From 95e24c767c46807546d65f4ffd5514aa5199e0e4 Mon Sep 17 00:00:00 2001 From: r1viollet Date: Fri, 27 Mar 2026 19:21:27 +0100 Subject: [PATCH] fix: cache ELF file descriptors per file, not per process When profiling many forked processes sharing the same ELF files (e.g. libc), report_module() was opening a fresh fd for each process's Dwfl session. With N forks and M shared libraries, this resulted in N*M open file descriptors. Fix: add _fd_cache (unordered_map) to DsoHdr, which is shared across all processes in UnwindState. The first time an ELF file is registered, it is opened and cached. Subsequent processes dup() the cached fd for dwfl ownership transfer. elfutils uses MAP_PRIVATE mmaps so the OS page cache already deduplicates physical pages; the dup() avoids redundant open() syscalls and fd exhaustion. Add STATS_ELF_FDS metric (elf.fds) to track the fd cache size each profiling cycle, observable in the notice-level stats output. Co-Authored-By: Claude Sonnet 4.6 --- include/ddprof_module_lib.hpp | 10 ++- include/ddprof_stats.hpp | 3 +- include/dso_hdr.hpp | 13 ++++ include/dwfl_wrapper.hpp | 5 +- src/ddprof_module_lib.cc | 20 +++++- src/ddprof_worker.cc | 2 + src/dso_hdr.cc | 22 ++++++ src/dwfl_wrapper.cc | 8 +-- src/unwind_dwfl.cc | 3 +- test/dwfl_module-ut.cc | 122 ++++++++++++++++++---------------- 10 files changed, 139 insertions(+), 69 deletions(-) diff --git a/include/ddprof_module_lib.hpp b/include/ddprof_module_lib.hpp index 902cbc0c6..8515af0ac 100644 --- a/include/ddprof_module_lib.hpp +++ b/include/ddprof_module_lib.hpp @@ -11,8 +11,10 @@ #include "dso.hpp" #include "dso_hdr.hpp" #include "dwfl_internals.hpp" +#include "unique_fd.hpp" #include +#include namespace ddprof { @@ -30,9 +32,13 @@ struct Segment { }; // From a dso object (and the matching file), attach the module to the dwfl -// object, return the associated Dwfl_Module +// object, return the associated Dwfl_Module. +// fd_cache is keyed by FileInfoId_t; the first call for a given file opens it +// and caches the fd, subsequent calls dup() it — avoiding redundant opens +// across processes that share the same ELF file. DDRes report_module(Dwfl *dwfl, ProcessAddress_t pc, const Dso &dso, - const FileInfoValue &fileInfoValue, DDProfMod &ddprof_mod); + const FileInfoValue &fileInfoValue, DDProfMod &ddprof_mod, + std::unordered_map &fd_cache); std::optional find_build_id(const char *filepath); diff --git a/include/ddprof_stats.hpp b/include/ddprof_stats.hpp index df318e4d0..b8e4ec28c 100644 --- a/include/ddprof_stats.hpp +++ b/include/ddprof_stats.hpp @@ -42,7 +42,8 @@ namespace ddprof { X(PPROF_SIZE, "pprof.size", STAT_GAUGE) \ X(PROFILE_DURATION, "profile.duration_ms", STAT_GAUGE) \ X(AGGREGATION_AVG_TIME, "aggregation.avg_time_ns", STAT_GAUGE) \ - X(BACKPOPULATE_COUNT, "backpopulate.count", STAT_GAUGE) + X(BACKPOPULATE_COUNT, "backpopulate.count", STAT_GAUGE) \ + X(ELF_FDS, "elf.fds", STAT_GAUGE) // Expand the enum/index for the individual stats enum DDPROF_STATS : uint8_t { STATS_TABLE(X_ENUM) STATS_LEN }; diff --git a/include/dso_hdr.hpp b/include/dso_hdr.hpp index 391c172b0..95abdbc78 100644 --- a/include/dso_hdr.hpp +++ b/include/dso_hdr.hpp @@ -17,6 +17,7 @@ #include "ddres_def.hpp" #include "dso.hpp" #include "perf_clock.hpp" +#include "unique_fd.hpp" namespace ddprof { @@ -204,6 +205,15 @@ class DsoHdr { int clear_unvisited(const std::unordered_set &visited_pids); + // Cache of open file descriptors keyed by FileInfoId_t (one fd per unique + // ELF file, shared across all processes). Callers must dup() before + // transferring ownership (e.g. to dwfl). + std::unordered_map &get_fd_cache() { + return _fd_cache; + } + + size_t get_fd_cache_size() const { return _fd_cache.size(); } + private: // erase range of elements static void erase_range(DsoMap &map, DsoRange range, const Dso &new_mapping); @@ -222,6 +232,9 @@ class DsoHdr { DsoStats _stats; FileInfoInodeMap _file_info_inode_map; FileInfoVector _file_info_vector; + // One open fd per unique ELF file (keyed by FileInfoId_t). Avoids reopening + // the same file for every profiled process (forked or not). + std::unordered_map _fd_cache; std::string _path_to_proc; // /proc files can be mounted at various places // (whole host profiling) int _dd_profiling_fd; diff --git a/include/dwfl_wrapper.hpp b/include/dwfl_wrapper.hpp index 5116ebab4..ed236e64d 100644 --- a/include/dwfl_wrapper.hpp +++ b/include/dwfl_wrapper.hpp @@ -12,8 +12,10 @@ #include "ddres.hpp" #include "dso.hpp" #include "dso_hdr.hpp" +#include "unique_fd.hpp" #include +#include extern "C" { struct Dwfl; @@ -42,7 +44,8 @@ struct DwflWrapper { // safe get DDRes register_mod(ProcessAddress_t pc, const Dso &dso, - const FileInfoValue &fileInfoValue, DDProfMod **mod); + const FileInfoValue &fileInfoValue, DDProfMod **mod, + std::unordered_map &fd_cache); ~DwflWrapper(); diff --git a/src/ddprof_module_lib.cc b/src/ddprof_module_lib.cc index 088281976..c245ecff2 100644 --- a/src/ddprof_module_lib.cc +++ b/src/ddprof_module_lib.cc @@ -212,7 +212,8 @@ DDRes compute_elf_bias(Elf *elf, const std::string &filepath, const Dso &dso, } // namespace DDRes report_module(Dwfl *dwfl, ProcessAddress_t pc, const Dso &dso, - const FileInfoValue &fileInfoValue, DDProfMod &ddprof_mod) { + const FileInfoValue &fileInfoValue, DDProfMod &ddprof_mod, + std::unordered_map &fd_cache) { const std::string &filepath = fileInfoValue.get_path(); const char *module_name = strrchr(filepath.c_str(), '/') + 1; if (fileInfoValue.errored()) { // avoid bouncing on errors @@ -237,11 +238,24 @@ DDRes report_module(Dwfl *dwfl, ProcessAddress_t pc, const Dso &dso, return ddres_warn(DD_WHAT_MODULE); } - UniqueFd fd_holder{::open(filepath.c_str(), O_RDONLY | O_CLOEXEC)}; - if (!fd_holder) { + // Get or populate the per-file cached fd (shared across all processes). + auto &cached_fd = fd_cache[fileInfoValue.get_id()]; + if (!cached_fd) { + cached_fd = UniqueFd{::open(filepath.c_str(), O_RDONLY | O_CLOEXEC)}; + LG_DBG("[Mod] Opened and cached fd for %s", filepath.c_str()); + } + if (!cached_fd) { LG_WRN("[Mod] Couldn't open fd to module (%s)", filepath.c_str()); return ddres_warn(DD_WHAT_MODULE); } + // dup with O_CLOEXEC preserved: plain dup() drops close-on-exec, which + // would leak ELF fds into exec'd children. F_DUPFD_CLOEXEC atomically + // duplicates and sets FD_CLOEXEC, matching the original open(O_CLOEXEC). + UniqueFd fd_holder{::fcntl(cached_fd.get(), F_DUPFD_CLOEXEC, 0)}; + if (!fd_holder) { + LG_WRN("[Mod] Couldn't dup fd to module (%s)", filepath.c_str()); + return ddres_warn(DD_WHAT_MODULE); + } LG_DBG("[Mod] Success opening %s, ", filepath.c_str()); // Load the file at a matching DSO address diff --git a/src/ddprof_worker.cc b/src/ddprof_worker.cc index aa1394370..34d1f1fb7 100644 --- a/src/ddprof_worker.cc +++ b/src/ddprof_worker.cc @@ -132,6 +132,8 @@ DDRes worker_update_stats(DDProfWorkerContext &worker_context, ddprof_stats_set(STATS_DSO_SIZE, dso_hdr.get_nb_dso()); ddprof_stats_set(STATS_BACKPOPULATE_COUNT, dso_hdr.stats().backpopulate_count()); + ddprof_stats_set(STATS_ELF_FDS, + static_cast(dso_hdr.get_fd_cache_size())); ddprof_stats_set( STATS_UNMATCHED_DEALLOCATION_COUNT, worker_context.live_allocation.get_nb_unmatched_deallocations()); diff --git a/src/dso_hdr.cc b/src/dso_hdr.cc index 550df2b40..1ae854c57 100644 --- a/src/dso_hdr.cc +++ b/src/dso_hdr.cc @@ -634,6 +634,28 @@ int DsoHdr::clear_unvisited(const std::unordered_set &visited_pids) { for (const auto &pid : pids_to_clear) { _pid_map.erase(pid); } + + // Prune _fd_cache: close fds for files no longer referenced by any live + // mapping, preventing unbounded growth when short-lived binaries/containers + // are observed over the profiler lifetime. + if (!_fd_cache.empty() && !pids_to_clear.empty()) { + std::unordered_set live_ids; + for (const auto &[pid, pid_mapping] : _pid_map) { + for (const auto &[addr, dso] : pid_mapping._map) { + if (dso._id > k_file_info_error) { + live_ids.insert(dso._id); + } + } + } + for (auto it = _fd_cache.begin(); it != _fd_cache.end();) { + if (!live_ids.contains(it->first)) { + it = _fd_cache.erase(it); + } else { + ++it; + } + } + } + return pids_to_clear.size(); } } // namespace ddprof diff --git a/src/dwfl_wrapper.cc b/src/dwfl_wrapper.cc index 81bf803d1..ef115a2ee 100644 --- a/src/dwfl_wrapper.cc +++ b/src/dwfl_wrapper.cc @@ -66,15 +66,15 @@ DDProfMod *DwflWrapper::unsafe_get(FileInfoId_t file_info_id) { return &it->second; } -DDRes DwflWrapper::register_mod(ProcessAddress_t pc, const Dso &dso, - const FileInfoValue &fileInfoValue, - DDProfMod **mod) { +DDRes DwflWrapper::register_mod( + ProcessAddress_t pc, const Dso &dso, const FileInfoValue &fileInfoValue, + DDProfMod **mod, std::unordered_map &fd_cache) { if (!_attached) { DDRES_RETURN_WARN_LOG(DD_WHAT_DWFL_LIB_ERROR, "dwfl not attached to pid %d", dso._pid); } DDProfMod new_mod; - DDRes res = report_module(_dwfl, pc, dso, fileInfoValue, new_mod); + DDRes res = report_module(_dwfl, pc, dso, fileInfoValue, new_mod, fd_cache); _inconsistent = new_mod._status == DDProfMod::kInconsistent; if (IsDDResNotOK(res)) { diff --git a/src/unwind_dwfl.cc b/src/unwind_dwfl.cc index 87bd3c455..f8c7a5cff 100644 --- a/src/unwind_dwfl.cc +++ b/src/unwind_dwfl.cc @@ -131,7 +131,8 @@ DDRes add_symbol(Dwfl_Frame *dwfl_frame, UnwindState *us) { // ensure unwinding backend has access to this module (and check // consistency) auto res = us->_dwfl_wrapper->register_mod(pc, find_res.first->second, - file_info_value, &ddprof_mod); + file_info_value, &ddprof_mod, + us->dso_hdr.get_fd_cache()); if (IsDDResOK(res)) { break; } diff --git a/test/dwfl_module-ut.cc b/test/dwfl_module-ut.cc index 10bbf5363..96f40ee0a 100644 --- a/test/dwfl_module-ut.cc +++ b/test/dwfl_module-ut.cc @@ -54,61 +54,67 @@ TEST(DwflModule, inconsistency_test) { int nb_fds_start = count_fds(my_pid); printf("-- Start open file descriptors: %d\n", nb_fds_start); LogHandle handle; - // Load DSOs from our unit test - ElfAddress_t ip = _THIS_IP_; - DsoHdr dso_hdr; - DsoHdr::DsoFindRes find_res = dso_hdr.dso_find_or_backpopulate(my_pid, ip); - UniqueElf unique_elf = create_elf_from_self(); - // Check that we found the DSO matching this IP - ASSERT_TRUE(find_res.second); + // Scope dso_hdr and dwfl_wrapper together so the fd cache (_fd_cache inside + // dso_hdr) is released before the fd count check below. { - DwflWrapper dwfl_wrapper; - dwfl_wrapper.attach(my_pid, unique_elf, nullptr); - // retrieve the map associated to pid - DsoHdr::DsoMap &dso_map = dso_hdr.get_pid_mapping(my_pid)._map; - for (auto it = dso_map.begin(); it != dso_map.end(); ++it) { - Dso &dso = it->second; - if (!has_relevant_path(dso._type) || !dso.is_executable()) { - continue; // skip non exec / non standard (anon/vdso...) - } - - FileInfoId_t file_info_id = dso_hdr.get_or_insert_file_info(dso); - ASSERT_TRUE(file_info_id > k_file_info_error); - - const FileInfoValue &file_info_value = - dso_hdr.get_file_info_value(file_info_id); - DDProfMod *ddprof_mod = nullptr; - auto res = dwfl_wrapper.register_mod(dso._start, it->second, - file_info_value, &ddprof_mod); - - ASSERT_TRUE(IsDDResOK(res)); - ASSERT_TRUE(ddprof_mod->_mod); - if (find_res.first == it) { - std::array elf_addr{ip - ddprof_mod->_sym_bias}; - blaze_symbolize_src_elf src_elf{ - .type_size = sizeof(blaze_symbolize_src_elf), - .path = dso._filename.c_str(), - .debug_syms = true, - .reserved = {}, - }; - const blaze_syms *blaze_syms = blaze_symbolize_elf_virt_offsets( - symbolizer, &src_elf, elf_addr.data(), elf_addr.size()); - - ASSERT_TRUE(blaze_syms); - ASSERT_GE(blaze_syms->cnt, 1); - defer { blaze_syms_free(blaze_syms); }; - // we don't have demangling at this step - EXPECT_TRUE( - strcmp("_ZN6ddprof34DwflModule_inconsistency_test_Test8TestBodyEv", - blaze_syms->syms[0].name) == 0); - // Only expect build-id on this binary (as we can not force it on - // others) - EXPECT_FALSE(ddprof_mod->_build_id.empty()); + // Load DSOs from our unit test + ElfAddress_t ip = _THIS_IP_; + DsoHdr dso_hdr; + DsoHdr::DsoFindRes find_res = dso_hdr.dso_find_or_backpopulate(my_pid, ip); + UniqueElf unique_elf = create_elf_from_self(); + // Check that we found the DSO matching this IP + ASSERT_TRUE(find_res.second); + { + DwflWrapper dwfl_wrapper; + dwfl_wrapper.attach(my_pid, unique_elf, nullptr); + // retrieve the map associated to pid + DsoHdr::DsoMap &dso_map = dso_hdr.get_pid_mapping(my_pid)._map; + for (auto it = dso_map.begin(); it != dso_map.end(); ++it) { + Dso &dso = it->second; + if (!has_relevant_path(dso._type) || !dso.is_executable()) { + continue; // skip non exec / non standard (anon/vdso...) + } + + FileInfoId_t file_info_id = dso_hdr.get_or_insert_file_info(dso); + ASSERT_TRUE(file_info_id > k_file_info_error); + + const FileInfoValue &file_info_value = + dso_hdr.get_file_info_value(file_info_id); + DDProfMod *ddprof_mod = nullptr; + auto res = + dwfl_wrapper.register_mod(dso._start, it->second, file_info_value, + &ddprof_mod, dso_hdr.get_fd_cache()); + + ASSERT_TRUE(IsDDResOK(res)); + ASSERT_TRUE(ddprof_mod->_mod); + if (find_res.first == it) { + std::array elf_addr{ip - ddprof_mod->_sym_bias}; + blaze_symbolize_src_elf src_elf{ + .type_size = sizeof(blaze_symbolize_src_elf), + .path = dso._filename.c_str(), + .debug_syms = true, + .reserved = {}, + }; + const blaze_syms *blaze_syms = blaze_symbolize_elf_virt_offsets( + symbolizer, &src_elf, elf_addr.data(), elf_addr.size()); + + ASSERT_TRUE(blaze_syms); + ASSERT_GE(blaze_syms->cnt, 1); + defer { blaze_syms_free(blaze_syms); }; + // we don't have demangling at this step + EXPECT_TRUE( + strcmp( + "_ZN6ddprof34DwflModule_inconsistency_test_Test8TestBodyEv", + blaze_syms->syms[0].name) == 0); + // Only expect build-id on this binary (as we can not force it on + // others) + EXPECT_FALSE(ddprof_mod->_build_id.empty()); + } + // check that we loaded all mods matching the DSOs + EXPECT_EQ(ddprof_mod->_status, DDProfMod::kUnknown); } - // check that we loaded all mods matching the DSOs - EXPECT_EQ(ddprof_mod->_status, DDProfMod::kUnknown); - } - } + } // dwfl_wrapper destroyed: dup'd fds closed + } // dso_hdr destroyed: fd_cache fds closed blaze_symbolizer_free(symbolizer); // int nb_fds_end = count_fds(my_pid); printf("-- End open file descriptors: %d\n", nb_fds_end); @@ -152,8 +158,9 @@ TEST(DwflModule, short_lived) { const FileInfoValue &file_info_value = dso_hdr.get_file_info_value(file_info_id); DDProfMod *ddprof_mod = nullptr; - auto res = dwfl_wrapper.register_mod(dso._start, it->second, - file_info_value, &ddprof_mod); + auto res = + dwfl_wrapper.register_mod(dso._start, it->second, file_info_value, + &ddprof_mod, dso_hdr.get_fd_cache()); ASSERT_TRUE(IsDDResOK(res)); ASSERT_TRUE(ddprof_mod->_mod); } @@ -189,8 +196,9 @@ TEST(DwflModule, short_lived) { const FileInfoValue &file_info_value = dso_hdr.get_file_info_value(file_info_id); DDProfMod *ddprof_mod = nullptr; - auto res = dwfl_wrapper.register_mod(dso._start, it->second, - file_info_value, &ddprof_mod); + auto res = + dwfl_wrapper.register_mod(dso._start, it->second, file_info_value, + &ddprof_mod, dso_hdr.get_fd_cache()); ASSERT_TRUE(IsDDResOK(res)); ASSERT_TRUE(ddprof_mod->_mod); }