diff --git a/include/ddprof_module_lib.hpp b/include/ddprof_module_lib.hpp index 902cbc0c..8515af0a 100644 --- a/include/ddprof_module_lib.hpp +++ b/include/ddprof_module_lib.hpp @@ -11,8 +11,10 @@ #include "dso.hpp" #include "dso_hdr.hpp" #include "dwfl_internals.hpp" +#include "unique_fd.hpp" #include +#include namespace ddprof { @@ -30,9 +32,13 @@ struct Segment { }; // From a dso object (and the matching file), attach the module to the dwfl -// object, return the associated Dwfl_Module +// object, return the associated Dwfl_Module. +// fd_cache is keyed by FileInfoId_t; the first call for a given file opens it +// and caches the fd, subsequent calls dup() it — avoiding redundant opens +// across processes that share the same ELF file. DDRes report_module(Dwfl *dwfl, ProcessAddress_t pc, const Dso &dso, - const FileInfoValue &fileInfoValue, DDProfMod &ddprof_mod); + const FileInfoValue &fileInfoValue, DDProfMod &ddprof_mod, + std::unordered_map &fd_cache); std::optional find_build_id(const char *filepath); diff --git a/include/ddprof_stats.hpp b/include/ddprof_stats.hpp index df318e4d..b8e4ec28 100644 --- a/include/ddprof_stats.hpp +++ b/include/ddprof_stats.hpp @@ -42,7 +42,8 @@ namespace ddprof { X(PPROF_SIZE, "pprof.size", STAT_GAUGE) \ X(PROFILE_DURATION, "profile.duration_ms", STAT_GAUGE) \ X(AGGREGATION_AVG_TIME, "aggregation.avg_time_ns", STAT_GAUGE) \ - X(BACKPOPULATE_COUNT, "backpopulate.count", STAT_GAUGE) + X(BACKPOPULATE_COUNT, "backpopulate.count", STAT_GAUGE) \ + X(ELF_FDS, "elf.fds", STAT_GAUGE) // Expand the enum/index for the individual stats enum DDPROF_STATS : uint8_t { STATS_TABLE(X_ENUM) STATS_LEN }; diff --git a/include/dso_hdr.hpp b/include/dso_hdr.hpp index 391c172b..95abdbc7 100644 --- a/include/dso_hdr.hpp +++ b/include/dso_hdr.hpp @@ -17,6 +17,7 @@ #include "ddres_def.hpp" #include "dso.hpp" #include "perf_clock.hpp" +#include "unique_fd.hpp" namespace ddprof { @@ -204,6 +205,15 @@ class DsoHdr { int clear_unvisited(const std::unordered_set &visited_pids); + // Cache of open file descriptors keyed by FileInfoId_t (one fd per unique + // ELF file, shared across all processes). Callers must dup() before + // transferring ownership (e.g. to dwfl). + std::unordered_map &get_fd_cache() { + return _fd_cache; + } + + size_t get_fd_cache_size() const { return _fd_cache.size(); } + private: // erase range of elements static void erase_range(DsoMap &map, DsoRange range, const Dso &new_mapping); @@ -222,6 +232,9 @@ class DsoHdr { DsoStats _stats; FileInfoInodeMap _file_info_inode_map; FileInfoVector _file_info_vector; + // One open fd per unique ELF file (keyed by FileInfoId_t). Avoids reopening + // the same file for every profiled process (forked or not). + std::unordered_map _fd_cache; std::string _path_to_proc; // /proc files can be mounted at various places // (whole host profiling) int _dd_profiling_fd; diff --git a/include/dwfl_wrapper.hpp b/include/dwfl_wrapper.hpp index 5116ebab..ed236e64 100644 --- a/include/dwfl_wrapper.hpp +++ b/include/dwfl_wrapper.hpp @@ -12,8 +12,10 @@ #include "ddres.hpp" #include "dso.hpp" #include "dso_hdr.hpp" +#include "unique_fd.hpp" #include +#include extern "C" { struct Dwfl; @@ -42,7 +44,8 @@ struct DwflWrapper { // safe get DDRes register_mod(ProcessAddress_t pc, const Dso &dso, - const FileInfoValue &fileInfoValue, DDProfMod **mod); + const FileInfoValue &fileInfoValue, DDProfMod **mod, + std::unordered_map &fd_cache); ~DwflWrapper(); diff --git a/src/ddprof_module_lib.cc b/src/ddprof_module_lib.cc index 08828197..c245ecff 100644 --- a/src/ddprof_module_lib.cc +++ b/src/ddprof_module_lib.cc @@ -212,7 +212,8 @@ DDRes compute_elf_bias(Elf *elf, const std::string &filepath, const Dso &dso, } // namespace DDRes report_module(Dwfl *dwfl, ProcessAddress_t pc, const Dso &dso, - const FileInfoValue &fileInfoValue, DDProfMod &ddprof_mod) { + const FileInfoValue &fileInfoValue, DDProfMod &ddprof_mod, + std::unordered_map &fd_cache) { const std::string &filepath = fileInfoValue.get_path(); const char *module_name = strrchr(filepath.c_str(), '/') + 1; if (fileInfoValue.errored()) { // avoid bouncing on errors @@ -237,11 +238,24 @@ DDRes report_module(Dwfl *dwfl, ProcessAddress_t pc, const Dso &dso, return ddres_warn(DD_WHAT_MODULE); } - UniqueFd fd_holder{::open(filepath.c_str(), O_RDONLY | O_CLOEXEC)}; - if (!fd_holder) { + // Get or populate the per-file cached fd (shared across all processes). + auto &cached_fd = fd_cache[fileInfoValue.get_id()]; + if (!cached_fd) { + cached_fd = UniqueFd{::open(filepath.c_str(), O_RDONLY | O_CLOEXEC)}; + LG_DBG("[Mod] Opened and cached fd for %s", filepath.c_str()); + } + if (!cached_fd) { LG_WRN("[Mod] Couldn't open fd to module (%s)", filepath.c_str()); return ddres_warn(DD_WHAT_MODULE); } + // dup with O_CLOEXEC preserved: plain dup() drops close-on-exec, which + // would leak ELF fds into exec'd children. F_DUPFD_CLOEXEC atomically + // duplicates and sets FD_CLOEXEC, matching the original open(O_CLOEXEC). + UniqueFd fd_holder{::fcntl(cached_fd.get(), F_DUPFD_CLOEXEC, 0)}; + if (!fd_holder) { + LG_WRN("[Mod] Couldn't dup fd to module (%s)", filepath.c_str()); + return ddres_warn(DD_WHAT_MODULE); + } LG_DBG("[Mod] Success opening %s, ", filepath.c_str()); // Load the file at a matching DSO address diff --git a/src/ddprof_worker.cc b/src/ddprof_worker.cc index aa139437..34d1f1fb 100644 --- a/src/ddprof_worker.cc +++ b/src/ddprof_worker.cc @@ -132,6 +132,8 @@ DDRes worker_update_stats(DDProfWorkerContext &worker_context, ddprof_stats_set(STATS_DSO_SIZE, dso_hdr.get_nb_dso()); ddprof_stats_set(STATS_BACKPOPULATE_COUNT, dso_hdr.stats().backpopulate_count()); + ddprof_stats_set(STATS_ELF_FDS, + static_cast(dso_hdr.get_fd_cache_size())); ddprof_stats_set( STATS_UNMATCHED_DEALLOCATION_COUNT, worker_context.live_allocation.get_nb_unmatched_deallocations()); diff --git a/src/dso_hdr.cc b/src/dso_hdr.cc index 550df2b4..1ae854c5 100644 --- a/src/dso_hdr.cc +++ b/src/dso_hdr.cc @@ -634,6 +634,28 @@ int DsoHdr::clear_unvisited(const std::unordered_set &visited_pids) { for (const auto &pid : pids_to_clear) { _pid_map.erase(pid); } + + // Prune _fd_cache: close fds for files no longer referenced by any live + // mapping, preventing unbounded growth when short-lived binaries/containers + // are observed over the profiler lifetime. + if (!_fd_cache.empty() && !pids_to_clear.empty()) { + std::unordered_set live_ids; + for (const auto &[pid, pid_mapping] : _pid_map) { + for (const auto &[addr, dso] : pid_mapping._map) { + if (dso._id > k_file_info_error) { + live_ids.insert(dso._id); + } + } + } + for (auto it = _fd_cache.begin(); it != _fd_cache.end();) { + if (!live_ids.contains(it->first)) { + it = _fd_cache.erase(it); + } else { + ++it; + } + } + } + return pids_to_clear.size(); } } // namespace ddprof diff --git a/src/dwfl_wrapper.cc b/src/dwfl_wrapper.cc index 81bf803d..ef115a2e 100644 --- a/src/dwfl_wrapper.cc +++ b/src/dwfl_wrapper.cc @@ -66,15 +66,15 @@ DDProfMod *DwflWrapper::unsafe_get(FileInfoId_t file_info_id) { return &it->second; } -DDRes DwflWrapper::register_mod(ProcessAddress_t pc, const Dso &dso, - const FileInfoValue &fileInfoValue, - DDProfMod **mod) { +DDRes DwflWrapper::register_mod( + ProcessAddress_t pc, const Dso &dso, const FileInfoValue &fileInfoValue, + DDProfMod **mod, std::unordered_map &fd_cache) { if (!_attached) { DDRES_RETURN_WARN_LOG(DD_WHAT_DWFL_LIB_ERROR, "dwfl not attached to pid %d", dso._pid); } DDProfMod new_mod; - DDRes res = report_module(_dwfl, pc, dso, fileInfoValue, new_mod); + DDRes res = report_module(_dwfl, pc, dso, fileInfoValue, new_mod, fd_cache); _inconsistent = new_mod._status == DDProfMod::kInconsistent; if (IsDDResNotOK(res)) { diff --git a/src/unwind_dwfl.cc b/src/unwind_dwfl.cc index 87bd3c45..f8c7a5cf 100644 --- a/src/unwind_dwfl.cc +++ b/src/unwind_dwfl.cc @@ -131,7 +131,8 @@ DDRes add_symbol(Dwfl_Frame *dwfl_frame, UnwindState *us) { // ensure unwinding backend has access to this module (and check // consistency) auto res = us->_dwfl_wrapper->register_mod(pc, find_res.first->second, - file_info_value, &ddprof_mod); + file_info_value, &ddprof_mod, + us->dso_hdr.get_fd_cache()); if (IsDDResOK(res)) { break; } diff --git a/test/dwfl_module-ut.cc b/test/dwfl_module-ut.cc index 10bbf536..96f40ee0 100644 --- a/test/dwfl_module-ut.cc +++ b/test/dwfl_module-ut.cc @@ -54,61 +54,67 @@ TEST(DwflModule, inconsistency_test) { int nb_fds_start = count_fds(my_pid); printf("-- Start open file descriptors: %d\n", nb_fds_start); LogHandle handle; - // Load DSOs from our unit test - ElfAddress_t ip = _THIS_IP_; - DsoHdr dso_hdr; - DsoHdr::DsoFindRes find_res = dso_hdr.dso_find_or_backpopulate(my_pid, ip); - UniqueElf unique_elf = create_elf_from_self(); - // Check that we found the DSO matching this IP - ASSERT_TRUE(find_res.second); + // Scope dso_hdr and dwfl_wrapper together so the fd cache (_fd_cache inside + // dso_hdr) is released before the fd count check below. { - DwflWrapper dwfl_wrapper; - dwfl_wrapper.attach(my_pid, unique_elf, nullptr); - // retrieve the map associated to pid - DsoHdr::DsoMap &dso_map = dso_hdr.get_pid_mapping(my_pid)._map; - for (auto it = dso_map.begin(); it != dso_map.end(); ++it) { - Dso &dso = it->second; - if (!has_relevant_path(dso._type) || !dso.is_executable()) { - continue; // skip non exec / non standard (anon/vdso...) - } - - FileInfoId_t file_info_id = dso_hdr.get_or_insert_file_info(dso); - ASSERT_TRUE(file_info_id > k_file_info_error); - - const FileInfoValue &file_info_value = - dso_hdr.get_file_info_value(file_info_id); - DDProfMod *ddprof_mod = nullptr; - auto res = dwfl_wrapper.register_mod(dso._start, it->second, - file_info_value, &ddprof_mod); - - ASSERT_TRUE(IsDDResOK(res)); - ASSERT_TRUE(ddprof_mod->_mod); - if (find_res.first == it) { - std::array elf_addr{ip - ddprof_mod->_sym_bias}; - blaze_symbolize_src_elf src_elf{ - .type_size = sizeof(blaze_symbolize_src_elf), - .path = dso._filename.c_str(), - .debug_syms = true, - .reserved = {}, - }; - const blaze_syms *blaze_syms = blaze_symbolize_elf_virt_offsets( - symbolizer, &src_elf, elf_addr.data(), elf_addr.size()); - - ASSERT_TRUE(blaze_syms); - ASSERT_GE(blaze_syms->cnt, 1); - defer { blaze_syms_free(blaze_syms); }; - // we don't have demangling at this step - EXPECT_TRUE( - strcmp("_ZN6ddprof34DwflModule_inconsistency_test_Test8TestBodyEv", - blaze_syms->syms[0].name) == 0); - // Only expect build-id on this binary (as we can not force it on - // others) - EXPECT_FALSE(ddprof_mod->_build_id.empty()); + // Load DSOs from our unit test + ElfAddress_t ip = _THIS_IP_; + DsoHdr dso_hdr; + DsoHdr::DsoFindRes find_res = dso_hdr.dso_find_or_backpopulate(my_pid, ip); + UniqueElf unique_elf = create_elf_from_self(); + // Check that we found the DSO matching this IP + ASSERT_TRUE(find_res.second); + { + DwflWrapper dwfl_wrapper; + dwfl_wrapper.attach(my_pid, unique_elf, nullptr); + // retrieve the map associated to pid + DsoHdr::DsoMap &dso_map = dso_hdr.get_pid_mapping(my_pid)._map; + for (auto it = dso_map.begin(); it != dso_map.end(); ++it) { + Dso &dso = it->second; + if (!has_relevant_path(dso._type) || !dso.is_executable()) { + continue; // skip non exec / non standard (anon/vdso...) + } + + FileInfoId_t file_info_id = dso_hdr.get_or_insert_file_info(dso); + ASSERT_TRUE(file_info_id > k_file_info_error); + + const FileInfoValue &file_info_value = + dso_hdr.get_file_info_value(file_info_id); + DDProfMod *ddprof_mod = nullptr; + auto res = + dwfl_wrapper.register_mod(dso._start, it->second, file_info_value, + &ddprof_mod, dso_hdr.get_fd_cache()); + + ASSERT_TRUE(IsDDResOK(res)); + ASSERT_TRUE(ddprof_mod->_mod); + if (find_res.first == it) { + std::array elf_addr{ip - ddprof_mod->_sym_bias}; + blaze_symbolize_src_elf src_elf{ + .type_size = sizeof(blaze_symbolize_src_elf), + .path = dso._filename.c_str(), + .debug_syms = true, + .reserved = {}, + }; + const blaze_syms *blaze_syms = blaze_symbolize_elf_virt_offsets( + symbolizer, &src_elf, elf_addr.data(), elf_addr.size()); + + ASSERT_TRUE(blaze_syms); + ASSERT_GE(blaze_syms->cnt, 1); + defer { blaze_syms_free(blaze_syms); }; + // we don't have demangling at this step + EXPECT_TRUE( + strcmp( + "_ZN6ddprof34DwflModule_inconsistency_test_Test8TestBodyEv", + blaze_syms->syms[0].name) == 0); + // Only expect build-id on this binary (as we can not force it on + // others) + EXPECT_FALSE(ddprof_mod->_build_id.empty()); + } + // check that we loaded all mods matching the DSOs + EXPECT_EQ(ddprof_mod->_status, DDProfMod::kUnknown); } - // check that we loaded all mods matching the DSOs - EXPECT_EQ(ddprof_mod->_status, DDProfMod::kUnknown); - } - } + } // dwfl_wrapper destroyed: dup'd fds closed + } // dso_hdr destroyed: fd_cache fds closed blaze_symbolizer_free(symbolizer); // int nb_fds_end = count_fds(my_pid); printf("-- End open file descriptors: %d\n", nb_fds_end); @@ -152,8 +158,9 @@ TEST(DwflModule, short_lived) { const FileInfoValue &file_info_value = dso_hdr.get_file_info_value(file_info_id); DDProfMod *ddprof_mod = nullptr; - auto res = dwfl_wrapper.register_mod(dso._start, it->second, - file_info_value, &ddprof_mod); + auto res = + dwfl_wrapper.register_mod(dso._start, it->second, file_info_value, + &ddprof_mod, dso_hdr.get_fd_cache()); ASSERT_TRUE(IsDDResOK(res)); ASSERT_TRUE(ddprof_mod->_mod); } @@ -189,8 +196,9 @@ TEST(DwflModule, short_lived) { const FileInfoValue &file_info_value = dso_hdr.get_file_info_value(file_info_id); DDProfMod *ddprof_mod = nullptr; - auto res = dwfl_wrapper.register_mod(dso._start, it->second, - file_info_value, &ddprof_mod); + auto res = + dwfl_wrapper.register_mod(dso._start, it->second, file_info_value, + &ddprof_mod, dso_hdr.get_fd_cache()); ASSERT_TRUE(IsDDResOK(res)); ASSERT_TRUE(ddprof_mod->_mod); }