diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/canary_probe_subprocess.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/canary_probe_subprocess.py new file mode 100644 index 0000000000..4435b7e433 --- /dev/null +++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/canary_probe_subprocess.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +import json +import sys + +from cuda.pathfinder._dynamic_libs.load_dl_common import LoadedDL +from cuda.pathfinder._utils.platform_aware import IS_WINDOWS + +if IS_WINDOWS: + from cuda.pathfinder._dynamic_libs.load_dl_windows import load_with_system_search +else: + from cuda.pathfinder._dynamic_libs.load_dl_linux import load_with_system_search + + +def _probe_canary_abs_path(libname: str) -> str | None: + loaded: LoadedDL | None = load_with_system_search(libname) + if loaded is None: + return None + abs_path = loaded.abs_path + if not isinstance(abs_path, str): + return None + return abs_path + + +def main(argv: list[str] | None = None) -> int: + args = sys.argv[1:] if argv is None else argv + if len(args) != 1: + return 2 + print(json.dumps(_probe_canary_abs_path(args[0]))) # noqa: T201 + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py index 65c9f4bf3c..6265992e4a 100644 --- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py +++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py @@ -101,7 +101,7 @@ def _find_lib_dir_using_anchor_point(libname: str, anchor_point: str, linux_lib_ for rel_path in rel_paths: for dirname in sorted(glob.glob(os.path.join(anchor_point, rel_path))): if os.path.isdir(dirname): - return dirname + return os.path.normpath(dirname) return None @@ -152,6 +152,57 @@ def _find_dll_using_lib_dir( return None +def _derive_ctk_root_linux(resolved_lib_path: str) -> str | None: + """Derive the CTK installation root from a resolved library path on Linux. + + Standard system CTK layout: ``$CTK_ROOT/lib64/libfoo.so.XX`` + (some installs use ``lib`` instead of ``lib64``). + + Returns None if the path doesn't match a recognized layout. + """ + lib_dir = os.path.dirname(resolved_lib_path) + basename = os.path.basename(lib_dir) + if basename in ("lib64", "lib"): + return os.path.dirname(lib_dir) + return None + + +def _derive_ctk_root_windows(resolved_lib_path: str) -> str | None: + """Derive the CTK installation root from a resolved library path on Windows. + + Handles two CTK layouts: + - CTK 13: ``$CTK_ROOT/bin/x64/foo.dll`` + - CTK 12: ``$CTK_ROOT/bin/foo.dll`` + + Returns None if the path doesn't match a recognized layout. + + Uses ``ntpath`` explicitly so the function is testable on any platform. + """ + import ntpath + + lib_dir = ntpath.dirname(resolved_lib_path) + basename = ntpath.basename(lib_dir).lower() + if basename == "x64": + parent = ntpath.dirname(lib_dir) + if ntpath.basename(parent).lower() == "bin": + return ntpath.dirname(parent) + elif basename == "bin": + return ntpath.dirname(lib_dir) + return None + + +def derive_ctk_root(resolved_lib_path: str) -> str | None: + """Derive the CTK installation root from a resolved library path. + + Given the absolute path of a loaded CTK shared library, walk up the + directory tree to find the CTK root. Returns None if the path doesn't + match any recognized CTK directory layout. + """ + if IS_WINDOWS: + return _derive_ctk_root_windows(resolved_lib_path) + return _derive_ctk_root_linux(resolved_lib_path) + + class _FindNvidiaDynamicLib: def __init__(self, libname: str): self.libname = libname @@ -185,6 +236,16 @@ def try_with_conda_prefix(self) -> str | None: def try_with_cuda_home(self) -> str | None: return self._find_using_lib_dir(_find_lib_dir_using_cuda_home(self.libname)) + def try_via_ctk_root(self, ctk_root: str) -> str | None: + """Find the library under a derived CTK root directory. + + Uses :func:`_find_lib_dir_using_anchor_point` which already knows + about non-standard sub-paths (e.g. ``nvvm/lib64`` for nvvm). + """ + return self._find_using_lib_dir( + _find_lib_dir_using_anchor_point(self.libname, anchor_point=ctk_root, linux_lib_dir="lib64") + ) + def _find_using_lib_dir(self, lib_dir: str | None) -> str | None: if lib_dir is None: return None diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py index 3431c2f86b..c50e253810 100644 --- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py +++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py @@ -2,10 +2,15 @@ # SPDX-License-Identifier: Apache-2.0 import functools +import json import struct +import subprocess import sys -from cuda.pathfinder._dynamic_libs.find_nvidia_dynamic_lib import _FindNvidiaDynamicLib +from cuda.pathfinder._dynamic_libs.find_nvidia_dynamic_lib import ( + _FindNvidiaDynamicLib, + derive_ctk_root, +) from cuda.pathfinder._dynamic_libs.load_dl_common import LoadedDL, load_dependencies from cuda.pathfinder._utils.platform_aware import IS_WINDOWS @@ -22,6 +27,73 @@ load_with_system_search, ) +# Libs that reside on the standard linker path in system CTK installs. +# Used to discover the CTK root when a lib with a non-standard path +# (e.g. nvvm under $CTK_ROOT/nvvm/lib64) can't be found directly. +_CTK_ROOT_CANARY_LIBNAMES = ("cudart",) + + +def _resolve_system_loaded_abs_path_in_subprocess(libname: str) -> str | None: + """Resolve a library's system-search absolute path in a child process. + + This keeps any side-effects of loading the canary library scoped to the + child process instead of polluting the current process. + """ + cmd = [ + sys.executable, + "-m", + "cuda.pathfinder._dynamic_libs.canary_probe_subprocess", + libname, + ] + try: + result = subprocess.run( # noqa: S603 + cmd, + check=False, + capture_output=True, + text=True, + timeout=10.0, + ) + except (OSError, subprocess.SubprocessError): + return None + if result.returncode != 0: + return None + + # Read the final non-empty stdout line in case earlier lines are emitted. + lines = [line for line in result.stdout.splitlines() if line.strip()] + if not lines: + return None + try: + payload = json.loads(lines[-1]) + except json.JSONDecodeError: + return None + if isinstance(payload, str): + return payload + return None + + +def _try_ctk_root_canary(finder: _FindNvidiaDynamicLib) -> str | None: + """Derive the CTK root from a system-installed canary lib. + + For libs like nvvm whose shared object doesn't reside on the standard + linker path, we locate a well-known CTK lib that IS on the linker path + via system search, derive the CTK installation root from its resolved + path, and then look for the target lib relative to that root. + + The canary load is performed in a subprocess to avoid introducing loader + state into the current process. + """ + for canary_libname in _CTK_ROOT_CANARY_LIBNAMES: + canary_abs_path = _resolve_system_loaded_abs_path_in_subprocess(canary_libname) + if canary_abs_path is None: + continue + ctk_root = derive_ctk_root(canary_abs_path) + if ctk_root is None: + continue + abs_path: str | None = finder.try_via_ctk_root(ctk_root) + if abs_path is not None: + return abs_path + return None + def _load_lib_no_cache(libname: str) -> LoadedDL: finder = _FindNvidiaDynamicLib(libname) @@ -50,11 +122,21 @@ def _load_lib_no_cache(libname: str) -> LoadedDL: loaded = load_with_system_search(libname) if loaded is not None: return loaded + abs_path = finder.try_with_cuda_home() - if abs_path is None: - finder.raise_not_found_error() - else: + if abs_path is not None: found_via = "CUDA_HOME" + else: + # Canary probe: if the direct system search and CUDA_HOME both + # failed (e.g. nvvm isn't on the linker path and CUDA_HOME is + # unset), try to discover the CTK root by loading a well-known CTK + # lib in a subprocess, then look for the target lib relative to + # that root. + abs_path = _try_ctk_root_canary(finder) + if abs_path is not None: + found_via = "system-ctk-root" + else: + finder.raise_not_found_error() return load_with_abs_path(libname, abs_path, found_via) @@ -123,6 +205,14 @@ def load_nvidia_dynamic_lib(libname: str) -> LoadedDL: - If set, use ``CUDA_HOME`` or ``CUDA_PATH`` (in that order). + 5. **CTK root canary probe** + + - For libraries whose shared object doesn't reside on the standard + linker path (e.g. ``libnvvm.so`` lives under ``$CTK_ROOT/nvvm/lib64``), + attempt to discover the CTK installation root by system-loading a + well-known CTK library (``cudart``) in a subprocess, then derive + the root from its resolved absolute path. + Notes: The search is performed **per library**. There is currently no mechanism to guarantee that multiple libraries are all resolved from the same location. diff --git a/cuda_pathfinder/tests/test_ctk_root_discovery.py b/cuda_pathfinder/tests/test_ctk_root_discovery.py new file mode 100644 index 0000000000..71a61c86c0 --- /dev/null +++ b/cuda_pathfinder/tests/test_ctk_root_discovery.py @@ -0,0 +1,276 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + + +import pytest + +from cuda.pathfinder._dynamic_libs.find_nvidia_dynamic_lib import ( + _derive_ctk_root_linux, + _derive_ctk_root_windows, + _FindNvidiaDynamicLib, + derive_ctk_root, +) +from cuda.pathfinder._dynamic_libs.load_dl_common import LoadedDL +from cuda.pathfinder._dynamic_libs.load_nvidia_dynamic_lib import ( + _load_lib_no_cache, + _try_ctk_root_canary, +) +from cuda.pathfinder._utils.platform_aware import IS_WINDOWS + +_MODULE = "cuda.pathfinder._dynamic_libs.load_nvidia_dynamic_lib" +_FIND_MODULE = "cuda.pathfinder._dynamic_libs.find_nvidia_dynamic_lib" + + +# --------------------------------------------------------------------------- +# Platform-aware test helpers +# --------------------------------------------------------------------------- + + +def _create_nvvm_in_ctk(ctk_root): + """Create a fake nvvm lib in the platform-appropriate CTK subdirectory.""" + if IS_WINDOWS: + nvvm_dir = ctk_root / "nvvm" / "bin" + nvvm_dir.mkdir(parents=True) + nvvm_lib = nvvm_dir / "nvvm64.dll" + else: + nvvm_dir = ctk_root / "nvvm" / "lib64" + nvvm_dir.mkdir(parents=True) + nvvm_lib = nvvm_dir / "libnvvm.so" + nvvm_lib.write_bytes(b"fake") + return nvvm_lib + + +def _create_cudart_in_ctk(ctk_root): + """Create a fake cudart lib in the platform-appropriate CTK subdirectory.""" + if IS_WINDOWS: + lib_dir = ctk_root / "bin" + lib_dir.mkdir(parents=True) + lib_file = lib_dir / "cudart64_12.dll" + else: + lib_dir = ctk_root / "lib64" + lib_dir.mkdir(parents=True) + lib_file = lib_dir / "libcudart.so" + lib_file.write_bytes(b"fake") + return lib_file + + +def _fake_canary_path(ctk_root): + """Return the path a system-loaded canary lib would resolve to.""" + if IS_WINDOWS: + return str(ctk_root / "bin" / "cudart64_13.dll") + return str(ctk_root / "lib64" / "libcudart.so.13") + + +# --------------------------------------------------------------------------- +# derive_ctk_root +# --------------------------------------------------------------------------- + + +def test_derive_ctk_root_linux_lib64(): + assert _derive_ctk_root_linux("/usr/local/cuda-13/lib64/libcudart.so.13") == "/usr/local/cuda-13" + + +def test_derive_ctk_root_linux_lib(): + assert _derive_ctk_root_linux("/opt/cuda/lib/libcudart.so.12") == "/opt/cuda" + + +def test_derive_ctk_root_linux_unrecognized(): + assert _derive_ctk_root_linux("/some/weird/path/libcudart.so.13") is None + + +def test_derive_ctk_root_linux_root_level(): + assert _derive_ctk_root_linux("/lib64/libcudart.so.13") == "/" + + +def test_derive_ctk_root_windows_ctk13(): + path = r"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.0\bin\x64\cudart64_13.dll" + assert _derive_ctk_root_windows(path) == r"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.0" + + +def test_derive_ctk_root_windows_ctk12(): + path = r"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8\bin\cudart64_12.dll" + assert _derive_ctk_root_windows(path) == r"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8" + + +def test_derive_ctk_root_windows_unrecognized(): + assert _derive_ctk_root_windows(r"C:\weird\cudart64_13.dll") is None + + +def test_derive_ctk_root_windows_case_insensitive_bin(): + assert _derive_ctk_root_windows(r"C:\CUDA\Bin\cudart64_12.dll") == r"C:\CUDA" + + +def test_derive_ctk_root_windows_case_insensitive_x64(): + assert _derive_ctk_root_windows(r"C:\CUDA\BIN\X64\cudart64_13.dll") == r"C:\CUDA" + + +def test_derive_ctk_root_dispatches_to_linux(mocker): + mocker.patch(f"{_FIND_MODULE}.IS_WINDOWS", False) + assert derive_ctk_root("/usr/local/cuda/lib64/libcudart.so.13") == "/usr/local/cuda" + + +def test_derive_ctk_root_dispatches_to_windows(mocker): + mocker.patch(f"{_FIND_MODULE}.IS_WINDOWS", True) + assert derive_ctk_root(r"C:\CUDA\v13\bin\cudart64_13.dll") == r"C:\CUDA\v13" + + +# --------------------------------------------------------------------------- +# _FindNvidiaDynamicLib.try_via_ctk_root +# --------------------------------------------------------------------------- + + +def test_try_via_ctk_root_finds_nvvm(tmp_path): + ctk_root = tmp_path / "cuda-13" + nvvm_lib = _create_nvvm_in_ctk(ctk_root) + + assert _FindNvidiaDynamicLib("nvvm").try_via_ctk_root(str(ctk_root)) == str(nvvm_lib) + + +def test_try_via_ctk_root_returns_none_when_dir_missing(tmp_path): + ctk_root = tmp_path / "cuda-13" + ctk_root.mkdir() + + assert _FindNvidiaDynamicLib("nvvm").try_via_ctk_root(str(ctk_root)) is None + + +def test_try_via_ctk_root_regular_lib(tmp_path): + ctk_root = tmp_path / "cuda-13" + cudart_lib = _create_cudart_in_ctk(ctk_root) + + assert _FindNvidiaDynamicLib("cudart").try_via_ctk_root(str(ctk_root)) == str(cudart_lib) + + +# --------------------------------------------------------------------------- +# _try_ctk_root_canary +# --------------------------------------------------------------------------- + + +def _make_loaded_dl(path, found_via): + return LoadedDL(path, False, 0xDEAD, found_via) + + +def test_canary_finds_nvvm(tmp_path, mocker): + ctk_root = tmp_path / "cuda-13" + _create_cudart_in_ctk(ctk_root) + nvvm_lib = _create_nvvm_in_ctk(ctk_root) + + probe = mocker.patch( + f"{_MODULE}._resolve_system_loaded_abs_path_in_subprocess", + return_value=_fake_canary_path(ctk_root), + ) + parent_system_loader = mocker.patch(f"{_MODULE}.load_with_system_search") + + assert _try_ctk_root_canary(_FindNvidiaDynamicLib("nvvm")) == str(nvvm_lib) + probe.assert_called_once_with("cudart") + parent_system_loader.assert_not_called() + + +def test_canary_returns_none_when_subprocess_probe_fails(mocker): + mocker.patch(f"{_MODULE}._resolve_system_loaded_abs_path_in_subprocess", return_value=None) + assert _try_ctk_root_canary(_FindNvidiaDynamicLib("nvvm")) is None + + +def test_canary_returns_none_when_ctk_root_unrecognized(mocker): + mocker.patch( + f"{_MODULE}._resolve_system_loaded_abs_path_in_subprocess", + return_value="/weird/path/libcudart.so.13", + ) + assert _try_ctk_root_canary(_FindNvidiaDynamicLib("nvvm")) is None + + +def test_canary_returns_none_when_nvvm_not_in_ctk_root(tmp_path, mocker): + ctk_root = tmp_path / "cuda-13" + # Create only the canary lib dir, not nvvm + _create_cudart_in_ctk(ctk_root) + + mocker.patch( + f"{_MODULE}._resolve_system_loaded_abs_path_in_subprocess", + return_value=_fake_canary_path(ctk_root), + ) + assert _try_ctk_root_canary(_FindNvidiaDynamicLib("nvvm")) is None + + +def test_canary_skips_when_abs_path_none(mocker): + mocker.patch(f"{_MODULE}._resolve_system_loaded_abs_path_in_subprocess", return_value=None) + assert _try_ctk_root_canary(_FindNvidiaDynamicLib("nvvm")) is None + + +# --------------------------------------------------------------------------- +# _load_lib_no_cache search-order +# --------------------------------------------------------------------------- + + +@pytest.fixture +def _isolate_load_cascade(mocker): + """Disable the search steps that run before system-search in _load_lib_no_cache. + + This lets the ordering tests focus on system-search, CUDA_HOME, and the + canary probe without needing a real site-packages or conda environment. + """ + # No wheels installed + mocker.patch.object(_FindNvidiaDynamicLib, "try_site_packages", return_value=None) + # No conda env + mocker.patch.object(_FindNvidiaDynamicLib, "try_with_conda_prefix", return_value=None) + # Lib not already loaded by another component + mocker.patch(f"{_MODULE}.check_if_already_loaded_from_elsewhere", return_value=None) + # Skip transitive dependency loading + mocker.patch(f"{_MODULE}.load_dependencies") + + +@pytest.mark.usefixtures("_isolate_load_cascade") +def test_cuda_home_takes_priority_over_canary(tmp_path, mocker): + # Two competing CTK roots: one from CUDA_HOME, one the canary would find. + cuda_home_root = tmp_path / "cuda-home" + nvvm_home_lib = _create_nvvm_in_ctk(cuda_home_root) + + canary_root = tmp_path / "cuda-system" + _create_cudart_in_ctk(canary_root) + _create_nvvm_in_ctk(canary_root) + + canary_mock = mocker.MagicMock(return_value=_fake_canary_path(canary_root)) + + # System search finds nothing for nvvm. + mocker.patch(f"{_MODULE}.load_with_system_search", return_value=None) + # Canary subprocess probe would find cudart if consulted. + mocker.patch(f"{_MODULE}._resolve_system_loaded_abs_path_in_subprocess", side_effect=canary_mock) + # CUDA_HOME points to a separate root that also has nvvm + mocker.patch(f"{_FIND_MODULE}.get_cuda_home_or_path", return_value=str(cuda_home_root)) + # Capture the final load call + mocker.patch( + f"{_MODULE}.load_with_abs_path", + side_effect=lambda _libname, path, via: _make_loaded_dl(path, via), + ) + + result = _load_lib_no_cache("nvvm") + + # CUDA_HOME must win; the canary should never have been consulted + assert result.found_via == "CUDA_HOME" + assert result.abs_path == str(nvvm_home_lib) + canary_mock.assert_not_called() + + +@pytest.mark.usefixtures("_isolate_load_cascade") +def test_canary_fires_only_after_all_earlier_steps_fail(tmp_path, mocker): + canary_root = tmp_path / "cuda-system" + _create_cudart_in_ctk(canary_root) + nvvm_lib = _create_nvvm_in_ctk(canary_root) + + # System search: nvvm not on linker path. + mocker.patch(f"{_MODULE}.load_with_system_search", return_value=None) + # Canary subprocess probe finds cudart under a system CTK root. + mocker.patch( + f"{_MODULE}._resolve_system_loaded_abs_path_in_subprocess", + return_value=_fake_canary_path(canary_root), + ) + # No CUDA_HOME set + mocker.patch(f"{_FIND_MODULE}.get_cuda_home_or_path", return_value=None) + # Capture the final load call + mocker.patch( + f"{_MODULE}.load_with_abs_path", + side_effect=lambda _libname, path, via: _make_loaded_dl(path, via), + ) + + result = _load_lib_no_cache("nvvm") + + assert result.found_via == "system-ctk-root" + assert result.abs_path == str(nvvm_lib)