Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
af1b908
Begin Cythonization of _program.py
Andy-Jost Feb 2, 2026
08a02c0
Extract Program helpers to module-level cdef functions
Andy-Jost Feb 2, 2026
a136180
Complete Cythonization of _program.py
Andy-Jost Feb 3, 2026
182feab
Extend test_object_protocols.py with Program and ObjectCode variations
Andy-Jost Feb 3, 2026
b9b90d6
Add NVRTC/NVVM resource handles and remove Program MNFF
Andy-Jost Feb 3, 2026
c15b12e
Add HANDLE_RETURN_NVRTC and HANDLE_RETURN_NVVM, simplify HANDLE_RETURN
Andy-Jost Feb 3, 2026
95f149a
Fix build errors, update tests, remove unused imports
Andy-Jost Feb 4, 2026
754e295
Merge remote-tracking branch 'origin/main' into cythonize-program
Andy-Jost Feb 5, 2026
ba0f2d3
Address review feedback: keep _can_load_generated_ptx private, update…
Andy-Jost Feb 6, 2026
95cfc6a
Merge remote-tracking branch 'origin/main' into cythonize-program
Andy-Jost Feb 6, 2026
583edee
Address review feedback: NVVMError inherits from nvvmError, clean up …
Andy-Jost Feb 9, 2026
3dbecfe
Add 0.6.x release notes with cuda-bindings build requirement change
Andy-Jost Feb 10, 2026
9edca0d
Merge remote-tracking branch 'origin/main' into cythonize-program
Andy-Jost Feb 10, 2026
4519992
Begin Cythonization of _linker.py
Andy-Jost Feb 10, 2026
320dc50
Add NvJitLinkHandle, CuLinkHandle RAII and HANDLE_RETURN_NVJITLINK
Andy-Jost Feb 11, 2026
2d19a69
Replace MNFF/weakref.finalize with RAII handle ownership in _linker
Andy-Jost Feb 11, 2026
7ad331a
Migrate linker to C-level calls with nogil
Andy-Jost Feb 11, 2026
0cfd977
Clean up module globals: remove _nvjitlink, use C-level enum ints
Andy-Jost Feb 11, 2026
82377ed
Reorganize _linker module per developer guide conventions
Andy-Jost Feb 11, 2026
efd6a2f
Merge remote-tracking branch 'origin/main' into cythonize-linker
Andy-Jost Feb 11, 2026
fc8e0c0
Fix cython-lint warnings in _linker.pyx
Andy-Jost Feb 11, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 71 additions & 4 deletions cuda_core/cuda/core/_cpp/resource_handles.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,18 @@ decltype(&cuLibraryLoadData) p_cuLibraryLoadData = nullptr;
decltype(&cuLibraryUnload) p_cuLibraryUnload = nullptr;
decltype(&cuLibraryGetKernel) p_cuLibraryGetKernel = nullptr;

// Linker
decltype(&cuLinkDestroy) p_cuLinkDestroy = nullptr;

// NVRTC function pointers
decltype(&nvrtcDestroyProgram) p_nvrtcDestroyProgram = nullptr;

// NVVM function pointers (may be null if NVVM is not available)
NvvmDestroyProgramFn p_nvvmDestroyProgram = nullptr;

// nvJitLink function pointers (may be null if nvJitLink is not available)
NvJitLinkDestroyFn p_nvJitLinkDestroy = nullptr;

// ============================================================================
// GIL management helpers
// ============================================================================
Expand Down Expand Up @@ -805,19 +811,19 @@ NvrtcProgramHandle create_nvrtc_program_handle_ref(nvrtcProgram prog) {

namespace {
struct NvvmProgramBox {
nvvmProgram resource;
NvvmProgramValue resource;
};
} // namespace

NvvmProgramHandle create_nvvm_program_handle(nvvmProgram prog) {
auto box = std::shared_ptr<NvvmProgramBox>(
new NvvmProgramBox{prog},
new NvvmProgramBox{{prog}},
[](NvvmProgramBox* b) {
// Note: nvvmDestroyProgram takes nvvmProgram* and nulls it,
// but we're deleting the box anyway so nulling is harmless.
// If NVVM is not available, the function pointer is null.
if (p_nvvmDestroyProgram) {
p_nvvmDestroyProgram(&b->resource);
p_nvvmDestroyProgram(&b->resource.raw);
}
delete b;
}
Expand All @@ -826,8 +832,69 @@ NvvmProgramHandle create_nvvm_program_handle(nvvmProgram prog) {
}

NvvmProgramHandle create_nvvm_program_handle_ref(nvvmProgram prog) {
auto box = std::make_shared<NvvmProgramBox>(NvvmProgramBox{prog});
auto box = std::make_shared<NvvmProgramBox>(NvvmProgramBox{{prog}});
return NvvmProgramHandle(box, &box->resource);
}

// ============================================================================
// nvJitLink Handles
// ============================================================================

namespace {
struct NvJitLinkBox {
NvJitLinkValue resource;
};
} // namespace

NvJitLinkHandle create_nvjitlink_handle(nvJitLink_t handle) {
auto box = std::shared_ptr<NvJitLinkBox>(
new NvJitLinkBox{{handle}},
[](NvJitLinkBox* b) {
// Note: nvJitLinkDestroy takes nvJitLinkHandle* and nulls it,
// but we're deleting the box anyway so nulling is harmless.
// If nvJitLink is not available, the function pointer is null.
if (p_nvJitLinkDestroy) {
p_nvJitLinkDestroy(&b->resource.raw);
}
delete b;
}
);
return NvJitLinkHandle(box, &box->resource);
}

NvJitLinkHandle create_nvjitlink_handle_ref(nvJitLink_t handle) {
auto box = std::make_shared<NvJitLinkBox>(NvJitLinkBox{{handle}});
return NvJitLinkHandle(box, &box->resource);
}

// ============================================================================
// cuLink Handles
// ============================================================================

namespace {
struct CuLinkBox {
CUlinkState resource;
};
} // namespace

CuLinkHandle create_culink_handle(CUlinkState state) {
auto box = std::shared_ptr<CuLinkBox>(
new CuLinkBox{state},
[](CuLinkBox* b) {
// cuLinkDestroy takes CUlinkState by value (not pointer).
// Errors are ignored (standard destructor practice).
if (p_cuLinkDestroy) {
p_cuLinkDestroy(b->resource);
}
delete b;
}
);
return CuLinkHandle(box, &box->resource);
}

CuLinkHandle create_culink_handle_ref(CUlinkState state) {
auto box = std::make_shared<CuLinkBox>(CuLinkBox{state});
return CuLinkHandle(box, &box->resource);
}

} // namespace cuda_core
92 changes: 91 additions & 1 deletion cuda_core/cuda/core/_cpp/resource_handles.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,28 @@
// Use void* to match cuda.bindings.cynvvm's typedef
using nvvmProgram = void*;

// Forward declaration for nvJitLink - avoids nvJitLink.h dependency
// Use void* to match cuda.bindings.cynvjitlink's typedef
using nvJitLink_t = void*;

namespace cuda_core {

// ============================================================================
// TaggedHandle - make void*-based handle types distinct for overloading
//
// Both nvvmProgram and nvJitLink_t are void*, so shared_ptr<const void*>
// would be the same C++ type for both. TaggedHandle<T, Tag> wraps the raw
// value with a unique tag type, making each shared_ptr type distinct.
// ============================================================================

template<typename T, int Tag>
struct TaggedHandle {
T raw;
};

using NvvmProgramValue = TaggedHandle<nvvmProgram, 0>;
using NvJitLinkValue = TaggedHandle<nvJitLink_t, 1>;

// ============================================================================
// Thread-local error handling
// ============================================================================
Expand Down Expand Up @@ -72,6 +92,9 @@ extern decltype(&cuLibraryLoadData) p_cuLibraryLoadData;
extern decltype(&cuLibraryUnload) p_cuLibraryUnload;
extern decltype(&cuLibraryGetKernel) p_cuLibraryGetKernel;

// Linker
extern decltype(&cuLinkDestroy) p_cuLinkDestroy;

// ============================================================================
// NVRTC function pointers
//
Expand All @@ -94,6 +117,19 @@ extern decltype(&nvrtcDestroyProgram) p_nvrtcDestroyProgram;
using NvvmDestroyProgramFn = int (*)(nvvmProgram*);
extern NvvmDestroyProgramFn p_nvvmDestroyProgram;

// ============================================================================
// nvJitLink function pointers
//
// These are populated by _resource_handles.pyx at module import time using
// function pointers extracted from cuda.bindings.cynvjitlink.__pyx_capi__.
// Note: May be null if nvJitLink is not available at runtime.
// ============================================================================

// Function pointer type for nvJitLinkDestroy (avoids nvJitLink.h dependency)
// Signature: nvJitLinkResult nvJitLinkDestroy(nvJitLinkHandle *handle)
using NvJitLinkDestroyFn = int (*)(nvJitLink_t*);
extern NvJitLinkDestroyFn p_nvJitLinkDestroy;

// ============================================================================
// Handle type aliases - expose only the raw CUDA resource
// ============================================================================
Expand All @@ -105,7 +141,9 @@ using MemoryPoolHandle = std::shared_ptr<const CUmemoryPool>;
using LibraryHandle = std::shared_ptr<const CUlibrary>;
using KernelHandle = std::shared_ptr<const CUkernel>;
using NvrtcProgramHandle = std::shared_ptr<const nvrtcProgram>;
using NvvmProgramHandle = std::shared_ptr<const nvvmProgram>;
using NvvmProgramHandle = std::shared_ptr<const NvvmProgramValue>;
using NvJitLinkHandle = std::shared_ptr<const NvJitLinkValue>;
using CuLinkHandle = std::shared_ptr<const CUlinkState>;

// ============================================================================
// Context handle functions
Expand Down Expand Up @@ -316,6 +354,33 @@ NvvmProgramHandle create_nvvm_program_handle(nvvmProgram prog);
// The program will NOT be destroyed when the handle is released.
NvvmProgramHandle create_nvvm_program_handle_ref(nvvmProgram prog);

// ============================================================================
// nvJitLink handle functions
// ============================================================================

// Create an owning nvJitLink handle.
// When the last reference is released, nvJitLinkDestroy is called.
// Use this to wrap a handle created via nvJitLinkCreate.
// Note: If nvJitLink is not available (p_nvJitLinkDestroy is null), the deleter is a no-op.
NvJitLinkHandle create_nvjitlink_handle(nvJitLink_t handle);

// Create a non-owning nvJitLink handle (references existing handle).
// The handle will NOT be destroyed when the last reference is released.
NvJitLinkHandle create_nvjitlink_handle_ref(nvJitLink_t handle);

// ============================================================================
// cuLink handle functions
// ============================================================================

// Create an owning cuLink handle.
// When the last reference is released, cuLinkDestroy is called.
// Use this to wrap a CUlinkState created via cuLinkCreate.
CuLinkHandle create_culink_handle(CUlinkState state);

// Create a non-owning cuLink handle (references existing CUlinkState).
// The handle will NOT be destroyed when the last reference is released.
CuLinkHandle create_culink_handle_ref(CUlinkState state);

// ============================================================================
// Overloaded helper functions to extract raw resources from handles
// ============================================================================
Expand Down Expand Up @@ -354,6 +419,14 @@ inline nvrtcProgram as_cu(const NvrtcProgramHandle& h) noexcept {
}

inline nvvmProgram as_cu(const NvvmProgramHandle& h) noexcept {
return h ? h->raw : nullptr;
}

inline nvJitLink_t as_cu(const NvJitLinkHandle& h) noexcept {
return h ? h->raw : nullptr;
}

inline CUlinkState as_cu(const CuLinkHandle& h) noexcept {
return h ? *h : nullptr;
}

Expand Down Expand Up @@ -395,6 +468,14 @@ inline std::intptr_t as_intptr(const NvvmProgramHandle& h) noexcept {
return reinterpret_cast<std::intptr_t>(as_cu(h));
}

inline std::intptr_t as_intptr(const NvJitLinkHandle& h) noexcept {
return reinterpret_cast<std::intptr_t>(as_cu(h));
}

inline std::intptr_t as_intptr(const CuLinkHandle& h) noexcept {
return reinterpret_cast<std::intptr_t>(as_cu(h));
}

// as_py() - convert handle to Python wrapper object (returns new reference)
namespace detail {
// n.b. class lookup is not cached to avoid deadlock hazard, see DESIGN.md
Expand Down Expand Up @@ -447,4 +528,13 @@ inline PyObject* as_py(const NvvmProgramHandle& h) noexcept {
return PyLong_FromSsize_t(as_intptr(h));
}

inline PyObject* as_py(const NvJitLinkHandle& h) noexcept {
// nvJitLink bindings use raw integers, not wrapper classes
return PyLong_FromSsize_t(as_intptr(h));
}

inline PyObject* as_py(const CuLinkHandle& h) noexcept {
return detail::make_py("cuda.bindings.driver", "CUlinkState", as_intptr(h));
}

} // namespace cuda_core
17 changes: 17 additions & 0 deletions cuda_core/cuda/core/_linker.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0

from ._resource_handles cimport NvJitLinkHandle, CuLinkHandle


cdef class Linker:
cdef:
NvJitLinkHandle _nvjitlink_handle
CuLinkHandle _culink_handle
bint _use_nvjitlink
object _drv_log_bufs # formatted_options list (driver); None for nvjitlink; cleared in link()
str _info_log # decoded log; None until link() or pre-link get_*_log()
str _error_log # decoded log; None until link() or pre-link get_*_log()
object _options # LinkerOptions
object __weakref__
Loading
Loading