-
Notifications
You must be signed in to change notification settings - Fork 245
Open
Labels
cuda.coreEverything related to the cuda.core moduleEverything related to the cuda.core moduletestImprovements or additions to testsImprovements or additions to tests
Description
Failures like the one shown below tend to be distracting, and it's probably very easy to avoid the distractions:
xref:
- Test fixes as required for QA #1567 (comment)
- https://github.com/NVIDIA/cuda-python/actions/runs/21995912807/job/63556542405?pr=1621
@Andy-Jost's suggestion:
@pytest.mark.flaky(reruns=2)
Other obvious idea: longer timeout?
Or both?
Example of full error for completeness:
=================================== FAILURES ===================================
_____________________ TestIpcReexport.test_main[DeviceMR] ______________________
self = <test_send_buffers.TestIpcReexport object at 0x478595203d0>
ipc_device = <Device 0 (Tesla T4)>
ipc_memory_resource = <cuda.core._memory._device_memory_resource.DeviceMemoryResource object at 0x478594efae0>
def test_main(self, ipc_device, ipc_memory_resource):
# Set up the device.
device = ipc_device
device.set_current()
# Allocate, fill a buffer.
mr = ipc_memory_resource
pgen = PatternGen(device, NBYTES)
buffer = mr.allocate(NBYTES)
pgen.fill_buffer(buffer, seed=0)
# Set up communication.
q_bc = mp.Queue()
event_b, event_c = [mp.Event() for _ in range(2)]
# Spawn B and C.
proc_b = mp.Process(target=self.process_b_main, args=(buffer, q_bc, event_b))
proc_c = mp.Process(target=self.process_c_main, args=(q_bc, event_c))
proc_b.start()
proc_c.start()
# Wait for C to signal completion then clean up.
event_c.wait(timeout=CHILD_TIMEOUT_SEC)
event_b.set() # b can finish now
proc_b.join(timeout=CHILD_TIMEOUT_SEC)
proc_c.join(timeout=CHILD_TIMEOUT_SEC)
assert proc_b.exitcode == 0
> assert proc_c.exitcode == 0
E AssertionError: assert 1 == 0
E + where 1 = <Process name='Process-25' pid=5129 parent=4876 stopped exitcode=1>.exitcode
buffer = <Buffer ptr=0x316000000 size=64>
device = <Device 0 (Tesla T4)>
event_b = <Event at 0x47858f9ab10 set>
event_c = <Event at 0x4785952cf90 unset>
ipc_device = <Device 0 (Tesla T4)>
ipc_memory_resource = <cuda.core._memory._device_memory_resource.DeviceMemoryResource object at 0x478594efae0>
mr = <cuda.core._memory._device_memory_resource.DeviceMemoryResource object at 0x478594efae0>
pgen = <helpers.buffers.PatternGen object at 0x47859dd1e10>
proc_b = <Process name='Process-24' pid=5128 parent=4876 stopped exitcode=0>
proc_c = <Process name='Process-25' pid=5129 parent=4876 stopped exitcode=1>
q_bc = <multiprocessing.queues.Queue object at 0x47859dd1910>
self = <test_send_buffers.TestIpcReexport object at 0x478595203d0>
tests/memory_ipc/test_send_buffers.py:97: AssertionError
----------------------------- Captured stderr call -----------------------------
Traceback (most recent call last):
File "/opt/hostedtoolcache/Python/3.14.2/x64-freethreaded/lib/python3.14t/multiprocessing/queues.py", line 262, in _feed
obj = _ForkingPickler.dumps(obj)
File "/opt/hostedtoolcache/Python/3.14.2/x64-freethreaded/lib/python3.14t/multiprocessing/reduction.py", line 51, in dumps
cls(buf, protocol).dump(obj)
~~~~~~~~~~~~~~~~~~~~~~~^^^^^
File "cuda/core/_memory/_buffer.pyx", line 99, in cuda.core._memory._buffer.Buffer.__reduce__
return Buffer.from_ipc_descriptor, (self.memory_resource, self.get_ipc_descriptor())
File "cuda/core/_memory/_buffer.pyx", line 139, in cuda.core._memory._buffer.Buffer.get_ipc_descriptor
self._ipc_data = IPCDataForBuffer(_ipc.Buffer_get_ipc_descriptor(self), False)
File "cuda/core/_memory/_ipc.pyx", line 160, in cuda.core._memory._ipc.Buffer_get_ipc_descriptor
if not self.memory_resource.is_ipc_enabled:
AttributeError: 'NoneType' object has no attribute 'is_ipc_enabled'
Process Process-25:
Traceback (most recent call last):
File "/opt/hostedtoolcache/Python/3.14.2/x64-freethreaded/lib/python3.14t/multiprocessing/process.py", line 320, in _bootstrap
self.run()
~~~~~~~~^^
File "/opt/hostedtoolcache/Python/3.14.2/x64-freethreaded/lib/python3.14t/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/__w/cuda-python/cuda-python/cuda_core/tests/memory_ipc/test_send_buffers.py", line 121, in process_c_main
buffer = q_bc.get(timeout=CHILD_TIMEOUT_SEC)
File "/opt/hostedtoolcache/Python/3.14.2/x64-freethreaded/lib/python3.14t/multiprocessing/queues.py", line 112, in get
raise Empty
_queue.Empty
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
cuda.coreEverything related to the cuda.core moduleEverything related to the cuda.core moduletestImprovements or additions to testsImprovements or additions to tests