Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions gpustack_runtime/detector/amd.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,17 @@ def detect(self) -> Devices | None:
dev_gpu_vram_usage = pyamdsmi.amdsmi_get_gpu_vram_usage(dev)
dev_mem = dev_gpu_vram_usage.get("vram_total")
dev_mem_used = dev_gpu_vram_usage.get("vram_used")
# On APUs with unified memory (e.g., AMD Strix Halo), VRAM
# reports only the BIOS carveout (~512 MiB); VIS_VRAM reports
# the full usable system memory. Use VIS_VRAM when larger.
with contextlib.suppress(pyrocmsmi.ROCMSMIError):
dev_mem_vis_vram = byte_to_mebibyte(
pyrocmsmi.rsmi_dev_memory_total_get(
dev_idx,
pyrocmsmi.RSMI_MEM_TYPE_VIS_VRAM,
),
)
dev_mem = max(dev_mem, dev_mem_vis_vram)
dev_ecc_count = pyamdsmi.amdsmi_get_gpu_ecc_count(
dev,
pyamdsmi.AmdSmiGpuBlock.UMC,
Expand All @@ -189,6 +200,17 @@ def detect(self) -> Devices | None:
dev_mem = byte_to_mebibyte( # byte to MiB
pyrocmsmi.rsmi_dev_memory_total_get(dev_idx),
)
# On APUs with unified memory (e.g., AMD Strix Halo), VRAM
# reports only the BIOS carveout (~512 MiB); VIS_VRAM reports
# the full usable system memory. Use VIS_VRAM when larger.
with contextlib.suppress(pyrocmsmi.ROCMSMIError):
dev_mem_vis_vram = byte_to_mebibyte(
pyrocmsmi.rsmi_dev_memory_total_get(
dev_idx,
pyrocmsmi.RSMI_MEM_TYPE_VIS_VRAM,
),
)
dev_mem = max(dev_mem, dev_mem_vis_vram)
dev_mem_used = byte_to_mebibyte( # byte to MiB
pyrocmsmi.rsmi_dev_memory_usage_get(dev_idx),
)
Expand Down
5 changes: 5 additions & 0 deletions gpustack_runtime/detector/pyrocmsmi/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,11 @@
ROCMSMI_IOLINK_TYPE_XGMI = 2
ROCMSMI_IOLINK_TYPE_NUMIOLINKTYPES = 3

## Memory Types ##
RSMI_MEM_TYPE_VRAM = 0
RSMI_MEM_TYPE_GTT = 1
RSMI_MEM_TYPE_VIS_VRAM = 2

## Error Codes ##
ROCMSMI_ERROR_UNINITIALIZED = -99997
ROCMSMI_ERROR_FUNCTION_NOT_FOUND = -99998
Expand Down
Loading