diff --git a/python/cuvs/cuvs/common/cydlpack.pyx b/python/cuvs/cuvs/common/cydlpack.pyx index aa3fbae5f2..4e11ac6142 100644 --- a/python/cuvs/cuvs/common/cydlpack.pyx +++ b/python/cuvs/cuvs/common/cydlpack.pyx @@ -1,11 +1,17 @@ # -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 # # cython: language_level=3 import numpy as np +from cuda.bindings.cyruntime cimport ( + cudaError, + cudaError_t, + cudaPointerAttributes, + cudaPointerGetAttributes, +) from libc cimport stdlib from libc.stdint cimport uintptr_t @@ -57,12 +63,36 @@ def dl_data_type_to_numpy(DLDataType dtype): raise ValueError(f"unknown DLDataTypeCode.code: {dtype.code}") +cdef int _cuda_pointer_device_id(uintptr_t tensor_ptr) except *: + cdef cudaPointerAttributes attributes + cdef cudaError_t status = cudaPointerGetAttributes( + &attributes, tensor_ptr + ) + if status != cudaError.cudaSuccess: + raise ValueError( + f"Unable to determine CUDA device for array pointer: {status}" + ) + return attributes.device + + +cdef int _dlpack_device_id_c(ary) except *: + cdef uintptr_t tensor_ptr = ary.ai_["data"][0] + if ary.from_cai: + return _cuda_pointer_device_id(tensor_ptr) + return 0 + + +def _dlpack_device_id(ary): + return _dlpack_device_id_c(ary) + + cdef DLManagedTensor* dlpack_c(ary): # todo(dgd): add checking options/parameters cdef DLDeviceType dev_type cdef DLDevice dev cdef DLDataType dtype cdef DLTensor tensor + cdef uintptr_t tensor_ptr = ary.ai_["data"][0] cdef DLManagedTensor* dlm = \ stdlib.malloc(sizeof(DLManagedTensor)) @@ -72,7 +102,7 @@ cdef DLManagedTensor* dlpack_c(ary): dev_type = DLDeviceType.kDLCPU dev.device_type = dev_type - dev.device_id = 0 + dev.device_id = _dlpack_device_id_c(ary) # todo (dgd): change to nice dict if ary.dtype == np.float32: @@ -117,9 +147,6 @@ cdef DLManagedTensor* dlpack_c(ary): for i in range(ndim): shape[i] = ary.shape[i] - cdef uintptr_t tensor_ptr - tensor_ptr = ary.ai_["data"][0] - tensor.data = tensor_ptr tensor.device = dev tensor.dtype = dtype diff --git a/python/cuvs/cuvs/tests/test_device_tensor_view.py b/python/cuvs/cuvs/tests/test_device_tensor_view.py index 611b0f3e55..cd00a3a013 100644 --- a/python/cuvs/cuvs/tests/test_device_tensor_view.py +++ b/python/cuvs/cuvs/tests/test_device_tensor_view.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 # @@ -6,10 +6,37 @@ import numpy as np import pytest +from pylibraft.common.cai_wrapper import wrap_array + +from cuvs.common.cydlpack import _dlpack_device_id from cuvs.common.device_tensor_view import DeviceTensorView from cuvs.tests.ann_utils import generate_data +def has_multiple_gpus(): + try: + return cp.cuda.runtime.getDeviceCount() > 1 + except Exception: + return False + + +requires_multiple_gpus = pytest.mark.skipif( + not has_multiple_gpus(), reason="Multi-GPU tests require multiple GPUs" +) + + +def test_dlpack_device_id_for_host_array(): + ary = np.empty((4,), dtype=np.float32) + assert _dlpack_device_id(wrap_array(ary)) == 0 + + +@requires_multiple_gpus +def test_dlpack_device_id_matches_cuda_array_device(): + with cp.cuda.Device(1): + ary = cp.empty((4,), dtype=cp.float32) + assert _dlpack_device_id(wrap_array(ary)) == 1 + + @pytest.mark.parametrize("dtype", [np.float32, np.int8, np.int32]) def test_device_tensor_view(dtype): n_rows, n_cols = 1000, 64