From 907b9669f5f1880b7a6fd277802153635595bd64 Mon Sep 17 00:00:00 2001 From: "Gainullin, Artur" Date: Tue, 16 Jun 2026 09:23:12 -0700 Subject: [PATCH 1/8] [UR][L0] Add read-only flag to host memory registration Replace the placeholder TBD flag in the host memory registration flags enum with UR_EXP_USM_HOST_ALLOC_REGISTER_FLAG_READ_ONLY and regenerate the API headers. The flag indicates that device access to the registered range is read-only. In the Level Zero v2 adapter, map the flag onto ZE_HOST_MEM_ALLOC_FLAG_MEM_READ_ONLY when calling zeMemAllocHost so the driver registers the external system memory range in read-only device-access mode. Assisted-By: Claude --- unified-runtime/include/unified-runtime/ur_api.h | 6 ++++-- unified-runtime/include/unified-runtime/ur_print.hpp | 12 ++++++------ .../scripts/core/exp-usm-host-alloc-register.yml | 7 +++++-- .../source/adapters/level_zero/v2/usm.cpp | 12 ++++++++++-- 4 files changed, 25 insertions(+), 12 deletions(-) diff --git a/unified-runtime/include/unified-runtime/ur_api.h b/unified-runtime/include/unified-runtime/ur_api.h index 792607ab106c4..9d1d1162c40b6 100644 --- a/unified-runtime/include/unified-runtime/ur_api.h +++ b/unified-runtime/include/unified-runtime/ur_api.h @@ -11722,8 +11722,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMContextMemcpyExp( /// @brief USM host memory registration flags. typedef uint32_t ur_exp_usm_host_alloc_register_flags_t; typedef enum ur_exp_usm_host_alloc_register_flag_t { - /// Reserved for future use. - UR_EXP_USM_HOST_ALLOC_REGISTER_FLAG_TBD = UR_BIT(0), + /// Device access to the registered range is read-only. The behavior + /// is undefined if device code writes to a range registered with this + /// flag. + UR_EXP_USM_HOST_ALLOC_REGISTER_FLAG_READ_ONLY = UR_BIT(0), /// @cond UR_EXP_USM_HOST_ALLOC_REGISTER_FLAG_FORCE_UINT32 = 0x7fffffff /// @endcond diff --git a/unified-runtime/include/unified-runtime/ur_print.hpp b/unified-runtime/include/unified-runtime/ur_print.hpp index d67c52cef8bd7..ff33df616ef12 100644 --- a/unified-runtime/include/unified-runtime/ur_print.hpp +++ b/unified-runtime/include/unified-runtime/ur_print.hpp @@ -12508,8 +12508,8 @@ inline ur_result_t printFlag(std::ostream &os, inline std::ostream & operator<<(std::ostream &os, enum ur_exp_usm_host_alloc_register_flag_t value) { switch (value) { - case UR_EXP_USM_HOST_ALLOC_REGISTER_FLAG_TBD: - os << "UR_EXP_USM_HOST_ALLOC_REGISTER_FLAG_TBD"; + case UR_EXP_USM_HOST_ALLOC_REGISTER_FLAG_READ_ONLY: + os << "UR_EXP_USM_HOST_ALLOC_REGISTER_FLAG_READ_ONLY"; break; default: os << "unknown enumerator"; @@ -12528,15 +12528,15 @@ printFlag(std::ostream &os, uint32_t val = flag; bool first = true; - if ((val & UR_EXP_USM_HOST_ALLOC_REGISTER_FLAG_TBD) == - (uint32_t)UR_EXP_USM_HOST_ALLOC_REGISTER_FLAG_TBD) { - val ^= (uint32_t)UR_EXP_USM_HOST_ALLOC_REGISTER_FLAG_TBD; + if ((val & UR_EXP_USM_HOST_ALLOC_REGISTER_FLAG_READ_ONLY) == + (uint32_t)UR_EXP_USM_HOST_ALLOC_REGISTER_FLAG_READ_ONLY) { + val ^= (uint32_t)UR_EXP_USM_HOST_ALLOC_REGISTER_FLAG_READ_ONLY; if (!first) { os << " | "; } else { first = false; } - os << UR_EXP_USM_HOST_ALLOC_REGISTER_FLAG_TBD; + os << UR_EXP_USM_HOST_ALLOC_REGISTER_FLAG_READ_ONLY; } if (val != 0) { std::bitset<32> bits(val); diff --git a/unified-runtime/scripts/core/exp-usm-host-alloc-register.yml b/unified-runtime/scripts/core/exp-usm-host-alloc-register.yml index 173da23cff408..88df311477b53 100644 --- a/unified-runtime/scripts/core/exp-usm-host-alloc-register.yml +++ b/unified-runtime/scripts/core/exp-usm-host-alloc-register.yml @@ -29,8 +29,11 @@ type: enum desc: "USM host memory registration flags." name: $x_exp_usm_host_alloc_register_flags_t etors: - - name: TBD - desc: "Reserved for future use." + - name: READ_ONLY + desc: |- + Device access to the registered range is read-only. The behavior + is undefined if device code writes to a range registered with this + flag. --- #-------------------------------------------------------------------------- type: struct desc: "USM host memory registration properties." diff --git a/unified-runtime/source/adapters/level_zero/v2/usm.cpp b/unified-runtime/source/adapters/level_zero/v2/usm.cpp index 3bc216421e2d6..0aa56d18eaec9 100644 --- a/unified-runtime/source/adapters/level_zero/v2/usm.cpp +++ b/unified-runtime/source/adapters/level_zero/v2/usm.cpp @@ -903,7 +903,7 @@ ur_result_t UR_APICALL urUSMContextMemcpyExp(ur_context_handle_t hContext, ur_result_t urUSMHostAllocRegisterExp( ur_context_handle_t hContext, void *pHostMem, size_t size, - const ur_exp_usm_host_alloc_register_properties_t * /*pProperties*/) { + const ur_exp_usm_host_alloc_register_properties_t *pProperties) { if (!hContext->getPlatform()->ZeExternalMemoryMappingExtensionSupported) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -912,8 +912,16 @@ ur_result_t urUSMHostAllocRegisterExp( ZE_STRUCTURE_TYPE_EXTERNAL_MEMMAP_SYSMEM_EXT_DESC, nullptr, pHostMem, size}; + // Map the read-only registration flag onto the Level Zero host allocation + // flag, telling the driver that device access to the range is read-only. + ze_host_mem_alloc_flags_t hostFlags = 0; + if (pProperties && + (pProperties->flags & UR_EXP_USM_HOST_ALLOC_REGISTER_FLAG_READ_ONLY)) { + hostFlags |= ZE_HOST_MEM_ALLOC_FLAG_MEM_READ_ONLY; + } + ze_host_mem_alloc_desc_t hostDesc = {ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC, - &sysMemDesc, 0}; + &sysMemDesc, hostFlags}; void *mappedMem = nullptr; ZE2UR_CALL(zeMemAllocHost, From f89d360cb7df2158e496089792b3d3bbb60f9eee Mon Sep 17 00:00:00 2001 From: "Gainullin, Artur" Date: Mon, 15 Jun 2026 10:53:28 -0700 Subject: [PATCH 2/8] [SYCL] Implement register_host_memory extension API - Add the sycl::ext::oneapi::experimental::register_host_memory and unregister_host_memory free functions defined by sycl_ext_oneapi_register_host_memory. - Implement read_only property. - Add a unit tests to verify argument forwarding to the UR host memory registration APIs. - Add e2e test that registers a page-aligned host allocation and checks that the pointer can be used in a kernel, that explicit copies to and from it work etc. Assisted-By: Claude --- .../experimental/register_host_memory.hpp | 82 +++++++ .../sycl/ext/oneapi/properties/property.hpp | 3 +- sycl/include/sycl/info/aspects.def | 1 + sycl/include/sycl/sycl.hpp | 1 + sycl/source/detail/device_impl.hpp | 5 + .../detail/ur_device_info_ret_types.inc | 1 + sycl/source/detail/usm/usm_impl.cpp | 78 +++++++ sycl/source/feature_test.hpp.in | 1 + sycl/test-e2e/USM/register_host_memory.cpp | 177 +++++++++++++++ sycl/test/abi/sycl_symbols_linux.dump | 2 + sycl/test/abi/sycl_symbols_windows.dump | 2 + sycl/unittests/Extensions/CMakeLists.txt | 1 + .../Extensions/RegisterHostMemory.cpp | 208 ++++++++++++++++++ 13 files changed, 561 insertions(+), 1 deletion(-) create mode 100644 sycl/include/sycl/ext/oneapi/experimental/register_host_memory.hpp create mode 100644 sycl/test-e2e/USM/register_host_memory.cpp create mode 100644 sycl/unittests/Extensions/RegisterHostMemory.cpp diff --git a/sycl/include/sycl/ext/oneapi/experimental/register_host_memory.hpp b/sycl/include/sycl/ext/oneapi/experimental/register_host_memory.hpp new file mode 100644 index 0000000000000..4af270d1f1c57 --- /dev/null +++ b/sycl/include/sycl/ext/oneapi/experimental/register_host_memory.hpp @@ -0,0 +1,82 @@ +//==--- register_host_memory.hpp - SYCL host memory registration extension -==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include // for __SYCL_EXPORT +#include + +#include // for size_t +#include // for uint32_t +#include + +namespace sycl { +inline namespace _V1 { +class context; + +namespace ext::oneapi::experimental { + +// Indicates that device code will only read from the registered range. Device +// writes to a range registered with this property are undefined behavior. +struct read_only_key + : detail::compile_time_property_key { + using value_t = property_value; +}; + +inline constexpr read_only_key::value_t read_only; + +namespace detail { +// Implementation-internal flags describing a host memory registration. They +// are translated to UR flags in the runtime library. +enum register_host_memory_flags : uint32_t { + register_host_memory_flag_read_only = 1u << 0, +}; + +// Non-templated implementation entry points, defined in the SYCL runtime +// library. Flags is a bitwise OR of register_host_memory_flags values. +__SYCL_EXPORT void register_host_memory(void *Ptr, size_t NumBytes, + const context &Ctxt, uint32_t Flags); +__SYCL_EXPORT void unregister_host_memory(void *Ptr, const context &Ctxt); + +// Lowers a compile-time property list to the runtime flags word. +template uint32_t getRegisterHostMemoryFlags() { + uint32_t Flags = 0; + if constexpr (std::decay_t::template has_property()) + Flags |= register_host_memory_flag_read_only; + return Flags; +} +} // namespace detail + +/// Registers the existing host memory range \p ptr of \p numBytes bytes with +/// \p ctxt so that it behaves like a USM host allocation. See +/// sycl_ext_oneapi_register_host_memory for the full semantics. +/// +/// \p ptr and \p numBytes must both be aligned to the host page size, \p ptr +/// must not be null, \p numBytes must not be zero, and every device in \p ctxt +/// must have aspect::ext_oneapi_register_host_memory. +template +std::enable_if_t>> +register_host_memory(void *ptr, size_t numBytes, const context &ctxt, + Properties props = {}) { + (void)props; + detail::register_host_memory( + ptr, numBytes, ctxt, detail::getRegisterHostMemoryFlags()); +} + +/// Unregisters a host memory range previously registered with +/// register_host_memory. \p ptr must be the exact base pointer that was passed +/// to register_host_memory with the same \p ctxt, and the registration must +/// still be in effect. This does not free or unmap the underlying host memory. +inline void unregister_host_memory(void *ptr, const context &ctxt) { + detail::unregister_host_memory(ptr, ctxt); +} + +} // namespace ext::oneapi::experimental +} // namespace _V1 +} // namespace sycl diff --git a/sycl/include/sycl/ext/oneapi/properties/property.hpp b/sycl/include/sycl/ext/oneapi/properties/property.hpp index 3f6dde389b795..edb4e79be86a5 100644 --- a/sycl/include/sycl/ext/oneapi/properties/property.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/property.hpp @@ -193,8 +193,9 @@ enum PropKind : uint32_t { ZeroInit = 48, FastLink = 49, PhysicalMemoryEnableIPC = 50, + RegisterHostMemoryReadOnly = 51, // PropKindSize must always be the last value. - PropKindSize = 51, + PropKindSize = 52, }; template struct PropertyToKind { diff --git a/sycl/include/sycl/info/aspects.def b/sycl/include/sycl/info/aspects.def index 69ff4e02ee761..960493719e83f 100644 --- a/sycl/include/sycl/info/aspects.def +++ b/sycl/include/sycl/info/aspects.def @@ -91,3 +91,4 @@ __SYCL_ASPECT(ext_intel_xe_cores_per_cluster, 93) __SYCL_ASPECT(ext_intel_eus_per_xe_core, 94) __SYCL_ASPECT(ext_intel_max_lanes_per_hw_thread, 95) __SYCL_ASPECT(ext_oneapi_ipc_physical_memory, 96) +__SYCL_ASPECT(ext_oneapi_register_host_memory, 97) diff --git a/sycl/include/sycl/sycl.hpp b/sycl/include/sycl/sycl.hpp index bf3cd66cc102a..7104705d5fb82 100644 --- a/sycl/include/sycl/sycl.hpp +++ b/sycl/include/sycl/sycl.hpp @@ -126,6 +126,7 @@ can be disabled by setting SYCL_DISABLE_FSYCL_SYCLHPP_WARNING macro.") #include #include #include +#include #include #include #include diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index 8ccd666880158..280a6d053c788 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -1391,6 +1391,11 @@ class device_impl { UR_DEVICE_INFO_EXTERNAL_MEMORY_IMPORT_SUPPORT_EXP>() .value_or(0); } + CASE(ext_oneapi_register_host_memory) { + return get_info_impl_nocheck< + UR_DEVICE_INFO_USM_HOST_ALLOC_REGISTER_SUPPORT_EXP>() + .value_or(0); + } CASE(ext_oneapi_external_semaphore_import) { return get_info_impl_nocheck< UR_DEVICE_INFO_EXTERNAL_SEMAPHORE_IMPORT_SUPPORT_EXP>() diff --git a/sycl/source/detail/ur_device_info_ret_types.inc b/sycl/source/detail/ur_device_info_ret_types.inc index b09099e1721d2..5dd0829a2584e 100644 --- a/sycl/source/detail/ur_device_info_ret_types.inc +++ b/sycl/source/detail/ur_device_info_ret_types.inc @@ -209,4 +209,5 @@ MAP(UR_DEVICE_INFO_CLOCK_DEVICE_SUPPORT_EXP, ur_bool_t) MAP(UR_DEVICE_INFO_IS_INTEGRATED_GPU, ur_bool_t) MAP(UR_DEVICE_INFO_DEVICE_WAIT_SUPPORT_EXP, ur_bool_t) MAP(UR_DEVICE_INFO_IPC_PHYSICAL_MEMORY_SUPPORT_EXP, ur_bool_t) +MAP(UR_DEVICE_INFO_USM_HOST_ALLOC_REGISTER_SUPPORT_EXP, ur_bool_t) // clang-format on diff --git a/sycl/source/detail/usm/usm_impl.cpp b/sycl/source/detail/usm/usm_impl.cpp index d4ae709903748..b4ede34b72490 100644 --- a/sycl/source/detail/usm/usm_impl.cpp +++ b/sycl/source/detail/usm/usm_impl.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -609,6 +610,83 @@ void release_from_device_copy(const void *Ptr, const queue &Queue) { release_from_usm_device_copy(Ptr, Queue.get_context()); } +// Host memory registration APIs, see sycl_ext_oneapi_register_host_memory. + +namespace detail { + +// Throws errc::feature_not_supported unless every device in the context +// reports aspect::ext_oneapi_register_host_memory. +static void checkRegisterHostMemorySupport(const context &Ctxt) { + detail::context_impl &CtxtImpl = *detail::getSyclObjImpl(Ctxt); + for (detail::device_impl &Dev : CtxtImpl.getDevices()) { + if (!Dev.has(aspect::ext_oneapi_register_host_memory)) + throw sycl::exception( + make_error_code(errc::feature_not_supported), + "At least one device in the context does not support registering " + "host memory (aspect::ext_oneapi_register_host_memory)."); + } +} + +// Maps a failed UR result from the host memory registration APIs to a +// sycl::exception with the error code mandated by the extension specification. +// Invalid argument conditions map to errc::invalid; anything else is a backend +// error. +static void throwRegisterHostMemoryError(ur_result_t Err, const char *What) { + errc Code; + switch (Err) { + case UR_RESULT_ERROR_INVALID_NULL_POINTER: + case UR_RESULT_ERROR_INVALID_VALUE: + case UR_RESULT_ERROR_INVALID_ARGUMENT: + Code = errc::invalid; + break; + default: + Code = errc::runtime; + break; + } + throw detail::set_ur_error(sycl::exception(make_error_code(Code), What), Err); +} + +void register_host_memory(void *Ptr, size_t NumBytes, const context &Ctxt, + uint32_t Flags) { + if (Ptr == nullptr) + throw sycl::exception(make_error_code(errc::invalid), + "register_host_memory: pointer must not be null."); + if (NumBytes == 0) + throw sycl::exception(make_error_code(errc::invalid), + "register_host_memory: size must not be zero."); + checkRegisterHostMemorySupport(Ctxt); + + ur_exp_usm_host_alloc_register_properties_t Props = { + UR_STRUCTURE_TYPE_EXP_USM_HOST_ALLOC_REGISTER_PROPERTIES, + /*pNext=*/nullptr, + /*flags=*/0}; + if (Flags & register_host_memory_flag_read_only) + Props.flags |= UR_EXP_USM_HOST_ALLOC_REGISTER_FLAG_READ_ONLY; + + auto [urCtx, Adapter] = get_ur_handles(Ctxt); + ur_result_t Err = + Adapter->call_nocheck( + urCtx, Ptr, NumBytes, &Props); + if (Err != UR_RESULT_SUCCESS) + throwRegisterHostMemoryError(Err, "register_host_memory failed."); +} + +void unregister_host_memory(void *Ptr, const context &Ctxt) { + if (Ptr == nullptr) + throw sycl::exception(make_error_code(errc::invalid), + "unregister_host_memory: pointer must not be null."); + checkRegisterHostMemorySupport(Ctxt); + + auto [urCtx, Adapter] = get_ur_handles(Ctxt); + ur_result_t Err = + Adapter->call_nocheck( + urCtx, Ptr); + if (Err != UR_RESULT_SUCCESS) + throwRegisterHostMemoryError(Err, "unregister_host_memory failed."); +} + +} // namespace detail + void *malloc_device(size_t numBytes, const device &syclDevice, const property_list &propList) { sycl::context ctxt = syclDevice.get_platform().khr_get_default_context(); diff --git a/sycl/source/feature_test.hpp.in b/sycl/source/feature_test.hpp.in index 35db9c3a97c89..364423910ae26 100644 --- a/sycl/source/feature_test.hpp.in +++ b/sycl/source/feature_test.hpp.in @@ -77,6 +77,7 @@ inline namespace _V1 { #define SYCL_EXT_ONEAPI_KERNEL_ARG_PROPERTIES 1 #define SYCL_EXT_ONEAPI_ANNOTATED_PTR 1 #define SYCL_EXT_ONEAPI_COPY_OPTIMIZE 1 +#define SYCL_EXT_ONEAPI_REGISTER_HOST_MEMORY 1 #define SYCL_EXT_ONEAPI_VIRTUAL_MEM 1 #define SYCL_EXT_ONEAPI_USM_MALLOC_PROPERTIES 1 #define SYCL_EXT_INTEL_CACHE_CONFIG 1 diff --git a/sycl/test-e2e/USM/register_host_memory.cpp b/sycl/test-e2e/USM/register_host_memory.cpp new file mode 100644 index 0000000000000..807c63afc6c80 --- /dev/null +++ b/sycl/test-e2e/USM/register_host_memory.cpp @@ -0,0 +1,177 @@ +// REQUIRES: aspect-ext_oneapi_register_host_memory + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +// End-to-end test for sycl_ext_oneapi_register_host_memory. Registers a +// page-aligned host allocation and exercises: +// - using the registered pointer directly in device code, +// - explicit USM copies to and from the registered memory, +// - get_pointer_type reporting usm::alloc::host while registered, +// - error handling for null pointer and zero size, +// - registering with the read_only property and reading from it in device +// code (device writes to a read_only range are undefined behavior and are +// therefore not exercised). + +#include +#include +#include + +#include +#include +#include + +#if defined(_WIN32) +#include +#else +#include +#endif + +namespace syclexp = sycl::ext::oneapi::experimental; + +static size_t getHostPageSize() { +#if defined(_WIN32) + SYSTEM_INFO Info; + GetSystemInfo(&Info); + return static_cast(Info.dwPageSize); +#else + return static_cast(sysconf(_SC_PAGESIZE)); +#endif +} + +static void *allocatePageAligned(size_t Alignment, size_t Size) { +#if defined(_WIN32) + return _aligned_malloc(Size, Alignment); +#else + return aligned_alloc(Alignment, Size); +#endif +} + +static void freePageAligned(void *Ptr) { +#if defined(_WIN32) + _aligned_free(Ptr); +#else + free(Ptr); +#endif +} + +int main() { + sycl::queue Q; + sycl::context Ctxt = Q.get_context(); + + const size_t PageSize = getHostPageSize(); + const size_t NumElems = 1024; + // Round the byte size up to a multiple of the page size as required. + size_t NumBytes = NumElems * sizeof(int); + NumBytes = (NumBytes + PageSize - 1) & ~(PageSize - 1); + + int *Data = static_cast(allocatePageAligned(PageSize, NumBytes)); + assert(Data != nullptr && "host allocation failed"); + + // Error handling: null pointer and zero size must throw errc::invalid. + { + bool Threw = false; + try { + syclexp::register_host_memory(nullptr, NumBytes, Ctxt); + } catch (const sycl::exception &E) { + Threw = (E.code() == sycl::errc::invalid); + } + assert(Threw && "null pointer should throw errc::invalid"); + + Threw = false; + try { + syclexp::register_host_memory(Data, 0, Ctxt); + } catch (const sycl::exception &E) { + Threw = (E.code() == sycl::errc::invalid); + } + assert(Threw && "zero size should throw errc::invalid"); + + // An unaligned pointer must throw errc::invalid. + Threw = false; + try { + syclexp::register_host_memory(reinterpret_cast(Data) + 64, + NumBytes, Ctxt); + } catch (const sycl::exception &E) { + Threw = (E.code() == sycl::errc::invalid); + } + assert(Threw && "unaligned pointer should throw errc::invalid"); + + // A size that is not a multiple of the page size must throw errc::invalid. + Threw = false; + try { + syclexp::register_host_memory(Data, NumBytes + 1, Ctxt); + } catch (const sycl::exception &E) { + Threw = (E.code() == sycl::errc::invalid); + } + assert(Threw && "unaligned size should throw errc::invalid"); + + // A range whose end address would overflow the host address space must + // throw errc::invalid. + Threw = false; + void *TopPage = reinterpret_cast(static_cast(-1) & + ~(PageSize - 1)); + try { + syclexp::register_host_memory(TopPage, PageSize, Ctxt); + } catch (const sycl::exception &E) { + Threw = (E.code() == sycl::errc::invalid); + } + assert(Threw && "non-representable range should throw errc::invalid"); + } + + syclexp::register_host_memory(Data, NumBytes, Ctxt); + + // While registered, the pointer behaves like a USM host allocation. + assert(sycl::get_pointer_type(Data, Ctxt) == sycl::usm::alloc::host); + // Interior pointers are also reported as host allocations. + assert(sycl::get_pointer_type(Data + 1, Ctxt) == sycl::usm::alloc::host); + + // The registered pointer can be referenced directly from device code. + Q.parallel_for(NumElems, [=](sycl::id<1> I) { + Data[I] = static_cast(I.get(0)) * 2; + }).wait(); + for (size_t I = 0; I < NumElems; ++I) + assert(Data[I] == static_cast(I) * 2); + + // Explicit copies to and from the registered memory. + std::vector HostSrc(NumElems); + for (size_t I = 0; I < NumElems; ++I) + HostSrc[I] = static_cast(I) + 7; + + // Copy from unregistered host memory into the registered range. + Q.memcpy(Data, HostSrc.data(), NumElems * sizeof(int)).wait(); + for (size_t I = 0; I < NumElems; ++I) + assert(Data[I] == static_cast(I) + 7); + + // Copy from the registered range back out to plain host memory. + std::vector HostDst(NumElems, 0); + Q.memcpy(HostDst.data(), Data, NumElems * sizeof(int)).wait(); + for (size_t I = 0; I < NumElems; ++I) + assert(HostDst[I] == static_cast(I) + 7); + + syclexp::unregister_host_memory(Data, Ctxt); + + // Register the same range with the read_only property and have device code + // read (but never write) it, writing results to a separate allocation. + for (size_t I = 0; I < NumElems; ++I) + Data[I] = static_cast(I) + 1; + + syclexp::register_host_memory(Data, NumBytes, Ctxt, + syclexp::properties{syclexp::read_only}); + assert(sycl::get_pointer_type(Data, Ctxt) == sycl::usm::alloc::host); + + int *Out = sycl::malloc_host(NumElems, Q); + assert(Out != nullptr && "host allocation failed"); + Q.parallel_for(NumElems, [=](sycl::id<1> I) { + Out[I] = Data[I] * 2; + }).wait(); + for (size_t I = 0; I < NumElems; ++I) + assert(Out[I] == (static_cast(I) + 1) * 2); + + sycl::free(Out, Q); + syclexp::unregister_host_memory(Data, Ctxt); + + // The application still owns and must free the host memory. + freePageAligned(Data); + + return 0; +} diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 6f63c8165f746..f311bbb0e49ee 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3100,6 +3100,7 @@ _ZN4sycl3_V13ext6oneapi12experimental6detail14image_mem_implD1Ev _ZN4sycl3_V13ext6oneapi12experimental6detail14image_mem_implD2Ev _ZN4sycl3_V13ext6oneapi12experimental6detail17build_from_sourceERNS0_13kernel_bundleILNS0_12bundle_stateE3EEERKSt6vectorINS0_6deviceESaISA_EERKS9_INS0_6detail11string_viewESaISG_EEPNSF_6stringESK_ _ZN4sycl3_V13ext6oneapi12experimental6detail19compile_from_sourceERNS0_13kernel_bundleILNS0_12bundle_stateE3EEERKSt6vectorINS0_6deviceESaISA_EERKS9_INS0_6detail11string_viewESaISG_EEPNSF_6stringESK_ +_ZN4sycl3_V13ext6oneapi12experimental6detail20register_host_memoryEPvmRKNS0_7contextEj _ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_base11updateValueEPKNS3_14raw_kernel_argEm _ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_base11updateValueEPKvm _ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_base14updateAccessorEPKNS0_6detail16AccessorBaseHostE @@ -3109,6 +3110,7 @@ _ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_baseC1Ev _ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_baseC2ERKSt10shared_ptrINS4_22dynamic_parameter_implEE _ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_baseC2EmPKv _ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_baseC2Ev +_ZN4sycl3_V13ext6oneapi12experimental6detail22unregister_host_memoryEPvRKNS0_7contextE _ZN4sycl3_V13ext6oneapi12experimental6detail24executable_command_graph12finalizeImplEv _ZN4sycl3_V13ext6oneapi12experimental6detail24executable_command_graph6updateERKNS3_13command_graphILNS3_11graph_stateE0EEE _ZN4sycl3_V13ext6oneapi12experimental6detail24executable_command_graph6updateERKNS3_4nodeE diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 078c703d1d3aa..90e2803f77200 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -4302,6 +4302,7 @@ ?reduGetMaxWGSize@detail@_V1@sycl@@YA_KAEAVhandler@23@_K@Z ?reduGetPreferredWGSize@detail@_V1@sycl@@YA_KAEAVhandler@23@_K@Z ?registerDynamicParameter@handler@_V1@sycl@@AEAAXPEAVdynamic_parameter_impl@detail@experimental@oneapi@ext@23@H@Z +?register_host_memory@detail@experimental@oneapi@ext@_V1@sycl@@YAXPEAX_KAEBVcontext@56@I@Z ?release_external_memory@experimental@oneapi@ext@_V1@sycl@@YAXUexternal_mem@12345@AEBVdevice@45@AEBVcontext@45@@Z ?release_external_memory@experimental@oneapi@ext@_V1@sycl@@YAXUexternal_mem@12345@AEBVqueue@45@@Z ?release_external_semaphore@experimental@oneapi@ext@_V1@sycl@@YAXUexternal_semaphore@12345@AEBVdevice@45@AEBVcontext@45@@Z @@ -4386,6 +4387,7 @@ ?unmap@experimental@oneapi@ext@_V1@sycl@@YAXPEBX_KAEBVcontext@45@@Z ?unmap_external_image_memory@experimental@oneapi@ext@_V1@sycl@@YAXUimage_mem_handle@12345@W4image_type@12345@AEBVdevice@45@AEBVcontext@45@@Z ?unmap_external_linear_memory@experimental@oneapi@ext@_V1@sycl@@YAXPEAXAEBVdevice@45@AEBVcontext@45@@Z +?unregister_host_memory@detail@experimental@oneapi@ext@_V1@sycl@@YAXPEAXAEBVcontext@56@@Z ?unsampledImageConstructorNotification@detail@_V1@sycl@@YAXPEAX0AEBV?$optional@W4image_target@_V1@sycl@@@std@@W4mode@access@23@PEBXIAEBUcode_location@123@@Z ?unsampledImageConstructorNotification@image_plain@detail@_V1@sycl@@IEAAXAEBUcode_location@234@PEAXPEBXIQEA_KW4image_format@34@@Z ?unsampledImageDestructorNotification@image_plain@detail@_V1@sycl@@IEAAXPEAX@Z diff --git a/sycl/unittests/Extensions/CMakeLists.txt b/sycl/unittests/Extensions/CMakeLists.txt index 63730d56ed088..9102a5e45668a 100644 --- a/sycl/unittests/Extensions/CMakeLists.txt +++ b/sycl/unittests/Extensions/CMakeLists.txt @@ -25,6 +25,7 @@ add_sycl_unittest(ExtensionsTests OBJECT DeviceInfo.cpp RootGroup.cpp USMPrefetch.cpp + RegisterHostMemory.cpp ) add_subdirectory(BindlessImages) diff --git a/sycl/unittests/Extensions/RegisterHostMemory.cpp b/sycl/unittests/Extensions/RegisterHostMemory.cpp new file mode 100644 index 0000000000000..3bc4aaaf09a49 --- /dev/null +++ b/sycl/unittests/Extensions/RegisterHostMemory.cpp @@ -0,0 +1,208 @@ +//==------------------------ RegisterHostMemory.cpp ------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Test that sycl_ext_oneapi_register_host_memory validates its arguments, +// honors the device aspect, and calls the UR host memory registration APIs +// with the correct arguments. + +#include + +#include +#include +#include +#include + +#include + +using namespace sycl; +namespace syclexp = sycl::ext::oneapi::experimental; + +namespace { + +// Whether the mock device should advertise support for host memory +// registration via aspect::ext_oneapi_register_host_memory. +thread_local bool DeviceSupportsRegister = true; + +// Captured arguments of the most recent UR register/unregister call. +thread_local void *LastRegisterPtr = nullptr; +thread_local size_t LastRegisterSize = 0; +thread_local void *LastUnregisterPtr = nullptr; +thread_local int RegisterCallCount = 0; +thread_local int UnregisterCallCount = 0; + +// Registration flags captured from the most recent register call. +thread_local ur_exp_usm_host_alloc_register_flags_t LastRegisterFlags = 0; + +// Result code the register/unregister mock should return, to exercise the +// UR-result-to-errc mapping in the runtime. +thread_local ur_result_t RegisterResult = UR_RESULT_SUCCESS; +thread_local ur_result_t UnregisterResult = UR_RESULT_SUCCESS; + +ur_result_t redefinedDeviceGetInfo(void *pParams) { + auto Params = *static_cast(pParams); + if (*Params.ppropName == UR_DEVICE_INFO_USM_HOST_ALLOC_REGISTER_SUPPORT_EXP) { + if (*Params.ppPropValue) + *static_cast(*Params.ppPropValue) = DeviceSupportsRegister; + if (*Params.ppPropSizeRet) + **Params.ppPropSizeRet = sizeof(ur_bool_t); + return UR_RESULT_SUCCESS; + } + return sycl::unittest::MockAdapter::mock_urDeviceGetInfo(pParams); +} + +ur_result_t redefinedHostAllocRegister(void *pParams) { + auto Params = *static_cast(pParams); + LastRegisterPtr = *Params.ppHostMem; + LastRegisterSize = *Params.psize; + LastRegisterFlags = + *Params.ppProperties ? (*Params.ppProperties)->flags : 0; + ++RegisterCallCount; + return RegisterResult; +} + +ur_result_t redefinedHostAllocUnregister(void *pParams) { + auto Params = + *static_cast(pParams); + LastUnregisterPtr = *Params.ppHostMem; + ++UnregisterCallCount; + return UnregisterResult; +} + +class RegisterHostMemoryTests : public ::testing::Test { +public: + RegisterHostMemoryTests() : Mock{}, Ctxt{platform().get_devices()[0]} {} + +protected: + void SetUp() override { + DeviceSupportsRegister = true; + LastRegisterPtr = nullptr; + LastRegisterSize = 0; + LastUnregisterPtr = nullptr; + RegisterCallCount = 0; + UnregisterCallCount = 0; + RegisterResult = UR_RESULT_SUCCESS; + UnregisterResult = UR_RESULT_SUCCESS; + LastRegisterFlags = 0; + mock::getCallbacks().set_replace_callback("urDeviceGetInfo", + &redefinedDeviceGetInfo); + mock::getCallbacks().set_replace_callback("urUSMHostAllocRegisterExp", + &redefinedHostAllocRegister); + mock::getCallbacks().set_replace_callback("urUSMHostAllocUnregisterExp", + &redefinedHostAllocUnregister); + } + + unittest::UrMock<> Mock; + context Ctxt; +}; + +// A successful registration forwards the exact pointer and size to UR and a +// matching unregistration forwards the same pointer. +TEST_F(RegisterHostMemoryTests, RegisterAndUnregisterForwardArguments) { + int Storage = 0; + void *Ptr = &Storage; + constexpr size_t Size = 4096; + + syclexp::register_host_memory(Ptr, Size, Ctxt); + EXPECT_EQ(RegisterCallCount, 1); + EXPECT_EQ(LastRegisterPtr, Ptr); + EXPECT_EQ(LastRegisterSize, Size); + // No properties passed: no registration flags should be set. + EXPECT_EQ(LastRegisterFlags, 0u); + + syclexp::unregister_host_memory(Ptr, Ctxt); + EXPECT_EQ(UnregisterCallCount, 1); + EXPECT_EQ(LastUnregisterPtr, Ptr); +} + +// The read_only property is lowered to the UR read-only registration flag. +TEST_F(RegisterHostMemoryTests, ReadOnlyPropertyLowersToFlag) { + int Storage = 0; + void *Ptr = &Storage; + constexpr size_t Size = 4096; + + syclexp::register_host_memory(Ptr, Size, Ctxt, + syclexp::properties{syclexp::read_only}); + EXPECT_EQ(RegisterCallCount, 1); + EXPECT_TRUE(LastRegisterFlags & + UR_EXP_USM_HOST_ALLOC_REGISTER_FLAG_READ_ONLY); + + syclexp::unregister_host_memory(Ptr, Ctxt); + EXPECT_EQ(UnregisterCallCount, 1); +} + +// A null pointer is rejected with errc::invalid before reaching UR. +TEST_F(RegisterHostMemoryTests, NullPointerThrowsInvalid) { + try { + syclexp::register_host_memory(nullptr, 4096, Ctxt); + FAIL() << "Expected an exception."; + } catch (const sycl::exception &E) { + EXPECT_EQ(E.code(), make_error_code(errc::invalid)); + } + EXPECT_EQ(RegisterCallCount, 0); +} + +// A zero size is rejected with errc::invalid before reaching UR. +TEST_F(RegisterHostMemoryTests, ZeroSizeThrowsInvalid) { + int Storage = 0; + try { + syclexp::register_host_memory(&Storage, 0, Ctxt); + FAIL() << "Expected an exception."; + } catch (const sycl::exception &E) { + EXPECT_EQ(E.code(), make_error_code(errc::invalid)); + } + EXPECT_EQ(RegisterCallCount, 0); +} + +// Unregistering a null pointer is rejected with errc::invalid. +TEST_F(RegisterHostMemoryTests, UnregisterNullThrowsInvalid) { + try { + syclexp::unregister_host_memory(nullptr, Ctxt); + FAIL() << "Expected an exception."; + } catch (const sycl::exception &E) { + EXPECT_EQ(E.code(), make_error_code(errc::invalid)); + } + EXPECT_EQ(UnregisterCallCount, 0); +} + +// When no device in the context supports the feature, registration throws +// errc::feature_not_supported and does not reach UR. +TEST_F(RegisterHostMemoryTests, UnsupportedDeviceThrowsFeatureNotSupported) { + DeviceSupportsRegister = false; + int Storage = 0; + try { + syclexp::register_host_memory(&Storage, 4096, Ctxt); + FAIL() << "Expected an exception."; + } catch (const sycl::exception &E) { + EXPECT_EQ(E.code(), make_error_code(errc::feature_not_supported)); + } + EXPECT_EQ(RegisterCallCount, 0); +} + +// The runtime maps a UR INVALID_VALUE result from either registration API to +// errc::invalid. The result is injected via the mock to test the mapping in +// isolation. +TEST_F(RegisterHostMemoryTests, BackendInvalidValueMapsToInvalid) { + int Storage = 0; + + RegisterResult = UR_RESULT_ERROR_INVALID_VALUE; + try { + syclexp::register_host_memory(&Storage, 4096, Ctxt); + FAIL() << "Expected an exception."; + } catch (const sycl::exception &E) { + EXPECT_EQ(E.code(), make_error_code(errc::invalid)); + } + + UnregisterResult = UR_RESULT_ERROR_INVALID_VALUE; + try { + syclexp::unregister_host_memory(&Storage, Ctxt); + FAIL() << "Expected an exception."; + } catch (const sycl::exception &E) { + EXPECT_EQ(E.code(), make_error_code(errc::invalid)); + } +} + +} // namespace From 9cd5f527e4fb0658c4e64b5d453d064ae9dae600 Mon Sep 17 00:00:00 2001 From: "Gainullin, Artur" Date: Tue, 23 Jun 2026 07:28:38 -0700 Subject: [PATCH 3/8] Format --- .../sycl/ext/oneapi/experimental/register_host_memory.hpp | 8 ++++---- sycl/include/sycl/sycl.hpp | 2 +- sycl/test-e2e/USM/register_host_memory.cpp | 8 +++----- sycl/unittests/Extensions/RegisterHostMemory.cpp | 6 +++--- 4 files changed, 11 insertions(+), 13 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/register_host_memory.hpp b/sycl/include/sycl/ext/oneapi/experimental/register_host_memory.hpp index 4af270d1f1c57..85fb45b78d59c 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/register_host_memory.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/register_host_memory.hpp @@ -23,9 +23,8 @@ namespace ext::oneapi::experimental { // Indicates that device code will only read from the registered range. Device // writes to a range registered with this property are undefined behavior. -struct read_only_key - : detail::compile_time_property_key { +struct read_only_key : detail::compile_time_property_key< + detail::PropKind::RegisterHostMemoryReadOnly> { using value_t = property_value; }; @@ -47,7 +46,8 @@ __SYCL_EXPORT void unregister_host_memory(void *Ptr, const context &Ctxt); // Lowers a compile-time property list to the runtime flags word. template uint32_t getRegisterHostMemoryFlags() { uint32_t Flags = 0; - if constexpr (std::decay_t::template has_property()) + if constexpr (std::decay_t::template has_property< + read_only_key>()) Flags |= register_host_memory_flag_read_only; return Flags; } diff --git a/sycl/include/sycl/sycl.hpp b/sycl/include/sycl/sycl.hpp index 7104705d5fb82..664fecf9f1184 100644 --- a/sycl/include/sycl/sycl.hpp +++ b/sycl/include/sycl/sycl.hpp @@ -126,8 +126,8 @@ can be disabled by setting SYCL_DISABLE_FSYCL_SYCLHPP_WARNING macro.") #include #include #include -#include #include +#include #include #include #include diff --git a/sycl/test-e2e/USM/register_host_memory.cpp b/sycl/test-e2e/USM/register_host_memory.cpp index 807c63afc6c80..97b43789b618a 100644 --- a/sycl/test-e2e/USM/register_host_memory.cpp +++ b/sycl/test-e2e/USM/register_host_memory.cpp @@ -108,8 +108,8 @@ int main() { // A range whose end address would overflow the host address space must // throw errc::invalid. Threw = false; - void *TopPage = reinterpret_cast(static_cast(-1) & - ~(PageSize - 1)); + void *TopPage = + reinterpret_cast(static_cast(-1) & ~(PageSize - 1)); try { syclexp::register_host_memory(TopPage, PageSize, Ctxt); } catch (const sycl::exception &E) { @@ -161,9 +161,7 @@ int main() { int *Out = sycl::malloc_host(NumElems, Q); assert(Out != nullptr && "host allocation failed"); - Q.parallel_for(NumElems, [=](sycl::id<1> I) { - Out[I] = Data[I] * 2; - }).wait(); + Q.parallel_for(NumElems, [=](sycl::id<1> I) { Out[I] = Data[I] * 2; }).wait(); for (size_t I = 0; I < NumElems; ++I) assert(Out[I] == (static_cast(I) + 1) * 2); diff --git a/sycl/unittests/Extensions/RegisterHostMemory.cpp b/sycl/unittests/Extensions/RegisterHostMemory.cpp index 3bc4aaaf09a49..ec763bad28649 100644 --- a/sycl/unittests/Extensions/RegisterHostMemory.cpp +++ b/sycl/unittests/Extensions/RegisterHostMemory.cpp @@ -55,11 +55,11 @@ ur_result_t redefinedDeviceGetInfo(void *pParams) { } ur_result_t redefinedHostAllocRegister(void *pParams) { - auto Params = *static_cast(pParams); + auto Params = + *static_cast(pParams); LastRegisterPtr = *Params.ppHostMem; LastRegisterSize = *Params.psize; - LastRegisterFlags = - *Params.ppProperties ? (*Params.ppProperties)->flags : 0; + LastRegisterFlags = *Params.ppProperties ? (*Params.ppProperties)->flags : 0; ++RegisterCallCount; return RegisterResult; } From 7fc552de8c540a323270dd5d5f5a0800ef8d6859 Mon Sep 17 00:00:00 2001 From: "Gainullin, Artur" Date: Tue, 23 Jun 2026 08:19:24 -0700 Subject: [PATCH 4/8] Update td for aspect --- llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td index 32121db8306e0..2c5903a033301 100644 --- a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td +++ b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td @@ -105,6 +105,7 @@ def Aspectext_intel_xe_cores_per_cluster : Aspect<"ext_intel_xe_cores_per_cluste def Aspectext_intel_eus_per_xe_core : Aspect<"ext_intel_eus_per_xe_core">; def Aspectext_intel_max_lanes_per_hw_thread : Aspect<"ext_intel_max_lanes_per_hw_thread">; def Aspectext_oneapi_ipc_physical_memory : Aspect<"ext_oneapi_ipc_physical_memory">; +def Aspectext_oneapi_register_host_memory : Aspect<"ext_oneapi_register_host_memory">; // Deprecated aspects def AspectInt64_base_atomics : Aspect<"int64_base_atomics">; @@ -191,7 +192,8 @@ def : TargetInfo<"__TestAspectList", Aspectext_intel_xe_cores_per_cluster, Aspectext_intel_eus_per_xe_core, Aspectext_intel_max_lanes_per_hw_thread, - Aspectext_oneapi_ipc_physical_memory], + Aspectext_oneapi_ipc_physical_memory, + Aspectext_oneapi_register_host_memory], []>; // This definition serves the only purpose of testing whether the deprecated aspect list defined in here and in SYCL RT // match. From 7429eeaaf032b9e7ea66d24494ac4269e885e218 Mon Sep 17 00:00:00 2001 From: "Gainullin, Artur" Date: Tue, 23 Jun 2026 15:29:27 -0700 Subject: [PATCH 5/8] Address review --- sycl/source/detail/usm/usm_impl.cpp | 32 +++++++++++ sycl/test-e2e/USM/register_host_memory.cpp | 1 + .../Extensions/RegisterHostMemory.cpp | 56 ++++++++++++++----- 3 files changed, 75 insertions(+), 14 deletions(-) diff --git a/sycl/source/detail/usm/usm_impl.cpp b/sycl/source/detail/usm/usm_impl.cpp index b4ede34b72490..15b3dcba9695c 100644 --- a/sycl/source/detail/usm/usm_impl.cpp +++ b/sycl/source/detail/usm/usm_impl.cpp @@ -20,9 +20,17 @@ #include #include +#include #include +#include #include +#ifdef _WIN32 +#include +#else +#include +#endif + #ifdef XPTI_ENABLE_INSTRUMENTATION // Include the headers necessary for emitting // traces using the trace framework @@ -614,6 +622,16 @@ void release_from_device_copy(const void *Ptr, const queue &Queue) { namespace detail { +static size_t getHostPageSize() { +#ifdef _WIN32 + SYSTEM_INFO Info; + GetSystemInfo(&Info); + return static_cast(Info.dwPageSize); +#else + return static_cast(sysconf(_SC_PAGESIZE)); +#endif +} + // Throws errc::feature_not_supported unless every device in the context // reports aspect::ext_oneapi_register_host_memory. static void checkRegisterHostMemorySupport(const context &Ctxt) { @@ -654,6 +672,20 @@ void register_host_memory(void *Ptr, size_t NumBytes, const context &Ctxt, if (NumBytes == 0) throw sycl::exception(make_error_code(errc::invalid), "register_host_memory: size must not be zero."); + const size_t PageSize = getHostPageSize(); + if (reinterpret_cast(Ptr) % PageSize != 0) + throw sycl::exception( + make_error_code(errc::invalid), + "register_host_memory: pointer must be aligned to the host page size."); + if (NumBytes % PageSize != 0) + throw sycl::exception( + make_error_code(errc::invalid), + "register_host_memory: size must be a multiple of the host page size."); + if (NumBytes > + std::numeric_limits::max() - reinterpret_cast(Ptr)) + throw sycl::exception(make_error_code(errc::invalid), + "register_host_memory: range is not representable in " + "the host address space."); checkRegisterHostMemorySupport(Ctxt); ur_exp_usm_host_alloc_register_properties_t Props = { diff --git a/sycl/test-e2e/USM/register_host_memory.cpp b/sycl/test-e2e/USM/register_host_memory.cpp index 97b43789b618a..d1b4c2348b6af 100644 --- a/sycl/test-e2e/USM/register_host_memory.cpp +++ b/sycl/test-e2e/USM/register_host_memory.cpp @@ -23,6 +23,7 @@ #if defined(_WIN32) #include +#include #else #include #endif diff --git a/sycl/unittests/Extensions/RegisterHostMemory.cpp b/sycl/unittests/Extensions/RegisterHostMemory.cpp index ec763bad28649..af255e1c75c00 100644 --- a/sycl/unittests/Extensions/RegisterHostMemory.cpp +++ b/sycl/unittests/Extensions/RegisterHostMemory.cpp @@ -13,16 +13,33 @@ #include #include +#include #include #include #include +#ifdef _WIN32 +#include +#else +#include +#endif + using namespace sycl; namespace syclexp = sycl::ext::oneapi::experimental; namespace { +static size_t getHostPageSize() { +#ifdef _WIN32 + SYSTEM_INFO Info; + GetSystemInfo(&Info); + return static_cast(Info.dwPageSize); +#else + return static_cast(sysconf(_SC_PAGESIZE)); +#endif +} + // Whether the mock device should advertise support for host memory // registration via aspect::ext_oneapi_register_host_memory. thread_local bool DeviceSupportsRegister = true; @@ -102,29 +119,31 @@ class RegisterHostMemoryTests : public ::testing::Test { // A successful registration forwards the exact pointer and size to UR and a // matching unregistration forwards the same pointer. TEST_F(RegisterHostMemoryTests, RegisterAndUnregisterForwardArguments) { - int Storage = 0; - void *Ptr = &Storage; - constexpr size_t Size = 4096; + const size_t PageSize = getHostPageSize(); + void *Ptr = detail::OSUtil::alignedAlloc(PageSize, PageSize); + ASSERT_NE(Ptr, nullptr); - syclexp::register_host_memory(Ptr, Size, Ctxt); + syclexp::register_host_memory(Ptr, PageSize, Ctxt); EXPECT_EQ(RegisterCallCount, 1); EXPECT_EQ(LastRegisterPtr, Ptr); - EXPECT_EQ(LastRegisterSize, Size); + EXPECT_EQ(LastRegisterSize, PageSize); // No properties passed: no registration flags should be set. EXPECT_EQ(LastRegisterFlags, 0u); syclexp::unregister_host_memory(Ptr, Ctxt); EXPECT_EQ(UnregisterCallCount, 1); EXPECT_EQ(LastUnregisterPtr, Ptr); + + detail::OSUtil::alignedFree(Ptr); } // The read_only property is lowered to the UR read-only registration flag. TEST_F(RegisterHostMemoryTests, ReadOnlyPropertyLowersToFlag) { - int Storage = 0; - void *Ptr = &Storage; - constexpr size_t Size = 4096; + const size_t PageSize = getHostPageSize(); + void *Ptr = detail::OSUtil::alignedAlloc(PageSize, PageSize); + ASSERT_NE(Ptr, nullptr); - syclexp::register_host_memory(Ptr, Size, Ctxt, + syclexp::register_host_memory(Ptr, PageSize, Ctxt, syclexp::properties{syclexp::read_only}); EXPECT_EQ(RegisterCallCount, 1); EXPECT_TRUE(LastRegisterFlags & @@ -132,6 +151,8 @@ TEST_F(RegisterHostMemoryTests, ReadOnlyPropertyLowersToFlag) { syclexp::unregister_host_memory(Ptr, Ctxt); EXPECT_EQ(UnregisterCallCount, 1); + + detail::OSUtil::alignedFree(Ptr); } // A null pointer is rejected with errc::invalid before reaching UR. @@ -172,25 +193,30 @@ TEST_F(RegisterHostMemoryTests, UnregisterNullThrowsInvalid) { // errc::feature_not_supported and does not reach UR. TEST_F(RegisterHostMemoryTests, UnsupportedDeviceThrowsFeatureNotSupported) { DeviceSupportsRegister = false; - int Storage = 0; + const size_t PageSize = getHostPageSize(); + void *Ptr = detail::OSUtil::alignedAlloc(PageSize, PageSize); + ASSERT_NE(Ptr, nullptr); try { - syclexp::register_host_memory(&Storage, 4096, Ctxt); + syclexp::register_host_memory(Ptr, PageSize, Ctxt); FAIL() << "Expected an exception."; } catch (const sycl::exception &E) { EXPECT_EQ(E.code(), make_error_code(errc::feature_not_supported)); } EXPECT_EQ(RegisterCallCount, 0); + detail::OSUtil::alignedFree(Ptr); } // The runtime maps a UR INVALID_VALUE result from either registration API to // errc::invalid. The result is injected via the mock to test the mapping in // isolation. TEST_F(RegisterHostMemoryTests, BackendInvalidValueMapsToInvalid) { - int Storage = 0; + const size_t PageSize = getHostPageSize(); + void *Ptr = detail::OSUtil::alignedAlloc(PageSize, PageSize); + ASSERT_NE(Ptr, nullptr); RegisterResult = UR_RESULT_ERROR_INVALID_VALUE; try { - syclexp::register_host_memory(&Storage, 4096, Ctxt); + syclexp::register_host_memory(Ptr, PageSize, Ctxt); FAIL() << "Expected an exception."; } catch (const sycl::exception &E) { EXPECT_EQ(E.code(), make_error_code(errc::invalid)); @@ -198,11 +224,13 @@ TEST_F(RegisterHostMemoryTests, BackendInvalidValueMapsToInvalid) { UnregisterResult = UR_RESULT_ERROR_INVALID_VALUE; try { - syclexp::unregister_host_memory(&Storage, Ctxt); + syclexp::unregister_host_memory(Ptr, Ctxt); FAIL() << "Expected an exception."; } catch (const sycl::exception &E) { EXPECT_EQ(E.code(), make_error_code(errc::invalid)); } + + detail::OSUtil::alignedFree(Ptr); } } // namespace From 58b0c761df6dbb519c6907a23a5f10f938998c5b Mon Sep 17 00:00:00 2001 From: "Gainullin, Artur" Date: Wed, 24 Jun 2026 13:03:28 -0700 Subject: [PATCH 6/8] Require v2 --- sycl/test-e2e/USM/register_host_memory.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/sycl/test-e2e/USM/register_host_memory.cpp b/sycl/test-e2e/USM/register_host_memory.cpp index d1b4c2348b6af..4f0ca89a16697 100644 --- a/sycl/test-e2e/USM/register_host_memory.cpp +++ b/sycl/test-e2e/USM/register_host_memory.cpp @@ -1,4 +1,5 @@ // REQUIRES: aspect-ext_oneapi_register_host_memory +// REQUIRES: level_zero_v2_adapter // RUN: %{build} -o %t.out // RUN: %{run} %t.out From e14282cc7d2ab726071ab5c873006e72a3f09e5b Mon Sep 17 00:00:00 2001 From: "Gainullin, Artur" Date: Thu, 25 Jun 2026 11:46:19 -0700 Subject: [PATCH 7/8] Address review --- sycl/source/detail/usm/usm_impl.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/sycl/source/detail/usm/usm_impl.cpp b/sycl/source/detail/usm/usm_impl.cpp index 15b3dcba9695c..04c7ef6b80c70 100644 --- a/sycl/source/detail/usm/usm_impl.cpp +++ b/sycl/source/detail/usm/usm_impl.cpp @@ -623,13 +623,16 @@ void release_from_device_copy(const void *Ptr, const queue &Queue) { namespace detail { static size_t getHostPageSize() { + static const size_t PageSize = []() { #ifdef _WIN32 - SYSTEM_INFO Info; - GetSystemInfo(&Info); - return static_cast(Info.dwPageSize); + SYSTEM_INFO Info; + GetSystemInfo(&Info); + return static_cast(Info.dwPageSize); #else - return static_cast(sysconf(_SC_PAGESIZE)); + return static_cast(sysconf(_SC_PAGESIZE)); #endif + }(); + return PageSize; } // Throws errc::feature_not_supported unless every device in the context From c9bb13b170f86d1b15c238d72681656b0d20d745 Mon Sep 17 00:00:00 2001 From: "Gainullin, Artur" Date: Thu, 25 Jun 2026 12:05:50 -0700 Subject: [PATCH 8/8] Fix --- sycl/source/detail/usm/usm_impl.cpp | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/sycl/source/detail/usm/usm_impl.cpp b/sycl/source/detail/usm/usm_impl.cpp index 04c7ef6b80c70..0d688c51d7e45 100644 --- a/sycl/source/detail/usm/usm_impl.cpp +++ b/sycl/source/detail/usm/usm_impl.cpp @@ -622,19 +622,6 @@ void release_from_device_copy(const void *Ptr, const queue &Queue) { namespace detail { -static size_t getHostPageSize() { - static const size_t PageSize = []() { -#ifdef _WIN32 - SYSTEM_INFO Info; - GetSystemInfo(&Info); - return static_cast(Info.dwPageSize); -#else - return static_cast(sysconf(_SC_PAGESIZE)); -#endif - }(); - return PageSize; -} - // Throws errc::feature_not_supported unless every device in the context // reports aspect::ext_oneapi_register_host_memory. static void checkRegisterHostMemorySupport(const context &Ctxt) { @@ -675,7 +662,15 @@ void register_host_memory(void *Ptr, size_t NumBytes, const context &Ctxt, if (NumBytes == 0) throw sycl::exception(make_error_code(errc::invalid), "register_host_memory: size must not be zero."); - const size_t PageSize = getHostPageSize(); + static const size_t PageSize = []() { +#ifdef _WIN32 + SYSTEM_INFO Info; + GetSystemInfo(&Info); + return static_cast(Info.dwPageSize); +#else + return static_cast(sysconf(_SC_PAGESIZE)); +#endif + }(); if (reinterpret_cast(Ptr) % PageSize != 0) throw sycl::exception( make_error_code(errc::invalid),