From 57ca600591b081ac2bd26dcd137bd55e0d86b5a1 Mon Sep 17 00:00:00 2001 From: Antonio Sanchez Date: Tue, 5 May 2026 12:41:01 -0700 Subject: [PATCH] Move tsl/platform/* interface headers to xla/tsl/platform/*. This is to fix a layering issue without introducing circular dependencies when using bzlmod. The implementations in xla/tsl/platform/default need direct access to the interface headers from the same repository (i.e. not from TSL `exports_files`) in order to propagate the correct `CcInfo` for determining proper include paths. Adding a direct dep back into `tsl/platform` would introduce a circular dependency. PiperOrigin-RevId: 910851800 --- tsl/platform/context.h | 28 +- tsl/platform/cpu_info.cc | 610 ----------------------------- tsl/platform/cpu_info.h | 182 +-------- tsl/platform/demangle.h | 15 +- tsl/platform/error_logging.h | 12 +- tsl/platform/host_info.h | 56 +-- tsl/platform/human_readable_json.h | 33 +- tsl/platform/init_main.h | 22 +- tsl/platform/load_library.h | 20 +- tsl/platform/mem.h | 101 +---- tsl/platform/net.h | 53 +-- tsl/platform/numa.h | 44 +-- tsl/platform/snappy.h | 37 +- tsl/platform/tracing.cc | 2 +- tsl/platform/tracing.h | 133 +------ 15 files changed, 23 insertions(+), 1325 deletions(-) delete mode 100644 tsl/platform/cpu_info.cc diff --git a/tsl/platform/context.h b/tsl/platform/context.h index 525436757..43682a08a 100644 --- a/tsl/platform/context.h +++ b/tsl/platform/context.h @@ -16,32 +16,6 @@ limitations under the License. #ifndef TENSORFLOW_TSL_PLATFORM_CONTEXT_H_ #define TENSORFLOW_TSL_PLATFORM_CONTEXT_H_ -#include "tsl/platform/platform.h" - -namespace tsl { - -enum class ContextKind { - // Initial state with default (empty) values. - kDefault, - // Initial state inherited from the creating or scheduling thread. - kThread, -}; - -// Context is a container for request-specific information that should be passed -// to threads that perform related work. The default constructor should capture -// all relevant context. -class Context; - -// Scoped object that sets the current thread's context until the object is -// destroyed. -class WithContext; - -} // namespace tsl - -#if defined(PLATFORM_GOOGLE) -#include "xla/tsl/platform/google/context.h" // IWYU pragma: export -#else -#include "xla/tsl/platform/default/context.h" // IWYU pragma: export -#endif +#include "xla/tsl/platform/context.h" #endif // TENSORFLOW_TSL_PLATFORM_CONTEXT_H_ diff --git a/tsl/platform/cpu_info.cc b/tsl/platform/cpu_info.cc deleted file mode 100644 index 71f7f8f56..000000000 --- a/tsl/platform/cpu_info.cc +++ /dev/null @@ -1,610 +0,0 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tsl/platform/cpu_info.h" - -// Required for cross compile with clang -#ifdef PLATFORM_WINDOWS -#include -#endif - -#include - -#include "absl/base/call_once.h" -#include "xla/tsl/platform/logging.h" -#include "xla/tsl/platform/types.h" -#include "tsl/platform/platform.h" -#if defined(PLATFORM_IS_X86) -#include // NOLINT -#endif -#if defined(PLATFORM_IS_ARM64) && !defined(__APPLE__) && !defined(__OpenBSD__) -#include /* Get HWCAP bits from asm/hwcap.h */ -#include -#ifndef HWCAP_CPUID -#define HWCAP_CPUID (1 << 11) -#endif -#include -#endif // PLATFORM_IS_ARM64 && !__APPLE__ && !__OpenBSD__ - -// SIMD extension querying is only available on x86. -#ifdef PLATFORM_IS_X86 -#ifdef PLATFORM_WINDOWS -// Visual Studio defines a builtin function for CPUID, so use that if possible. -#define GETCPUID(a, b, c, d, a_inp, c_inp) \ - { \ - int cpu_info[4] = {-1}; \ - __cpuidex(cpu_info, a_inp, c_inp); \ - a = cpu_info[0]; \ - b = cpu_info[1]; \ - c = cpu_info[2]; \ - d = cpu_info[3]; \ - } -#else -// Otherwise use gcc-format assembler to implement the underlying instructions. -#define GETCPUID(a, b, c, d, a_inp, c_inp) \ - asm("mov %%rbx, %%rdi\n" \ - "cpuid\n" \ - "xchg %%rdi, %%rbx\n" \ - : "=a"(a), "=D"(b), "=c"(c), "=d"(d) \ - : "a"(a_inp), "2"(c_inp)) -#endif -#endif - -namespace tsl { -namespace port { -namespace { - -#ifdef PLATFORM_IS_X86 -class CPUIDInfo; -void InitCPUIDInfo(); - -CPUIDInfo *cpuid = nullptr; - -#ifdef PLATFORM_WINDOWS -// Visual Studio defines a builtin function, so use that if possible. -int GetXCR0EAX() { return _xgetbv(0); } -#else -int GetXCR0EAX() { - int eax, edx; - asm("XGETBV" : "=a"(eax), "=d"(edx) : "c"(0)); - return eax; -} -#endif - -// Structure for basic CPUID info -class CPUIDInfo { - public: - CPUIDInfo() - : have_adx_(0), - have_aes_(0), - have_amx_bf16_(0), - have_amx_fp16_(0), - have_amx_int8_(0), - have_amx_tile_(0), - have_avx_(0), - have_avx2_(0), - have_avx512f_(0), - have_avx512cd_(0), - have_avx512er_(0), - have_avx512pf_(0), - have_avx512vl_(0), - have_avx512bw_(0), - have_avx512dq_(0), - have_avx512vbmi_(0), - have_avx512ifma_(0), - have_avx512_4vnniw_(0), - have_avx512_4fmaps_(0), - have_avx512_bf16_(0), - have_avx512_fp16_(0), - have_avx512_vnni_(0), - have_avx_vnni_(0), - have_avx_vnni_int8_(0), - have_avx_ne_convert_(0), - have_bmi1_(0), - have_bmi2_(0), - have_cmov_(0), - have_cmpxchg16b_(0), - have_cmpxchg8b_(0), - have_f16c_(0), - have_fma_(0), - have_mmx_(0), - have_pclmulqdq_(0), - have_popcnt_(0), - have_prefetchw_(0), - have_prefetchwt1_(0), - have_rdrand_(0), - have_rdseed_(0), - have_smap_(0), - have_sse_(0), - have_sse2_(0), - have_sse3_(0), - have_sse4_1_(0), - have_sse4_2_(0), - have_ssse3_(0), - have_hypervisor_(0) {} - - static void Initialize() { - // Initialize cpuid struct - CHECK(cpuid == nullptr) << __func__ << " ran more than once"; - cpuid = new CPUIDInfo; - - uint32_t eax, ebx, ecx, edx; - - // Get vendor string (issue CPUID with eax = 0) - GETCPUID(eax, ebx, ecx, edx, 0, 0); - cpuid->vendor_str_.append(reinterpret_cast(&ebx), 4); - cpuid->vendor_str_.append(reinterpret_cast(&edx), 4); - cpuid->vendor_str_.append(reinterpret_cast(&ecx), 4); - - // To get general information and extended features we send eax = 1 and - // ecx = 0 to cpuid. The response is returned in eax, ebx, ecx and edx. - // (See Intel 64 and IA-32 Architectures Software Developer's Manual - // Volume 2A: Instruction Set Reference, A-M CPUID). - GETCPUID(eax, ebx, ecx, edx, 1, 0); - - cpuid->model_num_ = static_cast((eax >> 4) & 0xf); - cpuid->family_ = static_cast((eax >> 8) & 0xf); - - cpuid->have_aes_ = (ecx >> 25) & 0x1; - cpuid->have_cmov_ = (edx >> 15) & 0x1; - cpuid->have_cmpxchg16b_ = (ecx >> 13) & 0x1; - cpuid->have_cmpxchg8b_ = (edx >> 8) & 0x1; - cpuid->have_mmx_ = (edx >> 23) & 0x1; - cpuid->have_pclmulqdq_ = (ecx >> 1) & 0x1; - cpuid->have_popcnt_ = (ecx >> 23) & 0x1; - cpuid->have_rdrand_ = (ecx >> 30) & 0x1; - cpuid->have_sse2_ = (edx >> 26) & 0x1; - cpuid->have_sse3_ = ecx & 0x1; - cpuid->have_sse4_1_ = (ecx >> 19) & 0x1; - cpuid->have_sse4_2_ = (ecx >> 20) & 0x1; - cpuid->have_sse_ = (edx >> 25) & 0x1; - cpuid->have_ssse3_ = (ecx >> 9) & 0x1; - cpuid->have_hypervisor_ = (ecx >> 31) & 1; - - const uint64_t xcr0_xmm_mask = 0x2; - const uint64_t xcr0_ymm_mask = 0x4; - const uint64_t xcr0_maskreg_mask = 0x20; - const uint64_t xcr0_zmm0_15_mask = 0x40; - const uint64_t xcr0_zmm16_31_mask = 0x80; - - const uint64_t xcr0_avx_mask = xcr0_xmm_mask | xcr0_ymm_mask; - const uint64_t xcr0_avx512_mask = xcr0_avx_mask | xcr0_maskreg_mask | - xcr0_zmm0_15_mask | xcr0_zmm16_31_mask; - - const bool have_avx = - // Does the OS support XGETBV instruction use by applications? - ((ecx >> 27) & 0x1) && - // Does the OS save/restore XMM and YMM state? - ((GetXCR0EAX() & xcr0_avx_mask) == xcr0_avx_mask) && - // Is AVX supported in hardware? - ((ecx >> 28) & 0x1); - - const bool have_avx512 = - // Does the OS support XGETBV instruction use by applications? - ((ecx >> 27) & 0x1) && - // Does the OS save/restore ZMM state? - ((GetXCR0EAX() & xcr0_avx512_mask) == xcr0_avx512_mask); - - cpuid->have_avx_ = have_avx; - cpuid->have_fma_ = have_avx && ((ecx >> 12) & 0x1); - cpuid->have_f16c_ = have_avx && ((ecx >> 29) & 0x1); - - // Get standard level 7 structured extension features (issue CPUID with - // eax = 7 and ecx = 0), which is required to check for AVX2 support as - // well as other Haswell (and beyond) features. (See Intel 64 and IA-32 - // Architectures Software Developer's Manual Volume 2A: Instruction Set - // Reference, A-M CPUID). - GETCPUID(eax, ebx, ecx, edx, 7, 0); - const uint32_t kMaxNumSubLeaves = eax; - - cpuid->have_adx_ = (ebx >> 19) & 0x1; - cpuid->have_avx2_ = have_avx && ((ebx >> 5) & 0x1); - cpuid->have_bmi1_ = (ebx >> 3) & 0x1; - cpuid->have_bmi2_ = (ebx >> 8) & 0x1; - cpuid->have_prefetchwt1_ = ecx & 0x1; - cpuid->have_rdseed_ = (ebx >> 18) & 0x1; - cpuid->have_smap_ = (ebx >> 20) & 0x1; - - cpuid->have_avx512f_ = have_avx512 && ((ebx >> 16) & 0x1); - cpuid->have_avx512cd_ = have_avx512 && ((ebx >> 28) & 0x1); - cpuid->have_avx512er_ = have_avx512 && ((ebx >> 27) & 0x1); - cpuid->have_avx512pf_ = have_avx512 && ((ebx >> 26) & 0x1); - cpuid->have_avx512vl_ = have_avx512 && ((ebx >> 31) & 0x1); - cpuid->have_avx512bw_ = have_avx512 && ((ebx >> 30) & 0x1); - cpuid->have_avx512dq_ = have_avx512 && ((ebx >> 17) & 0x1); - cpuid->have_avx512vbmi_ = have_avx512 && ((ecx >> 1) & 0x1); - cpuid->have_avx512ifma_ = have_avx512 && ((ebx >> 21) & 0x1); - cpuid->have_avx512_4vnniw_ = have_avx512 && ((edx >> 2) & 0x1); - cpuid->have_avx512_4fmaps_ = have_avx512 && ((edx >> 3) & 0x1); - cpuid->have_avx512_vnni_ = have_avx512 && ((ecx >> 11) & 0x1); - - // The latest Intel 64 and IA-32 Architectures Software Developer's Manual - // Volume 2A (December 2021) does not have information on AMX yet. We use - // the information from Xbyak in oneDNN. - // https://github.com/oneapi-src/oneDNN/blob/acf8d214cedfe7e24c9446bacc1f9f648c9273f8/src/cpu/x64/xbyak/xbyak_util.h#L536-L538 - cpuid->have_amx_tile_ = (edx >> 24) & 0x1; - cpuid->have_amx_int8_ = (edx >> 25) & 0x1; - cpuid->have_amx_bf16_ = (edx >> 22) & 0x1; - - // Check for avx512_fp16 using information from Xbyak in oneDNN: - // https://github.com/oneapi-src/oneDNN/blob/acf8d214cedfe7e24c9446bacc1f9f648c9273f8/src/cpu/x64/xbyak/xbyak_util.h#L516 - cpuid->have_avx512_fp16_ = have_avx512 && ((edx >> 23) & 0x1); - - // Get more Structured Extended Feature info by issuing CPUID with - // sub-leaf = 1 (eax = 7, ecx = 1) - if (kMaxNumSubLeaves >= 1) { - GETCPUID(eax, ebx, ecx, edx, 7, 1); - cpuid->have_avx_vnni_ = (eax >> 4) & 0x1; - cpuid->have_avx512_bf16_ = have_avx512 && ((eax >> 5) & 0x1); - cpuid->have_amx_fp16_ = (eax >> 21) & 0x1; - cpuid->have_avx_vnni_int8_ = (edx >> 4) & 0x1; - cpuid->have_avx_ne_convert_ = (edx >> 5) & 0x1; - } - } - - static bool TestFeature(CPUFeature feature) { - InitCPUIDInfo(); - // clang-format off - switch (feature) { - case ADX: return cpuid->have_adx_; - case AES: return cpuid->have_aes_; - case AMX_BF16: return cpuid->have_amx_bf16_; - case AMX_FP16: return cpuid->have_amx_fp16_; - case AMX_INT8: return cpuid->have_amx_int8_; - case AMX_TILE: return cpuid->have_amx_tile_; - case AVX2: return cpuid->have_avx2_; - case AVX: return cpuid->have_avx_; - case AVX512F: return cpuid->have_avx512f_; - case AVX512CD: return cpuid->have_avx512cd_; - case AVX512PF: return cpuid->have_avx512pf_; - case AVX512ER: return cpuid->have_avx512er_; - case AVX512VL: return cpuid->have_avx512vl_; - case AVX512BW: return cpuid->have_avx512bw_; - case AVX512DQ: return cpuid->have_avx512dq_; - case AVX512VBMI: return cpuid->have_avx512vbmi_; - case AVX512IFMA: return cpuid->have_avx512ifma_; - case AVX512_4VNNIW: return cpuid->have_avx512_4vnniw_; - case AVX512_4FMAPS: return cpuid->have_avx512_4fmaps_; - case AVX512_BF16: return cpuid->have_avx512_bf16_; - case AVX512_FP16: return cpuid->have_avx512_fp16_; - case AVX512_VNNI: return cpuid->have_avx512_vnni_; - case AVX_VNNI: return cpuid->have_avx_vnni_; - case AVX_VNNI_INT8: return cpuid->have_avx_vnni_int8_; - case AVX_NE_CONVERT: return cpuid->have_avx_ne_convert_; - case BMI1: return cpuid->have_bmi1_; - case BMI2: return cpuid->have_bmi2_; - case CMOV: return cpuid->have_cmov_; - case CMPXCHG16B: return cpuid->have_cmpxchg16b_; - case CMPXCHG8B: return cpuid->have_cmpxchg8b_; - case F16C: return cpuid->have_f16c_; - case FMA: return cpuid->have_fma_; - case MMX: return cpuid->have_mmx_; - case PCLMULQDQ: return cpuid->have_pclmulqdq_; - case POPCNT: return cpuid->have_popcnt_; - case PREFETCHW: return cpuid->have_prefetchw_; - case PREFETCHWT1: return cpuid->have_prefetchwt1_; - case RDRAND: return cpuid->have_rdrand_; - case RDSEED: return cpuid->have_rdseed_; - case SMAP: return cpuid->have_smap_; - case SSE2: return cpuid->have_sse2_; - case SSE3: return cpuid->have_sse3_; - case SSE4_1: return cpuid->have_sse4_1_; - case SSE4_2: return cpuid->have_sse4_2_; - case SSE: return cpuid->have_sse_; - case SSSE3: return cpuid->have_ssse3_; - case HYPERVISOR: return cpuid->have_hypervisor_; - default: - break; - } - // clang-format on - return false; - } - - std::string vendor_str() const { return vendor_str_; } - int family() const { return family_; } - int model_num() { return model_num_; } - - private: - int have_adx_ : 1; - int have_aes_ : 1; - int have_amx_bf16_ : 1; - int have_amx_fp16_ : 1; - int have_amx_int8_ : 1; - int have_amx_tile_ : 1; - int have_avx_ : 1; - int have_avx2_ : 1; - int have_avx512f_ : 1; - int have_avx512cd_ : 1; - int have_avx512er_ : 1; - int have_avx512pf_ : 1; - int have_avx512vl_ : 1; - int have_avx512bw_ : 1; - int have_avx512dq_ : 1; - int have_avx512vbmi_ : 1; - int have_avx512ifma_ : 1; - int have_avx512_4vnniw_ : 1; - int have_avx512_4fmaps_ : 1; - int have_avx512_bf16_ : 1; - int have_avx512_fp16_ : 1; - int have_avx512_vnni_ : 1; - int have_avx_vnni_ : 1; - int have_avx_vnni_int8_ : 1; - int have_avx_ne_convert_ : 1; - int have_bmi1_ : 1; - int have_bmi2_ : 1; - int have_cmov_ : 1; - int have_cmpxchg16b_ : 1; - int have_cmpxchg8b_ : 1; - int have_f16c_ : 1; - int have_fma_ : 1; - int have_mmx_ : 1; - int have_pclmulqdq_ : 1; - int have_popcnt_ : 1; - int have_prefetchw_ : 1; - int have_prefetchwt1_ : 1; - int have_rdrand_ : 1; - int have_rdseed_ : 1; - int have_smap_ : 1; - int have_sse_ : 1; - int have_sse2_ : 1; - int have_sse3_ : 1; - int have_sse4_1_ : 1; - int have_sse4_2_ : 1; - int have_ssse3_ : 1; - int have_hypervisor_ : 1; - std::string vendor_str_; - int family_; - int model_num_; -}; - -absl::once_flag cpuid_once_flag; - -void InitCPUIDInfo() { - // This ensures that CPUIDInfo::Initialize() is called exactly - // once regardless of how many threads concurrently call us - absl::call_once(cpuid_once_flag, CPUIDInfo::Initialize); -} - -#endif // PLATFORM_IS_X86 - -#if defined(PLATFORM_IS_ARM64) && !defined(__APPLE__) && !defined(__OpenBSD__) - -class CPUIDInfo; -void InitCPUIDInfo(); -void InitCPUIDFeatureInfo(); - -CPUIDInfo *cpuid = nullptr; - -// Structure for basic CPUID info. -class CPUIDInfo { - public: - CPUIDInfo() - : implementer_(0), - variant_(0), - cpunum_(0), - is_arm_neoverse_v1_(0), - is_arm_neoverse_n1_(0), - has_bf16_(0) {} - - static void Initialize() { - // Initialize CPUIDInfo pointer. - if (cpuid != nullptr) return; - - cpuid = new CPUIDInfo; - // Make sure CPUID registers are available before reading them. - if (!(getauxval(AT_HWCAP) & HWCAP_CPUID)) { - return; - } - - int present_cpu = -1; -#ifndef PLATFORM_WINDOWS - std::ifstream CPUspresent; - CPUspresent.open("/sys/devices/system/cpu/present", std::ios::in); - if (CPUspresent.is_open()) { - std::string line; - if (static_cast(getline(CPUspresent, line))) { - // We just need to find one CPU that is active - // from which we can read MIDR register to find - // implement, variant and revision information. - auto ending = line.end(); - for (auto i = line.begin(); i < line.end(); ++i) { - if (*i == '-' || *i == ',') { - ending = i; - break; - } - } - line.erase(ending, line.end()); - // That should be the fist number. - present_cpu = std::stoi(line); - } - } -#endif // !PLATFORM_WINDOWS - - if (present_cpu == -1) { - return; - } - -#ifndef PLATFORM_WINDOWS - std::stringstream str; - str << "/sys/devices/system/cpu/cpu" << present_cpu - << "/regs/identification/midr_el1"; - std::ifstream midr_el1_file(str.str(), std::ios::in); - if (midr_el1_file.is_open()) { - std::string line; - if (static_cast(getline(midr_el1_file, line))) { - uint32 midr_el1 = std::stoul(line, nullptr, 16); - - // Unpack variant and CPU ID. - // Reference: - // https://developer.arm.com/documentation/101427/0101/Register-descriptions/AArch64-system-registers/MIDR-EL1--Main-ID-Register--EL1. - cpuid->implementer_ = (midr_el1 >> 24) & 0xFF; - cpuid->variant_ = (midr_el1 >> 20) & 0xF; - cpuid->cpunum_ = (midr_el1 >> 4) & 0xFFF; - if (cpuid->implementer_ == 0x41) { - switch (cpuid->cpunum_) { - case 0xd40: // ARM NEOVERSE V1 - cpuid->is_arm_neoverse_v1_ = 1; - break; - case 0xd0c: // ARM NEOVERSE N1 - cpuid->is_arm_neoverse_n1_ = 1; - break; - default: - break; - } - } - } - } -#endif // !PLATFORM_WINDOWS - } - static void InitializeCPUFeature() { - // Initialize CPUIDInfo pointer. - if (cpuid != nullptr) return; - - cpuid = new CPUIDInfo; - - const uint32_t hwcaps2 = getauxval(AT_HWCAP2); - cpuid->has_bf16_ = IsFeatureSupported(hwcaps2, kHwcap2Bf16); - } - - int implementer() const { return implementer_; } - int cpunum() const { return cpunum_; } - - static bool TestAarch64CPU(Aarch64CPU cpu) { - InitCPUIDInfo(); - // clang-format off - switch (cpu) { - case ARM_NEOVERSE_V1: - return cpuid->is_arm_neoverse_v1_; - default: - return false; - } - // clang-format on - return false; - } - - static bool IsFeatureSupported(uint64_t features, uint64_t feature_mask) { - return (features & feature_mask); - } - static bool TestAarch64Feature(CPUFeature feature) { - InitCPUIDFeatureInfo(); - switch (feature) { - case AARCH64_BF16: - return cpuid->has_bf16_; - default: - break; - } - return false; - } - - private: - static constexpr uint64_t kHwcap2Bf16 = 1ull << 14; - int implementer_; - int variant_; - int cpunum_; - int is_arm_neoverse_v1_; // ARM NEOVERSE V1 - int is_arm_neoverse_n1_; // ARM NEOVERSE N1 - int has_bf16_; -}; - -absl::once_flag cpuid_once_flag; - -void InitCPUIDInfo() { - absl::call_once(cpuid_once_flag, CPUIDInfo::Initialize); -} - -void InitCPUIDFeatureInfo() { - absl::call_once(cpuid_once_flag, CPUIDInfo::InitializeCPUFeature); -} - -#endif // PLATFORM_IS_ARM64 && !__APPLE__ && !__OpenBSD__ - -} // namespace - -bool TestCPUFeature(CPUFeature feature) { -#ifdef PLATFORM_IS_X86 - return CPUIDInfo::TestFeature(feature); -#elif defined(PLATFORM_IS_ARM64) && !defined(__APPLE__) && !defined(__OpenBSD__) - return CPUIDInfo::TestAarch64Feature(feature); -#else - return false; -#endif -} - -bool TestAarch64CPU(Aarch64CPU cpu) { -#if defined(PLATFORM_IS_ARM64) && !defined(__APPLE__) && !defined(__OpenBSD__) - return CPUIDInfo::TestAarch64CPU(cpu); -#else - return false; -#endif -} - -std::string CPUVendorIDString() { -#ifdef PLATFORM_IS_X86 - InitCPUIDInfo(); - return cpuid->vendor_str(); -#else - return ""; -#endif -} - -int CPUFamily() { -#ifdef PLATFORM_IS_X86 - InitCPUIDInfo(); - return cpuid->family(); -#elif defined(PLATFORM_IS_ARM64) && !defined(__APPLE__) && !defined(__OpenBSD__) - InitCPUIDInfo(); - return cpuid->implementer(); -#else - return 0; -#endif -} - -int CPUModelNum() { -#ifdef PLATFORM_IS_X86 - InitCPUIDInfo(); - return cpuid->model_num(); -#elif defined(PLATFORM_IS_ARM64) && !defined(__APPLE__) && !defined(__OpenBSD__) - InitCPUIDInfo(); - return cpuid->cpunum(); -#else - return 0; -#endif -} - -int CPUIDNumSMT() { -#ifdef PLATFORM_IS_X86 - // https://software.intel.com/en-us/articles/intel-64-architecture-processor-topology-enumeration - // https://software.intel.com/en-us/articles/intel-sdm (Vol 3A) - // Section: Detecting Hardware Multi-threads Support and Topology - // Uses CPUID Leaf 11 to enumerate system topology on Intel x86 architectures - // Other cases not supported - uint32_t eax, ebx, ecx, edx; - // Check if system supports Leaf 11 - GETCPUID(eax, ebx, ecx, edx, 0, 0); - if (eax >= 11) { - // 1) Leaf 11 available? CPUID.(EAX=11, ECX=0):EBX != 0 - // 2) SMT_Mask_Width = CPUID.(EAX=11, ECX=0):EAX[4:0] if CPUID.(EAX=11, - // ECX=0):ECX[15:8] is 1 - GETCPUID(eax, ebx, ecx, edx, 11, 0); - if (ebx != 0 && ((ecx & 0xff00) >> 8) == 1) { - return 1 << (eax & 0x1f); // 2 ^ SMT_Mask_Width - } - } -#endif // PLATFORM_IS_X86 - return 0; -} - -} // namespace port -} // namespace tsl diff --git a/tsl/platform/cpu_info.h b/tsl/platform/cpu_info.h index 30eca5c41..d1590ff19 100644 --- a/tsl/platform/cpu_info.h +++ b/tsl/platform/cpu_info.h @@ -16,186 +16,6 @@ limitations under the License. #ifndef TENSORFLOW_TSL_PLATFORM_CPU_INFO_H_ #define TENSORFLOW_TSL_PLATFORM_CPU_INFO_H_ -#include - -// TODO(ahentz): This is not strictly required here but, for historical -// reasons, many people depend on cpu_info.h in order to use kLittleEndian. -#include "xla/tsl/platform/byte_order.h" -#include "tsl/platform/platform.h" - -#if defined(_MSC_VER) -// included so __cpuidex function is available for GETCPUID on Windows -#include -#endif - -namespace tsl { -namespace port { - -// Returns an estimate of the number of schedulable CPUs for this -// process. Usually, it's constant throughout the lifetime of a -// process, but it might change if the underlying cluster management -// software can change it dynamically. If the underlying call fails, a default -// value (e.g. `4`) may be returned. -int NumSchedulableCPUs(); - -// Returns an estimate for the maximum parallelism for this process. -// Applications should avoid running more than this number of threads with -// intensive workloads concurrently to avoid performance degradation and -// contention. -// This value is either the number of schedulable CPUs, or a value specific to -// the underlying cluster management. Applications should assume this value can -// change throughout the lifetime of the process. This function must not be -// called during initialization, i.e., before main() has started. -int MaxParallelism(); - -// Returns an estimate for the maximum parallelism for this process on the -// provided numa node, or any numa node if `numa_node` is kNUMANoAffinity. -// See MaxParallelism() for more information. -int MaxParallelism(int numa_node); - -// Returns the total number of CPUs on the system. This number should -// not change even if the underlying cluster management software may -// change the number of schedulable CPUs. Unlike `NumSchedulableCPUs`, if the -// underlying call fails, an invalid value of -1 will be returned; -// the user must check for validity. -static constexpr int kUnknownCPU = -1; -int NumTotalCPUs(); - -// Returns the id of the current CPU. Returns -1 if the current CPU cannot be -// identified. If successful, the return value will be in [0, NumTotalCPUs()). -int GetCurrentCPU(); - -// Returns an estimate of the number of hyperthreads per physical core -// on the CPU -int NumHyperthreadsPerCore(); - -// Mostly ISA related features that we care about -// Do not change numeric assignments. -enum CPUFeature { - //===--------------------------------------------------------------------===// - // x86 features - //===--------------------------------------------------------------------===// - MMX = 0, - SSE = 1, - SSE2 = 2, - SSE3 = 3, - SSSE3 = 4, - SSE4_1 = 5, - SSE4_2 = 6, - CMOV = 7, - CMPXCHG8B = 8, - CMPXCHG16B = 9, - POPCNT = 10, - AES = 11, - AVX = 12, - RDRAND = 13, - AVX2 = 14, - FMA = 15, - F16C = 16, - PCLMULQDQ = 17, - RDSEED = 18, - ADX = 19, - SMAP = 20, - - // Prefetch Vector Data Into Caches with Intent to Write and T1 Hint - // http://www.felixcloutier.com/x86/PREFETCHWT1.html. - // You probably want PREFETCHW instead. - PREFETCHWT1 = 21, - - BMI1 = 22, - BMI2 = 23, - HYPERVISOR = 25, // 0 when on a real CPU, 1 on (well-behaved) hypervisor. - - // Prefetch Data into Caches in Anticipation of a Write (3D Now!). - // http://www.felixcloutier.com/x86/PREFETCHW.html - PREFETCHW = 26, - - // AVX-512: 512-bit vectors (plus masking, etc.) in Knights Landing, - // Skylake, Xeon, etc. Each of these entries is a different subset of - // instructions, various combinations of which occur on various CPU types. - AVX512F = 27, // Foundation - AVX512CD = 28, // Conflict detection - AVX512ER = 29, // Exponential and reciprocal - AVX512PF = 30, // Prefetching - AVX512VL = 31, // Shorter vector lengths - AVX512BW = 32, // Byte and word - AVX512DQ = 33, // Dword and qword - AVX512VBMI = 34, // Bit manipulation - AVX512IFMA = 35, // Integer multiply-add - AVX512_4VNNIW = 36, // Integer neural network (Intel Xeon Phi only) - AVX512_4FMAPS = 37, // Floating point neural network (Intel Xeon Phi only) - AVX512_VNNI = 38, // Integer neural network - AVX512_BF16 = 39, // Bfloat16 neural network - - // AVX version of AVX512_VNNI in CPUs such as Alder Lake and Sapphire Rapids. - AVX_VNNI = 40, // Integer neural network - - // AMX: Advanced Matrix Extension in Sapphire Rapids. - // Perform matrix multiplication on the Tile Matrix Multiply (TMUL) unit, - // supporting two popular data types in neural networks, int8 and bfloat16. - AMX_TILE = 41, // Tile configuration and load/store - AMX_INT8 = 42, // Int8 tile matrix multiplication - AMX_BF16 = 43, // Bfloat16 tile matrix multiplication - - AVX512_FP16 = 44, // Float16 neural network - AMX_FP16 = 45, // Float16 tile matrix multiplication - AVX_NE_CONVERT = 46, // Instructions for faster bfloat16, float16 convert. - AVX_VNNI_INT8 = 47, // VNNI instructions for combinations of u8, s8 dtypes. - - //===--------------------------------------------------------------------===// - // AArch64 features - //===--------------------------------------------------------------------===// - AARCH64_NEON = 1000, - AARCH64_SVE = 1001, - AARCH64_SVE2 = 1002, - AARCH64_BF16 = 1003, // BF16 on AArch64 systems -}; - -enum Aarch64CPU { - ARM_NEOVERSE_N1 = 0, // ARM NEOVERSE N1 - ARM_NEOVERSE_V1 = 1, // ARM NEOVERSE V1 -}; -// Checks whether the current AArch64 processor is supported. -bool TestAarch64CPU(Aarch64CPU cpu); - -// Checks whether the current processor supports one of the features above. -// Checks CPU registers to return hardware capabilities. -bool TestCPUFeature(CPUFeature feature); - -// Checks whether the current processor is x86. -constexpr bool IsX86CPU() { -#ifdef PLATFORM_IS_X86 - return true; -#else - return false; -#endif -} - -// Checks whether the current processor is aarch64. -constexpr bool IsAarch64CPU() { -#if defined(PLATFORM_IS_ARM64) && !defined(__APPLE__) && !defined(__OpenBSD__) - return true; -#else - return false; -#endif -} - -// Returns CPU Vendor string (i.e. 'GenuineIntel', 'AuthenticAMD', etc.) -std::string CPUVendorIDString(); - -// Returns CPU family. -int CPUFamily(); - -// Returns CPU model number. -int CPUModelNum(); - -// Returns nominal core processor cycles per second of each processor. -double NominalCPUFrequency(); - -// Returns num of hyperthreads per physical core -int CPUIDNumSMT(); - -} // namespace port -} // namespace tsl +#include "xla/tsl/platform/cpu_info.h" #endif // TENSORFLOW_TSL_PLATFORM_CPU_INFO_H_ diff --git a/tsl/platform/demangle.h b/tsl/platform/demangle.h index 8171be236..9501268ba 100644 --- a/tsl/platform/demangle.h +++ b/tsl/platform/demangle.h @@ -16,19 +16,6 @@ limitations under the License. #ifndef TENSORFLOW_TSL_PLATFORM_DEMANGLE_H_ #define TENSORFLOW_TSL_PLATFORM_DEMANGLE_H_ -#include "xla/tsl/platform/types.h" - -namespace tsl { -namespace port { - -// If the compiler supports, demangle a mangled symbol name and return -// the demangled name. Otherwise, returns 'mangled' as is. -std::string Demangle(const char* mangled); -inline std::string Demangle(const std::string mangled) { - return Demangle(mangled.c_str()); -} - -} // namespace port -} // namespace tsl +#include "xla/tsl/platform/demangle.h" #endif // TENSORFLOW_TSL_PLATFORM_DEMANGLE_H_ diff --git a/tsl/platform/error_logging.h b/tsl/platform/error_logging.h index 0ee471d0f..ffca83860 100644 --- a/tsl/platform/error_logging.h +++ b/tsl/platform/error_logging.h @@ -1,4 +1,4 @@ -/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -16,14 +16,6 @@ limitations under the License. #ifndef TENSORFLOW_TSL_PLATFORM_ERROR_LOGGING_H_ #define TENSORFLOW_TSL_PLATFORM_ERROR_LOGGING_H_ -#include "absl/status/status.h" -#include "absl/strings/string_view.h" - -namespace tsl::error_logging { - -absl::Status Log(absl::string_view component, absl::string_view subcomponent, - absl::string_view error_msg); - -} +#include "xla/tsl/platform/error_logging.h" #endif // TENSORFLOW_TSL_PLATFORM_ERROR_LOGGING_H_ diff --git a/tsl/platform/host_info.h b/tsl/platform/host_info.h index a791a85db..8ffd8158c 100644 --- a/tsl/platform/host_info.h +++ b/tsl/platform/host_info.h @@ -1,4 +1,4 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -16,58 +16,6 @@ limitations under the License. #ifndef TENSORFLOW_TSL_PLATFORM_HOST_INFO_H_ #define TENSORFLOW_TSL_PLATFORM_HOST_INFO_H_ -#include -#include -#include -#include - -#include "absl/strings/string_view.h" - -namespace tsl { -namespace port { - -// Statistical data of IO operations performed by the job. -struct IOStatistics { - struct Distribution { - uint64_t count = 0; - double mean = 0.0; - double std_dev = 0.0; - }; - // Distribution of round trip IO latency in microseconds. - Distribution roundtrip_latency_usec; - // Distribution of data received by IO reads in bytes. - Distribution response_bytes; -}; - -// Return the hostname of the machine on which this process is running. -std::string Hostname(); - -// Return the job name as a string if it exists, otherwise return an empty -// string. -std::string JobName(); - -// Returns the Borg job UID as an int64_t if it exists. Otherwise return -1. -int64_t JobUid(); - -// Returns the Borg task ID as an int64_t if it exists. Otherwise return -1. -int64_t TaskId(); - -// Retrieves the host file read statistics. -IOStatistics GetIOStatistics(); - -// Returns a copy of a given hostname that does not include domain or port. -// Examples: -// "foo.example.com:80" -> "foo" -// "foo:80" -> "foo" -inline std::string StripDomainAndPort(absl::string_view hoststring) { - size_t strip_from = hoststring.find_first_of(".:"); - if (strip_from != absl::string_view::npos) { - return std::string(hoststring.substr(0, strip_from)); - } - return std::string(hoststring); -} - -} // namespace port -} // namespace tsl +#include "xla/tsl/platform/host_info.h" #endif // TENSORFLOW_TSL_PLATFORM_HOST_INFO_H_ diff --git a/tsl/platform/human_readable_json.h b/tsl/platform/human_readable_json.h index 897a6b110..48d410f1c 100644 --- a/tsl/platform/human_readable_json.h +++ b/tsl/platform/human_readable_json.h @@ -1,4 +1,4 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -16,35 +16,6 @@ limitations under the License. #ifndef TENSORFLOW_TSL_PLATFORM_HUMAN_READABLE_JSON_H_ #define TENSORFLOW_TSL_PLATFORM_HUMAN_READABLE_JSON_H_ -#include - -#include "absl/status/status.h" -#include "absl/status/statusor.h" -#include "xla/tsl/platform/types.h" -#include "tsl/platform/protobuf.h" - -namespace tsl { - -// Converts a proto to a JSON-like string that's meant to be human-readable -// but still machine-parseable. -// -// This string may not be strictly JSON-compliant, but it must be parsable by -// HumanReadableJSONToProto. -// -// When ignore_accuracy_loss = true, this function may ignore JavaScript -// accuracy loss with large integers. -absl::StatusOr ProtoToHumanReadableJson( - const protobuf::Message& proto, bool ignore_accuracy_loss); -absl::StatusOr ProtoToHumanReadableJson( - const protobuf::MessageLite& proto, bool ignore_accuracy_loss); - -// Converts a string produced by ProtoToHumanReadableJSON to a protobuf. Not -// guaranteed to work for general JSON. -absl::Status HumanReadableJsonToProto(const std::string& str, - protobuf::Message* proto); -absl::Status HumanReadableJsonToProto(const std::string& str, - protobuf::MessageLite* proto); - -} // namespace tsl +#include "xla/tsl/platform/human_readable_json.h" #endif // TENSORFLOW_TSL_PLATFORM_HUMAN_READABLE_JSON_H_ diff --git a/tsl/platform/init_main.h b/tsl/platform/init_main.h index 98b7cd67b..d887aa93a 100644 --- a/tsl/platform/init_main.h +++ b/tsl/platform/init_main.h @@ -1,4 +1,4 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -16,24 +16,6 @@ limitations under the License. #ifndef TENSORFLOW_TSL_PLATFORM_INIT_MAIN_H_ #define TENSORFLOW_TSL_PLATFORM_INIT_MAIN_H_ -#include -#include - -#include "absl/time/time.h" -#include "xla/tsl/platform/macros.h" - -namespace tsl { -namespace port { - -void InitMain(const char* usage, int* argc, char*** argv); - -TF_EXPORT const std::vector& GetArgvs(); - -TF_EXPORT const char* GetArgv0(); - -absl::Duration GetUptime(); - -} // namespace port -} // namespace tsl +#include "xla/tsl/platform/init_main.h" #endif // TENSORFLOW_TSL_PLATFORM_INIT_MAIN_H_ diff --git a/tsl/platform/load_library.h b/tsl/platform/load_library.h index 5a42f2a34..470598a02 100644 --- a/tsl/platform/load_library.h +++ b/tsl/platform/load_library.h @@ -1,4 +1,4 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -16,22 +16,6 @@ limitations under the License. #ifndef TENSORFLOW_TSL_PLATFORM_LOAD_LIBRARY_H_ #define TENSORFLOW_TSL_PLATFORM_LOAD_LIBRARY_H_ -#include - -#include "absl/status/status.h" - -namespace tsl { - -namespace internal { - -absl::Status LoadDynamicLibrary(const char* library_filename, void** handle); -absl::Status GetSymbolFromLibrary(void* handle, const char* symbol_name, - void** symbol); -std::string FormatLibraryFileName(const std::string& name, - const std::string& version); - -} // namespace internal - -} // namespace tsl +#include "xla/tsl/platform/load_library.h" #endif // TENSORFLOW_TSL_PLATFORM_LOAD_LIBRARY_H_ diff --git a/tsl/platform/mem.h b/tsl/platform/mem.h index d2b3286f0..362216dc0 100644 --- a/tsl/platform/mem.h +++ b/tsl/platform/mem.h @@ -1,4 +1,4 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -16,103 +16,6 @@ limitations under the License. #ifndef TENSORFLOW_TSL_PLATFORM_MEM_H_ #define TENSORFLOW_TSL_PLATFORM_MEM_H_ -#include -#include -#include - -// TODO(cwhipkey): remove this when callers use annotations directly. -#include "absl/base/macros.h" -#include "xla/tsl/platform/dynamic_annotations.h" -#include "xla/tsl/platform/types.h" -#include "tsl/platform/platform.h" - -namespace tsl { -namespace port { - -// Aligned allocation/deallocation. `minimum_alignment` must be a power of 2 -// and a multiple of sizeof(void*). -void* AlignedMalloc(size_t size, std::align_val_t minimum_alignment); -ABSL_DEPRECATE_AND_INLINE() -inline void* AlignedMalloc(size_t size, int minimum_alignment) { - return AlignedMalloc(size, static_cast(minimum_alignment)); -} -void AlignedFree(void* aligned_memory); -void AlignedSizedFree(void* aligned_memory, size_t size, - std::align_val_t minimum_alignment); -ABSL_DEPRECATE_AND_INLINE() -inline void AlignedSizedFree(void* aligned_memory, size_t alignment, - size_t size) { - AlignedSizedFree(aligned_memory, size, - static_cast(alignment)); -} - -// An allocator that allocates memory with the given minimum alignment. -template -struct AlignedAllocator { - using value_type = T; - - value_type* allocate(size_t n) { - return static_cast( - AlignedMalloc(n * sizeof(value_type), minimum_alignment)); - } - - void deallocate(value_type* p, size_t n) { - return AlignedSizedFree(p, n, minimum_alignment); - } -}; - -void* Malloc(size_t size); -void* Realloc(void* ptr, size_t size); -void Free(void* ptr); - -// Tries to release num_bytes of free memory back to the operating -// system for reuse. Use this routine with caution -- to get this -// memory back may require faulting pages back in by the OS, and -// that may be slow. -// -// Currently, if a malloc implementation does not support this -// routine, this routine is a no-op. -void MallocExtension_ReleaseToSystem(std::size_t num_bytes); - -// Returns the actual number N of bytes reserved by the malloc for the -// pointer p. This number may be equal to or greater than the number -// of bytes requested when p was allocated. -// -// This routine is just useful for statistics collection. The -// client must *not* read or write from the extra bytes that are -// indicated by this call. -// -// Example, suppose the client gets memory by calling -// p = malloc(10) -// and GetAllocatedSize(p) may return 16. The client must only use the -// first 10 bytes p[0..9], and not attempt to read or write p[10..15]. -// -// Currently, if a malloc implementation does not support this -// routine, this routine returns 0. -std::size_t MallocExtension_GetAllocatedSize(const void* p); - -struct MemoryInfo { - int64_t total = 0; - int64_t free = 0; -}; - -struct MemoryBandwidthInfo { - int64_t bw_used = 0; // memory bandwidth used across all CPU (in MBs/second) -}; - -// Retrieves the host memory information. If any of the fields in the returned -// MemoryInfo structure is INT64_MAX, it means such information is not -// available. -MemoryInfo GetMemoryInfo(); - -// Retrieves the host memory bandwidth information. If any field in the returned -// structure is INT64_MAX, it means such information is not available. -MemoryBandwidthInfo GetMemoryBandwidthInfo(); - -// Returns the amount of RAM available in bytes, or INT64_MAX if unknown. -static inline int64_t AvailableRam() { return GetMemoryInfo().free; } - -} // namespace port -} // namespace tsl +#include "xla/tsl/platform/mem.h" #endif // TENSORFLOW_TSL_PLATFORM_MEM_H_ diff --git a/tsl/platform/net.h b/tsl/platform/net.h index e42ec6b91..46339a49d 100644 --- a/tsl/platform/net.h +++ b/tsl/platform/net.h @@ -16,57 +16,6 @@ limitations under the License. #ifndef TENSORFLOW_TSL_PLATFORM_NET_H_ #define TENSORFLOW_TSL_PLATFORM_NET_H_ -#include - -#include "absl/base/macros.h" -#include "absl/strings/str_cat.h" -namespace tsl { -namespace net { - -// Checks whether the given port is available for binding to a TCP or UDP -// socket. If the port is available, returns true. Otherwise, returns false. If -// error is not null, sets error to a string describing the error. -bool IsPortAvailable(int* port, bool is_tcp, std::string* error); - -inline bool IsPortAvailable(int port, bool is_tcp, std::string* error) { - if (port <= 0) { - if (error != nullptr) { - *error = - absl::StrCat("Invalid port number: ", port, ". Port must be > 0."); - } - return false; - } - return IsPortAvailable(&port, is_tcp, error); -} -// Return a port number that is not currently bound to any TCP or UDP port. -// On success returns the assigned port number. Otherwise returns -1. -int PickUnusedPort(); - -// Same as PickUnusedPort(), but fails a CHECK() if a port can't be found. In -// that case, the error message is logged to FATAL. -int PickUnusedPortOrDie(); - -// Relinquish a claim on the given port which was previously returned by -// PickUnusedPort[OrDie](). This allows PickUnusedPort[OrDie]() to return -// the given port to another caller in the future. Since the number of -// ports the portserver will give to a process is limited (typically 200), -// recycling ports after they are no longer needed can help avoid -// exhausting them. 'port' must be a positive number that was previously -// returned by PickUnusedPort[OrDie](), and not yet recycled, otherwise an -// abort may occur. -void RecycleUnusedPort(int port); -} // namespace net - -namespace internal { -ABSL_DEPRECATE_AND_INLINE() -inline int PickUnusedPort() { return tsl::net::PickUnusedPort(); } - -ABSL_DEPRECATE_AND_INLINE() -inline int PickUnusedPortOrDie() { return tsl::net::PickUnusedPortOrDie(); } - -ABSL_DEPRECATE_AND_INLINE() -inline void RecycleUnusedPort(int port) { tsl::net::RecycleUnusedPort(port); } -} // namespace internal -} // namespace tsl +#include "xla/tsl/platform/net.h" #endif // TENSORFLOW_TSL_PLATFORM_NET_H_ diff --git a/tsl/platform/numa.h b/tsl/platform/numa.h index 85d16a4b3..a219722e7 100644 --- a/tsl/platform/numa.h +++ b/tsl/platform/numa.h @@ -1,4 +1,4 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -16,46 +16,6 @@ limitations under the License. #ifndef TENSORFLOW_TSL_PLATFORM_NUMA_H_ #define TENSORFLOW_TSL_PLATFORM_NUMA_H_ -#include +#include "xla/tsl/platform/numa.h" -namespace tsl { -namespace port { - -// Returns true iff NUMA functions are supported. -bool NUMAEnabled(); - -// Returns the number of NUMA nodes present with respect to CPU operations. -// Typically this will be the number of sockets where some RAM has greater -// affinity with one socket than another. -int NUMANumNodes(); - -static const int kNUMANoAffinity = -1; - -// If possible sets affinity of the current thread to the specified NUMA node. -// If node == kNUMANoAffinity removes affinity to any particular node. -void NUMASetThreadNodeAffinity(int node); - -// Returns NUMA node affinity of the current thread, kNUMANoAffinity if none. -int NUMAGetThreadNodeAffinity(); - -// Like AlignedMalloc, but allocates memory with affinity to the specified NUMA -// node. -// -// Notes: -// 1. node must be >= 0 and < NUMANumNodes. -// 1. minimum_alignment must a factor of system page size, the memory -// returned will be page-aligned. -// 2. This function is likely significantly slower than AlignedMalloc -// and should not be used for lots of small allocations. It makes more -// sense as a backing allocator for BFCAllocator, PoolAllocator, or similar. -void* NUMAMalloc(int node, size_t size, int minimum_alignment); - -// Memory allocated by NUMAMalloc must be freed via NUMAFree. -void NUMAFree(void* ptr, size_t size); - -// Returns NUMA node affinity of memory address, kNUMANoAffinity if none. -int NUMAGetMemAffinity(const void* ptr); - -} // namespace port -} // namespace tsl #endif // TENSORFLOW_TSL_PLATFORM_NUMA_H_ diff --git a/tsl/platform/snappy.h b/tsl/platform/snappy.h index aa292e190..588925f7d 100644 --- a/tsl/platform/snappy.h +++ b/tsl/platform/snappy.h @@ -1,4 +1,4 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -16,39 +16,6 @@ limitations under the License. #ifndef TENSORFLOW_TSL_PLATFORM_SNAPPY_H_ #define TENSORFLOW_TSL_PLATFORM_SNAPPY_H_ -#include "xla/tsl/platform/types.h" - -#if !defined(PLATFORM_WINDOWS) -#include -namespace tsl { -using ::iovec; // NOLINT(misc-unused-using-decls) -} // namespace tsl -#else -namespace tsl { -struct iovec { - void* iov_base; - size_t iov_len; -}; -} // namespace tsl -#endif - -namespace tsl { -namespace port { - -// Snappy compression/decompression support -bool Snappy_Compress(const char* input, size_t length, std::string* output); - -bool Snappy_CompressFromIOVec(const struct iovec* iov, - size_t uncompressed_length, std::string* output); - -bool Snappy_GetUncompressedLength(const char* input, size_t length, - size_t* result); -bool Snappy_Uncompress(const char* input, size_t length, char* output); - -bool Snappy_UncompressToIOVec(const char* compressed, size_t compressed_length, - const struct iovec* iov, size_t iov_cnt); - -} // namespace port -} // namespace tsl +#include "xla/tsl/platform/snappy.h" #endif // TENSORFLOW_TSL_PLATFORM_SNAPPY_H_ diff --git a/tsl/platform/tracing.cc b/tsl/platform/tracing.cc index 66a239de0..1190afe72 100644 --- a/tsl/platform/tracing.cc +++ b/tsl/platform/tracing.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tsl/platform/tracing.h" +#include "xla/tsl/platform/tracing.h" #include #include diff --git a/tsl/platform/tracing.h b/tsl/platform/tracing.h index 89728d4d3..7fff1c519 100644 --- a/tsl/platform/tracing.h +++ b/tsl/platform/tracing.h @@ -1,4 +1,4 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -16,135 +16,6 @@ limitations under the License. #ifndef TENSORFLOW_TSL_PLATFORM_TRACING_H_ #define TENSORFLOW_TSL_PLATFORM_TRACING_H_ -// Tracing interface - -#include - -#include "xla/tsl/platform/macros.h" -#include "xla/tsl/platform/types.h" -#include "tsl/platform/platform.h" -#include "tsl/platform/stringpiece.h" - -namespace tsl { -namespace tracing { - -// This enumeration contains the identifiers of all TensorFlow CPU profiler -// events. It must be kept in sync with the code in GetEventCategoryName(). -enum struct EventCategory : unsigned { - kScheduleClosure = 0, - kRunClosure = 1, - kCompute = 2, - kNumCategories = 3 // sentinel - keep last -}; -constexpr unsigned GetNumEventCategories() { - return static_cast(EventCategory::kNumCategories); -} -const char* GetEventCategoryName(EventCategory); - -// Interface for CPU profiler events. -class EventCollector { - public: - virtual ~EventCollector() {} - virtual void RecordEvent(uint64_t arg) const = 0; - virtual void StartRegion(uint64_t arg) const = 0; - virtual void StopRegion() const = 0; - - // Annotates the current thread with a name. - static void SetCurrentThreadName(const char* name); - // Returns whether event collection is enabled. - static bool IsEnabled(); - - private: - friend void SetEventCollector(EventCategory, const EventCollector*); - friend const EventCollector* GetEventCollector(EventCategory); - - static std::array instances_; -}; -// Set the callback for RecordEvent and ScopedRegion of category. -// Not thread safe. Only call while EventCollector::IsEnabled returns false. -void SetEventCollector(EventCategory category, const EventCollector* collector); - -// Returns the callback for RecordEvent and ScopedRegion of category if -// EventCollector::IsEnabled(), otherwise returns null. -inline const EventCollector* GetEventCollector(EventCategory category) { - if (EventCollector::IsEnabled()) { - return EventCollector::instances_[static_cast(category)]; - } - return nullptr; -} - -// Returns a unique id to pass to RecordEvent/ScopedRegion. Never returns zero. -uint64_t GetUniqueArg(); - -// Returns an id for name to pass to RecordEvent/ScopedRegion. -uint64_t GetArgForName(absl::string_view name); - -// Records an atomic event through the currently registered EventCollector. -inline void RecordEvent(EventCategory category, uint64_t arg) { - if (auto collector = GetEventCollector(category)) { - collector->RecordEvent(arg); - } -} - -// Records an event for the duration of the instance lifetime through the -// currently registered EventCollector. -class ScopedRegion { - public: - ScopedRegion(ScopedRegion&& other) noexcept // Move-constructible. - : collector_(other.collector_) { - other.collector_ = nullptr; - } - - ScopedRegion(EventCategory category, uint64_t arg) - : collector_(GetEventCollector(category)) { - if (collector_) { - collector_->StartRegion(arg); - } - } - - // Same as ScopedRegion(category, GetUniqueArg()), but faster if - // EventCollector::IsEnabled() returns false. - explicit ScopedRegion(EventCategory category) - : collector_(GetEventCollector(category)) { - if (collector_) { - collector_->StartRegion(GetUniqueArg()); - } - } - - // Same as ScopedRegion(category, GetArgForName(name)), but faster if - // EventCollector::IsEnabled() returns false. - ScopedRegion(EventCategory category, absl::string_view name) - : collector_(GetEventCollector(category)) { - if (collector_) { - collector_->StartRegion(GetArgForName(name)); - } - } - - ~ScopedRegion() { - if (collector_ && EventCollector::IsEnabled()) { - collector_->StopRegion(); - } - } - - bool IsEnabled() const { return collector_ != nullptr; } - - private: - ScopedRegion(const ScopedRegion&) = delete; - void operator=(const ScopedRegion&) = delete; - - const EventCollector* collector_; -}; - -// Return the pathname of the directory where we are writing log files. -const char* GetLogDir(); - -} // namespace tracing -} // namespace tsl - -#if defined(PLATFORM_GOOGLE) -#include "xla/tsl/platform/google/tracing_impl.h" -#else -#include "xla/tsl/platform/default/tracing_impl.h" -#endif +#include "xla/tsl/platform/tracing.h" #endif // TENSORFLOW_TSL_PLATFORM_TRACING_H_