From 57ca600591b081ac2bd26dcd137bd55e0d86b5a1 Mon Sep 17 00:00:00 2001
From: Antonio Sanchez <cantonios@google.com>
Date: Tue, 5 May 2026 12:41:01 -0700
Subject: [PATCH] Move tsl/platform/* interface headers to xla/tsl/platform/*.

This is to fix a layering issue without introducing circular dependencies
when using bzlmod.  The implementations in xla/tsl/platform/default
need direct access to the interface headers from the same repository
(i.e. not from TSL `exports_files`) in order to propagate the correct
`CcInfo` for determining proper include paths.  Adding a direct dep
back into `tsl/platform` would introduce a circular dependency.

PiperOrigin-RevId: 910851800
---
 tsl/platform/context.h             |  28 +-
 tsl/platform/cpu_info.cc           | 610 -----------------------------
 tsl/platform/cpu_info.h            | 182 +--------
 tsl/platform/demangle.h            |  15 +-
 tsl/platform/error_logging.h       |  12 +-
 tsl/platform/host_info.h           |  56 +--
 tsl/platform/human_readable_json.h |  33 +-
 tsl/platform/init_main.h           |  22 +-
 tsl/platform/load_library.h        |  20 +-
 tsl/platform/mem.h                 | 101 +----
 tsl/platform/net.h                 |  53 +--
 tsl/platform/numa.h                |  44 +--
 tsl/platform/snappy.h              |  37 +-
 tsl/platform/tracing.cc            |   2 +-
 tsl/platform/tracing.h             | 133 +------
 15 files changed, 23 insertions(+), 1325 deletions(-)
 delete mode 100644 tsl/platform/cpu_info.cc
diff --git a/tsl/platform/context.h b/tsl/platform/context.h
index 525436757..43682a08a 100644
--- a/tsl/platform/context.h
+++ b/tsl/platform/context.h
@@ -16,32 +16,6 @@ limitations under the License.
 #ifndef TENSORFLOW_TSL_PLATFORM_CONTEXT_H_
 #define TENSORFLOW_TSL_PLATFORM_CONTEXT_H_
 
-#include "tsl/platform/platform.h"
-
-namespace tsl {
-
-enum class ContextKind {
-  // Initial state with default (empty) values.
-  kDefault,
-  // Initial state inherited from the creating or scheduling thread.
-  kThread,
-};
-
-// Context is a container for request-specific information that should be passed
-// to threads that perform related work. The default constructor should capture
-// all relevant context.
-class Context;
-
-// Scoped object that sets the current thread's context until the object is
-// destroyed.
-class WithContext;
-
-}  // namespace tsl
-
-#if defined(PLATFORM_GOOGLE)
-#include "xla/tsl/platform/google/context.h"  // IWYU pragma: export
-#else
-#include "xla/tsl/platform/default/context.h"  // IWYU pragma: export
-#endif
+#include "xla/tsl/platform/context.h"
 
 #endif  // TENSORFLOW_TSL_PLATFORM_CONTEXT_H_
diff --git a/tsl/platform/cpu_info.cc b/tsl/platform/cpu_info.cc
deleted file mode 100644
index 71f7f8f56..000000000
--- a/tsl/platform/cpu_info.cc
+++ /dev/null
@@ -1,610 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tsl/platform/cpu_info.h"
-
-// Required for cross compile with clang
-#ifdef PLATFORM_WINDOWS
-#include <intrin.h>
-#endif
-
-#include <string>
-
-#include "absl/base/call_once.h"
-#include "xla/tsl/platform/logging.h"
-#include "xla/tsl/platform/types.h"
-#include "tsl/platform/platform.h"
-#if defined(PLATFORM_IS_X86)
-#include <mutex>  // NOLINT
-#endif
-#if defined(PLATFORM_IS_ARM64) && !defined(__APPLE__) && !defined(__OpenBSD__)
-#include <asm/hwcap.h> /* Get HWCAP bits from asm/hwcap.h */
-#include <sys/auxv.h>
-#ifndef HWCAP_CPUID
-#define HWCAP_CPUID (1 << 11)
-#endif
-#include <fstream>
-#endif  // PLATFORM_IS_ARM64 && !__APPLE__ && !__OpenBSD__
-
-// SIMD extension querying is only available on x86.
-#ifdef PLATFORM_IS_X86
-#ifdef PLATFORM_WINDOWS
-// Visual Studio defines a builtin function for CPUID, so use that if possible.
-#define GETCPUID(a, b, c, d, a_inp, c_inp) \
-  {                                        \
-    int cpu_info[4] = {-1};                \
-    __cpuidex(cpu_info, a_inp, c_inp);     \
-    a = cpu_info[0];                       \
-    b = cpu_info[1];                       \
-    c = cpu_info[2];                       \
-    d = cpu_info[3];                       \
-  }
-#else
-// Otherwise use gcc-format assembler to implement the underlying instructions.
-#define GETCPUID(a, b, c, d, a_inp, c_inp) \
-  asm("mov %%rbx, %%rdi\n"                 \
-      "cpuid\n"                            \
-      "xchg %%rdi, %%rbx\n"                \
-      : "=a"(a), "=D"(b), "=c"(c), "=d"(d) \
-      : "a"(a_inp), "2"(c_inp))
-#endif
-#endif
-
-namespace tsl {
-namespace port {
-namespace {
-
-#ifdef PLATFORM_IS_X86
-class CPUIDInfo;
-void InitCPUIDInfo();
-
-CPUIDInfo *cpuid = nullptr;
-
-#ifdef PLATFORM_WINDOWS
-// Visual Studio defines a builtin function, so use that if possible.
-int GetXCR0EAX() { return _xgetbv(0); }
-#else
-int GetXCR0EAX() {
-  int eax, edx;
-  asm("XGETBV" : "=a"(eax), "=d"(edx) : "c"(0));
-  return eax;
-}
-#endif
-
-// Structure for basic CPUID info
-class CPUIDInfo {
- public:
-  CPUIDInfo()
-      : have_adx_(0),
-        have_aes_(0),
-        have_amx_bf16_(0),
-        have_amx_fp16_(0),
-        have_amx_int8_(0),
-        have_amx_tile_(0),
-        have_avx_(0),
-        have_avx2_(0),
-        have_avx512f_(0),
-        have_avx512cd_(0),
-        have_avx512er_(0),
-        have_avx512pf_(0),
-        have_avx512vl_(0),
-        have_avx512bw_(0),
-        have_avx512dq_(0),
-        have_avx512vbmi_(0),
-        have_avx512ifma_(0),
-        have_avx512_4vnniw_(0),
-        have_avx512_4fmaps_(0),
-        have_avx512_bf16_(0),
-        have_avx512_fp16_(0),
-        have_avx512_vnni_(0),
-        have_avx_vnni_(0),
-        have_avx_vnni_int8_(0),
-        have_avx_ne_convert_(0),
-        have_bmi1_(0),
-        have_bmi2_(0),
-        have_cmov_(0),
-        have_cmpxchg16b_(0),
-        have_cmpxchg8b_(0),
-        have_f16c_(0),
-        have_fma_(0),
-        have_mmx_(0),
-        have_pclmulqdq_(0),
-        have_popcnt_(0),
-        have_prefetchw_(0),
-        have_prefetchwt1_(0),
-        have_rdrand_(0),
-        have_rdseed_(0),
-        have_smap_(0),
-        have_sse_(0),
-        have_sse2_(0),
-        have_sse3_(0),
-        have_sse4_1_(0),
-        have_sse4_2_(0),
-        have_ssse3_(0),
-        have_hypervisor_(0) {}
-
-  static void Initialize() {
-    // Initialize cpuid struct
-    CHECK(cpuid == nullptr) << __func__ << " ran more than once";
-    cpuid = new CPUIDInfo;
-
-    uint32_t eax, ebx, ecx, edx;
-
-    // Get vendor string (issue CPUID with eax = 0)
-    GETCPUID(eax, ebx, ecx, edx, 0, 0);
-    cpuid->vendor_str_.append(reinterpret_cast<char *>(&ebx), 4);
-    cpuid->vendor_str_.append(reinterpret_cast<char *>(&edx), 4);
-    cpuid->vendor_str_.append(reinterpret_cast<char *>(&ecx), 4);
-
-    // To get general information and extended features we send eax = 1 and
-    // ecx = 0 to cpuid.  The response is returned in eax, ebx, ecx and edx.
-    // (See Intel 64 and IA-32 Architectures Software Developer's Manual
-    // Volume 2A: Instruction Set Reference, A-M CPUID).
-    GETCPUID(eax, ebx, ecx, edx, 1, 0);
-
-    cpuid->model_num_ = static_cast<int>((eax >> 4) & 0xf);
-    cpuid->family_ = static_cast<int>((eax >> 8) & 0xf);
-
-    cpuid->have_aes_ = (ecx >> 25) & 0x1;
-    cpuid->have_cmov_ = (edx >> 15) & 0x1;
-    cpuid->have_cmpxchg16b_ = (ecx >> 13) & 0x1;
-    cpuid->have_cmpxchg8b_ = (edx >> 8) & 0x1;
-    cpuid->have_mmx_ = (edx >> 23) & 0x1;
-    cpuid->have_pclmulqdq_ = (ecx >> 1) & 0x1;
-    cpuid->have_popcnt_ = (ecx >> 23) & 0x1;
-    cpuid->have_rdrand_ = (ecx >> 30) & 0x1;
-    cpuid->have_sse2_ = (edx >> 26) & 0x1;
-    cpuid->have_sse3_ = ecx & 0x1;
-    cpuid->have_sse4_1_ = (ecx >> 19) & 0x1;
-    cpuid->have_sse4_2_ = (ecx >> 20) & 0x1;
-    cpuid->have_sse_ = (edx >> 25) & 0x1;
-    cpuid->have_ssse3_ = (ecx >> 9) & 0x1;
-    cpuid->have_hypervisor_ = (ecx >> 31) & 1;
-
-    const uint64_t xcr0_xmm_mask = 0x2;
-    const uint64_t xcr0_ymm_mask = 0x4;
-    const uint64_t xcr0_maskreg_mask = 0x20;
-    const uint64_t xcr0_zmm0_15_mask = 0x40;
-    const uint64_t xcr0_zmm16_31_mask = 0x80;
-
-    const uint64_t xcr0_avx_mask = xcr0_xmm_mask | xcr0_ymm_mask;
-    const uint64_t xcr0_avx512_mask = xcr0_avx_mask | xcr0_maskreg_mask |
-                                      xcr0_zmm0_15_mask | xcr0_zmm16_31_mask;
-
-    const bool have_avx =
-        // Does the OS support XGETBV instruction use by applications?
-        ((ecx >> 27) & 0x1) &&
-        // Does the OS save/restore XMM and YMM state?
-        ((GetXCR0EAX() & xcr0_avx_mask) == xcr0_avx_mask) &&
-        // Is AVX supported in hardware?
-        ((ecx >> 28) & 0x1);
-
-    const bool have_avx512 =
-        // Does the OS support XGETBV instruction use by applications?
-        ((ecx >> 27) & 0x1) &&
-        // Does the OS save/restore ZMM state?
-        ((GetXCR0EAX() & xcr0_avx512_mask) == xcr0_avx512_mask);
-
-    cpuid->have_avx_ = have_avx;
-    cpuid->have_fma_ = have_avx && ((ecx >> 12) & 0x1);
-    cpuid->have_f16c_ = have_avx && ((ecx >> 29) & 0x1);
-
-    // Get standard level 7 structured extension features (issue CPUID with
-    // eax = 7 and ecx = 0), which is required to check for AVX2 support as
-    // well as other Haswell (and beyond) features.  (See Intel 64 and IA-32
-    // Architectures Software Developer's Manual Volume 2A: Instruction Set
-    // Reference, A-M CPUID).
-    GETCPUID(eax, ebx, ecx, edx, 7, 0);
-    const uint32_t kMaxNumSubLeaves = eax;
-
-    cpuid->have_adx_ = (ebx >> 19) & 0x1;
-    cpuid->have_avx2_ = have_avx && ((ebx >> 5) & 0x1);
-    cpuid->have_bmi1_ = (ebx >> 3) & 0x1;
-    cpuid->have_bmi2_ = (ebx >> 8) & 0x1;
-    cpuid->have_prefetchwt1_ = ecx & 0x1;
-    cpuid->have_rdseed_ = (ebx >> 18) & 0x1;
-    cpuid->have_smap_ = (ebx >> 20) & 0x1;
-
-    cpuid->have_avx512f_ = have_avx512 && ((ebx >> 16) & 0x1);
-    cpuid->have_avx512cd_ = have_avx512 && ((ebx >> 28) & 0x1);
-    cpuid->have_avx512er_ = have_avx512 && ((ebx >> 27) & 0x1);
-    cpuid->have_avx512pf_ = have_avx512 && ((ebx >> 26) & 0x1);
-    cpuid->have_avx512vl_ = have_avx512 && ((ebx >> 31) & 0x1);
-    cpuid->have_avx512bw_ = have_avx512 && ((ebx >> 30) & 0x1);
-    cpuid->have_avx512dq_ = have_avx512 && ((ebx >> 17) & 0x1);
-    cpuid->have_avx512vbmi_ = have_avx512 && ((ecx >> 1) & 0x1);
-    cpuid->have_avx512ifma_ = have_avx512 && ((ebx >> 21) & 0x1);
-    cpuid->have_avx512_4vnniw_ = have_avx512 && ((edx >> 2) & 0x1);
-    cpuid->have_avx512_4fmaps_ = have_avx512 && ((edx >> 3) & 0x1);
-    cpuid->have_avx512_vnni_ = have_avx512 && ((ecx >> 11) & 0x1);
-
-    // The latest Intel 64 and IA-32 Architectures Software Developer's Manual
-    // Volume 2A (December 2021) does not have information on AMX yet. We use
-    // the information from Xbyak in oneDNN.
-    // https://github.com/oneapi-src/oneDNN/blob/acf8d214cedfe7e24c9446bacc1f9f648c9273f8/src/cpu/x64/xbyak/xbyak_util.h#L536-L538
-    cpuid->have_amx_tile_ = (edx >> 24) & 0x1;
-    cpuid->have_amx_int8_ = (edx >> 25) & 0x1;
-    cpuid->have_amx_bf16_ = (edx >> 22) & 0x1;
-
-    // Check for avx512_fp16 using information from Xbyak in oneDNN:
-    // https://github.com/oneapi-src/oneDNN/blob/acf8d214cedfe7e24c9446bacc1f9f648c9273f8/src/cpu/x64/xbyak/xbyak_util.h#L516
-    cpuid->have_avx512_fp16_ = have_avx512 && ((edx >> 23) & 0x1);
-
-    // Get more Structured Extended Feature info by issuing CPUID with
-    // sub-leaf = 1 (eax = 7, ecx = 1)
-    if (kMaxNumSubLeaves >= 1) {
-      GETCPUID(eax, ebx, ecx, edx, 7, 1);
-      cpuid->have_avx_vnni_ = (eax >> 4) & 0x1;
-      cpuid->have_avx512_bf16_ = have_avx512 && ((eax >> 5) & 0x1);
-      cpuid->have_amx_fp16_ = (eax >> 21) & 0x1;
-      cpuid->have_avx_vnni_int8_ = (edx >> 4) & 0x1;
-      cpuid->have_avx_ne_convert_ = (edx >> 5) & 0x1;
-    }
-  }
-
-  static bool TestFeature(CPUFeature feature) {
-    InitCPUIDInfo();
-    // clang-format off
-    switch (feature) {
-      case ADX:           return cpuid->have_adx_;
-      case AES:           return cpuid->have_aes_;
-      case AMX_BF16:      return cpuid->have_amx_bf16_;
-      case AMX_FP16:      return cpuid->have_amx_fp16_;
-      case AMX_INT8:      return cpuid->have_amx_int8_;
-      case AMX_TILE:      return cpuid->have_amx_tile_;
-      case AVX2:          return cpuid->have_avx2_;
-      case AVX:           return cpuid->have_avx_;
-      case AVX512F:       return cpuid->have_avx512f_;
-      case AVX512CD:      return cpuid->have_avx512cd_;
-      case AVX512PF:      return cpuid->have_avx512pf_;
-      case AVX512ER:      return cpuid->have_avx512er_;
-      case AVX512VL:      return cpuid->have_avx512vl_;
-      case AVX512BW:      return cpuid->have_avx512bw_;
-      case AVX512DQ:      return cpuid->have_avx512dq_;
-      case AVX512VBMI:    return cpuid->have_avx512vbmi_;
-      case AVX512IFMA:    return cpuid->have_avx512ifma_;
-      case AVX512_4VNNIW: return cpuid->have_avx512_4vnniw_;
-      case AVX512_4FMAPS: return cpuid->have_avx512_4fmaps_;
-      case AVX512_BF16:   return cpuid->have_avx512_bf16_;
-      case AVX512_FP16:   return cpuid->have_avx512_fp16_;
-      case AVX512_VNNI:   return cpuid->have_avx512_vnni_;
-      case AVX_VNNI:      return cpuid->have_avx_vnni_;
-      case AVX_VNNI_INT8:  return cpuid->have_avx_vnni_int8_;
-      case AVX_NE_CONVERT: return cpuid->have_avx_ne_convert_;
-      case BMI1:          return cpuid->have_bmi1_;
-      case BMI2:          return cpuid->have_bmi2_;
-      case CMOV:          return cpuid->have_cmov_;
-      case CMPXCHG16B:    return cpuid->have_cmpxchg16b_;
-      case CMPXCHG8B:     return cpuid->have_cmpxchg8b_;
-      case F16C:          return cpuid->have_f16c_;
-      case FMA:           return cpuid->have_fma_;
-      case MMX:           return cpuid->have_mmx_;
-      case PCLMULQDQ:     return cpuid->have_pclmulqdq_;
-      case POPCNT:        return cpuid->have_popcnt_;
-      case PREFETCHW:     return cpuid->have_prefetchw_;
-      case PREFETCHWT1:   return cpuid->have_prefetchwt1_;
-      case RDRAND:        return cpuid->have_rdrand_;
-      case RDSEED:        return cpuid->have_rdseed_;
-      case SMAP:          return cpuid->have_smap_;
-      case SSE2:          return cpuid->have_sse2_;
-      case SSE3:          return cpuid->have_sse3_;
-      case SSE4_1:        return cpuid->have_sse4_1_;
-      case SSE4_2:        return cpuid->have_sse4_2_;
-      case SSE:           return cpuid->have_sse_;
-      case SSSE3:         return cpuid->have_ssse3_;
-      case HYPERVISOR:    return cpuid->have_hypervisor_;
-      default:
-        break;
-    }
-    // clang-format on
-    return false;
-  }
-
-  std::string vendor_str() const { return vendor_str_; }
-  int family() const { return family_; }
-  int model_num() { return model_num_; }
-
- private:
-  int have_adx_ : 1;
-  int have_aes_ : 1;
-  int have_amx_bf16_ : 1;
-  int have_amx_fp16_ : 1;
-  int have_amx_int8_ : 1;
-  int have_amx_tile_ : 1;
-  int have_avx_ : 1;
-  int have_avx2_ : 1;
-  int have_avx512f_ : 1;
-  int have_avx512cd_ : 1;
-  int have_avx512er_ : 1;
-  int have_avx512pf_ : 1;
-  int have_avx512vl_ : 1;
-  int have_avx512bw_ : 1;
-  int have_avx512dq_ : 1;
-  int have_avx512vbmi_ : 1;
-  int have_avx512ifma_ : 1;
-  int have_avx512_4vnniw_ : 1;
-  int have_avx512_4fmaps_ : 1;
-  int have_avx512_bf16_ : 1;
-  int have_avx512_fp16_ : 1;
-  int have_avx512_vnni_ : 1;
-  int have_avx_vnni_ : 1;
-  int have_avx_vnni_int8_ : 1;
-  int have_avx_ne_convert_ : 1;
-  int have_bmi1_ : 1;
-  int have_bmi2_ : 1;
-  int have_cmov_ : 1;
-  int have_cmpxchg16b_ : 1;
-  int have_cmpxchg8b_ : 1;
-  int have_f16c_ : 1;
-  int have_fma_ : 1;
-  int have_mmx_ : 1;
-  int have_pclmulqdq_ : 1;
-  int have_popcnt_ : 1;
-  int have_prefetchw_ : 1;
-  int have_prefetchwt1_ : 1;
-  int have_rdrand_ : 1;
-  int have_rdseed_ : 1;
-  int have_smap_ : 1;
-  int have_sse_ : 1;
-  int have_sse2_ : 1;
-  int have_sse3_ : 1;
-  int have_sse4_1_ : 1;
-  int have_sse4_2_ : 1;
-  int have_ssse3_ : 1;
-  int have_hypervisor_ : 1;
-  std::string vendor_str_;
-  int family_;
-  int model_num_;
-};
-
-absl::once_flag cpuid_once_flag;
-
-void InitCPUIDInfo() {
-  // This ensures that CPUIDInfo::Initialize() is called exactly
-  // once regardless of how many threads concurrently call us
-  absl::call_once(cpuid_once_flag, CPUIDInfo::Initialize);
-}
-
-#endif  // PLATFORM_IS_X86
-
-#if defined(PLATFORM_IS_ARM64) && !defined(__APPLE__) && !defined(__OpenBSD__)
-
-class CPUIDInfo;
-void InitCPUIDInfo();
-void InitCPUIDFeatureInfo();
-
-CPUIDInfo *cpuid = nullptr;
-
-// Structure for basic CPUID info.
-class CPUIDInfo {
- public:
-  CPUIDInfo()
-      : implementer_(0),
-        variant_(0),
-        cpunum_(0),
-        is_arm_neoverse_v1_(0),
-        is_arm_neoverse_n1_(0),
-        has_bf16_(0) {}
-
-  static void Initialize() {
-    // Initialize CPUIDInfo pointer.
-    if (cpuid != nullptr) return;
-
-    cpuid = new CPUIDInfo;
-    // Make sure CPUID registers are available before reading them.
-    if (!(getauxval(AT_HWCAP) & HWCAP_CPUID)) {
-      return;
-    }
-
-    int present_cpu = -1;
-#ifndef PLATFORM_WINDOWS
-    std::ifstream CPUspresent;
-    CPUspresent.open("/sys/devices/system/cpu/present", std::ios::in);
-    if (CPUspresent.is_open()) {
-      std::string line;
-      if (static_cast<bool>(getline(CPUspresent, line))) {
-        // We just need to find one CPU that is active
-        // from which we can read MIDR register to find
-        // implement, variant and revision information.
-        auto ending = line.end();
-        for (auto i = line.begin(); i < line.end(); ++i) {
-          if (*i == '-' || *i == ',') {
-            ending = i;
-            break;
-          }
-        }
-        line.erase(ending, line.end());
-        // That should be the fist number.
-        present_cpu = std::stoi(line);
-      }
-    }
-#endif  // !PLATFORM_WINDOWS
-
-    if (present_cpu == -1) {
-      return;
-    }
-
-#ifndef PLATFORM_WINDOWS
-    std::stringstream str;
-    str << "/sys/devices/system/cpu/cpu" << present_cpu
-        << "/regs/identification/midr_el1";
-    std::ifstream midr_el1_file(str.str(), std::ios::in);
-    if (midr_el1_file.is_open()) {
-      std::string line;
-      if (static_cast<bool>(getline(midr_el1_file, line))) {
-        uint32 midr_el1 = std::stoul(line, nullptr, 16);
-
-        // Unpack variant and CPU ID.
-        // Reference:
-        // https://developer.arm.com/documentation/101427/0101/Register-descriptions/AArch64-system-registers/MIDR-EL1--Main-ID-Register--EL1.
-        cpuid->implementer_ = (midr_el1 >> 24) & 0xFF;
-        cpuid->variant_ = (midr_el1 >> 20) & 0xF;
-        cpuid->cpunum_ = (midr_el1 >> 4) & 0xFFF;
-        if (cpuid->implementer_ == 0x41) {
-          switch (cpuid->cpunum_) {
-            case 0xd40:  // ARM NEOVERSE V1
-              cpuid->is_arm_neoverse_v1_ = 1;
-              break;
-            case 0xd0c:  // ARM NEOVERSE N1
-              cpuid->is_arm_neoverse_n1_ = 1;
-              break;
-            default:
-              break;
-          }
-        }
-      }
-    }
-#endif  // !PLATFORM_WINDOWS
-  }
-  static void InitializeCPUFeature() {
-    // Initialize CPUIDInfo pointer.
-    if (cpuid != nullptr) return;
-
-    cpuid = new CPUIDInfo;
-
-    const uint32_t hwcaps2 = getauxval(AT_HWCAP2);
-    cpuid->has_bf16_ = IsFeatureSupported(hwcaps2, kHwcap2Bf16);
-  }
-
-  int implementer() const { return implementer_; }
-  int cpunum() const { return cpunum_; }
-
-  static bool TestAarch64CPU(Aarch64CPU cpu) {
-    InitCPUIDInfo();
-    // clang-format off
-    switch (cpu) {
-      case ARM_NEOVERSE_V1:
-        return cpuid->is_arm_neoverse_v1_;
-      default:
-        return false;
-    }
-    // clang-format on
-    return false;
-  }
-
-  static bool IsFeatureSupported(uint64_t features, uint64_t feature_mask) {
-    return (features & feature_mask);
-  }
-  static bool TestAarch64Feature(CPUFeature feature) {
-    InitCPUIDFeatureInfo();
-    switch (feature) {
-      case AARCH64_BF16:
-        return cpuid->has_bf16_;
-      default:
-        break;
-    }
-    return false;
-  }
-
- private:
-  static constexpr uint64_t kHwcap2Bf16 = 1ull << 14;
-  int implementer_;
-  int variant_;
-  int cpunum_;
-  int is_arm_neoverse_v1_;  // ARM NEOVERSE V1
-  int is_arm_neoverse_n1_;  // ARM NEOVERSE N1
-  int has_bf16_;
-};
-
-absl::once_flag cpuid_once_flag;
-
-void InitCPUIDInfo() {
-  absl::call_once(cpuid_once_flag, CPUIDInfo::Initialize);
-}
-
-void InitCPUIDFeatureInfo() {
-  absl::call_once(cpuid_once_flag, CPUIDInfo::InitializeCPUFeature);
-}
-
-#endif  // PLATFORM_IS_ARM64 && !__APPLE__ && !__OpenBSD__
-
-}  // namespace
-
-bool TestCPUFeature(CPUFeature feature) {
-#ifdef PLATFORM_IS_X86
-  return CPUIDInfo::TestFeature(feature);
-#elif defined(PLATFORM_IS_ARM64) && !defined(__APPLE__) && !defined(__OpenBSD__)
-  return CPUIDInfo::TestAarch64Feature(feature);
-#else
-  return false;
-#endif
-}
-
-bool TestAarch64CPU(Aarch64CPU cpu) {
-#if defined(PLATFORM_IS_ARM64) && !defined(__APPLE__) && !defined(__OpenBSD__)
-  return CPUIDInfo::TestAarch64CPU(cpu);
-#else
-  return false;
-#endif
-}
-
-std::string CPUVendorIDString() {
-#ifdef PLATFORM_IS_X86
-  InitCPUIDInfo();
-  return cpuid->vendor_str();
-#else
-  return "";
-#endif
-}
-
-int CPUFamily() {
-#ifdef PLATFORM_IS_X86
-  InitCPUIDInfo();
-  return cpuid->family();
-#elif defined(PLATFORM_IS_ARM64) && !defined(__APPLE__) && !defined(__OpenBSD__)
-  InitCPUIDInfo();
-  return cpuid->implementer();
-#else
-  return 0;
-#endif
-}
-
-int CPUModelNum() {
-#ifdef PLATFORM_IS_X86
-  InitCPUIDInfo();
-  return cpuid->model_num();
-#elif defined(PLATFORM_IS_ARM64) && !defined(__APPLE__) && !defined(__OpenBSD__)
-  InitCPUIDInfo();
-  return cpuid->cpunum();
-#else
-  return 0;
-#endif
-}
-
-int CPUIDNumSMT() {
-#ifdef PLATFORM_IS_X86
-  // https://software.intel.com/en-us/articles/intel-64-architecture-processor-topology-enumeration
-  // https://software.intel.com/en-us/articles/intel-sdm (Vol 3A)
-  // Section: Detecting Hardware Multi-threads Support and Topology
-  // Uses CPUID Leaf 11 to enumerate system topology on Intel x86 architectures
-  // Other cases not supported
-  uint32_t eax, ebx, ecx, edx;
-  // Check if system supports Leaf 11
-  GETCPUID(eax, ebx, ecx, edx, 0, 0);
-  if (eax >= 11) {
-    // 1) Leaf 11 available? CPUID.(EAX=11, ECX=0):EBX != 0
-    // 2) SMT_Mask_Width = CPUID.(EAX=11, ECX=0):EAX[4:0] if CPUID.(EAX=11,
-    // ECX=0):ECX[15:8] is 1
-    GETCPUID(eax, ebx, ecx, edx, 11, 0);
-    if (ebx != 0 && ((ecx & 0xff00) >> 8) == 1) {
-      return 1 << (eax & 0x1f);  // 2 ^ SMT_Mask_Width
-    }
-  }
-#endif  // PLATFORM_IS_X86
-  return 0;
-}
-
-}  // namespace port
-}  // namespace tsl
diff --git a/tsl/platform/cpu_info.h b/tsl/platform/cpu_info.h
index 30eca5c41..d1590ff19 100644
--- a/tsl/platform/cpu_info.h
+++ b/tsl/platform/cpu_info.h
@@ -16,186 +16,6 @@ limitations under the License.
 #ifndef TENSORFLOW_TSL_PLATFORM_CPU_INFO_H_
 #define TENSORFLOW_TSL_PLATFORM_CPU_INFO_H_
 
-#include <string>
-
-// TODO(ahentz): This is not strictly required here but, for historical
-// reasons, many people depend on cpu_info.h in order to use kLittleEndian.
-#include "xla/tsl/platform/byte_order.h"
-#include "tsl/platform/platform.h"
-
-#if defined(_MSC_VER)
-// included so __cpuidex function is available for GETCPUID on Windows
-#include <intrin.h>
-#endif
-
-namespace tsl {
-namespace port {
-
-// Returns an estimate of the number of schedulable CPUs for this
-// process.  Usually, it's constant throughout the lifetime of a
-// process, but it might change if the underlying cluster management
-// software can change it dynamically.  If the underlying call fails, a default
-// value (e.g. `4`) may be returned.
-int NumSchedulableCPUs();
-
-// Returns an estimate for the maximum parallelism for this process.
-// Applications should avoid running more than this number of threads with
-// intensive workloads concurrently to avoid performance degradation and
-// contention.
-// This value is either the number of schedulable CPUs, or a value specific to
-// the underlying cluster management. Applications should assume this value can
-// change throughout the lifetime of the process. This function must not be
-// called during initialization, i.e., before main() has started.
-int MaxParallelism();
-
-// Returns an estimate for the maximum parallelism for this process on the
-// provided numa node, or any numa node if `numa_node` is kNUMANoAffinity.
-// See MaxParallelism() for more information.
-int MaxParallelism(int numa_node);
-
-// Returns the total number of CPUs on the system.  This number should
-// not change even if the underlying cluster management software may
-// change the number of schedulable CPUs.  Unlike `NumSchedulableCPUs`, if the
-// underlying call fails, an invalid value of -1 will be returned;
-// the user must check for validity.
-static constexpr int kUnknownCPU = -1;
-int NumTotalCPUs();
-
-// Returns the id of the current CPU.  Returns -1 if the current CPU cannot be
-// identified.  If successful, the return value will be in [0, NumTotalCPUs()).
-int GetCurrentCPU();
-
-// Returns an estimate of the number of hyperthreads per physical core
-// on the CPU
-int NumHyperthreadsPerCore();
-
-// Mostly ISA related features that we care about
-// Do not change numeric assignments.
-enum CPUFeature {
-  //===--------------------------------------------------------------------===//
-  // x86 features
-  //===--------------------------------------------------------------------===//
-  MMX = 0,
-  SSE = 1,
-  SSE2 = 2,
-  SSE3 = 3,
-  SSSE3 = 4,
-  SSE4_1 = 5,
-  SSE4_2 = 6,
-  CMOV = 7,
-  CMPXCHG8B = 8,
-  CMPXCHG16B = 9,
-  POPCNT = 10,
-  AES = 11,
-  AVX = 12,
-  RDRAND = 13,
-  AVX2 = 14,
-  FMA = 15,
-  F16C = 16,
-  PCLMULQDQ = 17,
-  RDSEED = 18,
-  ADX = 19,
-  SMAP = 20,
-
-  // Prefetch Vector Data Into Caches with Intent to Write and T1 Hint
-  // http://www.felixcloutier.com/x86/PREFETCHWT1.html.
-  // You probably want PREFETCHW instead.
-  PREFETCHWT1 = 21,
-
-  BMI1 = 22,
-  BMI2 = 23,
-  HYPERVISOR = 25,  // 0 when on a real CPU, 1 on (well-behaved) hypervisor.
-
-  // Prefetch Data into Caches in Anticipation of a Write (3D Now!).
-  // http://www.felixcloutier.com/x86/PREFETCHW.html
-  PREFETCHW = 26,
-
-  // AVX-512: 512-bit vectors (plus masking, etc.) in Knights Landing,
-  // Skylake, Xeon, etc. Each of these entries is a different subset of
-  // instructions, various combinations of which occur on various CPU types.
-  AVX512F = 27,        // Foundation
-  AVX512CD = 28,       // Conflict detection
-  AVX512ER = 29,       // Exponential and reciprocal
-  AVX512PF = 30,       // Prefetching
-  AVX512VL = 31,       // Shorter vector lengths
-  AVX512BW = 32,       // Byte and word
-  AVX512DQ = 33,       // Dword and qword
-  AVX512VBMI = 34,     // Bit manipulation
-  AVX512IFMA = 35,     // Integer multiply-add
-  AVX512_4VNNIW = 36,  // Integer neural network (Intel Xeon Phi only)
-  AVX512_4FMAPS = 37,  // Floating point neural network (Intel Xeon Phi only)
-  AVX512_VNNI = 38,    // Integer neural network
-  AVX512_BF16 = 39,    // Bfloat16 neural network
-
-  // AVX version of AVX512_VNNI in CPUs such as Alder Lake and Sapphire Rapids.
-  AVX_VNNI = 40,  // Integer neural network
-
-  // AMX: Advanced Matrix Extension in Sapphire Rapids.
-  // Perform matrix multiplication on the Tile Matrix Multiply (TMUL) unit,
-  // supporting two popular data types in neural networks, int8 and bfloat16.
-  AMX_TILE = 41,  // Tile configuration and load/store
-  AMX_INT8 = 42,  // Int8 tile matrix multiplication
-  AMX_BF16 = 43,  // Bfloat16 tile matrix multiplication
-
-  AVX512_FP16 = 44,     // Float16 neural network
-  AMX_FP16 = 45,        // Float16 tile matrix multiplication
-  AVX_NE_CONVERT = 46,  // Instructions for faster bfloat16, float16 convert.
-  AVX_VNNI_INT8 = 47,   // VNNI instructions for combinations of u8, s8 dtypes.
-
-  //===--------------------------------------------------------------------===//
-  // AArch64 features
-  //===--------------------------------------------------------------------===//
-  AARCH64_NEON = 1000,
-  AARCH64_SVE = 1001,
-  AARCH64_SVE2 = 1002,
-  AARCH64_BF16 = 1003,  // BF16 on AArch64 systems
-};
-
-enum Aarch64CPU {
-  ARM_NEOVERSE_N1 = 0,  // ARM NEOVERSE N1
-  ARM_NEOVERSE_V1 = 1,  // ARM NEOVERSE V1
-};
-// Checks whether the current AArch64 processor is supported.
-bool TestAarch64CPU(Aarch64CPU cpu);
-
-// Checks whether the current processor supports one of the features above.
-// Checks CPU registers to return hardware capabilities.
-bool TestCPUFeature(CPUFeature feature);
-
-// Checks whether the current processor is x86.
-constexpr bool IsX86CPU() {
-#ifdef PLATFORM_IS_X86
-  return true;
-#else
-  return false;
-#endif
-}
-
-// Checks whether the current processor is aarch64.
-constexpr bool IsAarch64CPU() {
-#if defined(PLATFORM_IS_ARM64) && !defined(__APPLE__) && !defined(__OpenBSD__)
-  return true;
-#else
-  return false;
-#endif
-}
-
-// Returns CPU Vendor string (i.e. 'GenuineIntel', 'AuthenticAMD', etc.)
-std::string CPUVendorIDString();
-
-// Returns CPU family.
-int CPUFamily();
-
-// Returns CPU model number.
-int CPUModelNum();
-
-// Returns nominal core processor cycles per second of each processor.
-double NominalCPUFrequency();
-
-// Returns num of hyperthreads per physical core
-int CPUIDNumSMT();
-
-}  // namespace port
-}  // namespace tsl
+#include "xla/tsl/platform/cpu_info.h"
 
 #endif  // TENSORFLOW_TSL_PLATFORM_CPU_INFO_H_
diff --git a/tsl/platform/demangle.h b/tsl/platform/demangle.h
index 8171be236..9501268ba 100644
--- a/tsl/platform/demangle.h
+++ b/tsl/platform/demangle.h
@@ -16,19 +16,6 @@ limitations under the License.
 #ifndef TENSORFLOW_TSL_PLATFORM_DEMANGLE_H_
 #define TENSORFLOW_TSL_PLATFORM_DEMANGLE_H_
 
-#include "xla/tsl/platform/types.h"
-
-namespace tsl {
-namespace port {
-
-// If the compiler supports, demangle a mangled symbol name and return
-// the demangled name. Otherwise, returns 'mangled' as is.
-std::string Demangle(const char* mangled);
-inline std::string Demangle(const std::string mangled) {
-  return Demangle(mangled.c_str());
-}
-
-}  // namespace port
-}  // namespace tsl
+#include "xla/tsl/platform/demangle.h"
 
 #endif  // TENSORFLOW_TSL_PLATFORM_DEMANGLE_H_
diff --git a/tsl/platform/error_logging.h b/tsl/platform/error_logging.h
index 0ee471d0f..ffca83860 100644
--- a/tsl/platform/error_logging.h
+++ b/tsl/platform/error_logging.h
@@ -1,4 +1,4 @@
-/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -16,14 +16,6 @@ limitations under the License.
 #ifndef TENSORFLOW_TSL_PLATFORM_ERROR_LOGGING_H_
 #define TENSORFLOW_TSL_PLATFORM_ERROR_LOGGING_H_
 
-#include "absl/status/status.h"
-#include "absl/strings/string_view.h"
-
-namespace tsl::error_logging {
-
-absl::Status Log(absl::string_view component, absl::string_view subcomponent,
-                 absl::string_view error_msg);
-
-}
+#include "xla/tsl/platform/error_logging.h"
 
 #endif  // TENSORFLOW_TSL_PLATFORM_ERROR_LOGGING_H_
diff --git a/tsl/platform/host_info.h b/tsl/platform/host_info.h
index a791a85db..8ffd8158c 100644
--- a/tsl/platform/host_info.h
+++ b/tsl/platform/host_info.h
@@ -1,4 +1,4 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -16,58 +16,6 @@ limitations under the License.
 #ifndef TENSORFLOW_TSL_PLATFORM_HOST_INFO_H_
 #define TENSORFLOW_TSL_PLATFORM_HOST_INFO_H_
 
-#include <cstddef>
-#include <cstdint>
-#include <cstring>
-#include <string>
-
-#include "absl/strings/string_view.h"
-
-namespace tsl {
-namespace port {
-
-// Statistical data of IO operations performed by the job.
-struct IOStatistics {
-  struct Distribution {
-    uint64_t count = 0;
-    double mean = 0.0;
-    double std_dev = 0.0;
-  };
-  // Distribution of round trip IO latency in microseconds.
-  Distribution roundtrip_latency_usec;
-  // Distribution of data received by IO reads in bytes.
-  Distribution response_bytes;
-};
-
-// Return the hostname of the machine on which this process is running.
-std::string Hostname();
-
-// Return the job name as a string if it exists, otherwise return an empty
-// string.
-std::string JobName();
-
-// Returns the Borg job UID as an int64_t if it exists. Otherwise return -1.
-int64_t JobUid();
-
-// Returns the Borg task ID as an int64_t if it exists. Otherwise return -1.
-int64_t TaskId();
-
-// Retrieves the host file read statistics.
-IOStatistics GetIOStatistics();
-
-// Returns a copy of a given hostname that does not include domain or port.
-// Examples:
-//  "foo.example.com:80" -> "foo"
-//  "foo:80" -> "foo"
-inline std::string StripDomainAndPort(absl::string_view hoststring) {
-  size_t strip_from = hoststring.find_first_of(".:");
-  if (strip_from != absl::string_view::npos) {
-    return std::string(hoststring.substr(0, strip_from));
-  }
-  return std::string(hoststring);
-}
-
-}  // namespace port
-}  // namespace tsl
+#include "xla/tsl/platform/host_info.h"
 
 #endif  // TENSORFLOW_TSL_PLATFORM_HOST_INFO_H_
diff --git a/tsl/platform/human_readable_json.h b/tsl/platform/human_readable_json.h
index 897a6b110..48d410f1c 100644
--- a/tsl/platform/human_readable_json.h
+++ b/tsl/platform/human_readable_json.h
@@ -1,4 +1,4 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -16,35 +16,6 @@ limitations under the License.
 #ifndef TENSORFLOW_TSL_PLATFORM_HUMAN_READABLE_JSON_H_
 #define TENSORFLOW_TSL_PLATFORM_HUMAN_READABLE_JSON_H_
 
-#include <string>
-
-#include "absl/status/status.h"
-#include "absl/status/statusor.h"
-#include "xla/tsl/platform/types.h"
-#include "tsl/platform/protobuf.h"
-
-namespace tsl {
-
-// Converts a proto to a JSON-like string that's meant to be human-readable
-// but still machine-parseable.
-//
-// This string may not be strictly JSON-compliant, but it must be parsable by
-// HumanReadableJSONToProto.
-//
-// When ignore_accuracy_loss = true, this function may ignore JavaScript
-// accuracy loss with large integers.
-absl::StatusOr<std::string> ProtoToHumanReadableJson(
-    const protobuf::Message& proto, bool ignore_accuracy_loss);
-absl::StatusOr<std::string> ProtoToHumanReadableJson(
-    const protobuf::MessageLite& proto, bool ignore_accuracy_loss);
-
-// Converts a string produced by ProtoToHumanReadableJSON to a protobuf.  Not
-// guaranteed to work for general JSON.
-absl::Status HumanReadableJsonToProto(const std::string& str,
-                                      protobuf::Message* proto);
-absl::Status HumanReadableJsonToProto(const std::string& str,
-                                      protobuf::MessageLite* proto);
-
-}  // namespace tsl
+#include "xla/tsl/platform/human_readable_json.h"
 
 #endif  // TENSORFLOW_TSL_PLATFORM_HUMAN_READABLE_JSON_H_
diff --git a/tsl/platform/init_main.h b/tsl/platform/init_main.h
index 98b7cd67b..d887aa93a 100644
--- a/tsl/platform/init_main.h
+++ b/tsl/platform/init_main.h
@@ -1,4 +1,4 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -16,24 +16,6 @@ limitations under the License.
 #ifndef TENSORFLOW_TSL_PLATFORM_INIT_MAIN_H_
 #define TENSORFLOW_TSL_PLATFORM_INIT_MAIN_H_
 
-#include <string>
-#include <vector>
-
-#include "absl/time/time.h"
-#include "xla/tsl/platform/macros.h"
-
-namespace tsl {
-namespace port {
-
-void InitMain(const char* usage, int* argc, char*** argv);
-
-TF_EXPORT const std::vector<std::string>& GetArgvs();
-
-TF_EXPORT const char* GetArgv0();
-
-absl::Duration GetUptime();
-
-}  // namespace port
-}  // namespace tsl
+#include "xla/tsl/platform/init_main.h"
 
 #endif  // TENSORFLOW_TSL_PLATFORM_INIT_MAIN_H_
diff --git a/tsl/platform/load_library.h b/tsl/platform/load_library.h
index 5a42f2a34..470598a02 100644
--- a/tsl/platform/load_library.h
+++ b/tsl/platform/load_library.h
@@ -1,4 +1,4 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -16,22 +16,6 @@ limitations under the License.
 #ifndef TENSORFLOW_TSL_PLATFORM_LOAD_LIBRARY_H_
 #define TENSORFLOW_TSL_PLATFORM_LOAD_LIBRARY_H_
 
-#include <string>
-
-#include "absl/status/status.h"
-
-namespace tsl {
-
-namespace internal {
-
-absl::Status LoadDynamicLibrary(const char* library_filename, void** handle);
-absl::Status GetSymbolFromLibrary(void* handle, const char* symbol_name,
-                                  void** symbol);
-std::string FormatLibraryFileName(const std::string& name,
-                                  const std::string& version);
-
-}  // namespace internal
-
-}  // namespace tsl
+#include "xla/tsl/platform/load_library.h"
 
 #endif  // TENSORFLOW_TSL_PLATFORM_LOAD_LIBRARY_H_
diff --git a/tsl/platform/mem.h b/tsl/platform/mem.h
index d2b3286f0..362216dc0 100644
--- a/tsl/platform/mem.h
+++ b/tsl/platform/mem.h
@@ -1,4 +1,4 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -16,103 +16,6 @@ limitations under the License.
 #ifndef TENSORFLOW_TSL_PLATFORM_MEM_H_
 #define TENSORFLOW_TSL_PLATFORM_MEM_H_
 
-#include <cstddef>
-#include <cstdint>
-#include <new>
-
-// TODO(cwhipkey): remove this when callers use annotations directly.
-#include "absl/base/macros.h"
-#include "xla/tsl/platform/dynamic_annotations.h"
-#include "xla/tsl/platform/types.h"
-#include "tsl/platform/platform.h"
-
-namespace tsl {
-namespace port {
-
-// Aligned allocation/deallocation. `minimum_alignment` must be a power of 2
-// and a multiple of sizeof(void*).
-void* AlignedMalloc(size_t size, std::align_val_t minimum_alignment);
-ABSL_DEPRECATE_AND_INLINE()
-inline void* AlignedMalloc(size_t size, int minimum_alignment) {
-  return AlignedMalloc(size, static_cast<std::align_val_t>(minimum_alignment));
-}
-void AlignedFree(void* aligned_memory);
-void AlignedSizedFree(void* aligned_memory, size_t size,
-                      std::align_val_t minimum_alignment);
-ABSL_DEPRECATE_AND_INLINE()
-inline void AlignedSizedFree(void* aligned_memory, size_t alignment,
-                             size_t size) {
-  AlignedSizedFree(aligned_memory, size,
-                   static_cast<std::align_val_t>(alignment));
-}
-
-// An allocator that allocates memory with the given minimum alignment.
-template <class T, std::align_val_t minimum_alignment>
-struct AlignedAllocator {
-  using value_type = T;
-
-  value_type* allocate(size_t n) {
-    return static_cast<value_type*>(
-        AlignedMalloc(n * sizeof(value_type), minimum_alignment));
-  }
-
-  void deallocate(value_type* p, size_t n) {
-    return AlignedSizedFree(p, n, minimum_alignment);
-  }
-};
-
-void* Malloc(size_t size);
-void* Realloc(void* ptr, size_t size);
-void Free(void* ptr);
-
-// Tries to release num_bytes of free memory back to the operating
-// system for reuse.  Use this routine with caution -- to get this
-// memory back may require faulting pages back in by the OS, and
-// that may be slow.
-//
-// Currently, if a malloc implementation does not support this
-// routine, this routine is a no-op.
-void MallocExtension_ReleaseToSystem(std::size_t num_bytes);
-
-// Returns the actual number N of bytes reserved by the malloc for the
-// pointer p.  This number may be equal to or greater than the number
-// of bytes requested when p was allocated.
-//
-// This routine is just useful for statistics collection.  The
-// client must *not* read or write from the extra bytes that are
-// indicated by this call.
-//
-// Example, suppose the client gets memory by calling
-//    p = malloc(10)
-// and GetAllocatedSize(p) may return 16.  The client must only use the
-// first 10 bytes p[0..9], and not attempt to read or write p[10..15].
-//
-// Currently, if a malloc implementation does not support this
-// routine, this routine returns 0.
-std::size_t MallocExtension_GetAllocatedSize(const void* p);
-
-struct MemoryInfo {
-  int64_t total = 0;
-  int64_t free = 0;
-};
-
-struct MemoryBandwidthInfo {
-  int64_t bw_used = 0;  // memory bandwidth used across all CPU (in MBs/second)
-};
-
-// Retrieves the host memory information. If any of the fields in the returned
-// MemoryInfo structure is INT64_MAX, it means such information is not
-// available.
-MemoryInfo GetMemoryInfo();
-
-// Retrieves the host memory bandwidth information. If any field in the returned
-// structure is INT64_MAX, it means such information is not available.
-MemoryBandwidthInfo GetMemoryBandwidthInfo();
-
-// Returns the amount of RAM available in bytes, or INT64_MAX if unknown.
-static inline int64_t AvailableRam() { return GetMemoryInfo().free; }
-
-}  // namespace port
-}  // namespace tsl
+#include "xla/tsl/platform/mem.h"
 
 #endif  // TENSORFLOW_TSL_PLATFORM_MEM_H_
diff --git a/tsl/platform/net.h b/tsl/platform/net.h
index e42ec6b91..46339a49d 100644
--- a/tsl/platform/net.h
+++ b/tsl/platform/net.h
@@ -16,57 +16,6 @@ limitations under the License.
 #ifndef TENSORFLOW_TSL_PLATFORM_NET_H_
 #define TENSORFLOW_TSL_PLATFORM_NET_H_
 
-#include <string>
-
-#include "absl/base/macros.h"
-#include "absl/strings/str_cat.h"
-namespace tsl {
-namespace net {
-
-// Checks whether the given port is available for binding to a TCP or UDP
-// socket. If the port is available, returns true. Otherwise, returns false. If
-// error is not null, sets error to a string describing the error.
-bool IsPortAvailable(int* port, bool is_tcp, std::string* error);
-
-inline bool IsPortAvailable(int port, bool is_tcp, std::string* error) {
-  if (port <= 0) {
-    if (error != nullptr) {
-      *error =
-          absl::StrCat("Invalid port number: ", port, ". Port must be > 0.");
-    }
-    return false;
-  }
-  return IsPortAvailable(&port, is_tcp, error);
-}
-// Return a port number that is not currently bound to any TCP or UDP port.
-// On success returns the assigned port number. Otherwise returns -1.
-int PickUnusedPort();
-
-// Same as PickUnusedPort(), but fails a CHECK() if a port can't be found. In
-// that case, the error message is logged to FATAL.
-int PickUnusedPortOrDie();
-
-// Relinquish a claim on the given port which was previously returned by
-// PickUnusedPort[OrDie](). This allows PickUnusedPort[OrDie]() to return
-// the given port to another caller in the future. Since the number of
-// ports the portserver will give to a process is limited (typically 200),
-// recycling ports after they are no longer needed can help avoid
-// exhausting them. 'port' must be a positive number that was previously
-// returned by PickUnusedPort[OrDie](), and not yet recycled, otherwise an
-// abort may occur.
-void RecycleUnusedPort(int port);
-}  // namespace net
-
-namespace internal {
-ABSL_DEPRECATE_AND_INLINE()
-inline int PickUnusedPort() { return tsl::net::PickUnusedPort(); }
-
-ABSL_DEPRECATE_AND_INLINE()
-inline int PickUnusedPortOrDie() { return tsl::net::PickUnusedPortOrDie(); }
-
-ABSL_DEPRECATE_AND_INLINE()
-inline void RecycleUnusedPort(int port) { tsl::net::RecycleUnusedPort(port); }
-}  // namespace internal
-}  // namespace tsl
+#include "xla/tsl/platform/net.h"
 
 #endif  // TENSORFLOW_TSL_PLATFORM_NET_H_
diff --git a/tsl/platform/numa.h b/tsl/platform/numa.h
index 85d16a4b3..a219722e7 100644
--- a/tsl/platform/numa.h
+++ b/tsl/platform/numa.h
@@ -1,4 +1,4 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -16,46 +16,6 @@ limitations under the License.
 #ifndef TENSORFLOW_TSL_PLATFORM_NUMA_H_
 #define TENSORFLOW_TSL_PLATFORM_NUMA_H_
 
-#include <cstddef>
+#include "xla/tsl/platform/numa.h"
 
-namespace tsl {
-namespace port {
-
-// Returns true iff NUMA functions are supported.
-bool NUMAEnabled();
-
-// Returns the number of NUMA nodes present with respect to CPU operations.
-// Typically this will be the number of sockets where some RAM has greater
-// affinity with one socket than another.
-int NUMANumNodes();
-
-static const int kNUMANoAffinity = -1;
-
-// If possible sets affinity of the current thread to the specified NUMA node.
-// If node == kNUMANoAffinity removes affinity to any particular node.
-void NUMASetThreadNodeAffinity(int node);
-
-// Returns NUMA node affinity of the current thread, kNUMANoAffinity if none.
-int NUMAGetThreadNodeAffinity();
-
-// Like AlignedMalloc, but allocates memory with affinity to the specified NUMA
-// node.
-//
-// Notes:
-//  1. node must be >= 0 and < NUMANumNodes.
-//  1. minimum_alignment must a factor of system page size, the memory
-//     returned will be page-aligned.
-//  2. This function is likely significantly slower than AlignedMalloc
-//     and should not be used for lots of small allocations.  It makes more
-//     sense as a backing allocator for BFCAllocator, PoolAllocator, or similar.
-void* NUMAMalloc(int node, size_t size, int minimum_alignment);
-
-// Memory allocated by NUMAMalloc must be freed via NUMAFree.
-void NUMAFree(void* ptr, size_t size);
-
-// Returns NUMA node affinity of memory address, kNUMANoAffinity if none.
-int NUMAGetMemAffinity(const void* ptr);
-
-}  // namespace port
-}  // namespace tsl
 #endif  // TENSORFLOW_TSL_PLATFORM_NUMA_H_
diff --git a/tsl/platform/snappy.h b/tsl/platform/snappy.h
index aa292e190..588925f7d 100644
--- a/tsl/platform/snappy.h
+++ b/tsl/platform/snappy.h
@@ -1,4 +1,4 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -16,39 +16,6 @@ limitations under the License.
 #ifndef TENSORFLOW_TSL_PLATFORM_SNAPPY_H_
 #define TENSORFLOW_TSL_PLATFORM_SNAPPY_H_
 
-#include "xla/tsl/platform/types.h"
-
-#if !defined(PLATFORM_WINDOWS)
-#include <sys/uio.h>
-namespace tsl {
-using ::iovec;  // NOLINT(misc-unused-using-decls)
-}  // namespace tsl
-#else
-namespace tsl {
-struct iovec {
-  void* iov_base;
-  size_t iov_len;
-};
-}  // namespace tsl
-#endif
-
-namespace tsl {
-namespace port {
-
-// Snappy compression/decompression support
-bool Snappy_Compress(const char* input, size_t length, std::string* output);
-
-bool Snappy_CompressFromIOVec(const struct iovec* iov,
-                              size_t uncompressed_length, std::string* output);
-
-bool Snappy_GetUncompressedLength(const char* input, size_t length,
-                                  size_t* result);
-bool Snappy_Uncompress(const char* input, size_t length, char* output);
-
-bool Snappy_UncompressToIOVec(const char* compressed, size_t compressed_length,
-                              const struct iovec* iov, size_t iov_cnt);
-
-}  // namespace port
-}  // namespace tsl
+#include "xla/tsl/platform/snappy.h"
 
 #endif  // TENSORFLOW_TSL_PLATFORM_SNAPPY_H_
diff --git a/tsl/platform/tracing.cc b/tsl/platform/tracing.cc
index 66a239de0..1190afe72 100644
--- a/tsl/platform/tracing.cc
+++ b/tsl/platform/tracing.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tsl/platform/tracing.h"
+#include "xla/tsl/platform/tracing.h"
 
 #include <array>
 #include <atomic>
diff --git a/tsl/platform/tracing.h b/tsl/platform/tracing.h
index 89728d4d3..7fff1c519 100644
--- a/tsl/platform/tracing.h
+++ b/tsl/platform/tracing.h
@@ -1,4 +1,4 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -16,135 +16,6 @@ limitations under the License.
 #ifndef TENSORFLOW_TSL_PLATFORM_TRACING_H_
 #define TENSORFLOW_TSL_PLATFORM_TRACING_H_
 
-// Tracing interface
-
-#include <array>
-
-#include "xla/tsl/platform/macros.h"
-#include "xla/tsl/platform/types.h"
-#include "tsl/platform/platform.h"
-#include "tsl/platform/stringpiece.h"
-
-namespace tsl {
-namespace tracing {
-
-// This enumeration contains the identifiers of all TensorFlow CPU profiler
-// events. It must be kept in sync with the code in GetEventCategoryName().
-enum struct EventCategory : unsigned {
-  kScheduleClosure = 0,
-  kRunClosure = 1,
-  kCompute = 2,
-  kNumCategories = 3  // sentinel - keep last
-};
-constexpr unsigned GetNumEventCategories() {
-  return static_cast<unsigned>(EventCategory::kNumCategories);
-}
-const char* GetEventCategoryName(EventCategory);
-
-// Interface for CPU profiler events.
-class EventCollector {
- public:
-  virtual ~EventCollector() {}
-  virtual void RecordEvent(uint64_t arg) const = 0;
-  virtual void StartRegion(uint64_t arg) const = 0;
-  virtual void StopRegion() const = 0;
-
-  // Annotates the current thread with a name.
-  static void SetCurrentThreadName(const char* name);
-  // Returns whether event collection is enabled.
-  static bool IsEnabled();
-
- private:
-  friend void SetEventCollector(EventCategory, const EventCollector*);
-  friend const EventCollector* GetEventCollector(EventCategory);
-
-  static std::array<const EventCollector*, GetNumEventCategories()> instances_;
-};
-// Set the callback for RecordEvent and ScopedRegion of category.
-// Not thread safe. Only call while EventCollector::IsEnabled returns false.
-void SetEventCollector(EventCategory category, const EventCollector* collector);
-
-// Returns the callback for RecordEvent and ScopedRegion of category if
-// EventCollector::IsEnabled(), otherwise returns null.
-inline const EventCollector* GetEventCollector(EventCategory category) {
-  if (EventCollector::IsEnabled()) {
-    return EventCollector::instances_[static_cast<unsigned>(category)];
-  }
-  return nullptr;
-}
-
-// Returns a unique id to pass to RecordEvent/ScopedRegion. Never returns zero.
-uint64_t GetUniqueArg();
-
-// Returns an id for name to pass to RecordEvent/ScopedRegion.
-uint64_t GetArgForName(absl::string_view name);
-
-// Records an atomic event through the currently registered EventCollector.
-inline void RecordEvent(EventCategory category, uint64_t arg) {
-  if (auto collector = GetEventCollector(category)) {
-    collector->RecordEvent(arg);
-  }
-}
-
-// Records an event for the duration of the instance lifetime through the
-// currently registered EventCollector.
-class ScopedRegion {
- public:
-  ScopedRegion(ScopedRegion&& other) noexcept  // Move-constructible.
-      : collector_(other.collector_) {
-    other.collector_ = nullptr;
-  }
-
-  ScopedRegion(EventCategory category, uint64_t arg)
-      : collector_(GetEventCollector(category)) {
-    if (collector_) {
-      collector_->StartRegion(arg);
-    }
-  }
-
-  // Same as ScopedRegion(category, GetUniqueArg()), but faster if
-  // EventCollector::IsEnabled() returns false.
-  explicit ScopedRegion(EventCategory category)
-      : collector_(GetEventCollector(category)) {
-    if (collector_) {
-      collector_->StartRegion(GetUniqueArg());
-    }
-  }
-
-  // Same as ScopedRegion(category, GetArgForName(name)), but faster if
-  // EventCollector::IsEnabled() returns false.
-  ScopedRegion(EventCategory category, absl::string_view name)
-      : collector_(GetEventCollector(category)) {
-    if (collector_) {
-      collector_->StartRegion(GetArgForName(name));
-    }
-  }
-
-  ~ScopedRegion() {
-    if (collector_ && EventCollector::IsEnabled()) {
-      collector_->StopRegion();
-    }
-  }
-
-  bool IsEnabled() const { return collector_ != nullptr; }
-
- private:
-  ScopedRegion(const ScopedRegion&) = delete;
-  void operator=(const ScopedRegion&) = delete;
-
-  const EventCollector* collector_;
-};
-
-// Return the pathname of the directory where we are writing log files.
-const char* GetLogDir();
-
-}  // namespace tracing
-}  // namespace tsl
-
-#if defined(PLATFORM_GOOGLE)
-#include "xla/tsl/platform/google/tracing_impl.h"
-#else
-#include "xla/tsl/platform/default/tracing_impl.h"
-#endif
+#include "xla/tsl/platform/tracing.h"
 
 #endif  // TENSORFLOW_TSL_PLATFORM_TRACING_H_