Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions paddle/common/backend_header.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@
#include <cuda_bf16.h>
#endif

#if defined(PADDLE_WITH_HIP) && defined(__HIPCC__)
#define PADDLE_HIP_BF16
#include <hip/hip_bfloat16.h>
#endif

#ifndef PADDLE_WITH_HIP
#if !defined(_WIN32)
#define PADDLE_ALIGN(x) __attribute__((aligned(x)))
Expand Down
48 changes: 25 additions & 23 deletions paddle/phi/common/bfloat16.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,21 +82,14 @@ struct PADDLE_ALIGN(2) bfloat16 {
~bfloat16() = default;

HOSTDEVICE inline explicit bfloat16(float val) {
#ifdef PADDLE_WITH_HIP
uint32_t res = 0;
uint32_t* tempRes;
// We should be using memcpy in order to respect the strict aliasing rule
// but it fails in the HIP environment.
tempRes = reinterpret_cast<uint32_t*>(&val);
res = *tempRes;
x = res >> 16;
#else
#if defined(PADDLE_CUDA_BF16)
__nv_bfloat16 tmp = __float2bfloat16(val);
x = *reinterpret_cast<uint16_t*>(&tmp);
#elif defined(PADDLE_HIP_BF16)
hip_bfloat16 tmp(val);
x = tmp.data;
#else
x = cpu_float_to_bfloat16(val);
#endif
#endif
}

Expand All @@ -106,6 +99,10 @@ struct PADDLE_ALIGN(2) bfloat16 {
}
#endif

#if defined(PADDLE_HIP_BF16)
HOSTDEVICE inline explicit bfloat16(const hip_bfloat16& val) { x = val.data; }
#endif

template <class T>
HOSTDEVICE inline explicit bfloat16(const T& val)
: x(bfloat16(static_cast<float>(val)).x) {}
Expand All @@ -118,6 +115,13 @@ struct PADDLE_ALIGN(2) bfloat16 {
}
#endif

#if defined(PADDLE_HIP_BF16)
HOSTDEVICE inline bfloat16& operator=(const hip_bfloat16& val) {
x = val.data;
return *this;
}
#endif

HOSTDEVICE inline bfloat16& operator=(bool b) {
x = b ? 0x3f80 : 0;
return *this;
Expand Down Expand Up @@ -175,26 +179,16 @@ struct PADDLE_ALIGN(2) bfloat16 {

// Conversion operators
HOSTDEVICE inline operator float() const {
#ifdef PADDLE_WITH_HIP
uint32_t res = 0;
// We should be using memcpy in order to respect the strict aliasing rule
// but it fails in the HIP environment.
uint16_t temp = x;
uint16_t* temp_ptr = reinterpret_cast<uint16_t*>(&temp);
res = *temp_ptr;
// return res;
res = res << 16;
return *reinterpret_cast<float*>(&res);
#else
#ifdef PADDLE_CUDA_BF16
#if defined(PADDLE_CUDA_BF16)
return __bfloat162float(*reinterpret_cast<const __nv_bfloat16*>(&x));
#elif defined(PADDLE_HIP_BF16)
return static_cast<float>(to_hip_bfloat16());
#else
float val = 0.f;
uint16_t temp = x;
std::memcpy(
reinterpret_cast<char*>(&val) + 2, reinterpret_cast<char*>(&temp), 2);
return val;
#endif
#endif
}

Expand All @@ -204,6 +198,14 @@ struct PADDLE_ALIGN(2) bfloat16 {
}
#endif

#ifdef PADDLE_HIP_BF16
HOSTDEVICE inline hip_bfloat16 to_hip_bfloat16() const {
hip_bfloat16 val;
val.data = x;
return val;
}
#endif

HOSTDEVICE inline explicit operator bool() const { return (x & 0x7fff) != 0; }

HOSTDEVICE inline explicit operator int8_t() const {
Expand Down