From df04d563e0d734b13b56383a48a282abdde492fc Mon Sep 17 00:00:00 2001 From: jichang Date: Wed, 9 Jan 2019 19:01:17 -0800 Subject: [PATCH] Fix data incorrect for reduceSum function --- paddle/fluid/platform/cuda_device_function.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/paddle/fluid/platform/cuda_device_function.h b/paddle/fluid/platform/cuda_device_function.h index 220093b79eb360..b218eeeaf5febf 100644 --- a/paddle/fluid/platform/cuda_device_function.h +++ b/paddle/fluid/platform/cuda_device_function.h @@ -101,6 +101,23 @@ HOSTDEVICE T Infinity() { return INFINITY; } +#ifdef PADDLE_WITH_HIP +template +__device__ T reduceSum(T val, int tid, int len) { + const int warpSize = 32; + __shared__ T shm[warpSize*warpSize]; + shm[tid] = val; + + __syncthreads(); + + if (tid == 0 ) { + for (int i = 1 ; i < len ; i++) + val += shm[i]; + } + + return val; +} +#else template __device__ T reduceSum(T val, int tid, int len) { // NOTE(zcd): The warp size should be taken from the @@ -134,6 +151,7 @@ __device__ T reduceSum(T val, int tid, int len) { } return val; } +#endif } // namespace platform } // namespace paddle