We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent aae8198 commit aa34cc0Copy full SHA for aa34cc0
1 file changed
backends/gpu/lib/kernels/blas_kernels.cc
@@ -222,7 +222,10 @@ static Error BlasTrsmBatch(
222
const void** a_array = const_cast<const void**>(b_array + batchCount);
223
224
auto side_mode = wrapper::BlasSideMode::FromOpaqueValue(*sideMode);
225
- int32_t a_num_elements = side_mode == CUBLAS_SIDE_LEFT ? m * m : n * n;
+ int32_t a_num_elements = n * n;
226
+ if ((platform == wrapper::Platform::CUDA && side_mode == CUBLAS_SIDE_LEFT) ||
227
+ (platform == wrapper::Platform::ROCm && side_mode == rocblas_side_left))
228
+ a_num_elements = m * m;
229
ptrdiff_t a_batch_stride_bytes = *data_type_size_bytes * a_num_elements;
230
ptrdiff_t b_batch_stride_bytes = *data_type_size_bytes * m * n;
231
const char* a_ptr = static_cast<const char*>(A.pointer().raw(platform));
0 commit comments