Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions encoding/lib/cpu/nms_cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ std::vector<at::Tensor> Non_Max_Suppression_CPU(
rawMask[i] = 0;
}
++pos;
while(pos < (1+batch)*num_boxes-1 and (rawMask[pos] == 0))
while(pos < (1+batch)*num_boxes-1 && (rawMask[pos] == 0))
++pos;
}
}
Expand All @@ -89,7 +89,7 @@ std::vector<at::Tensor> Non_Max_Suppression_CPU(
rawMask[i] = 0;
}
++pos;
while(pos < (1+batch)*num_boxes-1 and (rawMask[pos] == 0))
while(pos < (1+batch)*num_boxes-1 && (rawMask[pos] == 0))
++pos;
}
}
Expand Down
14 changes: 7 additions & 7 deletions encoding/lib/gpu/lib_ssd.cu
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,9 @@ void reduce_val_idx(int N, volatile float *vals, volatile int *idx) {
**/
template <int BLOCK_SIZE, int MAX_BBOXES_PER_BLOCK>
__global__
void encode(const int N_img, const float4 *bbox_in, const long *labels_in, const int *offsets,
void encode(const int N_img, const float4 *bbox_in, const int64_t *labels_in, const int *offsets,
const int M, const float4 *dboxes, // const float *ious,
const float criteria, uint8_t *workspace, float4 *bbox_out, long *label_out) {
const float criteria, uint8_t *workspace, float4 *bbox_out, int64_t *label_out) {

// Each block will take a single image's IoU set
const int img = blockIdx.x;
Expand Down Expand Up @@ -250,7 +250,7 @@ void encode(const int N_img, const float4 *bbox_in, const long *labels_in, const
/**
# filter IoU > 0.5
masks = best_dbox_ious > criteria
labels_out = torch.zeros(self.nboxes, dtype=torch.long)
labels_out = torch.zeros(self.nboxes, dtype=torch.int64_t)
#print(maxloc.shape, labels_in.shape, labels_out.shape)
labels_out[masks] = labels_in[best_dbox_idx[masks]]
bboxes_out = self.dboxes.clone()
Expand Down Expand Up @@ -323,7 +323,7 @@ void encode(const int N_img, const float4 *bbox_in, const long *labels_in, const

# filter IoU > 0.5
masks = best_dbox_ious > criteria
labels_out = torch.zeros(self.nboxes, dtype=torch.long)
labels_out = torch.zeros(self.nboxes, dtype=torch.int64_t)
#print(maxloc.shape, labels_in.shape, labels_out.shape)
labels_out[masks] = labels_in[best_dbox_idx[masks]]
bboxes_out = self.dboxes.clone()
Expand Down Expand Up @@ -373,7 +373,7 @@ std::vector<at::Tensor> box_encoder(const int N_img,
#ifdef DEBUG
printf("%d x %d\n", N_img * M, 4);
// at::Tensor bbox_out = dbox.scalar_type().tensor({N_img * M, 4});
printf("allocating %lu bytes for output labels\n", N_img*M*sizeof(long));
printf("allocating %lu bytes for output labels\n", N_img*M*sizeof(int64_t));
#endif
at::Tensor labels_out = at::empty({N_img * M}, labels_input.options());
C10_CUDA_CHECK(cudaGetLastError());
Expand All @@ -397,14 +397,14 @@ std::vector<at::Tensor> box_encoder(const int N_img,
const int THREADS_PER_BLOCK = 256;
encode<THREADS_PER_BLOCK, 256><<<N_img, THREADS_PER_BLOCK, 0, stream.stream()>>>(N_img,
(float4*)bbox_input.data_ptr<float>(),
labels_input.data_ptr<long>(),
labels_input.data_ptr<int64_t>(),
bbox_offsets.data_ptr<int>(),
M,
(float4*)dbox.data_ptr<float>(),
criteria,
workspace.data_ptr<uint8_t>(),
(float4*)bbox_out.data_ptr<float>(),
labels_out.data_ptr<long>());
labels_out.data_ptr<int64_t>());

C10_CUDA_CHECK(cudaGetLastError());
return {bbox_out, labels_out};
Expand Down