zhanghang1989 · RongLiu-Leo · Jun 1, 2024
diff --git a/encoding/lib/cpu/nms_cpu.cpp b/encoding/lib/cpu/nms_cpu.cpp
@@ -67,7 +67,7 @@ std::vector<at::Tensor> Non_Max_Suppression_CPU(
             rawMask[i] = 0;
         }
         ++pos;
-        while(pos < (1+batch)*num_boxes-1 and (rawMask[pos] == 0))
+        while(pos < (1+batch)*num_boxes-1 && (rawMask[pos] == 0))
           ++pos;
       }
     }
@@ -89,7 +89,7 @@ std::vector<at::Tensor> Non_Max_Suppression_CPU(
             rawMask[i] = 0;
         }
         ++pos;
-        while(pos < (1+batch)*num_boxes-1 and (rawMask[pos] == 0))
+        while(pos < (1+batch)*num_boxes-1 && (rawMask[pos] == 0))
           ++pos;
       }
     }

diff --git a/encoding/lib/gpu/lib_ssd.cu b/encoding/lib/gpu/lib_ssd.cu
@@ -111,9 +111,9 @@ void reduce_val_idx(int N, volatile float *vals, volatile int *idx) {
  **/
 template <int BLOCK_SIZE, int MAX_BBOXES_PER_BLOCK>
 __global__
-void encode(const int N_img, const float4 *bbox_in, const long *labels_in, const int *offsets,
+void encode(const int N_img, const float4 *bbox_in, const int64_t *labels_in, const int *offsets,
             const int M, const float4 *dboxes, // const float *ious,
-            const float criteria, uint8_t *workspace, float4 *bbox_out, long *label_out) {
+            const float criteria, uint8_t *workspace, float4 *bbox_out, int64_t *label_out) {
 
   // Each block will take a single image's IoU set
   const int img = blockIdx.x;
@@ -250,7 +250,7 @@ void encode(const int N_img, const float4 *bbox_in, const long *labels_in, const
   /**
         # filter IoU > 0.5
         masks = best_dbox_ious > criteria
-        labels_out = torch.zeros(self.nboxes, dtype=torch.long)
+        labels_out = torch.zeros(self.nboxes, dtype=torch.int64_t)
         #print(maxloc.shape, labels_in.shape, labels_out.shape)
         labels_out[masks] = labels_in[best_dbox_idx[masks]]
         bboxes_out = self.dboxes.clone()
@@ -323,7 +323,7 @@ void encode(const int N_img, const float4 *bbox_in, const long *labels_in, const
 
         # filter IoU > 0.5
         masks = best_dbox_ious > criteria
-        labels_out = torch.zeros(self.nboxes, dtype=torch.long)
+        labels_out = torch.zeros(self.nboxes, dtype=torch.int64_t)
         #print(maxloc.shape, labels_in.shape, labels_out.shape)
         labels_out[masks] = labels_in[best_dbox_idx[masks]]
         bboxes_out = self.dboxes.clone()
@@ -373,7 +373,7 @@ std::vector<at::Tensor> box_encoder(const int N_img,
 #ifdef DEBUG
   printf("%d x %d\n", N_img * M, 4);
   // at::Tensor bbox_out = dbox.scalar_type().tensor({N_img * M, 4});
-  printf("allocating %lu bytes for output labels\n", N_img*M*sizeof(long));
+  printf("allocating %lu bytes for output labels\n", N_img*M*sizeof(int64_t));
 #endif
   at::Tensor labels_out = at::empty({N_img * M}, labels_input.options());
   C10_CUDA_CHECK(cudaGetLastError());
@@ -397,14 +397,14 @@ std::vector<at::Tensor> box_encoder(const int N_img,
   const int THREADS_PER_BLOCK = 256;
   encode<THREADS_PER_BLOCK, 256><<<N_img, THREADS_PER_BLOCK, 0, stream.stream()>>>(N_img,
                       (float4*)bbox_input.data_ptr<float>(),
-                      labels_input.data_ptr<long>(),
+                      labels_input.data_ptr<int64_t>(),
                       bbox_offsets.data_ptr<int>(),
                       M,
                       (float4*)dbox.data_ptr<float>(),
                       criteria,
                       workspace.data_ptr<uint8_t>(),
                       (float4*)bbox_out.data_ptr<float>(),
-                      labels_out.data_ptr<long>());
+                      labels_out.data_ptr<int64_t>());
 
   C10_CUDA_CHECK(cudaGetLastError());
   return {bbox_out, labels_out};