grid_sampler/test_cuda_kernel.cpp at master · high-cloud/grid_sampler · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#include <iostream>
#include <vector>
#include <cuda_runtime.h>

extern "C" {
    void grid_sampler_cuda(
        const float* input,
        const float* grid,
        float* output,
        int input_batch, int input_channels, int input_height, int input_width,
        int output_height, int output_width,
        int mode, int padding_mode, int align_corners,
        cudaStream_t stream = 0
    );
}

int main() {
    std::cout << "=== CUDA Grid Sampler Kernel 测试 ===" << std::endl;

    // 测试参数
    int batch = 1;
    int channels = 2;
    int input_height = 3;
    int input_width = 3;
    int output_height = 2;
    int output_width = 2;

    // 创建测试数据
    std::vector<float> input(batch * channels * input_height * input_width);
    std::vector<float> grid(batch * output_height * output_width * 2);
    std::vector<float> output(batch * channels * output_height * output_width);

    // 填充输入数据
    for (int n = 0; n < batch; n++) {
        for (int c = 0; c < channels; c++) {
            for (int h = 0; h < input_height; h++) {
                for (int w = 0; w < input_width; w++) {
                    int idx = n * channels * input_height * input_width +
                             c * input_height * input_width +
                             h * input_width + w;
                    input[idx] = static_cast<float>(n * 1000 + c * 100 + h * 10 + w);
                }
            }
        }
    }

    // 填充网格数据 (从[-1, 1]映射)
    for (int n = 0; n < batch; n++) {
        for (int h = 0; h < output_height; h++) {
            for (int w = 0; w < output_width; w++) {
                int idx = n * output_height * output_width * 2 +
                         h * output_width * 2 + w * 2;
                grid[idx] = (w * 2.0f / (output_width - 1)) - 1.0f;  // x
                grid[idx + 1] = (h * 2.0f / (output_height - 1)) - 1.0f;  // y
            }
        }
    }

    // 打印输入数据
    std::cout << "输入数据:" << std::endl;
    for (int n = 0; n < batch; n++) {
        std::cout << "批次 " << n << ":" << std::endl;
        for (int c = 0; c < channels; c++) {
            std::cout << "  通道 " << c << ":" << std::endl;
            for (int h = 0; h < input_height; h++) {
                std::cout << "    ";
                for (int w = 0; w < input_width; w++) {
                    int idx = n * channels * input_height * input_width +
                             c * input_height * input_width +
                             h * input_width + w;
                    std::cout << input[idx] << " ";
                }
                std::cout << std::endl;
            }
        }
    }

    // 打印网格数据
    std::cout << "网格数据:" << std::endl;
    for (int n = 0; n < batch; n++) {
        std::cout << "批次 " << n << ":" << std::endl;
        for (int h = 0; h < output_height; h++) {
            std::cout << "    ";
            for (int w = 0; w < output_width; w++) {
                int idx = n * output_height * output_width * 2 +
                         h * output_width * 2 + w * 2;
                std::cout << "(" << grid[idx] << ", " << grid[idx + 1] << ") ";
            }
            std::cout << std::endl;
        }
    }

    // 调用CUDA kernel
    std::cout << "调用CUDA kernel..." << std::endl;

    grid_sampler_cuda(
        input.data(), grid.data(), output.data(),
        batch, channels, input_height, input_width,
        output_height, output_width,
        0,  // bilinear mode
        0,  // zeros padding
        0   // align_corners = false
    );

    // 检查CUDA错误
    cudaError_t err = cudaGetLastError();
    if (err != cudaSuccess) {
        std::cerr << "CUDA错误: " << cudaGetErrorString(err) << std::endl;
        return 1;
    }

    // 打印输出数据
    std::cout << "输出数据:" << std::endl;
    for (int n = 0; n < batch; n++) {
        std::cout << "批次 " << n << ":" << std::endl;
        for (int c = 0; c < channels; c++) {
            std::cout << "  通道 " << c << ":" << std::endl;
            for (int h = 0; h < output_height; h++) {
                std::cout << "    ";
                for (int w = 0; w < output_width; w++) {
                    int idx = n * channels * output_height * output_width +
                             c * output_height * output_width +
                             h * output_width + w;
                    std::cout << output[idx] << " ";
                }
                std::cout << std::endl;
            }
        }
    }

    std::cout << "测试完成！" << std::endl;
    return 0;
}