diff --git a/.github/workflows/ci-deeploy-testing.yml b/.github/workflows/ci-deeploy-testing.yml index a44d557265..c6e262ee40 100644 --- a/.github/workflows/ci-deeploy-testing.yml +++ b/.github/workflows/ci-deeploy-testing.yml @@ -40,19 +40,19 @@ jobs: include: - name: fail-input0 platform: Generic - test: testTypeInferenceDifferentTypes + test: Others/TypeInference type_map: "A=int8_t B=int8_t C=int8_t" offset_map: "A=0 B=0 C=0" shouldFail: true - name: fail-input2 platform: Generic - test: testTypeInferenceDifferentTypes + test: Others/TypeInference type_map: "A=int16_t B=int8_t C=int16_t" offset_map: "A=0 B=0 C=0" shouldFail: true - name: pass platform: Generic - test: testTypeInferenceDifferentTypes + test: Others/TypeInference type_map: "A=int16_t B=int8_t C=int32_t" offset_map: "A=0 B=0 C=0" shouldFail: false diff --git a/.github/workflows/ci-deeploy.yml b/.github/workflows/ci-deeploy.yml index 429e9c2027..20ee91e9fb 100644 --- a/.github/workflows/ci-deeploy.yml +++ b/.github/workflows/ci-deeploy.yml @@ -60,10 +60,10 @@ jobs: shell: bash run: | cd DeeployTest - python testMVP.py -t Tests/CCT/CCT_1_16_16_8 -p Siracusa --defaultMemLevel=L2 --l1=64000 --l2=75000 --memAllocStrategy=MiniMalloc - python testMVP.py -t Tests/CCT/CCT_1_16_16_8 -p Siracusa --defaultMemLevel=L2 --l1=64000 --l2=60000 --memAllocStrategy=MiniMalloc --shouldFail - python testMVP.py -t Tests/CCT/CCT_1_16_16_8 -p Siracusa --defaultMemLevel=L2 --l1=64000 --l2=90000 --memAllocStrategy=TetrisRandom - python testMVP.py -t Tests/CCT/CCT_1_16_16_8 -p Siracusa --defaultMemLevel=L2 --l1=64000 --l2=75000 --memAllocStrategy=TetrisRandom --shouldFail + python testMVP.py -t Tests/Models/CCT/FP32/CCT_1_16_16_8 -p Siracusa --defaultMemLevel=L2 --l1=64000 --l2=75000 --memAllocStrategy=MiniMalloc + python testMVP.py -t Tests/Models/CCT/FP32/CCT_1_16_16_8 -p Siracusa --defaultMemLevel=L2 --l1=64000 --l2=60000 --memAllocStrategy=MiniMalloc --shouldFail + python testMVP.py -t Tests/Models/CCT/FP32/CCT_1_16_16_8 -p Siracusa --defaultMemLevel=L2 --l1=64000 --l2=90000 --memAllocStrategy=TetrisRandom + python testMVP.py -t Tests/Models/CCT/FP32/CCT_1_16_16_8 -p Siracusa --defaultMemLevel=L2 --l1=64000 --l2=75000 --memAllocStrategy=TetrisRandom --shouldFail deeploy-state-serialization: needs: select-env @@ -82,10 +82,10 @@ jobs: shell: bash run: | cd DeeployTest - python deeployStateEqualityTest.py -t ./Tests/simpleRegression -p QEMU-ARM - python deeployStateEqualityTest.py -t ./Tests/simpleRegression -p Siracusa - python deeployStateEqualityTest.py -t ./Tests/simpleRegression -p MemPool - python deeployStateEqualityTest.py -t ./Tests/simpleRegression -p Generic + python deeployStateEqualityTest.py -t ./Tests/Others/SimpleRegression -p QEMU-ARM + python deeployStateEqualityTest.py -t ./Tests/Others/SimpleRegression -p Siracusa + python deeployStateEqualityTest.py -t ./Tests/Others/SimpleRegression -p MemPool + python deeployStateEqualityTest.py -t ./Tests/Others/SimpleRegression -p Generic deeploy-memory-level-extension: needs: select-env @@ -104,10 +104,10 @@ jobs: shell: bash run: | cd DeeployTest - python testMemoryLevelExtension.py -t ./Tests/simpleRegression -p QEMU-ARM - python testMemoryLevelExtension.py -t ./Tests/simpleRegression -p Siracusa - python testMemoryLevelExtension.py -t ./Tests/simpleRegression -p MemPool - python testMemoryLevelExtension.py -t ./Tests/simpleRegression -p Generic + python testMemoryLevelExtension.py -t ./Tests/Others/SimpleRegression -p QEMU-ARM + python testMemoryLevelExtension.py -t ./Tests/Others/SimpleRegression -p Siracusa + python testMemoryLevelExtension.py -t ./Tests/Others/SimpleRegression -p MemPool + python testMemoryLevelExtension.py -t ./Tests/Others/SimpleRegression -p Generic deeploy-tiler-extension: needs: select-env @@ -126,14 +126,14 @@ jobs: shell: bash run: | cd DeeployTest - python testTilerExtension.py -p Siracusa -t ./Tests/simpleRegression - python testTilerExtension.py -p Siracusa -t ./Tests/simpleCNN - python testTilerExtension.py -p Siracusa -t ./Tests/testMatMul - python testTilerExtension.py -p Siracusa -t ./Tests/testMaxPool - python testTilerExtension.py -p Siracusa -t ./Tests/simpleRegression --l1 2000 --shouldFail - python testTilerExtension.py -p Siracusa -t ./Tests/simpleCNN --l1 2000 --shouldFail - python testTilerExtension.py -p Siracusa -t ./Tests/testMatMul --l1 2000 --shouldFail - python testTilerExtension.py -p Siracusa -t ./Tests/testMaxPool --l1 2000 --shouldFail + python testTilerExtension.py -p Siracusa -t ./Tests/Others/SimpleRegression + python testTilerExtension.py -p Siracusa -t ./Tests/Models/simpleCNN + python testTilerExtension.py -p Siracusa -t ./Tests/IntKernels/MatMul/regular + python testTilerExtension.py -p Siracusa -t ./Tests/IntKernels/MaxPool + python testTilerExtension.py -p Siracusa -t ./Tests/Others/SimpleRegression --l1 2000 --shouldFail + python testTilerExtension.py -p Siracusa -t ./Tests/Models/simpleCNN --l1 2000 --shouldFail + python testTilerExtension.py -p Siracusa -t ./Tests/IntKernels/MatMul/regular --l1 2000 --shouldFail + python testTilerExtension.py -p Siracusa -t ./Tests/IntKernels/MaxPool --l1 2000 --shouldFail deeploy-memory-allocation-extension: needs: select-env @@ -152,12 +152,12 @@ jobs: shell: bash run: | cd DeeployTest - python testTilerExtension.py -p Siracusa -t ./Tests/simpleRegression - python testTilerExtension.py -p Siracusa -t ./Tests/simpleCNN - python testTilerExtension.py -p Siracusa -t ./Tests/miniMobileNet - python testTilerExtension.py -p Siracusa -t ./Tests/miniMobileNetv2 - python testTilerExtension.py -p Siracusa -t ./Tests/testMatMul - python testTilerExtension.py -p Siracusa -t ./Tests/testMaxPool + python testTilerExtension.py -p Siracusa -t ./Tests/Others/SimpleRegression + python testTilerExtension.py -p Siracusa -t ./Tests/Models/simpleCNN + python testTilerExtension.py -p Siracusa -t ./Tests/Models/miniMobileNet + python testTilerExtension.py -p Siracusa -t ./Tests/Models/miniMobileNetv2 + python testTilerExtension.py -p Siracusa -t ./Tests/IntKernels/MatMul/regular + python testTilerExtension.py -p Siracusa -t ./Tests/IntKernels/MaxPool deeploy-typing: needs: select-env @@ -195,9 +195,9 @@ jobs: shell: bash run: | cd DeeployTest - python testPrintInputOutputTransformation.py -p Generic -t ./Tests/simpleRegression - python testPrintInputOutputTransformation.py -p Siracusa -t ./Tests/simpleRegression - python testDebugPrintPass.py -p Generic -t ./Tests/simpleRegression + python testPrintInputOutputTransformation.py -p Generic -t ./Tests/Others/SimpleRegression + python testPrintInputOutputTransformation.py -p Siracusa -t ./Tests/Others/SimpleRegression + python testDebugPrintPass.py -p Generic -t ./Tests/Others/SimpleRegression deeploy-regex-matching: needs: select-env diff --git a/.github/workflows/ci-platform-chimera.yml b/.github/workflows/ci-platform-chimera.yml index 79db97abd0..35c6bc586f 100644 --- a/.github/workflows/ci-platform-chimera.yml +++ b/.github/workflows/ci-platform-chimera.yml @@ -36,6 +36,6 @@ jobs: runner: ${{ needs.select-env.outputs.runner }} docker-image: ${{ needs.select-env.outputs.image }} test-names: | - Adder + IntKernels/Add/Regular simulators: | gvsoc diff --git a/.github/workflows/ci-platform-cortexm.yml b/.github/workflows/ci-platform-cortexm.yml index f9020f3646..57ab5ff7d8 100644 --- a/.github/workflows/ci-platform-cortexm.yml +++ b/.github/workflows/ci-platform-cortexm.yml @@ -36,17 +36,17 @@ jobs: runner: ${{ needs.select-env.outputs.runner }} docker-image: ${{ needs.select-env.outputs.image }} test-names: | - Adder - MultIO - test1DPad - test2DPad - testMatMul - testMatMulAdd - testMaxPool - testRQConv - testReduceSum - testReduceMean - testSlice + IntKernels/Add/Regular + IntKernels/Add/MultIO + IntKernels/Pad/1D + IntKernels/Pad/2D + IntKernels/MatMul/regular + IntKernels/MatMul/add + IntKernels/MaxPool + Others/RQConv + IntKernels/ReduceSum + IntKernels/ReduceMean + IntKernels/Slice cortexm-models: needs: select-env @@ -55,5 +55,5 @@ jobs: runner: ${{ needs.select-env.outputs.runner }} docker-image: ${{ needs.select-env.outputs.image }} test-names: | - simpleRegression - WaveFormer + Others/SimpleRegression + Models/WaveFormer diff --git a/.github/workflows/ci-platform-generic.yml b/.github/workflows/ci-platform-generic.yml index fb39a9bd53..5e726c325d 100644 --- a/.github/workflows/ci-platform-generic.yml +++ b/.github/workflows/ci-platform-generic.yml @@ -36,50 +36,89 @@ jobs: runner: ${{ needs.select-env.outputs.runner }} docker-image: ${{ needs.select-env.outputs.image }} test-names: | - Adder - MultIO - test1DConvolution - test2DConvolution - test1DDWConvolution - test2DDWConvolution - test1DPad - test2DPad - testGEMM - testMatMul - testMatMulAdd - testMaxPool - testRQConv - testRQMatMul - testReduceSum - testReduceMean - testSlice - testRequantizedDWConv - test2DRequantizedConv - iSoftmax - testFloatAdder - testFloatGEMM - testFloat2DConvolution - testFloat2DConvolutionBias - testFloat2DConvolutionZeroBias - testFloatLayerNorm - testFloatDiv - testFloat2DDWConvolution - testFloat2DDWConvolutionBias - testFloat2DDWConvolutionZeroBias - testFloatRelu - testFloatMaxPool - testFloatMatmul - testFloatReshapeWithSkipConnection - testFloatSoftmax - testFloatTranspose - testFloatMul - testFloatPowScalar - testFloatPowVector - testFloatSqrt - testFloatRMSNorm - Quant - Dequant - QuantizedLinear + FP32Kernels/Activations/ReLU + FP32Kernels/Activations/Softmax/Regular + + FP32Kernels/Add/regular + + FP32Kernels/Conv2D/DWBias + FP32Kernels/Conv2D/DWNoBias + FP32Kernels/Conv2D/DWZeroValuedBias + + FP32Kernels/Conv2D/RegularBias + FP32Kernels/Conv2D/RegularNoBias + FP32Kernels/Conv2D/RegularZeroValuedBias + + FP32Kernels/Div + FP32Kernels/GEMM/regular + FP32Kernels/MatMul + FP32Kernels/MaxPool + FP32Kernels/Mul + + FP32Kernels/Norm/LayerNorm + FP32Kernels/Norm/RMSNorm + + FP32Kernels/Pow/Scalar + FP32Kernels/Pow/Vector + + FP32Kernels/ReduceMean/KeepDims/Add_ReduceMean + FP32Kernels/ReduceMean/KeepDims/Add_ReduceMean_Add + FP32Kernels/ReduceMean/KeepDims/AllAxes + FP32Kernels/ReduceMean/KeepDims/Axes1_2_3 + FP32Kernels/ReduceMean/KeepDims/Axes1_3 + FP32Kernels/ReduceMean/KeepDims/Axes2_1 + FP32Kernels/ReduceMean/KeepDims/Axis0 + FP32Kernels/ReduceMean/KeepDims/Axis2 + FP32Kernels/ReduceMean/KeepDims/ReduceMean_Add + + FP32Kernels/ReduceMean/NoKeepDims/Add_ReduceMean + FP32Kernels/ReduceMean/NoKeepDims/Add_ReduceMean_Add + FP32Kernels/ReduceMean/NoKeepDims/AllAxes + FP32Kernels/ReduceMean/NoKeepDims/Axes1_2_3 + FP32Kernels/ReduceMean/NoKeepDims/Axes1_3 + FP32Kernels/ReduceMean/NoKeepDims/Axes2_1 + FP32Kernels/ReduceMean/NoKeepDims/Axis0 + FP32Kernels/ReduceMean/NoKeepDims/Axis2 + FP32Kernels/ReduceMean/NoKeepDims/ReduceMean_Add + + FP32Kernels/SkipConnection/ReshapeWithSkipConnection + FP32Kernels/Sqrt + FP32Kernels/Transpose + + IntKernels/Activations/Softmax/Regular + + IntKernels/Add/MultIO + IntKernels/Add/Regular + + IntKernels/Conv/1D/DW + IntKernels/Conv/1D/Regular + + IntKernels/Conv/2D/DW + IntKernels/Conv/2D/Regular + + IntKernels/GEMM + + IntKernels/MatMul/add + IntKernels/MatMul/regular + + IntKernels/MaxPool + + IntKernels/Pad/1D + IntKernels/Pad/2D + + IntKernels/ReduceMean + IntKernels/ReduceSum + IntKernels/Slice + + Models/TinyViT/5M/Layers/FP32/ReduceMean + + Others/Dequant + Others/Quant + Others/QuantizedLinear + Others/RequantizedConv2D + Others/RequantizedDWConv + Others/RQConv + Others/RQMatMul generic-models: needs: select-env @@ -88,16 +127,20 @@ jobs: runner: ${{ needs.select-env.outputs.runner }} docker-image: ${{ needs.select-env.outputs.image }} test-names: | - simpleRegression - WaveFormer - simpleCNN - ICCT - ICCT_ITA - ICCT_8 - ICCT_ITA_8 - miniMobileNet - miniMobileNetv2 - CCT/CCT_1_16_16_8 - CCT/CCT_2_32_32_128_Opset20 - testFloatDemoTinyViT - Autoencoder1D + Models/Autoencoder1D + + Models/CCT/FP32/CCT_1_16_16_8 + Models/CCT/FP32/CCT_2_32_32_128_Opset20 + Models/CCT/Int/ICCT + Models/CCT/Int/ICCT_8 + Models/CCT/Int/ICCT_ITA + Models/CCT/Int/ICCT_ITA_8 + + Models/miniMobileNet + Models/miniMobileNetv2 + + Models/simpleCNN + Models/TinyViT/Demo + Models/WaveFormer + + Others/SimpleRegression diff --git a/.github/workflows/ci-platform-mempool.yml b/.github/workflows/ci-platform-mempool.yml index f7394c04da..60d76b8233 100644 --- a/.github/workflows/ci-platform-mempool.yml +++ b/.github/workflows/ci-platform-mempool.yml @@ -36,26 +36,35 @@ jobs: runner: ${{ needs.select-env.outputs.runner }} docker-image: ${{ needs.select-env.outputs.image }} test-names: | - Adder - MultIO - test1DConvolution - test2DConvolution - test1DDWConvolution - test2DDWConvolution - test1DPad - test2DPad - testGEMM - testMatMul - testMatMulAdd - testMaxPool - testRQConv - testRQGEMM - testRQMatMul - testReduceSum - testReduceMean - testSlice - testRequantizedDWConv - test2DRequantizedConv + IntKernels/Add/MultIO + IntKernels/Add/Regular + + IntKernels/Conv/1D/DW + IntKernels/Conv/1D/Regular + + IntKernels/Conv/2D/DW + IntKernels/Conv/2D/Regular + + IntKernels/GEMM + + IntKernels/MatMul/add + IntKernels/MatMul/regular + + IntKernels/MaxPool + + IntKernels/Pad/1D + IntKernels/Pad/2D + + IntKernels/ReduceMean + IntKernels/ReduceSum + + IntKernels/Slice + + Others/RequantizedConv2D + Others/RequantizedDWConv + Others/RQConv + Others/RQGEMM + Others/RQMatMul mempool-models: needs: select-env @@ -64,10 +73,13 @@ jobs: runner: ${{ needs.select-env.outputs.runner }} docker-image: ${{ needs.select-env.outputs.image }} test-names: | - simpleRegression - simpleCNN - ICCT - ICCT_ITA - ICCT_8 - miniMobileNet - miniMobileNetv2 + Models/CCT/Int/ICCT + Models/CCT/Int/ICCT_8 + Models/CCT/Int/ICCT_ITA + + Models/miniMobileNet + Models/miniMobileNetv2 + + Models/simpleCNN + + Others/SimpleRegression diff --git a/.github/workflows/ci-platform-siracusa-neureka-tiled.yml b/.github/workflows/ci-platform-siracusa-neureka-tiled.yml index e9f920931a..161f6aa1d3 100644 --- a/.github/workflows/ci-platform-siracusa-neureka-tiled.yml +++ b/.github/workflows/ci-platform-siracusa-neureka-tiled.yml @@ -37,10 +37,10 @@ jobs: docker-image: ${{ needs.select-env.outputs.image }} tests-config: | [ - {"name":"testRequantizedLinear","L1":[16000]}, - {"name":"testPointwise","L1":[32000]}, - {"name":"testPointwiseConvBNReLU","L1":[32000]}, - {"name":"testPointwiseUnsignedWeights","L1":[32000]} + {"name":"Others/RequantizedLinear","L1":[16000]}, + {"name":"IntKernels/Conv/2D/PW/regular","L1":[32000]}, + {"name":"IntKernels/Conv/2D/PW/convBNReLU","L1":[32000]}, + {"name":"IntKernels/Conv/2D/PW/unsignedWeights","L1":[32000]} ] num-cores: 8 @@ -52,10 +52,10 @@ jobs: docker-image: ${{ needs.select-env.outputs.image }} tests-config: | [ - {"name":"testRequantizedLinear","L1":[16000]}, - {"name":"testPointwise","L1":[32000]}, - {"name":"testPointwiseConvBNReLU","L1":[32000]}, - {"name":"testPointwiseUnsignedWeights","L1":[32000]} + {"name":"Others/RequantizedLinear","L1":[16000]}, + {"name":"IntKernels/Conv/2D/PW/regular","L1":[32000]}, + {"name":"IntKernels/Conv/2D/PW/convBNReLU","L1":[32000]}, + {"name":"IntKernels/Conv/2D/PW/unsignedWeights","L1":[32000]} ] num-cores: 8 # double buffer enabled: @@ -67,10 +67,10 @@ jobs: fail-fast: false matrix: test-data: - - { name: "miniMobileNet", L1: [2000] } # LMACAN: 1000 leads to non-2d transfers in L3! - - { name: "Attention", L1: [2500] } - - { name: "Transformer", L1: [15000] } - - { name: "microLlama/microLlama1", L1: [10000] } + - { name: "Models/miniMobileNet", L1: [2000] } # LMACAN: 1000 leads to non-2d transfers in L3! + - { name: "IntKernels/Attention", L1: [2500] } + - { name: "Others/Transformer", L1: [15000] } + - { name: "Models/microLlama/microLlama1", L1: [10000] } num-cores: [8] default-memory-level: ["L3"] uses: ./.github/workflows/_runner-siracusa-neureka-tiled.yml @@ -88,9 +88,9 @@ jobs: fail-fast: false matrix: test-data: - - { name: "miniMobileNet", L1: [2000] } # LMACAN note - - { name: "Attention", L1: [5000] } - - { name: "Transformer", L1: [30000] } + - { name: "Models/miniMobileNet", L1: [2000] } # LMACAN note + - { name: "IntKernels/Attention", L1: [5000] } + - { name: "Others/Transformer", L1: [30000] } num-cores: [8] double-buffer: [true] default-memory-level: ["L3"] @@ -112,10 +112,10 @@ jobs: docker-image: ${{ needs.select-env.outputs.image }} tests-config: | [ - {"name":"testRequantizedLinear","L1":[16000]}, - {"name":"testPointwise","L1":[32000]}, - {"name":"testPointwiseConvBNReLU","L1":[32000]}, - {"name":"testPointwiseUnsignedWeights","L1":[32000]} + {"name":"Others/RequantizedLinear","L1":[16000]}, + {"name":"IntKernels/Conv/2D/PW/regular","L1":[32000]}, + {"name":"IntKernels/Conv/2D/PW/convBNReLU","L1":[32000]}, + {"name":"IntKernels/Conv/2D/PW/unsignedWeights","L1":[32000]} ] num-cores: 8 neureka-wmem: true @@ -126,10 +126,10 @@ jobs: fail-fast: false matrix: test-data: - - { name: "miniMobileNet", L1: [2000] } # LMACAN note - - { name: "Attention", L1: [3500] } - # - { name: "Transformer", L1: [30000] } - - { name: "microLlama/microLlama1", L1: [10000] } + - { name: "Models/miniMobileNet", L1: [2000] } # LMACAN note + - { name: "IntKernels/Attention", L1: [3500] } + # - { name: "Others/Transformer", L1: [30000] } + - { name: "Models/microLlama/microLlama1", L1: [10000] } num-cores: [8] double-buffer: [true] default-memory-level: ["L3"] diff --git a/.github/workflows/ci-platform-siracusa-tiled.yml b/.github/workflows/ci-platform-siracusa-tiled.yml index dc52f6ad7f..06a149c694 100644 --- a/.github/workflows/ci-platform-siracusa-tiled.yml +++ b/.github/workflows/ci-platform-siracusa-tiled.yml @@ -37,36 +37,62 @@ jobs: docker-image: ${{ needs.select-env.outputs.image }} tests-config: | [ - {"name":"testMatMul","L1":[64000,32000,16000]}, - {"name":"test2DRequantizedConv","L1":[8000,6000,4000]}, - {"name":"test2DRequantizedStriddedPaddedConv","L1":[600]}, - {"name":"testRequantizedDWConv","L1":[2561]}, - {"name":"iSoftmax","L1":[800,500,300]}, - {"name":"testConcat","L1":[32000,16000,8000]}, - {"name":"testRMSNorm","L1":[2048,1024,512]}, - {"name":"Hardswish","L1":[750]}, - {"name":"RQHardswish","L1":[750]}, - {"name":"testFloatGEMM","L1":[8000]}, - - {"name":"testFloat2DConvolution","L1":[1600]}, - {"name":"testFloat2DConvolutionBias","L1":[6600]}, - {"name":"testFloat2DConvolutionZeroBias","L1":[6600]}, - - {"name":"testFloat2DDWConvolution","L1":[7200]}, - {"name":"testFloat2DDWConvolutionBias","L1":[7200]}, - {"name":"testFloat2DDWConvolutionZeroBias","L1":[7200]}, - - {"name":"testFloatLayerNorm","L1":[2000]}, - {"name":"testFloatMaxPool","L1":[2000]}, - {"name":"testFloatMatmul","L1":[2000]}, - {"name":"testFloatRelu","L1":[2000]}, - {"name":"testFloatReshapeWithSkipConnection","L1":[1400]}, - {"name":"testFloatSoftmax","L1":[4000]}, - {"name":"testFloatTranspose","L1":[2000]}, - {"name":"testFloatMul","L1":[2000]}, - {"name":"largeFloatAdd","L1":[220000]}, - {"name":"testRQGEMMwBatch","L1":[20000]}, - {"name":"testMatMulBatch","L1":[20000]} + {"name":"FP32Kernels/Activations/ReLU","L1":[2000]}, + {"name":"FP32Kernels/Activations/Softmax/Regular","L1":[4000]}, + + {"name":"FP32Kernels/Add/large","L1":[220000]}, + + {"name":"FP32Kernels/Conv2D/DWBias","L1":[7200]}, + {"name":"FP32Kernels/Conv2D/DWNoBias","L1":[7200]}, + {"name":"FP32Kernels/Conv2D/DWZeroValuedBias","L1":[7200]}, + {"name":"FP32Kernels/Conv2D/RegularBias","L1":[6600]}, + {"name":"FP32Kernels/Conv2D/RegularNoBias","L1":[1600]}, + {"name":"FP32Kernels/Conv2D/RegularZeroValuedBias","L1":[6600]}, + + {"name":"FP32Kernels/GEMM/regular","L1":[8000]}, + {"name":"FP32Kernels/MatMul","L1":[2000]}, + {"name":"FP32Kernels/MaxPool","L1":[2000]}, + {"name":"FP32Kernels/Mul","L1":[2000]}, + {"name":"FP32Kernels/Norm/LayerNorm","L1":[2000]}, + + {"name":"FP32Kernels/ReduceMean/KeepDims/Add_ReduceMean","L1":[8000]}, + {"name":"FP32Kernels/ReduceMean/KeepDims/Add_ReduceMean_Add","L1":[8000]}, + {"name":"FP32Kernels/ReduceMean/KeepDims/AllAxes","L1":[50000]}, + {"name":"FP32Kernels/ReduceMean/KeepDims/Axes1_2_3","L1":[50000]}, + {"name":"FP32Kernels/ReduceMean/KeepDims/Axes1_3","L1":[5000,50000]}, + {"name":"FP32Kernels/ReduceMean/KeepDims/Axes2_1","L1":[6200,50000]}, + {"name":"FP32Kernels/ReduceMean/KeepDims/Axis0","L1":[8400,50000]}, + {"name":"FP32Kernels/ReduceMean/KeepDims/Axis2","L1":[8400,50000]}, + {"name":"FP32Kernels/ReduceMean/KeepDims/ReduceMean_Add","L1":[8000]}, + + {"name":"FP32Kernels/ReduceMean/NoKeepDims/Add_ReduceMean","L1":[8000]}, + {"name":"FP32Kernels/ReduceMean/NoKeepDims/Add_ReduceMean_Add","L1":[8000]}, + {"name":"FP32Kernels/ReduceMean/NoKeepDims/AllAxes","L1":[50000]}, + {"name":"FP32Kernels/ReduceMean/NoKeepDims/Axes1_2_3","L1":[50000]}, + {"name":"FP32Kernels/ReduceMean/NoKeepDims/Axes1_3","L1":[5000,50000]}, + {"name":"FP32Kernels/ReduceMean/NoKeepDims/Axes2_1","L1":[6200,50000]}, + {"name":"FP32Kernels/ReduceMean/NoKeepDims/Axis0","L1":[8400,50000]}, + {"name":"FP32Kernels/ReduceMean/NoKeepDims/Axis2","L1":[8400,50000]}, + {"name":"FP32Kernels/ReduceMean/NoKeepDims/ReduceMean_Add","L1":[8000]}, + + {"name":"FP32Kernels/SkipConnection/ReshapeWithSkipConnection","L1":[1400]}, + {"name":"FP32Kernels/Transpose","L1":[2000]}, + + {"name":"IntKernels/Activations/Hardswish","L1":[750]}, + {"name":"IntKernels/Activations/Softmax/Regular","L1":[800,500,300]}, + + {"name":"IntKernels/Concat","L1":[32000,16000,8000]}, + + {"name":"IntKernels/MatMul/batch","L1":[20000]}, + {"name":"IntKernels/MatMul/regular","L1":[64000,32000,16000]}, + + {"name":"IntKernels/RMSNorm","L1":[2048,1024,512]}, + + {"name":"Others/RequantizedConv2D","L1":[8000,6000,4000]}, + {"name":"Others/RequantizedDWConv","L1":[2561]}, + {"name":"Others/RequantizedStriddedPaddedConv2D","L1":[600]}, + {"name":"Others/RQGEMMwBatch","L1":[20000]}, + {"name":"Others/RQHardswish","L1":[750]} ] num-cores: 8 @@ -78,32 +104,55 @@ jobs: docker-image: ${{ needs.select-env.outputs.image }} tests-config: | [ - {"name":"testMatMul","L1":[64000,32000,16000]}, - {"name":"test2DRequantizedConv","L1":[8000,6000,5000]}, - {"name":"testRequantizedDWConv","L1":[5121]}, - {"name":"iSoftmax","L1":[1600,1000,600]}, - {"name":"testConcat","L1":[64000,32000,16000]}, - {"name":"testRMSNorm","L1":[4096,2048,1024]}, - {"name":"Hardswish","L1":[750]}, - {"name":"RQHardswish","L1":[800]}, - {"name":"testFloatGEMM","L1":[8000]}, - - {"name":"testFloat2DConvolution","L1":[2000]}, - {"name":"testFloat2DConvolutionBias","L1":[8800]}, - {"name":"testFloat2DConvolutionZeroBias","L1":[8800]}, - - {"name":"testFloat2DDWConvolution","L1":[9800]}, - {"name":"testFloat2DDWConvolutionBias","L1":[10000]}, - {"name":"testFloat2DDWConvolutionZeroBias","L1":[9800]}, - - {"name":"testFloatLayerNorm","L1":[2000]}, - {"name":"testFloatMaxPool","L1":[5000]}, - {"name":"testFloatMatmul","L1":[5000]}, - {"name":"testFloatRelu","L1":[20]}, - {"name":"testFloatReshapeWithSkipConnection","L1":[2600]}, - {"name":"testFloatSoftmax","L1":[8000]}, - {"name":"testFloatTranspose","L1":[2000]}, - {"name":"testFloatMul","L1":[2000]} + {"name":"FP32Kernels/Activations/ReLU","L1":[20]}, + {"name":"FP32Kernels/Activations/Softmax/Regular","L1":[8000]}, + + {"name":"FP32Kernels/Conv2D/DWBias","L1":[10000]}, + {"name":"FP32Kernels/Conv2D/DWNoBias","L1":[9800]}, + {"name":"FP32Kernels/Conv2D/DWZeroValuedBias","L1":[9800]}, + {"name":"FP32Kernels/Conv2D/RegularBias","L1":[8800]}, + {"name":"FP32Kernels/Conv2D/RegularNoBias","L1":[2000]}, + {"name":"FP32Kernels/Conv2D/RegularZeroValuedBias","L1":[8800]}, + + {"name":"FP32Kernels/GEMM/regular","L1":[8000]}, + {"name":"FP32Kernels/MatMul","L1":[5000]}, + {"name":"FP32Kernels/MaxPool","L1":[5000]}, + {"name":"FP32Kernels/Mul","L1":[2000]}, + {"name":"FP32Kernels/Norm/LayerNorm","L1":[2000]}, + + {"name":"FP32Kernels/ReduceMean/KeepDims/Add_ReduceMean","L1":[8000]}, + {"name":"FP32Kernels/ReduceMean/KeepDims/Add_ReduceMean_Add","L1":[8000]}, + {"name":"FP32Kernels/ReduceMean/KeepDims/AllAxes","L1":[100000]}, + {"name":"FP32Kernels/ReduceMean/KeepDims/Axes1_2_3","L1":[100000]}, + {"name":"FP32Kernels/ReduceMean/KeepDims/Axes1_3","L1":[10000,50000]}, + {"name":"FP32Kernels/ReduceMean/KeepDims/Axes2_1","L1":[13000,50000]}, + {"name":"FP32Kernels/ReduceMean/KeepDims/Axis0","L1":[17000,50000]}, + {"name":"FP32Kernels/ReduceMean/KeepDims/Axis2","L1":[17000,50000]}, + {"name":"FP32Kernels/ReduceMean/KeepDims/ReduceMean_Add","L1":[8000]}, + + {"name":"FP32Kernels/ReduceMean/NoKeepDims/Add_ReduceMean","L1":[8000]}, + {"name":"FP32Kernels/ReduceMean/NoKeepDims/Add_ReduceMean_Add","L1":[8000]}, + {"name":"FP32Kernels/ReduceMean/NoKeepDims/AllAxes","L1":[100000]}, + {"name":"FP32Kernels/ReduceMean/NoKeepDims/Axes1_2_3","L1":[100000]}, + {"name":"FP32Kernels/ReduceMean/NoKeepDims/Axes1_3","L1":[10000,50000]}, + {"name":"FP32Kernels/ReduceMean/NoKeepDims/Axes2_1","L1":[13000,50000]}, + {"name":"FP32Kernels/ReduceMean/NoKeepDims/Axis0","L1":[17000,50000]}, + {"name":"FP32Kernels/ReduceMean/NoKeepDims/Axis2","L1":[17000,50000]}, + {"name":"FP32Kernels/ReduceMean/NoKeepDims/ReduceMean_Add","L1":[8000]}, + + {"name":"FP32Kernels/SkipConnection/ReshapeWithSkipConnection","L1":[2600]}, + {"name":"FP32Kernels/Transpose","L1":[2000]}, + + {"name":"IntKernels/Activations/Hardswish","L1":[750]}, + {"name":"IntKernels/Activations/Softmax/Regular","L1":[1600,1000,600]}, + + {"name":"IntKernels/Concat","L1":[64000,32000,16000]}, + {"name":"IntKernels/MatMul/regular","L1":[64000,32000,16000]}, + {"name":"IntKernels/RMSNorm","L1":[4096,2048,1024]}, + + {"name":"Others/RequantizedConv2D","L1":[8000,6000,5000]}, + {"name":"Others/RequantizedDWConv","L1":[5121]}, + {"name":"Others/RQHardswish","L1":[800]} ] num-cores: 8 double-buffer: true @@ -114,32 +163,40 @@ jobs: fail-fast: false matrix: test-data: - - name: "simpleRegression" - L1: [45000, 30000, 15000] - - name: "miniMobileNet" - L1: [60000, 12000, 6000, 3000] - - name: "miniMobileNetv2" - L1: [60000, 16000, 12000, 8000] - - name: "Attention" + - name: "IntKernels/Attention" L1: [60000, 10000, 5000] - - name: "microLlama/microLlama1" + + - name: "Models/CCT/FP32/CCT_1_16_16_8" + L1: [2000, 64000] + + - name: "Models/microLlama/microLlama1" L1: [60000, 10000, 5000] - - name: "microLlama/microLlama8" + - name: "Models/microLlama/microLlama8" L1: [60000, 10000, 5000] - - name: "microLlama/microLlama8_parallel" + - name: "Models/microLlama/microLlama8_parallel" L1: [60000, 10000, 5000] - - name: "MLPerf/KeywordSpotting" + + - name: "Models/miniMobileNet" + L1: [60000, 12000, 6000, 3000] + - name: "Models/miniMobileNetv2" + L1: [60000, 16000, 12000, 8000] + + - name: "Models/MLPerf/AnomalyDetection" L1: [64000] - - name: "MLPerf/ImageClassification" + - name: "Models/MLPerf/ImageClassification" L1: [64000] - - name: "MLPerf/AnomalyDetection" + - name: "Models/MLPerf/KeywordSpotting" L1: [64000] - - name: "CCT/CCT_1_16_16_8" - L1: [2000, 64000] - - name: "testTrainCCT/CCT1_Classifier_Training/CCT_1_16_16_8" - L1: [4000, 64000] - - name: "testFloatDemoTinyViT" + + - name: "Models/TinyViT/5M/Layers/FP32/ReduceMean" + L1: [200, 40000] + - name: "Models/TinyViT/Demo" L1: [4000] + + - name: "Others/SimpleRegression" + L1: [45000, 30000, 15000] + - name: "Others/TrainCCT/CCT1_Classifier_Training/CCT_1_16_16_8" + L1: [4000, 64000] num-cores: [8] uses: ./.github/workflows/_runner-siracusa-tiled.yml with: @@ -155,24 +212,31 @@ jobs: fail-fast: false matrix: test-data: - - name: "simpleRegression" - L1: [45000, 30000, 16000] # SCHEREMO note - - name: "miniMobileNet" - L1: [60000, 12000, 6000] # SCHEREMO note - - name: "miniMobileNetv2" - L1: [60000, 16000, 12000, 8000] - - name: "Attention" + - name: "IntKernels/Attention" L1: [60000, 10000, 5000, 2500] - - name: "Transformer" - L1: [60000, 30000, 15000] - - name: "microLlama/microLlama1" - L1: [60000, 10000, 5000] - - name: "CCT/CCT_2_32_32_128" + + - name: "Models/CCT/FP32/CCT_2_32_32_128" L1: [64000, 128000] - - name: "testTrainCCT/CCT1_Classifier_Training/CCT_1_16_16_128" - L1: [32000, 64000] - - name: "testFloatDemoTinyViT" + + - name: "Models/microLlama/microLlama1" + L1: [60000, 10000, 5000] + + - name: "Models/miniMobileNet" + L1: [60000, 12000, 6000] # SCHEREMO note + - name: "Models/miniMobileNetv2" + L1: [60000, 16000, 12000, 8000] + + - name: "Models/TinyViT/5M/Layers/FP32/ReduceMean" + L1: [200, 40000] + - name: "Models/TinyViT/Demo" L1: [4000] + + - name: "Others/SimpleRegression" + L1: [45000, 30000, 16000] # SCHEREMO note + - name: "Others/TrainCCT/CCT1_Classifier_Training/CCT_1_16_16_128" + L1: [32000, 64000] + - name: "Others/Transformer" + L1: [60000, 30000, 15000] num-cores: [8] default-memory-level: ["L3"] uses: ./.github/workflows/_runner-siracusa-tiled.yml @@ -184,35 +248,41 @@ jobs: L1: ${{ toJson(matrix.test-data.L1) }} default-memory-level: ${{ matrix.default-memory-level }} - # TEMPORARILY DISABLE L3 TRANSFER DUE TO DRIVER BUG CAUSING SPORADIC CRASH siracusa-models-tiled-doublebuffer-L3: needs: select-env strategy: fail-fast: false matrix: test-data: - - name: "simpleRegression" - L1: [60000, 45000, 30000] - - name: "miniMobileNet" - L1: [60000, 24000, 12000, 6000] - - name: "miniMobileNetv2" - L1: [60000, 32000, 24000, 16000] - - name: "Attention" + - name: "IntKernels/Attention" L1: [60000, 20000, 10000, 5000] - - name: "Transformer" - L1: [60000, 30000, 15000] - - name: "microLlama/microLlama1" + + - name: "Models/CCT/FP32/CCT_2_32_32_128" + L1: [64000, 128000] + + - name: "Models/microLlama/microLlama1" L1: [60000, 20000, 10000] - - name: "microLlama/microLlama8" + - name: "Models/microLlama/microLlama8" L1: [60000, 20000, 10000] - - name: "microLlama/microLlama8_parallel" + - name: "Models/microLlama/microLlama8_parallel" L1: [60000, 20000, 10000] - - name: "CCT/CCT_2_32_32_128" - L1: [64000, 128000] - - name: "testTrainCCT/CCT1_Classifier_Training/CCT_1_16_16_128" - L1: [8000, 64000] - - name: "testFloatDemoTinyViT" + + - name: "Models/miniMobileNet" + L1: [60000, 24000, 12000, 6000] + - name: "Models/miniMobileNetv2" + L1: [60000, 32000, 24000, 16000] + + - name: "Models/TinyViT/5M/Layers/FP32/ReduceMean" + L1: [200, 40000] + - name: "Models/TinyViT/Demo" L1: [4000] + + - name: "Others/SimpleRegression" + L1: [60000, 45000, 30000] + - name: "Others/TrainCCT/CCT1_Classifier_Training/CCT_1_16_16_128" + L1: [8000, 64000] + - name: "Others/Transformer" + L1: [60000, 30000, 15000] num-cores: [8] double-buffer: [true] default-memory-level: ["L3"] diff --git a/.github/workflows/ci-platform-siracusa.yml b/.github/workflows/ci-platform-siracusa.yml index f59f7fa884..162f0bc18b 100644 --- a/.github/workflows/ci-platform-siracusa.yml +++ b/.github/workflows/ci-platform-siracusa.yml @@ -36,47 +36,79 @@ jobs: runner: ${{ needs.select-env.outputs.runner }} docker-image: ${{ needs.select-env.outputs.image }} test-names: | - Adder - MultIO - test1DPad - test2DPad - testMatMul - testMatMulAdd - testRequantizedDWConv - test2DRequantizedConv - iSoftmax - testConcat - testRMSNorm - trueIntegerDivSandwich - Hardswish - RQHardswish - testBacktracking - testFloatAdder - testFloatGEMM - - testFloat2DConvolution - testFloat2DConvolutionBias - testFloat2DConvolutionZeroBias - - testFloat2DDWConvolution - testFloat2DDWConvolutionBias - testFloat2DDWConvolutionZeroBias - - testFloatLayerNorm - testFloatRelu - testFloatMaxPool - testFloatMatmul - testFloatSoftmax - testFloatTranspose - testFloatMul - Quant - Dequant - testFloatReduceSum - testFloatReshapeWithSkipConnection - testFloatSoftmaxGrad - testFloatSoftmaxCrossEntropy - testFloatSoftmaxCrossEntropyGrad - QuantizedLinear + FP32Kernels/Activations/ReLU + + FP32Kernels/Activations/Softmax/CrossEntropy + FP32Kernels/Activations/Softmax/CrossEntropyGrad + FP32Kernels/Activations/Softmax/Grad + FP32Kernels/Activations/Softmax/Regular + + FP32Kernels/Add/regular + + FP32Kernels/Conv2D/DWBias + FP32Kernels/Conv2D/DWNoBias + FP32Kernels/Conv2D/DWZeroValuedBias + FP32Kernels/Conv2D/RegularBias + FP32Kernels/Conv2D/RegularNoBias + FP32Kernels/Conv2D/RegularZeroValuedBias + + FP32Kernels/GEMM/regular + FP32Kernels/MatMul + FP32Kernels/MaxPool + FP32Kernels/Mul + FP32Kernels/Norm/LayerNorm + + FP32Kernels/ReduceMean/KeepDims/Add_ReduceMean + FP32Kernels/ReduceMean/KeepDims/Add_ReduceMean_Add + FP32Kernels/ReduceMean/KeepDims/AllAxes + FP32Kernels/ReduceMean/KeepDims/Axes1_2_3 + FP32Kernels/ReduceMean/KeepDims/Axes1_3 + FP32Kernels/ReduceMean/KeepDims/Axes2_1 + FP32Kernels/ReduceMean/KeepDims/Axis0 + FP32Kernels/ReduceMean/KeepDims/Axis2 + FP32Kernels/ReduceMean/KeepDims/ReduceMean_Add + + FP32Kernels/ReduceMean/NoKeepDims/Add_ReduceMean + FP32Kernels/ReduceMean/NoKeepDims/Add_ReduceMean_Add + FP32Kernels/ReduceMean/NoKeepDims/AllAxes + FP32Kernels/ReduceMean/NoKeepDims/Axes1_2_3 + FP32Kernels/ReduceMean/NoKeepDims/Axes1_3 + FP32Kernels/ReduceMean/NoKeepDims/Axes2_1 + FP32Kernels/ReduceMean/NoKeepDims/Axis0 + FP32Kernels/ReduceMean/NoKeepDims/Axis2 + FP32Kernels/ReduceMean/NoKeepDims/ReduceMean_Add + + FP32Kernels/ReduceSum + FP32Kernels/SkipConnection/ReshapeWithSkipConnection + + FP32Kernels/Transpose + + IntKernels/Activations/Hardswish + IntKernels/Activations/Softmax/Regular + + IntKernels/Add/MultIO + IntKernels/Add/Regular + + IntKernels/Concat + + IntKernels/MatMul/add + IntKernels/MatMul/regular + + IntKernels/Pad/1D + IntKernels/Pad/2D + + IntKernels/RMSNorm + + Models/TinyViT/5M/Layers/FP32/ReduceMean + + Others/Backtracking + Others/Dequant + Others/Quant + Others/QuantizedLinear + Others/RequantizedConv2D + Others/RequantizedDWConv + Others/RQHardswish + Others/trueIntegerDivSandwich num-cores: 8 siracusa-models: @@ -86,15 +118,20 @@ jobs: runner: ${{ needs.select-env.outputs.runner }} docker-image: ${{ needs.select-env.outputs.image }} test-names: | - simpleRegression - miniMobileNet - miniMobileNetv2 - Attention - MLPerf/KeywordSpotting - MLPerf/ImageClassification - MLPerf/AnomalyDetection - CCT/CCT_1_16_16_8 - CCT/CCT_2_32_32_128_Opset20 - testTrainCCT/CCT1_Classifier_Training/CCT_1_16_16_8 - testFloatDemoTinyViT + IntKernels/Attention + + Models/CCT/FP32/CCT_1_16_16_8 + Models/CCT/FP32/CCT_2_32_32_128_Opset20 + + Models/miniMobileNet + Models/miniMobileNetv2 + + Models/MLPerf/KeywordSpotting + Models/MLPerf/ImageClassification + Models/MLPerf/AnomalyDetection + + Models/TinyViT/Demo + + Others/SimpleRegression + Others/TrainCCT/CCT1_Classifier_Training/CCT_1_16_16_8 num-cores: 8 diff --git a/.github/workflows/ci-platform-snitch-tiled.yml b/.github/workflows/ci-platform-snitch-tiled.yml index 71d6a93009..0df2f5611a 100644 --- a/.github/workflows/ci-platform-snitch-tiled.yml +++ b/.github/workflows/ci-platform-snitch-tiled.yml @@ -37,14 +37,17 @@ jobs: docker-image: ${{ needs.select-env.outputs.image }} tests-config: | [ - {"name":"TestiNoNorm","L1":[5000,10000]}, - {"name":"TestAdderLarge","L1":[5000,10000]}, - {"name":"TestiSoftmaxLarge","L1":[5000,10000]}, - {"name":"testRQGEMM","L1":[2000,5000]}, - {"name":"testFloatSoftmax","L1":[2000,5000,10000]}, - {"name":"TestRQAdd","L1":[5000,10000]}, - {"name":"testFloatGEMM","L1":[2000,5000,10000]}, - {"name":"testFloatGEMMtransB","L1":[2000,5000,10000]} + {"name":"IntKernels/Add/Large","L1":[5000,10000]}, + {"name":"IntKernels/Activations/Softmax/Large","L1":[5000,10000]}, + + {"name":"FP32Kernels/Activations/Softmax/Regular","L1":[2000,5000,10000]}, + + {"name":"FP32Kernels/GEMM/regular","L1":[2000,5000,10000]}, + {"name":"FP32Kernels/GEMM/transB","L1":[2000,5000,10000]}, + + {"name":"Others/iNoNorm","L1":[5000,10000]}, + {"name":"Others/RQAdd","L1":[5000,10000]}, + {"name":"Others/RQGEMM","L1":[2000,5000]} ] simulators: | gvsoc diff --git a/.github/workflows/ci-platform-snitch.yml b/.github/workflows/ci-platform-snitch.yml index 3968ba3201..94a87baeae 100644 --- a/.github/workflows/ci-platform-snitch.yml +++ b/.github/workflows/ci-platform-snitch.yml @@ -36,16 +36,20 @@ jobs: runner: ${{ needs.select-env.outputs.runner }} docker-image: ${{ needs.select-env.outputs.image }} test-names: | - Adder - iSoftmax - TestiNoNorm - TestAdderLarge - TestiSoftmaxLarge - testMatMul - testRQGEMM - TestRQAdd - testRQGEMMTransB - testFloatSoftmax + FP32Kernels/Activations/Softmax/Regular + + IntKernels/Add/Large + IntKernels/Add/Regular + + IntKernels/Activations/Softmax/Large + IntKernels/Activations/Softmax/Regular + + IntKernels/MatMul/regular + + Others/iNoNorm + Others/RQGEMM + Others/RQAdd + Others/RQGEMMTransB num-cores: 9 simulators: | gvsoc diff --git a/.github/workflows/ci-platform-softhier.yml b/.github/workflows/ci-platform-softhier.yml index 959dca131b..0cc442d505 100644 --- a/.github/workflows/ci-platform-softhier.yml +++ b/.github/workflows/ci-platform-softhier.yml @@ -36,4 +36,4 @@ jobs: runner: ${{ needs.select-env.outputs.runner }} docker-image: ${{ needs.select-env.outputs.image }} test-names: | - Adder + IntKernels/Add/Regular diff --git a/.github/workflows/infra-generate-ccache.yml b/.github/workflows/infra-generate-ccache.yml index 721f09870b..5ad33ca542 100644 --- a/.github/workflows/infra-generate-ccache.yml +++ b/.github/workflows/infra-generate-ccache.yml @@ -34,15 +34,15 @@ jobs: cd DeeployTest mkdir -p /app/.ccache export CCACHE_DIR=/app/.ccache - python testRunner_generic.py -t ./Tests/Adder - python testRunner_mempool.py -t ./Tests/Adder - python testRunner_cortexm.py -t ./Tests/Adder - python testRunner_snitch.py -t ./Tests/Adder - python testRunner_tiled_snitch.py -t ./Tests/Adder - python testRunner_siracusa.py -t ./Tests/Adder - python testRunner_tiled_siracusa.py -t ./Tests/Adder - python testRunner_tiled_siracusa_w_neureka.py -t ./Tests/Adder - python testRunner_chimera.py -t ./Tests/Adder + python testRunner_generic.py -t ./Tests/IntKernels/Add/Regular + python testRunner_mempool.py -t ./Tests/IntKernels/Add/Regular + python testRunner_cortexm.py -t ./Tests/IntKernels/Add/Regular + python testRunner_snitch.py -t ./Tests/IntKernels/Add/Regular + python testRunner_tiled_snitch.py -t ./Tests/IntKernels/Add/Regular + python testRunner_siracusa.py -t ./Tests/IntKernels/Add/Regular + python testRunner_tiled_siracusa.py -t ./Tests/IntKernels/Add/Regular + python testRunner_tiled_siracusa_w_neureka.py -t ./Tests/IntKernels/Add/Regular + python testRunner_chimera.py -t ./Tests/IntKernels/Add/Regular - name: Clean and Upload CCache uses: actions/cache@v4 with: diff --git a/.gitignore b/.gitignore index e0e99b33ba..dc93328e4a 100644 --- a/.gitignore +++ b/.gitignore @@ -2,36 +2,41 @@ # # SPDX-License-Identifier: Apache-2.0 +# Editor and OS files *~ -__pycache__ -build -dist -**/*.egg* +*# *.vscode .DS_Store -*.html -!docs/_templates/* -*.csv + +# Python +__pycache__ +.venv/* +.mypy_cache .ipynb_checkpoints/ -*# -install/ +**/*.egg* *.pkl *.data -*# + +# Build artifacts +build +dist +install/ +compile_commands.json toolchain/**/*/ + +# Node package.json package-lock.json -.mypy_cache node_modules -.venv/* - -compile_commands.json - +# Documentation docs/_autosummary docs/_build +*.html +!docs/_templates/* +*.csv - +# DeeployTest DeeployTest/TestFiles/ DeeployTest/Tests/**/*.txt DeeployTest/**/BUILD/* diff --git a/.vscode/launch.json b/.vscode/launch.json index f889ab29d1..554ca8cd91 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -67,7 +67,7 @@ }, "labelTransform": { "text": "${fileDirname}", - "find": ".*[\\/]", + "find" : "${workspaceFolder}/DeeployTest/Tests/", "replace": "" }, "valueTransform": { diff --git a/CHANGELOG.md b/CHANGELOG.md index 821dbaec51..4e33026a58 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ This file contains the changelog for the Deeploy project. The changelog is divid ## Unreleased (Planned Release Target: v0.2.1) ### List of Pull Requests +- FP32 ReduceMean operator improvement [#137](https://github.com/pulp-platform/Deeploy/pull/137) - Support for RMSNorm (Pow and Sqrt operators) [#136](https://github.com/pulp-platform/Deeploy/pull/136) - Demo TinyViT compatibility with tiled Siracusa [#124](https://github.com/pulp-platform/Deeploy/pull/124) - TinyViT on non-tiled Siracusa [#117](https://github.com/pulp-platform/Deeploy/pull/117) @@ -27,6 +28,10 @@ This file contains the changelog for the Deeploy project. The changelog is divid - Fix bias hoisting in generic GEMM with no bias [#126](https://github.com/pulp-platform/Deeploy/pull/126) ### Added +- Support for unknown number of data dimensions in the tiler +- Parallelization support for the FP32 ReduceMean operator on PULPOpen +- Extensive testing for the ReduceMean operator +- Pass to remove ReduceMean operators that don't change data content, but only its shape - Support for RMSNorm operation via operator decomposition. - Added `Pow` (Power) and `Sqrt` (Square Root) operation support (Parsers, Layers, Bindings, Templates, and FP32 Kernels) for the Generic platform. - Support for input tiling for PULP FP regular and DW conv 2D. @@ -78,6 +83,8 @@ This file contains the changelog for the Deeploy project. The changelog is divid - annotateNCores method to PULPDeployer that adds an `n_cores` key to all PULPClusterEngine templates' operatorRepresentations ### Changed +- Structure of Tests subdir for improved ordering +- Structure of .gitignore file for improved ordering - Decreased L1 maximal memory limit for CI pipeline tests where compatible thanks to the implementation of Conv2D input tiling support. - Reduced size of reshape & skip connection test, for non-tiled Siracusa memory compatibility. - Replaced platform-specific tags (`*-amd64`, `*-arm64`) with direct digest references in `Noelware/docker-manifest-action`. @@ -118,6 +125,7 @@ This file contains the changelog for the Deeploy project. The changelog is divid - changed `_mapNode` to `_selectEngine` which reduces the responsibility of that function to, as the name states, just engine selection ### Fixed +- Fixed ReduceMean parallelization and tiling issues described in Issue [#134](https://github.com/pulp-platform/Deeploy/issues/134). - Fixed PULP FP32 regular and DW Conv2D, and MatMul tile constraints. - Fixed type casting for tiling code generation. - Fixed bug in buffer name identification in code generation for tests with L3 default memory level. diff --git a/Deeploy/CommonExtensions/CodeTransformationPasses/Closure.py b/Deeploy/CommonExtensions/CodeTransformationPasses/Closure.py index 41073ad646..70a91fd0ce 100644 --- a/Deeploy/CommonExtensions/CodeTransformationPasses/Closure.py +++ b/Deeploy/CommonExtensions/CodeTransformationPasses/Closure.py @@ -20,7 +20,10 @@ _closureTemplate = NodeTemplate(""" static void ${closureName}(void* ${closureName}_args){ // CLOSURE ARG CAST +% if len(closureStructArgs.value) > 0: ${closureStructArgs.typeName}* args = (${closureStructArgs.typeName}*) ${closureStructArgName}; +% endif + % for argName, argType in closureStructArgs.value.items(): ${argType.typeName} ${argName} = args->${argName}; % endfor diff --git a/Deeploy/CommonExtensions/CodeTransformationPasses/IntrospectiveCodeTransformation.py b/Deeploy/CommonExtensions/CodeTransformationPasses/IntrospectiveCodeTransformation.py index 99b8eb132b..7e682b2644 100644 --- a/Deeploy/CommonExtensions/CodeTransformationPasses/IntrospectiveCodeTransformation.py +++ b/Deeploy/CommonExtensions/CodeTransformationPasses/IntrospectiveCodeTransformation.py @@ -142,6 +142,27 @@ def _extractDynamicExpressions(self, operatorRepresentation[expr] for expr in makoExpressions if expr in operatorRepresentation ] + # Add in mako expressions that are accessed through pageargs + # Required for unknown number of data dimensions + for expr in makoExpressions: + if expr.startswith("pageargs["): + # Extract key inside pageargs[] + key = expr[len("pageargs["):-1] + assert key.startswith("'") or key.startswith( + "\""), f"pageargs key must begin with a string literal, got: {key}" + + # Extract initial string literal (between first 2 " or ' characters) + quoteChar = key[0] + endIdx = key.find(quoteChar, 1) + key = key[1:endIdx] + + assert endIdx != -1, f"pageargs key missing closing quote: {expr}" + + # Search for all expressions that begin with the given key + for exprKey in operatorRepresentation.keys(): + if exprKey.startswith(key): + representedExpressions.append(operatorRepresentation[exprKey]) + # Filter buffers from expressions references = [expr for expr in representedExpressions if ctxt.is_buffer(expr)] diff --git a/Deeploy/CommonExtensions/OptimizationPasses/TopologyOptimizationPasses/LoweringOptimizationPasses.py b/Deeploy/CommonExtensions/OptimizationPasses/TopologyOptimizationPasses/LoweringOptimizationPasses.py index a87a641d30..aba6740d49 100644 --- a/Deeploy/CommonExtensions/OptimizationPasses/TopologyOptimizationPasses/LoweringOptimizationPasses.py +++ b/Deeploy/CommonExtensions/OptimizationPasses/TopologyOptimizationPasses/LoweringOptimizationPasses.py @@ -521,3 +521,32 @@ def __init__(self): graph = _singleNodePattern("Conv") name = "_REMOVE_EMPTY_CONV_BIAS_PASS" super().__init__(graph, _remove_empty_conv_bias_fun, name) + + +def _remove_only_singleton_reduce_mean(graph: gs.Graph, match: Match, name: str): + node = next(iter((match.nodes_map.values()))) + + # Keep node if only one in the graph + if len(graph.nodes) == 1: + return graph + + # Delete node if only reduction over singleton dimensions + if 'axis' in node.attrs: + axis = node.attrs['axis'] + else: + axis = node.inputs[1].values + + # Check if shape information is available + if node.inputs[0].shape is not None and all(node.inputs[0].shape[ax] == 1 for ax in axis): + graph.deleteNode(node) + + return graph + + +@contextagnostic +class RemoveOnlySingletonReduceMeanPass(ReplaceSequentialPatternPass): + + def __init__(self): + graph = _singleNodePattern("ReduceMean") + name = "_REMOVE_ONLY_SINGLETON_REDUCE_MEAN_PASS" + super().__init__(graph, _remove_only_singleton_reduce_mean, name) diff --git a/Deeploy/Targets/Generic/Bindings.py b/Deeploy/Targets/Generic/Bindings.py index 1807864dfc..ec2ed6270f 100644 --- a/Deeploy/Targets/Generic/Bindings.py +++ b/Deeploy/Targets/Generic/Bindings.py @@ -208,10 +208,15 @@ NodeBinding(ReduceMeanChecker([PointerClass(type)], [PointerClass(type)]), ReduceMeanTemplate.referenceTemplate, BasicTransformer) for type in SignedIntegerDataTypes ] + [ + # ONNX OPSET < 18 NodeBinding(ReduceMeanChecker([PointerClass(float_type), PointerClass(integer_type)], [PointerClass(float_type)]), FloatReduceMeanTemplate.referenceTemplate, BasicTransformer) for integer_type in SignedIntegerDataTypes for float_type in FloatDataTypes +] + [ + # ONNX OPSET >= 18 + NodeBinding(ReduceMeanChecker([PointerClass(float_type)], [PointerClass(float_type)]), + FloatReduceMeanTemplate.referenceTemplate, BasicTransformer) for float_type in FloatDataTypes ] BasicReduceSumBindings = [ diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index bc69e64dae..a303e6d14b 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -141,9 +141,12 @@ def parseNodeCtxt(self, ctxt.hoistConstant(axesTensor) node.inputs.append(axesTensor) if len(node.inputs) <= 4: - values = np.ones((self.operatorRepresentation['dims'])) + values = np.ones((self.operatorRepresentation['dims']), dtype = np.int64) stepsTensor = gs.Constant(f'{node.name}_Steps_Tensor', values = values) + ctxt.hoistConstant(stepsTensor) + ctxt.addUser(stepsTensor.name, node) + node.inputs.append(stepsTensor) self.operatorRepresentation['starts'] = node.inputs[1].name @@ -529,9 +532,14 @@ def __init__(self): super().__init__() def parseNode(self, node: gs.Node) -> bool: - if len(node.inputs) == 2: - # Float node, requiring 2 inputs (ONNX opset version >= 18) - wellFormed = all(['keepdims' in node.attrs, len(node.inputs) == 2, len(node.outputs) == 1]) + if 1 <= len(node.inputs) and ("axes" not in node.attrs): + # Float node, requiring 1 or 2 inputs (ONNX opset version >= 18). + # "axes" input is optional. + # If axes is not provided, then reduction will happen over all dimensions. + # + # WARNING: noop_with_empty_axes attribute not handled + + wellFormed = all(['keepdims' in node.attrs, 1 <= len(node.inputs) <= 2, len(node.outputs) == 1]) if wellFormed: self.operatorRepresentation['keepdims'] = int(node.attrs['keepdims']) @@ -546,23 +554,47 @@ def parseNodeCtxt(self, node: gs.Node, channels_first: bool = True) -> Tuple[NetworkContext, bool]: - if len(node.inputs) == 2: + if 1 <= len(node.inputs) and ("axes" not in node.attrs): + # Extract context information for Float ReduceMean node (ONNX opset version >= 18) data_in = ctxt.lookup(node.inputs[0].name) data_out = ctxt.lookup(node.outputs[0].name) - axes = ctxt.lookup(node.inputs[1].name) + # Extract axes as numpy sorted array + # If not provided, according to ONNX specification, reduction will happen over all dimensions + if len(node.inputs) == 2: + axes = ctxt.lookup(node.inputs[1].name) + + # Mark the axes variable to be excluded from the context, since only used in the template, as part of the operator representation + axes._live = False + axes._deploy = False + # Sort axes + axes = axes.values + axes.sort() + else: + axes = np.array(list(range(len(data_in.shape)))) + + # Remove axes reduced over singleton dimensions + # Keep first axis if only singleton dimensions are reduced + nonSingletonAxes = [] + for axis in axes: + if data_in.shape[axis] != 1: + nonSingletonAxes.append(axis) + if len(nonSingletonAxes) == 0: + nonSingletonAxes.append(axes[0]) + axes = np.array(nonSingletonAxes) + + # Update operator representation self.operatorRepresentation['data_in'] = data_in.name self.operatorRepresentation['data_out'] = data_out.name + self.operatorRepresentation['data_in_shape'] = data_in.shape self.operatorRepresentation['data_out_shape'] = data_out.shape + self.operatorRepresentation['size'] = np.prod(data_in.shape) - self.operatorRepresentation['axisLength'] = data_in.shape[axes.values[0]] - self.operatorRepresentation['axes'] = axes.values - # Mark the axes variable to be excluded from the context, since only used in the template, as part of the operator representation - axes._live = False - axes._deploy = False + self.operatorRepresentation['axes'] = axes + self.operatorRepresentation['axisLength'] = data_in.shape[axes[0]] return ctxt, True else: diff --git a/Deeploy/Targets/Generic/Platform.py b/Deeploy/Targets/Generic/Platform.py index 785d932776..7a842baba3 100644 --- a/Deeploy/Targets/Generic/Platform.py +++ b/Deeploy/Targets/Generic/Platform.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 from Deeploy.CommonExtensions.OptimizationPasses.TopologyOptimizationPasses.LoweringOptimizationPasses import \ - RemoveEmptyConvBiasPass + RemoveEmptyConvBiasPass, RemoveOnlySingletonReduceMeanPass from Deeploy.DeeployTypes import ConstantBuffer, DeploymentEngine, DeploymentPlatform, NodeMapper, NodeTemplate, \ StructBuffer, TopologyOptimizer, TransientBuffer, VariableBuffer from Deeploy.Targets.Generic.Bindings import BasicAddBindings, BasicBatchNormBindings, BasicConcatBindings, \ @@ -164,6 +164,7 @@ class GenericStructBuffer(StructBuffer): ExtractPaddingFromConvPass(), ExtractPaddingFromPoolPass(), RemoveEmptyConvBiasPass(), + RemoveOnlySingletonReduceMeanPass(), # DebugPrintPass(r'.*[Mm]at[Mm]ul.*', position = 'after'), ], name = "GenericOptimizer") diff --git a/Deeploy/Targets/Generic/Templates/FloatReduceMeanTemplate.py b/Deeploy/Targets/Generic/Templates/FloatReduceMeanTemplate.py index 005b0b8893..88ffc32de8 100644 --- a/Deeploy/Targets/Generic/Templates/FloatReduceMeanTemplate.py +++ b/Deeploy/Targets/Generic/Templates/FloatReduceMeanTemplate.py @@ -17,66 +17,78 @@ def alignToContext(self, ctxt: NetworkContext, data_in = ctxt.lookup(operatorRepresentation['data_in']) data_out = ctxt.lookup(operatorRepresentation['data_out']) + operatorRepresentation['input_offset'] = 0 if hasattr(data_in, "_signed") and hasattr(data_in, "nLevels"): operatorRepresentation['input_offset'] = (data_in._signed == 0) * int(data_in.nLevels / 2) + operatorRepresentation['output_offset'] = 0 if hasattr(data_out, "_signed") and hasattr(data_out, "nLevels"): - operatorRepresentation['output_offset'] = -(data_out._signed == 0) * int(data_in.nLevels / 2) + operatorRepresentation['output_offset'] = -(data_out._signed == 0) * int(data_out.nLevels / 2) return ctxt, operatorRepresentation, [] referenceTemplate = _FloatReduceMeanTemplate(""" -// FloatReduceMean (Name: ${nodeName}, Op: ${nodeOp}) -BEGIN_SINGLE_CORE -${data_out_type.referencedType.typeName} ${data_out}_accumulator = 0; +## =============== Compute required variables =============== <% - +## Compute the total number of elements being reduced in one axis reduceLength = 1 + for i, axis in enumerate(axes): if axis < 0: axes[i] += len(data_in_shape) reduceLength = reduceLength * data_in_shape[axis] -%> -<% - shapeStr = '' - accessStr = '' -%> -% for idx, i in enumerate(data_in_shape[1:]): -<% + +## Compute the remaining dimensions after reduction +restDims = set(list(range(len(data_in_shape)))).difference(set(axes)) + +## =============== Prepare shape and access strings =============== +## shapeStr is going to have the [d1][d2]... format +## accessStr is going to have the [i_0][i_1]... format +shapeStr = '' +accessStr = '' + +for idx, i in enumerate(data_in_shape[1:]): shapeStr += '['+str(i)+']' -%> -% endfor -% for j in range(len(data_in_shape)): -<% + +for j in range(len(data_in_shape)): accessStr += '[i_'+str(j)+']' %> -% endfor + +## =============== Start of the actual template =============== +## Prepare variables +// ReduceMean (Name: ${nodeName}, Op: ${nodeOp}) +BEGIN_SINGLE_CORE + +${data_out_type.referencedType.typeName} ${data_out}_accumulator = 0; ${data_out_type.typeName} dummy_${data_out} = ${data_out}; -<% -restDims = set(list(range(len(data_in_shape)))).difference(set(axes)) -%> +## Iterate through non-reduced dimensions % for i in list(restDims): -for(uint32_t i_${i} = 0; i_${i}<${data_in_shape[i]}; i_${i}++){ +for(uint32_t i_${i} = 0; i_${i}<${data_in_shape[i]}; i_${i}++) { % endfor +## Initialize accumulator ${data_out}_accumulator = ${input_offset}*${reduceLength}; + +## Iterate through reduced dimensions and accumulate % for i in list(axes): -for(uint32_t i_${i} = 0; i_${i}<${data_in_shape[i]}; i_${i}++){ +for(uint32_t i_${i} = 0; i_${i}<${data_in_shape[i]}; i_${i}++) { % endfor ${data_out}_accumulator += ((${data_in_type.referencedType.typeName} (*)${shapeStr})${data_in})${accessStr}; - % for i in range(len(axes)): } % endfor + +## Write back the mean value % if keepdims: -*dummy_${data_out}++ = (${data_out_type.referencedType.typeName}) ((${data_out}_accumulator / ${reduceLength} + ${output_offset}); +*dummy_${data_out}++ = (${data_out_type.referencedType.typeName}) (${data_out}_accumulator / ${reduceLength} + ${output_offset}); % else: *dummy_${data_out}++ = (${data_out_type.referencedType.typeName}) (${data_out}_accumulator / ${reduceLength}); % endif % for i in range(len(restDims)): } % endfor + END_SINGLE_CORE """) diff --git a/Deeploy/Targets/PULPOpen/Bindings.py b/Deeploy/Targets/PULPOpen/Bindings.py index 35e7230fb8..b24d9706af 100644 --- a/Deeploy/Targets/PULPOpen/Bindings.py +++ b/Deeploy/Targets/PULPOpen/Bindings.py @@ -14,8 +14,8 @@ from Deeploy.DeeployTypes import CodeTransformation, NodeBinding, NodeTemplate from Deeploy.FutureExtension.Bindings.AutoFutureBinding import AutoFutureBinding from Deeploy.FutureExtension.CodeTransformationPasses.FutureCodeTransformation import FutureGeneration -from Deeploy.Targets.Generic.Templates import AddTemplate, ConcatTemplate, DequantTemplate, FloatReduceMeanTemplate, \ - FloatReduceSumTemplate, GatherTemplate, QuantTemplate, RQSiGELUTemplate, SliceTemplate, iHardswishTemplate +from Deeploy.Targets.Generic.Templates import AddTemplate, ConcatTemplate, DequantTemplate, FloatReduceSumTemplate, \ + GatherTemplate, QuantTemplate, RQSiGELUTemplate, SliceTemplate, iHardswishTemplate from Deeploy.Targets.Generic.TypeCheckers import AddChecker, ConcatChecker, ConvChecker, DequantChecker, \ GatherChecker, GELUChecker, GEMMChecker, HardswishChecker, LayerNormChecker, MatMulChecker, MulChecker, \ QuantChecker, ReduceMeanChecker, ReluChecker, ReshapeChecker, RQAddChecker, RQHardswishChecker, SGDChecker, \ @@ -29,10 +29,10 @@ from Deeploy.Targets.PULPOpen.DMA.MchanDma import MchanDma from Deeploy.Targets.PULPOpen.Templates import ConvTemplate, DMASliceTemplate, FloatAddTemplate, FloatConvTemplate, \ FloatGELUTemplate, FloatGemmTemplate, FloatLayernormTemplate, FloatMatMulTemplate, FloatMaxPoolTemplate, \ - FloatMulTemplate, FloatReluTemplate, FloatSoftmaxTemplate, GEMMTemplate, MatrixVectorTemplate, MaxPool2DTemplate, \ - MulTemplate, ReduceMeanTemplate, RequantShiftTemplate, ReshapeTemplate, RQAddTemplate, RQSiHardswishTemplate, \ - SGDTemplate, SoftmaxCrossEntropyLossTemplate, TallGEMMTemplate, TransposeTemplate, UniformRequantShiftTemplate, \ - iRMSNormTemplate, iSoftmaxTemplate + FloatMulTemplate, FloatReduceMeanTemplate, FloatReluTemplate, FloatSoftmaxTemplate, GEMMTemplate, \ + MatrixVectorTemplate, MaxPool2DTemplate, MulTemplate, ReduceMeanTemplate, RequantShiftTemplate, ReshapeTemplate, \ + RQAddTemplate, RQSiHardswishTemplate, SGDTemplate, SoftmaxCrossEntropyLossTemplate, TallGEMMTemplate, \ + TransposeTemplate, UniformRequantShiftTemplate, iRMSNormTemplate, iSoftmaxTemplate from Deeploy.Targets.PULPOpen.TypeCheckers import PULPConvChecker, PULPLinearChecker, PULPMaxPoolChecker, \ PULPRequantShiftChecker from Deeploy.TilingExtension.CodeTransformationPasses.TilingVariableReplacement import TilingVariableReplacement, \ @@ -298,7 +298,7 @@ ClusterTransformer) for type in IntegerDataTypes ] + [ NodeBinding(ReduceMeanChecker([PointerClass(float_type), PointerClass(integer_type)], [PointerClass(float_type)]), - FloatReduceMeanTemplate.referenceTemplate, ClusterTransformer) + FloatReduceMeanTemplate.referenceTemplate, ForkTransformer) for integer_type in SignedIntegerDataTypes for float_type in FloatDataTypes ] diff --git a/Deeploy/Targets/PULPOpen/Parsers.py b/Deeploy/Targets/PULPOpen/Parsers.py index ab99fcabc6..5c5951eaba 100644 --- a/Deeploy/Targets/PULPOpen/Parsers.py +++ b/Deeploy/Targets/PULPOpen/Parsers.py @@ -8,8 +8,8 @@ import onnx_graphsurgeon as gs from Deeploy.DeeployTypes import NetworkContext -from Deeploy.Targets.Generic.Parsers import Conv2DParser, GEMMParser, RQSConv1DParser, RQSConv2DParser, \ - RQSParserInterface +from Deeploy.Targets.Generic.Parsers import Conv2DParser, GEMMParser, ReduceMeanParser, RQSConv1DParser, \ + RQSConv2DParser, RQSParserInterface class PULPConv2DParser(RQSConv2DParser): @@ -462,3 +462,26 @@ def parseNodeCtxt(self, return ctxt, False return newCtxt, True + + +class PULPReduceMeanParser(ReduceMeanParser): + + def parseNodeCtxt(self, + ctxt: NetworkContext, + node: gs.Node, + channels_first: bool = True) -> Tuple[NetworkContext, bool]: + # Inherit the generic ReduceMean parsing + newCtxt, ret = super().parseNodeCtxt(ctxt, node, channels_first) + + if ret: + # Add to operator representation the non-reduced dimensions for tiling purposes + originalInputShape = newCtxt.lookup(self.operatorRepresentation['data_in']).shape + reducedAxes = self.operatorRepresentation['axes'] + + for ax in range(len(originalInputShape)): + if ax not in reducedAxes: + self.operatorRepresentation['dim_in_' + str(ax)] = originalInputShape[ax] + + return newCtxt, True + else: + return ctxt, False diff --git a/Deeploy/Targets/PULPOpen/Platform.py b/Deeploy/Targets/PULPOpen/Platform.py index 133670da02..2939742cea 100644 --- a/Deeploy/Targets/PULPOpen/Platform.py +++ b/Deeploy/Targets/PULPOpen/Platform.py @@ -6,7 +6,7 @@ import onnx_graphsurgeon as gs from Deeploy.CommonExtensions.OptimizationPasses.TopologyOptimizationPasses.LoweringOptimizationPasses import \ - RemoveEmptyConvBiasPass + RemoveEmptyConvBiasPass, RemoveOnlySingletonReduceMeanPass from Deeploy.DeeployTypes import ConstantBuffer, DeploymentEngine, DeploymentPlatform, NetworkContext, NodeMapper, \ NodeTemplate, StructBuffer, TopologyOptimizer, TransientBuffer, VariableBuffer from Deeploy.MemoryLevelExtension.MemoryLevels import MemoryHierarchy, MemoryLevel @@ -20,10 +20,10 @@ TransposeLayer, iHardswishLayer, iRMSNormLayer from Deeploy.Targets.Generic.Parsers import AddParser, ConcatParser, DequantParser, FlattenParser, GatherParser, \ GELUParser, GEMMParser, LayerNormParser, MatMulParser, MaxPool2DParser, MulParser, Pad1DParser, Pad2DParser, \ - QuantParser, ReduceMeanParser, ReduceSumParser, ReluParser, RequantShiftParser, ReshapeParser, RQAddParser, \ - RQIntegerDivParser, RQSiGELUParser, RQSiHardswishParser, SGDParser, SliceParser, \ - SoftmaxCrossEntropyLossGradParser, SoftmaxCrossEntropyLossParser, SoftmaxGradParser, SoftmaxParser, \ - TransposeParser, UniformRequantShiftParser, UnsqueezeParser, iHardswishParser, iRMSNormParser, iSoftmaxParser + QuantParser, ReduceSumParser, ReluParser, RequantShiftParser, ReshapeParser, RQAddParser, RQIntegerDivParser, \ + RQSiGELUParser, RQSiHardswishParser, SGDParser, SliceParser, SoftmaxCrossEntropyLossGradParser, \ + SoftmaxCrossEntropyLossParser, SoftmaxGradParser, SoftmaxParser, TransposeParser, UniformRequantShiftParser, \ + UnsqueezeParser, iHardswishParser, iRMSNormParser, iSoftmaxParser from Deeploy.Targets.Generic.Templates import AllocateTemplate as BasicAllocateTemplate from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import DequantPatternPass, IntegerDivRequantMergePass, \ MergeConstAddAndRequantPass, MergeTrueIntegerDivRequantShiftPass, QuantPatternPass, RQSSplitPass, \ @@ -33,7 +33,7 @@ from Deeploy.Targets.PULPOpen.Layers import PULPRQSConvLayer, PULPRQSGEMMLayer from Deeploy.Targets.PULPOpen.Parsers import PULPConv1DParser, PULPConv2DParser, PULPDWConv1DParser, \ PULPDWConv2DParser, PULPFPConv2DParser, PULPFPDWConv2DParser, PULPGEMMParser, PULPMatrixVecParser, \ - PULPTallGEMMParser + PULPReduceMeanParser, PULPTallGEMMParser from Deeploy.Targets.PULPOpen.Templates import AllocateTemplate, FreeTemplate from Deeploy.Targets.PULPOpen.Tiler import PULPAddTilingReadyBindings, PULPConcatTilingReadyBindings, \ PULPConv2DTilingReadyBindings, PULPDWConv2DTilingReadyBindings, PULPFlattenTilingReadyBindings, \ @@ -65,7 +65,7 @@ RequantShiftMapper = NodeMapper(RequantShiftParser(), PULPRQSTilingReadyBindings) UniformRequantShiftMapper = NodeMapper(UniformRequantShiftParser(), PULPUniformRQSTilingReadyBindings) -ReduceMeanMapper = NodeMapper(ReduceMeanParser(), PULPReduceMeanTilingReadyBindings) +ReduceMeanMapper = NodeMapper(PULPReduceMeanParser(), PULPReduceMeanTilingReadyBindings) ReduceSumMapper = NodeMapper(ReduceSumParser(), PULPReduceSumTilingReadyBindings) MatMulMapper = NodeMapper(MatMulParser(), PULPMatMulTilingReadyBindings) RQIntegerDivMapper = NodeMapper(RQIntegerDivParser(), [BasicRQIntegerDivBinding]) @@ -234,6 +234,7 @@ class PULPStructBuffer(StructBuffer): PULPMatMulRequantMergePass(), PULPAddRequantMergePass(), RemoveEmptyConvBiasPass(), + RemoveOnlySingletonReduceMeanPass(), ], name = "PULPOptimizer") diff --git a/Deeploy/Targets/PULPOpen/Templates/FloatReduceMeanTemplate.py b/Deeploy/Targets/PULPOpen/Templates/FloatReduceMeanTemplate.py new file mode 100644 index 0000000000..62e1110f79 --- /dev/null +++ b/Deeploy/Targets/PULPOpen/Templates/FloatReduceMeanTemplate.py @@ -0,0 +1,142 @@ +# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 + +from typing import Dict, List, Tuple + +from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation + + +class _FloatReduceMeanTemplate(NodeTemplate): + ''' + WARNING: This version of parallelization is optimized for the TinyViT ReduceMean layers + (49 elements in the reduced axis). Greater sizes of the reduced axis may benefit + from different parallelization and tiling strategies. + ''' + + def __init__(self, templateStr): + super().__init__(templateStr) + + def alignToContext(self, ctxt: NetworkContext, + operatorRepresentation: OperatorRepresentation) -> Tuple[NetworkContext, Dict, List[str]]: + + data_in = ctxt.lookup(operatorRepresentation['data_in']) + data_out = ctxt.lookup(operatorRepresentation['data_out']) + + operatorRepresentation['input_offset'] = 0 + if hasattr(data_in, "_signed") and hasattr(data_in, "nLevels"): + operatorRepresentation['input_offset'] = (data_in._signed == 0) * int(data_in.nLevels / 2) + + operatorRepresentation['output_offset'] = 0 + if hasattr(data_out, "_signed") and hasattr(data_out, "nLevels"): + operatorRepresentation['output_offset'] = -(data_out._signed == 0) * int(data_out.nLevels / 2) + + for ax in range(len(operatorRepresentation['data_in_shape'])): + if ax not in operatorRepresentation['axes']: + _ = operatorRepresentation['dim_in_' + str(ax)] + + return ctxt, operatorRepresentation, [] + + +referenceTemplate = _FloatReduceMeanTemplate(""" +## =============== Perform necessary precomputations =============== +<% +# Update input shape based on tiling +new_data_in_shape = data_in_shape.copy() + +for i in range(len(new_data_in_shape)): + if i not in axes: + new_data_in_shape[i] = pageargs['dim_in_' + str(i)] + +# Compute the total number of elements being reduced in one axis +reduceLength = 1 +for i, axis in enumerate(axes): + if axis < 0: + axes[i] += len(data_in_shape) + reduceLength = reduceLength * data_in_shape[axis] + +# Compute the remaining dimensions after reduction +# Order them for more efficient parallelization +# (heuristically working on the largest non-tiled stride last, +# since it's impossible to get tiling information here) +restDims = list(set(list(range(len(data_in_shape)))).difference(set(axes))) +restDims = sorted(restDims, key=lambda x: data_in_shape[x]) + +dataSize = new_data_in_shape[restDims[-1]] + +# =============== Prepare shape and access strings =============== +# shapeStr is going to have the [d1][d2]... format +# accessStr is going to have the [i_0][i_1]... format +shapeStr = '' +accessStr = '' + +data_out_str = "0" +data_out_str_prod = "1" + +for idx, i in enumerate(new_data_in_shape[1:]): + if isinstance(i, str): + shapeStr += '[*' + i + ']' + else: + shapeStr += '[' + str(i) + ']' + +for j in range(len(data_in_shape)): + accessStr += '[i_' + str(j) + ']' + +for k in sorted(restDims, reverse=True): + data_out_str += ' + i_' + str(k) + '*' + str(data_out_str_prod) + if isinstance(new_data_in_shape[k], str): + data_out_str_prod += "* *(" + new_data_in_shape[k] + ")" + else: + data_out_str_prod += "* " + str(new_data_in_shape[k]) +%> + +## =============== Start of the actual template =============== +// ReduceMean (Name: ${nodeName}, Op: ${nodeOp}) +## Get core information +uint32_t core_id = pi_core_id(); +uint32_t log2Core = (uint32_t) LOG2(NUM_CORES); + +## Split into chunks for each core +% if isinstance(dataSize, str): +uint32_t chunk = (*(${dataSize}) >> log2Core) + ((*(${dataSize}) & (NUM_CORES - 1)) != 0); +uint32_t chunk_start = MIN(chunk * core_id, *(${dataSize})); +uint32_t chunk_stop = MIN(chunk_start + chunk, *(${dataSize})); +% else: +uint32_t chunk = (${dataSize}U >> log2Core) + ((${dataSize}U & (NUM_CORES - 1)) != 0); +uint32_t chunk_start = MIN(chunk * core_id, ${dataSize}U); +uint32_t chunk_stop = MIN(chunk_start + chunk, ${dataSize}U); +% endif + +## Iterate through non-reduced dimensions +## Keep the last dimension for parallelization +% for i in list(restDims[:-1]): +% if isinstance(pageargs['dim_in_' + str(i)], str): +for(uint32_t i_${i} = 0; i_${i} < *${pageargs['dim_in_' + str(i)]}; i_${i}++) { +% else: +for(uint32_t i_${i} = 0; i_${i} < ${pageargs['dim_in_' + str(i)]}; i_${i}++) { +% endif +% endfor +for(uint32_t i_${restDims[-1]} = chunk_start; i_${restDims[-1]} < chunk_stop; i_${restDims[-1]}++) { +## Initialize accumulator +uint32_t out_idx = ${data_out_str}; +${data_out}[out_idx] = ${input_offset}*${reduceLength}; + +## Iterate through reduced dimensions and accumulate +% for i in list(axes): +for(uint32_t i_${i} = 0; i_${i} < ${data_in_shape[i]}; i_${i}++) { +% endfor +${data_out}[out_idx] += ((${data_in_type.referencedType.typeName} (*)${shapeStr})${data_in})${accessStr}; +% for i in range(len(axes)): +} +% endfor + +## Write back the mean value +% if keepdims: +${data_out}[out_idx] = (${data_out_type.referencedType.typeName}) (${data_out}[out_idx] / ${reduceLength} + ${output_offset}); +% else: +${data_out}[out_idx] = (${data_out_type.referencedType.typeName}) (${data_out}[out_idx] / ${reduceLength}); +% endif +% for i in range(len(restDims)): +} +% endfor +""") diff --git a/Deeploy/Targets/PULPOpen/Templates/GEMMTemplate.py b/Deeploy/Targets/PULPOpen/Templates/GEMMTemplate.py index 1f7149e1e8..1cfdcb6185 100644 --- a/Deeploy/Targets/PULPOpen/Templates/GEMMTemplate.py +++ b/Deeploy/Targets/PULPOpen/Templates/GEMMTemplate.py @@ -50,7 +50,7 @@ def alignToContext(self, ctxt: NetworkContext, // LMACAN: In some edge cases sporadic errors happen if this loop is not added. // We believe this is due to missing bubbles in the pipeline that break operator forwarding. // Breaking test: -// `python testRunner_tiled_siracusa.py -t=Tests/Transformer --defaultMemLevel=L3 --doublebuffer --l1=30000` +// `python testRunner_tiled_siracusa.py -t=Tests/Others/Transformer --defaultMemLevel=L3 --doublebuffer --l1=30000` #pragma unroll 1 for(int k=0;k<3;k++){ asm volatile("nop" ::); diff --git a/Deeploy/Targets/PULPOpen/TileConstraints/DWConvTileConstraint.py b/Deeploy/Targets/PULPOpen/TileConstraints/DWConvTileConstraint.py index 6824d6f710..bb0e3ed6ee 100644 --- a/Deeploy/Targets/PULPOpen/TileConstraints/DWConvTileConstraint.py +++ b/Deeploy/Targets/PULPOpen/TileConstraints/DWConvTileConstraint.py @@ -239,6 +239,7 @@ class DWConv2DTileConstraint(Conv2DTileConstraint): @staticmethod def addPolicyConstraint(tilerModel: TilerModel, parseDict: Dict, ctxt: NetworkContext) -> TilerModel: + # Inherit from regular Conv2D policy constraints tilerModel = Conv2DTileConstraint.addPolicyConstraint(tilerModel, parseDict, ctxt) # Add constraint for relationship between in and out number of channels diff --git a/Deeploy/Targets/PULPOpen/TileConstraints/ReduceMeanConstraint.py b/Deeploy/Targets/PULPOpen/TileConstraints/ReduceMeanConstraint.py index 0c5de8397f..b991de80dd 100644 --- a/Deeploy/Targets/PULPOpen/TileConstraints/ReduceMeanConstraint.py +++ b/Deeploy/Targets/PULPOpen/TileConstraints/ReduceMeanConstraint.py @@ -4,11 +4,10 @@ from typing import Dict, List, Tuple, Union -import numpy as np from ortools.constraint_solver.pywrapcp import IntVar from Deeploy.AbstractDataTypes import PointerClass -from Deeploy.CommonExtensions.DataTypes import uint16_t +from Deeploy.CommonExtensions.DataTypes import uint32_t from Deeploy.DeeployTypes import NetworkContext, OperatorRepresentation from Deeploy.TilingExtension.MemoryConstraints import NodeMemoryConstraint from Deeploy.TilingExtension.TileConstraint import TileConstraint @@ -18,6 +17,11 @@ class ReduceMeanTileConstraint(TileConstraint): + ''' + WARNING: This version of tiling is optimized for the TinyViT ReduceMean layers + (49 elements in the reduced axis). Greater sizes of the reduced axis may benefit + from different parallelization and tiling strategies. + ''' @staticmethod def addGeometricalConstraint(tilerModel: TilerModel, parseDict: Dict, ctxt: NetworkContext) -> TilerModel: @@ -35,7 +39,7 @@ def addGeometricalConstraint(tilerModel: TilerModel, parseDict: Dict, ctxt: Netw tilerModel.addTensorDimToModel(ctxt, bufferName) # ===== ADD CONSTRAINTS ===== - # Add constraints for the I/O dimensions + # Add constraints for the relationship between the I/O dimensions # Iterate over input axes and maintain an output index pointer inputShape = parseDict['data_in_shape'] output_idx = 0 @@ -57,11 +61,44 @@ def addGeometricalConstraint(tilerModel: TilerModel, parseDict: Dict, ctxt: Netw return tilerModel + @staticmethod + def addPolicyConstraint(tilerModel: TilerModel, parseDict: Dict, ctxt: NetworkContext) -> TilerModel: + # ===== GET NECESSARY INFORMATION ===== + # Get I/O buffer names + inputBufferName = parseDict['data_in'] + + # Get other necessary information + inputShape = parseDict['data_in_shape'] + reduceAxes = parseDict['axes'] + nonReducedDims = [ax for ax in range(len(inputShape)) if ax not in reduceAxes] + + if len(nonReducedDims) > 0: + biggestNonReducedDim = max(nonReducedDims, key = lambda ax: inputShape[ax]) + else: + biggestNonReducedDim = -1 # No non-reduced dimensions + + # ===== ADD CONSTRAINTS ===== + # Kernel parallelized only on biggest non-reduced dimension, + # so tile only on that dimension + for ax in range(len(inputShape)): + dimVar = tilerModel.getTensorDimVar(tensorName = inputBufferName, dimIdx = ax) + if ax != biggestNonReducedDim: + # This is not the biggest non-reduced dimension, force no tiling + tilerModel.addConstraint(dimVar == inputShape[ax]) + + return tilerModel + @staticmethod def constructSymbolicNodeRep(tilerModel: TilerModel, parseDict: Dict, ctxt: NetworkContext) -> Dict[str, Union[int, IntVar]]: symbolicParseDict = parseDict.copy() + inputBuffer = ctxt.lookup(name = parseDict['data_in']) + for ax in range(len(parseDict['data_in_shape'])): + if ax not in parseDict['axes']: + dimVar = tilerModel.getTensorDimVar(tensorName = inputBuffer.name, dimIdx = ax) + symbolicParseDict['dim_in_' + str(ax)] = dimVar + return symbolicParseDict @staticmethod @@ -106,33 +143,14 @@ def serializeTilingSolution( inputBaseOffsets, outputBaseOffsets = cls.extractBaseAddr(tilingSolution, targetMemLevel, operatorRepresentation, addrNames) - # Prepare replacement lists for the elements inside the operator representation, - # for the cubes to be computed further down in this function - - # ~~~~~ SEE ISSUE #134: https://github.com/pulp-platform/Deeploy/issues/134 ~~~~~ - # Freeze tiling input and output tiling for now - replacements: Dict[str, List[int]] = { - # "data_in_shape": [], - # "data_out_shape": [], - "size": [], - } - - replacementTypes = { - # "data_in_shape": [ - # PointerClass(uint16_t), - # PointerClass(uint16_t), - # PointerClass(uint16_t), - # PointerClass(uint16_t) - # ], - # "data_out_shape": [ - # PointerClass(uint16_t), - # PointerClass(uint16_t), - # PointerClass(uint16_t), - # PointerClass(uint16_t) - # ], - "size": PointerClass(uint16_t), - } - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # Prepare replacements for non-reduced input sizes + replacements: Dict[str, List[int]] = dict() + replacementTypes = dict() + + for ax in range(len(operatorRepresentation['data_in_shape'])): + if ax not in operatorRepresentation['axes']: + replacements["dim_in_" + str(ax)] = [] + replacementTypes["dim_in_" + str(ax)] = PointerClass(uint32_t) # Prepare loading schedule lists inputLoadSchedule = [] @@ -144,13 +162,10 @@ def serializeTilingSolution( in_cube = ReduceMeanTileConstraint.computeInputCubeFromOutputCube(out_cube, parseDict = operatorRepresentation) - # Append replacement elements - # ~~~~~ SEE ISSUE #134: https://github.com/pulp-platform/Deeploy/issues/134 ~~~~~ - # Freeze tiling input and output tiling for now - # replacements["data_in_shape"].append(list(in_cube.dims).copy()) - # replacements["data_out_shape"].append(list(out_cube.dims).copy()) - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - replacements["size"].append(int(np.prod(out_cube.dims))) + # Add replacements for non-reduced input sizes + for ax in range(len(operatorRepresentation['data_in_shape'])): + if ax not in operatorRepresentation['axes']: + replacements["dim_in_" + str(ax)].append(in_cube.dims[ax]) # Append new cubes inputLoadSchedule.append({"data_in": in_cube}) diff --git a/Deeploy/Targets/PULPOpen/TileConstraints/SliceConstraint.py b/Deeploy/Targets/PULPOpen/TileConstraints/SliceConstraint.py index 5309300659..53c45df1c3 100644 --- a/Deeploy/Targets/PULPOpen/TileConstraints/SliceConstraint.py +++ b/Deeploy/Targets/PULPOpen/TileConstraints/SliceConstraint.py @@ -113,36 +113,25 @@ def serializeTilingSolution( # Prepare replacement lists for the elements inside the operator representation, # for the cubes to be computed further down in this function - # ~~~~~ SEE ISSUE #134: https://github.com/pulp-platform/Deeploy/issues/134 ~~~~~ - # Freeze tiling input and output tiling for now + # Build replacementTypes based on the actual number of dimensions + data_in_shape_type = [PointerClass(uint16_t) for _ in range(len(operatorRepresentation['data_in_shape']))] + data_out_shape_type = [PointerClass(uint16_t) for _ in range(len(operatorRepresentation['data_out_shape']))] + replacements = { - # "data_in_shape": [], - # "data_out_shape": [], - # "starts": [[ - # 0, - # ] * len(operatorRepresentation['axes'])] * len(outputCubes), - # "ends": [], + "data_in_shape": [], + "data_out_shape": [], + "starts": [], + "ends": [], "data_in_size": [], } replacementTypes = { - # "data_in_shape": [ - # PointerClass(uint16_t), - # PointerClass(uint16_t), - # PointerClass(uint16_t), - # PointerClass(uint16_t) - # ], - # "data_out_shape": [ - # PointerClass(uint16_t), - # PointerClass(uint16_t), - # PointerClass(uint16_t), - # PointerClass(uint16_t) - # ], - # "starts": PointerClass(uint16_t), - # "ends": PointerClass(uint16_t), + "data_in_shape": data_in_shape_type, + "data_out_shape": data_out_shape_type, + "starts": PointerClass(uint16_t), + "ends": PointerClass(uint16_t), "data_in_size": PointerClass(uint16_t), } - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Prepare loading schedule lists inputLoadSchedule = [] @@ -152,20 +141,18 @@ def serializeTilingSolution( # Compute input cube in_cube = SliceTileConstraint.computeInputCubeFromOutputCube(out_cube, parseDict = operatorRepresentation) - # Compute new ends for replacement + # Compute new starts and ends for replacement + new_starts = list() new_ends = list() for ax in operatorRepresentation['axes']: + new_starts.append(in_cube.offset[ax]) new_ends.append(in_cube.offset[ax] + in_cube.dims[ax]) - # Append replacement elements - - # ~~~~~ SEE ISSUE #134: https://github.com/pulp-platform/Deeploy/issues/134 ~~~~~ - # Freeze tiling input and output tiling for now - # replacements["data_in_shape"].append(list(in_cube.dims).copy()) - # replacements["data_out_shape"].append(list(out_cube.dims).copy()) - # replacements["ends"].append(new_ends) - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - + # Append replacement elements (using tuples so they can be hashed by minimizeVariableReplacement) + replacements["data_in_shape"].append(tuple(in_cube.dims)) + replacements["data_out_shape"].append(tuple(out_cube.dims)) + replacements["starts"].append(tuple(new_starts)) + replacements["ends"].append(tuple(new_ends)) replacements["data_in_size"].append(int(np.prod(in_cube.dims))) # Append new cubes diff --git a/DeeployTest/Tests/testFloatGelu/inputs.npz b/DeeployTest/Tests/FP32Kernels/Activations/GELU/inputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatGelu/inputs.npz rename to DeeployTest/Tests/FP32Kernels/Activations/GELU/inputs.npz diff --git a/DeeployTest/Tests/testFloatGelu/network.onnx b/DeeployTest/Tests/FP32Kernels/Activations/GELU/network.onnx similarity index 100% rename from DeeployTest/Tests/testFloatGelu/network.onnx rename to DeeployTest/Tests/FP32Kernels/Activations/GELU/network.onnx diff --git a/DeeployTest/Tests/testFloatGelu/outputs.npz b/DeeployTest/Tests/FP32Kernels/Activations/GELU/outputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatGelu/outputs.npz rename to DeeployTest/Tests/FP32Kernels/Activations/GELU/outputs.npz diff --git a/DeeployTest/Tests/testFloatRelu/inputs.npz b/DeeployTest/Tests/FP32Kernels/Activations/ReLU/inputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatRelu/inputs.npz rename to DeeployTest/Tests/FP32Kernels/Activations/ReLU/inputs.npz diff --git a/DeeployTest/Tests/testFloatRelu/network.onnx b/DeeployTest/Tests/FP32Kernels/Activations/ReLU/network.onnx similarity index 100% rename from DeeployTest/Tests/testFloatRelu/network.onnx rename to DeeployTest/Tests/FP32Kernels/Activations/ReLU/network.onnx diff --git a/DeeployTest/Tests/testFloatRelu/outputs.npz b/DeeployTest/Tests/FP32Kernels/Activations/ReLU/outputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatRelu/outputs.npz rename to DeeployTest/Tests/FP32Kernels/Activations/ReLU/outputs.npz diff --git a/DeeployTest/Tests/testFloatSoftmaxCrossEntropy/inputs.npz b/DeeployTest/Tests/FP32Kernels/Activations/Softmax/CrossEntropy/inputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatSoftmaxCrossEntropy/inputs.npz rename to DeeployTest/Tests/FP32Kernels/Activations/Softmax/CrossEntropy/inputs.npz diff --git a/DeeployTest/Tests/testFloatSoftmaxCrossEntropy/network.onnx b/DeeployTest/Tests/FP32Kernels/Activations/Softmax/CrossEntropy/network.onnx similarity index 100% rename from DeeployTest/Tests/testFloatSoftmaxCrossEntropy/network.onnx rename to DeeployTest/Tests/FP32Kernels/Activations/Softmax/CrossEntropy/network.onnx diff --git a/DeeployTest/Tests/testFloatSoftmaxCrossEntropy/outputs.npz b/DeeployTest/Tests/FP32Kernels/Activations/Softmax/CrossEntropy/outputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatSoftmaxCrossEntropy/outputs.npz rename to DeeployTest/Tests/FP32Kernels/Activations/Softmax/CrossEntropy/outputs.npz diff --git a/DeeployTest/Tests/testFloatSoftmaxCrossEntropyGrad/inputs.npz b/DeeployTest/Tests/FP32Kernels/Activations/Softmax/CrossEntropyGrad/inputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatSoftmaxCrossEntropyGrad/inputs.npz rename to DeeployTest/Tests/FP32Kernels/Activations/Softmax/CrossEntropyGrad/inputs.npz diff --git a/DeeployTest/Tests/testFloatSoftmaxCrossEntropyGrad/network.onnx b/DeeployTest/Tests/FP32Kernels/Activations/Softmax/CrossEntropyGrad/network.onnx similarity index 100% rename from DeeployTest/Tests/testFloatSoftmaxCrossEntropyGrad/network.onnx rename to DeeployTest/Tests/FP32Kernels/Activations/Softmax/CrossEntropyGrad/network.onnx diff --git a/DeeployTest/Tests/testFloatSoftmaxCrossEntropyGrad/outputs.npz b/DeeployTest/Tests/FP32Kernels/Activations/Softmax/CrossEntropyGrad/outputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatSoftmaxCrossEntropyGrad/outputs.npz rename to DeeployTest/Tests/FP32Kernels/Activations/Softmax/CrossEntropyGrad/outputs.npz diff --git a/DeeployTest/Tests/testFloatSoftmaxGrad/inputs.npz b/DeeployTest/Tests/FP32Kernels/Activations/Softmax/Grad/inputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatSoftmaxGrad/inputs.npz rename to DeeployTest/Tests/FP32Kernels/Activations/Softmax/Grad/inputs.npz diff --git a/DeeployTest/Tests/testFloatSoftmaxGrad/network.onnx b/DeeployTest/Tests/FP32Kernels/Activations/Softmax/Grad/network.onnx similarity index 100% rename from DeeployTest/Tests/testFloatSoftmaxGrad/network.onnx rename to DeeployTest/Tests/FP32Kernels/Activations/Softmax/Grad/network.onnx diff --git a/DeeployTest/Tests/testFloatSoftmaxGrad/outputs.npz b/DeeployTest/Tests/FP32Kernels/Activations/Softmax/Grad/outputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatSoftmaxGrad/outputs.npz rename to DeeployTest/Tests/FP32Kernels/Activations/Softmax/Grad/outputs.npz diff --git a/DeeployTest/Tests/testFloatSoftmax/inputs.npz b/DeeployTest/Tests/FP32Kernels/Activations/Softmax/Regular/inputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatSoftmax/inputs.npz rename to DeeployTest/Tests/FP32Kernels/Activations/Softmax/Regular/inputs.npz diff --git a/DeeployTest/Tests/testFloatSoftmax/network.onnx b/DeeployTest/Tests/FP32Kernels/Activations/Softmax/Regular/network.onnx similarity index 100% rename from DeeployTest/Tests/testFloatSoftmax/network.onnx rename to DeeployTest/Tests/FP32Kernels/Activations/Softmax/Regular/network.onnx diff --git a/DeeployTest/Tests/testFloatSoftmax/outputs.npz b/DeeployTest/Tests/FP32Kernels/Activations/Softmax/Regular/outputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatSoftmax/outputs.npz rename to DeeployTest/Tests/FP32Kernels/Activations/Softmax/Regular/outputs.npz diff --git a/DeeployTest/Tests/largeFloatAdd/inputs.npz b/DeeployTest/Tests/FP32Kernels/Add/large/inputs.npz similarity index 100% rename from DeeployTest/Tests/largeFloatAdd/inputs.npz rename to DeeployTest/Tests/FP32Kernels/Add/large/inputs.npz diff --git a/DeeployTest/Tests/largeFloatAdd/network.onnx b/DeeployTest/Tests/FP32Kernels/Add/large/network.onnx similarity index 100% rename from DeeployTest/Tests/largeFloatAdd/network.onnx rename to DeeployTest/Tests/FP32Kernels/Add/large/network.onnx diff --git a/DeeployTest/Tests/largeFloatAdd/outputs.npz b/DeeployTest/Tests/FP32Kernels/Add/large/outputs.npz similarity index 100% rename from DeeployTest/Tests/largeFloatAdd/outputs.npz rename to DeeployTest/Tests/FP32Kernels/Add/large/outputs.npz diff --git a/DeeployTest/Tests/TestiNoNorm/activations.npz b/DeeployTest/Tests/FP32Kernels/Add/regular/activations.npz similarity index 100% rename from DeeployTest/Tests/TestiNoNorm/activations.npz rename to DeeployTest/Tests/FP32Kernels/Add/regular/activations.npz diff --git a/DeeployTest/Tests/testFloatAdder/inputs.npz b/DeeployTest/Tests/FP32Kernels/Add/regular/inputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatAdder/inputs.npz rename to DeeployTest/Tests/FP32Kernels/Add/regular/inputs.npz diff --git a/DeeployTest/Tests/testFloatAdder/network.onnx b/DeeployTest/Tests/FP32Kernels/Add/regular/network.onnx similarity index 100% rename from DeeployTest/Tests/testFloatAdder/network.onnx rename to DeeployTest/Tests/FP32Kernels/Add/regular/network.onnx diff --git a/DeeployTest/Tests/testFloatAdder/outputs.npz b/DeeployTest/Tests/FP32Kernels/Add/regular/outputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatAdder/outputs.npz rename to DeeployTest/Tests/FP32Kernels/Add/regular/outputs.npz diff --git a/DeeployTest/Tests/testFloat2DDWConvolutionBias/inputs.npz b/DeeployTest/Tests/FP32Kernels/Conv2D/DWBias/inputs.npz similarity index 100% rename from DeeployTest/Tests/testFloat2DDWConvolutionBias/inputs.npz rename to DeeployTest/Tests/FP32Kernels/Conv2D/DWBias/inputs.npz diff --git a/DeeployTest/Tests/testFloat2DDWConvolutionBias/network.onnx b/DeeployTest/Tests/FP32Kernels/Conv2D/DWBias/network.onnx similarity index 100% rename from DeeployTest/Tests/testFloat2DDWConvolutionBias/network.onnx rename to DeeployTest/Tests/FP32Kernels/Conv2D/DWBias/network.onnx diff --git a/DeeployTest/Tests/testFloat2DDWConvolutionBias/outputs.npz b/DeeployTest/Tests/FP32Kernels/Conv2D/DWBias/outputs.npz similarity index 100% rename from DeeployTest/Tests/testFloat2DDWConvolutionBias/outputs.npz rename to DeeployTest/Tests/FP32Kernels/Conv2D/DWBias/outputs.npz diff --git a/DeeployTest/Tests/testFloat2DDWConvolution/inputs.npz b/DeeployTest/Tests/FP32Kernels/Conv2D/DWNoBias/inputs.npz similarity index 100% rename from DeeployTest/Tests/testFloat2DDWConvolution/inputs.npz rename to DeeployTest/Tests/FP32Kernels/Conv2D/DWNoBias/inputs.npz diff --git a/DeeployTest/Tests/testFloat2DDWConvolution/network.onnx b/DeeployTest/Tests/FP32Kernels/Conv2D/DWNoBias/network.onnx similarity index 100% rename from DeeployTest/Tests/testFloat2DDWConvolution/network.onnx rename to DeeployTest/Tests/FP32Kernels/Conv2D/DWNoBias/network.onnx diff --git a/DeeployTest/Tests/testFloat2DDWConvolution/outputs.npz b/DeeployTest/Tests/FP32Kernels/Conv2D/DWNoBias/outputs.npz similarity index 100% rename from DeeployTest/Tests/testFloat2DDWConvolution/outputs.npz rename to DeeployTest/Tests/FP32Kernels/Conv2D/DWNoBias/outputs.npz diff --git a/DeeployTest/Tests/testFloat2DDWConvolutionZeroBias/inputs.npz b/DeeployTest/Tests/FP32Kernels/Conv2D/DWZeroValuedBias/inputs.npz similarity index 100% rename from DeeployTest/Tests/testFloat2DDWConvolutionZeroBias/inputs.npz rename to DeeployTest/Tests/FP32Kernels/Conv2D/DWZeroValuedBias/inputs.npz diff --git a/DeeployTest/Tests/testFloat2DDWConvolutionZeroBias/network.onnx b/DeeployTest/Tests/FP32Kernels/Conv2D/DWZeroValuedBias/network.onnx similarity index 100% rename from DeeployTest/Tests/testFloat2DDWConvolutionZeroBias/network.onnx rename to DeeployTest/Tests/FP32Kernels/Conv2D/DWZeroValuedBias/network.onnx diff --git a/DeeployTest/Tests/testFloat2DDWConvolutionZeroBias/outputs.npz b/DeeployTest/Tests/FP32Kernels/Conv2D/DWZeroValuedBias/outputs.npz similarity index 100% rename from DeeployTest/Tests/testFloat2DDWConvolutionZeroBias/outputs.npz rename to DeeployTest/Tests/FP32Kernels/Conv2D/DWZeroValuedBias/outputs.npz diff --git a/DeeployTest/Tests/testFloat2DConvolutionBias/inputs.npz b/DeeployTest/Tests/FP32Kernels/Conv2D/RegularBias/inputs.npz similarity index 100% rename from DeeployTest/Tests/testFloat2DConvolutionBias/inputs.npz rename to DeeployTest/Tests/FP32Kernels/Conv2D/RegularBias/inputs.npz diff --git a/DeeployTest/Tests/testFloat2DConvolutionBias/network.onnx b/DeeployTest/Tests/FP32Kernels/Conv2D/RegularBias/network.onnx similarity index 100% rename from DeeployTest/Tests/testFloat2DConvolutionBias/network.onnx rename to DeeployTest/Tests/FP32Kernels/Conv2D/RegularBias/network.onnx diff --git a/DeeployTest/Tests/testFloat2DConvolutionBias/outputs.npz b/DeeployTest/Tests/FP32Kernels/Conv2D/RegularBias/outputs.npz similarity index 100% rename from DeeployTest/Tests/testFloat2DConvolutionBias/outputs.npz rename to DeeployTest/Tests/FP32Kernels/Conv2D/RegularBias/outputs.npz diff --git a/DeeployTest/Tests/testFloat2DConvolution/inputs.npz b/DeeployTest/Tests/FP32Kernels/Conv2D/RegularNoBias/inputs.npz similarity index 100% rename from DeeployTest/Tests/testFloat2DConvolution/inputs.npz rename to DeeployTest/Tests/FP32Kernels/Conv2D/RegularNoBias/inputs.npz diff --git a/DeeployTest/Tests/testFloat2DConvolution/network.onnx b/DeeployTest/Tests/FP32Kernels/Conv2D/RegularNoBias/network.onnx similarity index 100% rename from DeeployTest/Tests/testFloat2DConvolution/network.onnx rename to DeeployTest/Tests/FP32Kernels/Conv2D/RegularNoBias/network.onnx diff --git a/DeeployTest/Tests/testFloat2DConvolution/outputs.npz b/DeeployTest/Tests/FP32Kernels/Conv2D/RegularNoBias/outputs.npz similarity index 100% rename from DeeployTest/Tests/testFloat2DConvolution/outputs.npz rename to DeeployTest/Tests/FP32Kernels/Conv2D/RegularNoBias/outputs.npz diff --git a/DeeployTest/Tests/testFloat2DConvolutionZeroBias/inputs.npz b/DeeployTest/Tests/FP32Kernels/Conv2D/RegularZeroValuedBias/inputs.npz similarity index 100% rename from DeeployTest/Tests/testFloat2DConvolutionZeroBias/inputs.npz rename to DeeployTest/Tests/FP32Kernels/Conv2D/RegularZeroValuedBias/inputs.npz diff --git a/DeeployTest/Tests/testFloat2DConvolutionZeroBias/network.onnx b/DeeployTest/Tests/FP32Kernels/Conv2D/RegularZeroValuedBias/network.onnx similarity index 100% rename from DeeployTest/Tests/testFloat2DConvolutionZeroBias/network.onnx rename to DeeployTest/Tests/FP32Kernels/Conv2D/RegularZeroValuedBias/network.onnx diff --git a/DeeployTest/Tests/testFloat2DConvolutionZeroBias/outputs.npz b/DeeployTest/Tests/FP32Kernels/Conv2D/RegularZeroValuedBias/outputs.npz similarity index 100% rename from DeeployTest/Tests/testFloat2DConvolutionZeroBias/outputs.npz rename to DeeployTest/Tests/FP32Kernels/Conv2D/RegularZeroValuedBias/outputs.npz diff --git a/DeeployTest/Tests/testFloatDiv/inputs.npz b/DeeployTest/Tests/FP32Kernels/Div/inputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatDiv/inputs.npz rename to DeeployTest/Tests/FP32Kernels/Div/inputs.npz diff --git a/DeeployTest/Tests/testFloatDiv/network.onnx b/DeeployTest/Tests/FP32Kernels/Div/network.onnx similarity index 100% rename from DeeployTest/Tests/testFloatDiv/network.onnx rename to DeeployTest/Tests/FP32Kernels/Div/network.onnx diff --git a/DeeployTest/Tests/testFloatDiv/outputs.npz b/DeeployTest/Tests/FP32Kernels/Div/outputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatDiv/outputs.npz rename to DeeployTest/Tests/FP32Kernels/Div/outputs.npz diff --git a/DeeployTest/Tests/testFloatGEMMnobias/inputs.npz b/DeeployTest/Tests/FP32Kernels/GEMM/noBias/inputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatGEMMnobias/inputs.npz rename to DeeployTest/Tests/FP32Kernels/GEMM/noBias/inputs.npz diff --git a/DeeployTest/Tests/testFloatGEMMnobias/network.onnx b/DeeployTest/Tests/FP32Kernels/GEMM/noBias/network.onnx similarity index 100% rename from DeeployTest/Tests/testFloatGEMMnobias/network.onnx rename to DeeployTest/Tests/FP32Kernels/GEMM/noBias/network.onnx diff --git a/DeeployTest/Tests/testFloatGEMMnobias/outputs.npz b/DeeployTest/Tests/FP32Kernels/GEMM/noBias/outputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatGEMMnobias/outputs.npz rename to DeeployTest/Tests/FP32Kernels/GEMM/noBias/outputs.npz diff --git a/DeeployTest/Tests/testFloatGEMM/inputs.npz b/DeeployTest/Tests/FP32Kernels/GEMM/regular/inputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatGEMM/inputs.npz rename to DeeployTest/Tests/FP32Kernels/GEMM/regular/inputs.npz diff --git a/DeeployTest/Tests/testFloatGEMM/network.onnx b/DeeployTest/Tests/FP32Kernels/GEMM/regular/network.onnx similarity index 100% rename from DeeployTest/Tests/testFloatGEMM/network.onnx rename to DeeployTest/Tests/FP32Kernels/GEMM/regular/network.onnx diff --git a/DeeployTest/Tests/testFloatGEMM/outputs.npz b/DeeployTest/Tests/FP32Kernels/GEMM/regular/outputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatGEMM/outputs.npz rename to DeeployTest/Tests/FP32Kernels/GEMM/regular/outputs.npz diff --git a/DeeployTest/Tests/testFloatGEMMtransB/inputs.npz b/DeeployTest/Tests/FP32Kernels/GEMM/transB/inputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatGEMMtransB/inputs.npz rename to DeeployTest/Tests/FP32Kernels/GEMM/transB/inputs.npz diff --git a/DeeployTest/Tests/testFloatGEMMtransB/network.onnx b/DeeployTest/Tests/FP32Kernels/GEMM/transB/network.onnx similarity index 100% rename from DeeployTest/Tests/testFloatGEMMtransB/network.onnx rename to DeeployTest/Tests/FP32Kernels/GEMM/transB/network.onnx diff --git a/DeeployTest/Tests/testFloatGEMMtransB/outputs.npz b/DeeployTest/Tests/FP32Kernels/GEMM/transB/outputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatGEMMtransB/outputs.npz rename to DeeployTest/Tests/FP32Kernels/GEMM/transB/outputs.npz diff --git a/DeeployTest/Tests/testFloatMatmul/inputs.npz b/DeeployTest/Tests/FP32Kernels/MatMul/inputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatMatmul/inputs.npz rename to DeeployTest/Tests/FP32Kernels/MatMul/inputs.npz diff --git a/DeeployTest/Tests/testFloatMatmul/network.onnx b/DeeployTest/Tests/FP32Kernels/MatMul/network.onnx similarity index 100% rename from DeeployTest/Tests/testFloatMatmul/network.onnx rename to DeeployTest/Tests/FP32Kernels/MatMul/network.onnx diff --git a/DeeployTest/Tests/testFloatMatmul/outputs.npz b/DeeployTest/Tests/FP32Kernels/MatMul/outputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatMatmul/outputs.npz rename to DeeployTest/Tests/FP32Kernels/MatMul/outputs.npz diff --git a/DeeployTest/Tests/testFloatMaxPool/inputs.npz b/DeeployTest/Tests/FP32Kernels/MaxPool/inputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatMaxPool/inputs.npz rename to DeeployTest/Tests/FP32Kernels/MaxPool/inputs.npz diff --git a/DeeployTest/Tests/testFloatMaxPool/network.onnx b/DeeployTest/Tests/FP32Kernels/MaxPool/network.onnx similarity index 100% rename from DeeployTest/Tests/testFloatMaxPool/network.onnx rename to DeeployTest/Tests/FP32Kernels/MaxPool/network.onnx diff --git a/DeeployTest/Tests/testFloatMaxPool/outputs.npz b/DeeployTest/Tests/FP32Kernels/MaxPool/outputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatMaxPool/outputs.npz rename to DeeployTest/Tests/FP32Kernels/MaxPool/outputs.npz diff --git a/DeeployTest/Tests/testFloatMul/inputs.npz b/DeeployTest/Tests/FP32Kernels/Mul/inputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatMul/inputs.npz rename to DeeployTest/Tests/FP32Kernels/Mul/inputs.npz diff --git a/DeeployTest/Tests/testFloatMul/network.onnx b/DeeployTest/Tests/FP32Kernels/Mul/network.onnx similarity index 100% rename from DeeployTest/Tests/testFloatMul/network.onnx rename to DeeployTest/Tests/FP32Kernels/Mul/network.onnx diff --git a/DeeployTest/Tests/testFloatMul/outputs.npz b/DeeployTest/Tests/FP32Kernels/Mul/outputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatMul/outputs.npz rename to DeeployTest/Tests/FP32Kernels/Mul/outputs.npz diff --git a/DeeployTest/Tests/testFloatLayerNorm/inputs.npz b/DeeployTest/Tests/FP32Kernels/Norm/LayerNorm/inputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatLayerNorm/inputs.npz rename to DeeployTest/Tests/FP32Kernels/Norm/LayerNorm/inputs.npz diff --git a/DeeployTest/Tests/testFloatLayerNorm/network.onnx b/DeeployTest/Tests/FP32Kernels/Norm/LayerNorm/network.onnx similarity index 100% rename from DeeployTest/Tests/testFloatLayerNorm/network.onnx rename to DeeployTest/Tests/FP32Kernels/Norm/LayerNorm/network.onnx diff --git a/DeeployTest/Tests/testFloatLayerNorm/outputs.npz b/DeeployTest/Tests/FP32Kernels/Norm/LayerNorm/outputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatLayerNorm/outputs.npz rename to DeeployTest/Tests/FP32Kernels/Norm/LayerNorm/outputs.npz diff --git a/DeeployTest/Tests/testFloatRMSNorm/inputs.npz b/DeeployTest/Tests/FP32Kernels/Norm/RMSNorm/inputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatRMSNorm/inputs.npz rename to DeeployTest/Tests/FP32Kernels/Norm/RMSNorm/inputs.npz diff --git a/DeeployTest/Tests/testFloatRMSNorm/network.onnx b/DeeployTest/Tests/FP32Kernels/Norm/RMSNorm/network.onnx similarity index 100% rename from DeeployTest/Tests/testFloatRMSNorm/network.onnx rename to DeeployTest/Tests/FP32Kernels/Norm/RMSNorm/network.onnx diff --git a/DeeployTest/Tests/testFloatRMSNorm/outputs.npz b/DeeployTest/Tests/FP32Kernels/Norm/RMSNorm/outputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatRMSNorm/outputs.npz rename to DeeployTest/Tests/FP32Kernels/Norm/RMSNorm/outputs.npz diff --git a/DeeployTest/Tests/testFloat2DPadding/inputs.npz b/DeeployTest/Tests/FP32Kernels/Pad2D/inputs.npz similarity index 100% rename from DeeployTest/Tests/testFloat2DPadding/inputs.npz rename to DeeployTest/Tests/FP32Kernels/Pad2D/inputs.npz diff --git a/DeeployTest/Tests/testFloat2DPadding/network.onnx b/DeeployTest/Tests/FP32Kernels/Pad2D/network.onnx similarity index 100% rename from DeeployTest/Tests/testFloat2DPadding/network.onnx rename to DeeployTest/Tests/FP32Kernels/Pad2D/network.onnx diff --git a/DeeployTest/Tests/testFloat2DPadding/outputs.npz b/DeeployTest/Tests/FP32Kernels/Pad2D/outputs.npz similarity index 100% rename from DeeployTest/Tests/testFloat2DPadding/outputs.npz rename to DeeployTest/Tests/FP32Kernels/Pad2D/outputs.npz diff --git a/DeeployTest/Tests/testFloatPowScalar/inputs.npz b/DeeployTest/Tests/FP32Kernels/Pow/Scalar/inputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatPowScalar/inputs.npz rename to DeeployTest/Tests/FP32Kernels/Pow/Scalar/inputs.npz diff --git a/DeeployTest/Tests/testFloatPowScalar/network.onnx b/DeeployTest/Tests/FP32Kernels/Pow/Scalar/network.onnx similarity index 100% rename from DeeployTest/Tests/testFloatPowScalar/network.onnx rename to DeeployTest/Tests/FP32Kernels/Pow/Scalar/network.onnx diff --git a/DeeployTest/Tests/testFloatPowScalar/outputs.npz b/DeeployTest/Tests/FP32Kernels/Pow/Scalar/outputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatPowScalar/outputs.npz rename to DeeployTest/Tests/FP32Kernels/Pow/Scalar/outputs.npz diff --git a/DeeployTest/Tests/testFloatPowVector/inputs.npz b/DeeployTest/Tests/FP32Kernels/Pow/Vector/inputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatPowVector/inputs.npz rename to DeeployTest/Tests/FP32Kernels/Pow/Vector/inputs.npz diff --git a/DeeployTest/Tests/testFloatPowVector/network.onnx b/DeeployTest/Tests/FP32Kernels/Pow/Vector/network.onnx similarity index 100% rename from DeeployTest/Tests/testFloatPowVector/network.onnx rename to DeeployTest/Tests/FP32Kernels/Pow/Vector/network.onnx diff --git a/DeeployTest/Tests/testFloatPowVector/outputs.npz b/DeeployTest/Tests/FP32Kernels/Pow/Vector/outputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatPowVector/outputs.npz rename to DeeployTest/Tests/FP32Kernels/Pow/Vector/outputs.npz diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Add_ReduceMean/inputs.npz b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Add_ReduceMean/inputs.npz new file mode 100644 index 0000000000..42c1f86253 Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Add_ReduceMean/inputs.npz differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Add_ReduceMean/network.onnx b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Add_ReduceMean/network.onnx new file mode 100644 index 0000000000..5bb612d31c Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Add_ReduceMean/network.onnx differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Add_ReduceMean/outputs.npz b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Add_ReduceMean/outputs.npz new file mode 100644 index 0000000000..48b72a636f Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Add_ReduceMean/outputs.npz differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Add_ReduceMean_Add/inputs.npz b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Add_ReduceMean_Add/inputs.npz new file mode 100644 index 0000000000..42c1f86253 Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Add_ReduceMean_Add/inputs.npz differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Add_ReduceMean_Add/network.onnx b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Add_ReduceMean_Add/network.onnx new file mode 100644 index 0000000000..8675da8b4e Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Add_ReduceMean_Add/network.onnx differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Add_ReduceMean_Add/outputs.npz b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Add_ReduceMean_Add/outputs.npz new file mode 100644 index 0000000000..241904dd35 Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Add_ReduceMean_Add/outputs.npz differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/AllAxes/inputs.npz b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/AllAxes/inputs.npz new file mode 100644 index 0000000000..8970f698ad Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/AllAxes/inputs.npz differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/AllAxes/network.onnx b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/AllAxes/network.onnx new file mode 100644 index 0000000000..c8c9d011f7 Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/AllAxes/network.onnx differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/AllAxes/outputs.npz b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/AllAxes/outputs.npz new file mode 100644 index 0000000000..13ebdd8585 Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/AllAxes/outputs.npz differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axes1_2_3/inputs.npz b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axes1_2_3/inputs.npz new file mode 100644 index 0000000000..8970f698ad Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axes1_2_3/inputs.npz differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axes1_2_3/network.onnx b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axes1_2_3/network.onnx new file mode 100644 index 0000000000..589670ea8e Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axes1_2_3/network.onnx differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axes1_2_3/outputs.npz b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axes1_2_3/outputs.npz new file mode 100644 index 0000000000..13ebdd8585 Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axes1_2_3/outputs.npz differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axes1_3/inputs.npz b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axes1_3/inputs.npz new file mode 100644 index 0000000000..42c1f86253 Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axes1_3/inputs.npz differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axes1_3/network.onnx b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axes1_3/network.onnx new file mode 100644 index 0000000000..aeaf210c69 Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axes1_3/network.onnx differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axes1_3/outputs.npz b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axes1_3/outputs.npz new file mode 100644 index 0000000000..b39a9200bd Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axes1_3/outputs.npz differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axes2_1/inputs.npz b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axes2_1/inputs.npz new file mode 100644 index 0000000000..42c1f86253 Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axes2_1/inputs.npz differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axes2_1/network.onnx b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axes2_1/network.onnx new file mode 100644 index 0000000000..7941bad80c Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axes2_1/network.onnx differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axes2_1/outputs.npz b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axes2_1/outputs.npz new file mode 100644 index 0000000000..b108ed17bf Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axes2_1/outputs.npz differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axis0/inputs.npz b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axis0/inputs.npz new file mode 100644 index 0000000000..42c1f86253 Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axis0/inputs.npz differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axis0/network.onnx b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axis0/network.onnx new file mode 100644 index 0000000000..3bc8a3a93a Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axis0/network.onnx differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axis0/outputs.npz b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axis0/outputs.npz new file mode 100644 index 0000000000..21761d2281 Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axis0/outputs.npz differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axis2/inputs.npz b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axis2/inputs.npz new file mode 100644 index 0000000000..42c1f86253 Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axis2/inputs.npz differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axis2/network.onnx b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axis2/network.onnx new file mode 100644 index 0000000000..1f6be48e8e Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axis2/network.onnx differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axis2/outputs.npz b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axis2/outputs.npz new file mode 100644 index 0000000000..180cd2795f Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/Axis2/outputs.npz differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/ReduceMean_Add/inputs.npz b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/ReduceMean_Add/inputs.npz new file mode 100644 index 0000000000..42c1f86253 Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/ReduceMean_Add/inputs.npz differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/ReduceMean_Add/network.onnx b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/ReduceMean_Add/network.onnx new file mode 100644 index 0000000000..babf226169 Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/ReduceMean_Add/network.onnx differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/ReduceMean_Add/outputs.npz b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/ReduceMean_Add/outputs.npz new file mode 100644 index 0000000000..48b72a636f Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/KeepDims/ReduceMean_Add/outputs.npz differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Add_ReduceMean/inputs.npz b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Add_ReduceMean/inputs.npz new file mode 100644 index 0000000000..42c1f86253 Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Add_ReduceMean/inputs.npz differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Add_ReduceMean/network.onnx b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Add_ReduceMean/network.onnx new file mode 100644 index 0000000000..3efac8fdf7 Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Add_ReduceMean/network.onnx differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Add_ReduceMean/outputs.npz b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Add_ReduceMean/outputs.npz new file mode 100644 index 0000000000..3eef9f240f Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Add_ReduceMean/outputs.npz differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Add_ReduceMean_Add/inputs.npz b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Add_ReduceMean_Add/inputs.npz new file mode 100644 index 0000000000..42c1f86253 Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Add_ReduceMean_Add/inputs.npz differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Add_ReduceMean_Add/network.onnx b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Add_ReduceMean_Add/network.onnx new file mode 100644 index 0000000000..887b122ea8 Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Add_ReduceMean_Add/network.onnx differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Add_ReduceMean_Add/outputs.npz b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Add_ReduceMean_Add/outputs.npz new file mode 100644 index 0000000000..4a4bc781c8 Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Add_ReduceMean_Add/outputs.npz differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/AllAxes/inputs.npz b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/AllAxes/inputs.npz new file mode 100644 index 0000000000..8970f698ad Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/AllAxes/inputs.npz differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/AllAxes/network.onnx b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/AllAxes/network.onnx new file mode 100644 index 0000000000..5526be4a80 Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/AllAxes/network.onnx differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/AllAxes/outputs.npz b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/AllAxes/outputs.npz new file mode 100644 index 0000000000..e2e66e5824 Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/AllAxes/outputs.npz differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axes1_2_3/inputs.npz b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axes1_2_3/inputs.npz new file mode 100644 index 0000000000..8970f698ad Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axes1_2_3/inputs.npz differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axes1_2_3/network.onnx b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axes1_2_3/network.onnx new file mode 100644 index 0000000000..b21abffd28 Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axes1_2_3/network.onnx differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axes1_2_3/outputs.npz b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axes1_2_3/outputs.npz new file mode 100644 index 0000000000..abe06e5a28 Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axes1_2_3/outputs.npz differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axes1_3/inputs.npz b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axes1_3/inputs.npz new file mode 100644 index 0000000000..42c1f86253 Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axes1_3/inputs.npz differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axes1_3/network.onnx b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axes1_3/network.onnx new file mode 100644 index 0000000000..524e462371 Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axes1_3/network.onnx differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axes1_3/outputs.npz b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axes1_3/outputs.npz new file mode 100644 index 0000000000..7de7741ef8 Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axes1_3/outputs.npz differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axes2_1/inputs.npz b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axes2_1/inputs.npz new file mode 100644 index 0000000000..42c1f86253 Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axes2_1/inputs.npz differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axes2_1/network.onnx b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axes2_1/network.onnx new file mode 100644 index 0000000000..24a7ad68d6 Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axes2_1/network.onnx differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axes2_1/outputs.npz b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axes2_1/outputs.npz new file mode 100644 index 0000000000..9db04e582f Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axes2_1/outputs.npz differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axis0/inputs.npz b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axis0/inputs.npz new file mode 100644 index 0000000000..42c1f86253 Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axis0/inputs.npz differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axis0/network.onnx b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axis0/network.onnx new file mode 100644 index 0000000000..249858b03b Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axis0/network.onnx differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axis0/outputs.npz b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axis0/outputs.npz new file mode 100644 index 0000000000..847ceaf4ec Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axis0/outputs.npz differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axis2/inputs.npz b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axis2/inputs.npz new file mode 100644 index 0000000000..42c1f86253 Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axis2/inputs.npz differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axis2/network.onnx b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axis2/network.onnx new file mode 100644 index 0000000000..496f7a9e18 Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axis2/network.onnx differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axis2/outputs.npz b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axis2/outputs.npz new file mode 100644 index 0000000000..95e79e2a23 Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/Axis2/outputs.npz differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/ReduceMean_Add/inputs.npz b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/ReduceMean_Add/inputs.npz new file mode 100644 index 0000000000..42c1f86253 Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/ReduceMean_Add/inputs.npz differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/ReduceMean_Add/network.onnx b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/ReduceMean_Add/network.onnx new file mode 100644 index 0000000000..40188be091 Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/ReduceMean_Add/network.onnx differ diff --git a/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/ReduceMean_Add/outputs.npz b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/ReduceMean_Add/outputs.npz new file mode 100644 index 0000000000..3eef9f240f Binary files /dev/null and b/DeeployTest/Tests/FP32Kernels/ReduceMean/NoKeepDims/ReduceMean_Add/outputs.npz differ diff --git a/DeeployTest/Tests/testFloatReduceSum/inputs.npz b/DeeployTest/Tests/FP32Kernels/ReduceSum/inputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatReduceSum/inputs.npz rename to DeeployTest/Tests/FP32Kernels/ReduceSum/inputs.npz diff --git a/DeeployTest/Tests/testFloatReduceSum/network.onnx b/DeeployTest/Tests/FP32Kernels/ReduceSum/network.onnx similarity index 100% rename from DeeployTest/Tests/testFloatReduceSum/network.onnx rename to DeeployTest/Tests/FP32Kernels/ReduceSum/network.onnx diff --git a/DeeployTest/Tests/testFloatReduceSum/outputs.npz b/DeeployTest/Tests/FP32Kernels/ReduceSum/outputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatReduceSum/outputs.npz rename to DeeployTest/Tests/FP32Kernels/ReduceSum/outputs.npz diff --git a/DeeployTest/Tests/testFloatReshape/inputs.npz b/DeeployTest/Tests/FP32Kernels/Reshape/inputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatReshape/inputs.npz rename to DeeployTest/Tests/FP32Kernels/Reshape/inputs.npz diff --git a/DeeployTest/Tests/testFloatReshape/network.onnx b/DeeployTest/Tests/FP32Kernels/Reshape/network.onnx similarity index 100% rename from DeeployTest/Tests/testFloatReshape/network.onnx rename to DeeployTest/Tests/FP32Kernels/Reshape/network.onnx diff --git a/DeeployTest/Tests/testFloatReshape/outputs.npz b/DeeployTest/Tests/FP32Kernels/Reshape/outputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatReshape/outputs.npz rename to DeeployTest/Tests/FP32Kernels/Reshape/outputs.npz diff --git a/DeeployTest/Tests/testFloatReshapeWithSkipConnection/inputs.npz b/DeeployTest/Tests/FP32Kernels/SkipConnection/ReshapeWithSkipConnection/inputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatReshapeWithSkipConnection/inputs.npz rename to DeeployTest/Tests/FP32Kernels/SkipConnection/ReshapeWithSkipConnection/inputs.npz diff --git a/DeeployTest/Tests/testFloatReshapeWithSkipConnection/network.onnx b/DeeployTest/Tests/FP32Kernels/SkipConnection/ReshapeWithSkipConnection/network.onnx similarity index 100% rename from DeeployTest/Tests/testFloatReshapeWithSkipConnection/network.onnx rename to DeeployTest/Tests/FP32Kernels/SkipConnection/ReshapeWithSkipConnection/network.onnx diff --git a/DeeployTest/Tests/testFloatReshapeWithSkipConnection/outputs.npz b/DeeployTest/Tests/FP32Kernels/SkipConnection/ReshapeWithSkipConnection/outputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatReshapeWithSkipConnection/outputs.npz rename to DeeployTest/Tests/FP32Kernels/SkipConnection/ReshapeWithSkipConnection/outputs.npz diff --git a/DeeployTest/Tests/testFloatSqrt/inputs.npz b/DeeployTest/Tests/FP32Kernels/Sqrt/inputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatSqrt/inputs.npz rename to DeeployTest/Tests/FP32Kernels/Sqrt/inputs.npz diff --git a/DeeployTest/Tests/testFloatSqrt/network.onnx b/DeeployTest/Tests/FP32Kernels/Sqrt/network.onnx similarity index 100% rename from DeeployTest/Tests/testFloatSqrt/network.onnx rename to DeeployTest/Tests/FP32Kernels/Sqrt/network.onnx diff --git a/DeeployTest/Tests/testFloatSqrt/outputs.npz b/DeeployTest/Tests/FP32Kernels/Sqrt/outputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatSqrt/outputs.npz rename to DeeployTest/Tests/FP32Kernels/Sqrt/outputs.npz diff --git a/DeeployTest/Tests/testFloatSqueeze/inputs.npz b/DeeployTest/Tests/FP32Kernels/Squeeze/inputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatSqueeze/inputs.npz rename to DeeployTest/Tests/FP32Kernels/Squeeze/inputs.npz diff --git a/DeeployTest/Tests/testFloatSqueeze/network.onnx b/DeeployTest/Tests/FP32Kernels/Squeeze/network.onnx similarity index 100% rename from DeeployTest/Tests/testFloatSqueeze/network.onnx rename to DeeployTest/Tests/FP32Kernels/Squeeze/network.onnx diff --git a/DeeployTest/Tests/testFloatSqueeze/outputs.npz b/DeeployTest/Tests/FP32Kernels/Squeeze/outputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatSqueeze/outputs.npz rename to DeeployTest/Tests/FP32Kernels/Squeeze/outputs.npz diff --git a/DeeployTest/Tests/testFloatTranspose/inputs.npz b/DeeployTest/Tests/FP32Kernels/Transpose/inputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatTranspose/inputs.npz rename to DeeployTest/Tests/FP32Kernels/Transpose/inputs.npz diff --git a/DeeployTest/Tests/testFloatTranspose/network.onnx b/DeeployTest/Tests/FP32Kernels/Transpose/network.onnx similarity index 100% rename from DeeployTest/Tests/testFloatTranspose/network.onnx rename to DeeployTest/Tests/FP32Kernels/Transpose/network.onnx diff --git a/DeeployTest/Tests/testFloatTranspose/outputs.npz b/DeeployTest/Tests/FP32Kernels/Transpose/outputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatTranspose/outputs.npz rename to DeeployTest/Tests/FP32Kernels/Transpose/outputs.npz diff --git a/DeeployTest/Tests/Hardswish/inputs.npz b/DeeployTest/Tests/IntKernels/Activations/Hardswish/inputs.npz similarity index 100% rename from DeeployTest/Tests/Hardswish/inputs.npz rename to DeeployTest/Tests/IntKernels/Activations/Hardswish/inputs.npz diff --git a/DeeployTest/Tests/Hardswish/network.onnx b/DeeployTest/Tests/IntKernels/Activations/Hardswish/network.onnx similarity index 100% rename from DeeployTest/Tests/Hardswish/network.onnx rename to DeeployTest/Tests/IntKernels/Activations/Hardswish/network.onnx diff --git a/DeeployTest/Tests/Hardswish/outputs.npz b/DeeployTest/Tests/IntKernels/Activations/Hardswish/outputs.npz similarity index 100% rename from DeeployTest/Tests/Hardswish/outputs.npz rename to DeeployTest/Tests/IntKernels/Activations/Hardswish/outputs.npz diff --git a/DeeployTest/Tests/TestiSoftmaxLarge/activations.npz b/DeeployTest/Tests/IntKernels/Activations/Softmax/Large/activations.npz similarity index 100% rename from DeeployTest/Tests/TestiSoftmaxLarge/activations.npz rename to DeeployTest/Tests/IntKernels/Activations/Softmax/Large/activations.npz diff --git a/DeeployTest/Tests/TestiSoftmaxLarge/inputs.npz b/DeeployTest/Tests/IntKernels/Activations/Softmax/Large/inputs.npz similarity index 100% rename from DeeployTest/Tests/TestiSoftmaxLarge/inputs.npz rename to DeeployTest/Tests/IntKernels/Activations/Softmax/Large/inputs.npz diff --git a/DeeployTest/Tests/TestiSoftmaxLarge/network.onnx b/DeeployTest/Tests/IntKernels/Activations/Softmax/Large/network.onnx similarity index 100% rename from DeeployTest/Tests/TestiSoftmaxLarge/network.onnx rename to DeeployTest/Tests/IntKernels/Activations/Softmax/Large/network.onnx diff --git a/DeeployTest/Tests/TestiSoftmaxLarge/outputs.npz b/DeeployTest/Tests/IntKernels/Activations/Softmax/Large/outputs.npz similarity index 100% rename from DeeployTest/Tests/TestiSoftmaxLarge/outputs.npz rename to DeeployTest/Tests/IntKernels/Activations/Softmax/Large/outputs.npz diff --git a/DeeployTest/Tests/iSoftmax/activations.npz b/DeeployTest/Tests/IntKernels/Activations/Softmax/Regular/activations.npz similarity index 100% rename from DeeployTest/Tests/iSoftmax/activations.npz rename to DeeployTest/Tests/IntKernels/Activations/Softmax/Regular/activations.npz diff --git a/DeeployTest/Tests/iSoftmax/inputs.npz b/DeeployTest/Tests/IntKernels/Activations/Softmax/Regular/inputs.npz similarity index 100% rename from DeeployTest/Tests/iSoftmax/inputs.npz rename to DeeployTest/Tests/IntKernels/Activations/Softmax/Regular/inputs.npz diff --git a/DeeployTest/Tests/iSoftmax/network.onnx b/DeeployTest/Tests/IntKernels/Activations/Softmax/Regular/network.onnx similarity index 100% rename from DeeployTest/Tests/iSoftmax/network.onnx rename to DeeployTest/Tests/IntKernels/Activations/Softmax/Regular/network.onnx diff --git a/DeeployTest/Tests/iSoftmax/outputs.npz b/DeeployTest/Tests/IntKernels/Activations/Softmax/Regular/outputs.npz similarity index 100% rename from DeeployTest/Tests/iSoftmax/outputs.npz rename to DeeployTest/Tests/IntKernels/Activations/Softmax/Regular/outputs.npz diff --git a/DeeployTest/Tests/TestAdderLarge/inputs.npz b/DeeployTest/Tests/IntKernels/Add/Large/inputs.npz similarity index 100% rename from DeeployTest/Tests/TestAdderLarge/inputs.npz rename to DeeployTest/Tests/IntKernels/Add/Large/inputs.npz diff --git a/DeeployTest/Tests/TestAdderLarge/network.onnx b/DeeployTest/Tests/IntKernels/Add/Large/network.onnx similarity index 100% rename from DeeployTest/Tests/TestAdderLarge/network.onnx rename to DeeployTest/Tests/IntKernels/Add/Large/network.onnx diff --git a/DeeployTest/Tests/TestAdderLarge/outputs.npz b/DeeployTest/Tests/IntKernels/Add/Large/outputs.npz similarity index 100% rename from DeeployTest/Tests/TestAdderLarge/outputs.npz rename to DeeployTest/Tests/IntKernels/Add/Large/outputs.npz diff --git a/DeeployTest/Tests/MultIO/inputs.npz b/DeeployTest/Tests/IntKernels/Add/MultIO/inputs.npz similarity index 100% rename from DeeployTest/Tests/MultIO/inputs.npz rename to DeeployTest/Tests/IntKernels/Add/MultIO/inputs.npz diff --git a/DeeployTest/Tests/MultIO/network.onnx b/DeeployTest/Tests/IntKernels/Add/MultIO/network.onnx similarity index 100% rename from DeeployTest/Tests/MultIO/network.onnx rename to DeeployTest/Tests/IntKernels/Add/MultIO/network.onnx diff --git a/DeeployTest/Tests/MultIO/outputs.npz b/DeeployTest/Tests/IntKernels/Add/MultIO/outputs.npz similarity index 100% rename from DeeployTest/Tests/MultIO/outputs.npz rename to DeeployTest/Tests/IntKernels/Add/MultIO/outputs.npz diff --git a/DeeployTest/Tests/Adder/inputs.npz b/DeeployTest/Tests/IntKernels/Add/Regular/inputs.npz similarity index 100% rename from DeeployTest/Tests/Adder/inputs.npz rename to DeeployTest/Tests/IntKernels/Add/Regular/inputs.npz diff --git a/DeeployTest/Tests/Adder/network.onnx b/DeeployTest/Tests/IntKernels/Add/Regular/network.onnx similarity index 100% rename from DeeployTest/Tests/Adder/network.onnx rename to DeeployTest/Tests/IntKernels/Add/Regular/network.onnx diff --git a/DeeployTest/Tests/Adder/outputs.npz b/DeeployTest/Tests/IntKernels/Add/Regular/outputs.npz similarity index 100% rename from DeeployTest/Tests/Adder/outputs.npz rename to DeeployTest/Tests/IntKernels/Add/Regular/outputs.npz diff --git a/DeeployTest/Tests/Attention/activations.npz b/DeeployTest/Tests/IntKernels/Attention/activations.npz similarity index 100% rename from DeeployTest/Tests/Attention/activations.npz rename to DeeployTest/Tests/IntKernels/Attention/activations.npz diff --git a/DeeployTest/Tests/Attention/inputs.npz b/DeeployTest/Tests/IntKernels/Attention/inputs.npz similarity index 100% rename from DeeployTest/Tests/Attention/inputs.npz rename to DeeployTest/Tests/IntKernels/Attention/inputs.npz diff --git a/DeeployTest/Tests/Attention/network.onnx b/DeeployTest/Tests/IntKernels/Attention/network.onnx similarity index 100% rename from DeeployTest/Tests/Attention/network.onnx rename to DeeployTest/Tests/IntKernels/Attention/network.onnx diff --git a/DeeployTest/Tests/Attention/outputs.npz b/DeeployTest/Tests/IntKernels/Attention/outputs.npz similarity index 100% rename from DeeployTest/Tests/Attention/outputs.npz rename to DeeployTest/Tests/IntKernels/Attention/outputs.npz diff --git a/DeeployTest/Tests/testBacktracking/activations.npz b/DeeployTest/Tests/IntKernels/Concat/activations.npz similarity index 100% rename from DeeployTest/Tests/testBacktracking/activations.npz rename to DeeployTest/Tests/IntKernels/Concat/activations.npz diff --git a/DeeployTest/Tests/testConcat/inputs.npz b/DeeployTest/Tests/IntKernels/Concat/inputs.npz similarity index 100% rename from DeeployTest/Tests/testConcat/inputs.npz rename to DeeployTest/Tests/IntKernels/Concat/inputs.npz diff --git a/DeeployTest/Tests/testConcat/network.onnx b/DeeployTest/Tests/IntKernels/Concat/network.onnx similarity index 100% rename from DeeployTest/Tests/testConcat/network.onnx rename to DeeployTest/Tests/IntKernels/Concat/network.onnx diff --git a/DeeployTest/Tests/testConcat/outputs.npz b/DeeployTest/Tests/IntKernels/Concat/outputs.npz similarity index 100% rename from DeeployTest/Tests/testConcat/outputs.npz rename to DeeployTest/Tests/IntKernels/Concat/outputs.npz diff --git a/DeeployTest/Tests/test1DDWConvolution/inputs.npz b/DeeployTest/Tests/IntKernels/Conv/1D/DW/inputs.npz similarity index 100% rename from DeeployTest/Tests/test1DDWConvolution/inputs.npz rename to DeeployTest/Tests/IntKernels/Conv/1D/DW/inputs.npz diff --git a/DeeployTest/Tests/test1DDWConvolution/network.onnx b/DeeployTest/Tests/IntKernels/Conv/1D/DW/network.onnx similarity index 100% rename from DeeployTest/Tests/test1DDWConvolution/network.onnx rename to DeeployTest/Tests/IntKernels/Conv/1D/DW/network.onnx diff --git a/DeeployTest/Tests/test1DDWConvolution/outputs.npz b/DeeployTest/Tests/IntKernels/Conv/1D/DW/outputs.npz similarity index 100% rename from DeeployTest/Tests/test1DDWConvolution/outputs.npz rename to DeeployTest/Tests/IntKernels/Conv/1D/DW/outputs.npz diff --git a/DeeployTest/Tests/test1DConvolution/inputs.npz b/DeeployTest/Tests/IntKernels/Conv/1D/Regular/inputs.npz similarity index 100% rename from DeeployTest/Tests/test1DConvolution/inputs.npz rename to DeeployTest/Tests/IntKernels/Conv/1D/Regular/inputs.npz diff --git a/DeeployTest/Tests/test1DConvolution/network.onnx b/DeeployTest/Tests/IntKernels/Conv/1D/Regular/network.onnx similarity index 100% rename from DeeployTest/Tests/test1DConvolution/network.onnx rename to DeeployTest/Tests/IntKernels/Conv/1D/Regular/network.onnx diff --git a/DeeployTest/Tests/test1DConvolution/outputs.npz b/DeeployTest/Tests/IntKernels/Conv/1D/Regular/outputs.npz similarity index 100% rename from DeeployTest/Tests/test1DConvolution/outputs.npz rename to DeeployTest/Tests/IntKernels/Conv/1D/Regular/outputs.npz diff --git a/DeeployTest/Tests/test2DDWConvolution/inputs.npz b/DeeployTest/Tests/IntKernels/Conv/2D/DW/inputs.npz similarity index 100% rename from DeeployTest/Tests/test2DDWConvolution/inputs.npz rename to DeeployTest/Tests/IntKernels/Conv/2D/DW/inputs.npz diff --git a/DeeployTest/Tests/test2DDWConvolution/network.onnx b/DeeployTest/Tests/IntKernels/Conv/2D/DW/network.onnx similarity index 100% rename from DeeployTest/Tests/test2DDWConvolution/network.onnx rename to DeeployTest/Tests/IntKernels/Conv/2D/DW/network.onnx diff --git a/DeeployTest/Tests/test2DDWConvolution/outputs.npz b/DeeployTest/Tests/IntKernels/Conv/2D/DW/outputs.npz similarity index 100% rename from DeeployTest/Tests/test2DDWConvolution/outputs.npz rename to DeeployTest/Tests/IntKernels/Conv/2D/DW/outputs.npz diff --git a/DeeployTest/Tests/testPointwiseConvBNReLU/activations.npz b/DeeployTest/Tests/IntKernels/Conv/2D/PW/convBNReLU/activations.npz similarity index 100% rename from DeeployTest/Tests/testPointwiseConvBNReLU/activations.npz rename to DeeployTest/Tests/IntKernels/Conv/2D/PW/convBNReLU/activations.npz diff --git a/DeeployTest/Tests/testPointwiseConvBNReLU/inputs.npz b/DeeployTest/Tests/IntKernels/Conv/2D/PW/convBNReLU/inputs.npz similarity index 100% rename from DeeployTest/Tests/testPointwiseConvBNReLU/inputs.npz rename to DeeployTest/Tests/IntKernels/Conv/2D/PW/convBNReLU/inputs.npz diff --git a/DeeployTest/Tests/testPointwiseConvBNReLU/network.onnx b/DeeployTest/Tests/IntKernels/Conv/2D/PW/convBNReLU/network.onnx similarity index 100% rename from DeeployTest/Tests/testPointwiseConvBNReLU/network.onnx rename to DeeployTest/Tests/IntKernels/Conv/2D/PW/convBNReLU/network.onnx diff --git a/DeeployTest/Tests/testPointwiseConvBNReLU/outputs.npz b/DeeployTest/Tests/IntKernels/Conv/2D/PW/convBNReLU/outputs.npz similarity index 100% rename from DeeployTest/Tests/testPointwiseConvBNReLU/outputs.npz rename to DeeployTest/Tests/IntKernels/Conv/2D/PW/convBNReLU/outputs.npz diff --git a/DeeployTest/Tests/testPointwise/activations.npz b/DeeployTest/Tests/IntKernels/Conv/2D/PW/regular/activations.npz similarity index 100% rename from DeeployTest/Tests/testPointwise/activations.npz rename to DeeployTest/Tests/IntKernels/Conv/2D/PW/regular/activations.npz diff --git a/DeeployTest/Tests/testPointwise/inputs.npz b/DeeployTest/Tests/IntKernels/Conv/2D/PW/regular/inputs.npz similarity index 100% rename from DeeployTest/Tests/testPointwise/inputs.npz rename to DeeployTest/Tests/IntKernels/Conv/2D/PW/regular/inputs.npz diff --git a/DeeployTest/Tests/testPointwise/network.onnx b/DeeployTest/Tests/IntKernels/Conv/2D/PW/regular/network.onnx similarity index 100% rename from DeeployTest/Tests/testPointwise/network.onnx rename to DeeployTest/Tests/IntKernels/Conv/2D/PW/regular/network.onnx diff --git a/DeeployTest/Tests/testPointwise/outputs.npz b/DeeployTest/Tests/IntKernels/Conv/2D/PW/regular/outputs.npz similarity index 100% rename from DeeployTest/Tests/testPointwise/outputs.npz rename to DeeployTest/Tests/IntKernels/Conv/2D/PW/regular/outputs.npz diff --git a/DeeployTest/Tests/testPointwiseUnsignedWeights/activations.npz b/DeeployTest/Tests/IntKernels/Conv/2D/PW/unsignedWeights/activations.npz similarity index 100% rename from DeeployTest/Tests/testPointwiseUnsignedWeights/activations.npz rename to DeeployTest/Tests/IntKernels/Conv/2D/PW/unsignedWeights/activations.npz diff --git a/DeeployTest/Tests/testPointwiseUnsignedWeights/inputs.npz b/DeeployTest/Tests/IntKernels/Conv/2D/PW/unsignedWeights/inputs.npz similarity index 100% rename from DeeployTest/Tests/testPointwiseUnsignedWeights/inputs.npz rename to DeeployTest/Tests/IntKernels/Conv/2D/PW/unsignedWeights/inputs.npz diff --git a/DeeployTest/Tests/testPointwiseUnsignedWeights/network.onnx b/DeeployTest/Tests/IntKernels/Conv/2D/PW/unsignedWeights/network.onnx similarity index 100% rename from DeeployTest/Tests/testPointwiseUnsignedWeights/network.onnx rename to DeeployTest/Tests/IntKernels/Conv/2D/PW/unsignedWeights/network.onnx diff --git a/DeeployTest/Tests/testPointwiseUnsignedWeights/outputs.npz b/DeeployTest/Tests/IntKernels/Conv/2D/PW/unsignedWeights/outputs.npz similarity index 100% rename from DeeployTest/Tests/testPointwiseUnsignedWeights/outputs.npz rename to DeeployTest/Tests/IntKernels/Conv/2D/PW/unsignedWeights/outputs.npz diff --git a/DeeployTest/Tests/test2DConvolution/inputs.npz b/DeeployTest/Tests/IntKernels/Conv/2D/Regular/inputs.npz similarity index 100% rename from DeeployTest/Tests/test2DConvolution/inputs.npz rename to DeeployTest/Tests/IntKernels/Conv/2D/Regular/inputs.npz diff --git a/DeeployTest/Tests/test2DConvolution/network.onnx b/DeeployTest/Tests/IntKernels/Conv/2D/Regular/network.onnx similarity index 100% rename from DeeployTest/Tests/test2DConvolution/network.onnx rename to DeeployTest/Tests/IntKernels/Conv/2D/Regular/network.onnx diff --git a/DeeployTest/Tests/test2DConvolution/outputs.npz b/DeeployTest/Tests/IntKernels/Conv/2D/Regular/outputs.npz similarity index 100% rename from DeeployTest/Tests/test2DConvolution/outputs.npz rename to DeeployTest/Tests/IntKernels/Conv/2D/Regular/outputs.npz diff --git a/DeeployTest/Tests/testGEMM/inputs.npz b/DeeployTest/Tests/IntKernels/GEMM/inputs.npz similarity index 100% rename from DeeployTest/Tests/testGEMM/inputs.npz rename to DeeployTest/Tests/IntKernels/GEMM/inputs.npz diff --git a/DeeployTest/Tests/testGEMM/network.onnx b/DeeployTest/Tests/IntKernels/GEMM/network.onnx similarity index 100% rename from DeeployTest/Tests/testGEMM/network.onnx rename to DeeployTest/Tests/IntKernels/GEMM/network.onnx diff --git a/DeeployTest/Tests/testGEMM/outputs.npz b/DeeployTest/Tests/IntKernels/GEMM/outputs.npz similarity index 100% rename from DeeployTest/Tests/testGEMM/outputs.npz rename to DeeployTest/Tests/IntKernels/GEMM/outputs.npz diff --git a/DeeployTest/Tests/testRemoveIdentityOp/inputIdentity/inputs.npz b/DeeployTest/Tests/IntKernels/Identity/inputIdentity/inputs.npz similarity index 100% rename from DeeployTest/Tests/testRemoveIdentityOp/inputIdentity/inputs.npz rename to DeeployTest/Tests/IntKernels/Identity/inputIdentity/inputs.npz diff --git a/DeeployTest/Tests/testRemoveIdentityOp/inputIdentity/network.onnx b/DeeployTest/Tests/IntKernels/Identity/inputIdentity/network.onnx similarity index 100% rename from DeeployTest/Tests/testRemoveIdentityOp/inputIdentity/network.onnx rename to DeeployTest/Tests/IntKernels/Identity/inputIdentity/network.onnx diff --git a/DeeployTest/Tests/testRemoveIdentityOp/inputIdentity/outputs.npz b/DeeployTest/Tests/IntKernels/Identity/inputIdentity/outputs.npz similarity index 100% rename from DeeployTest/Tests/testRemoveIdentityOp/inputIdentity/outputs.npz rename to DeeployTest/Tests/IntKernels/Identity/inputIdentity/outputs.npz diff --git a/DeeployTest/Tests/testRemoveIdentityOp/multiOutputIdentity/inputs.npz b/DeeployTest/Tests/IntKernels/Identity/multiOutputIdentity/inputs.npz similarity index 100% rename from DeeployTest/Tests/testRemoveIdentityOp/multiOutputIdentity/inputs.npz rename to DeeployTest/Tests/IntKernels/Identity/multiOutputIdentity/inputs.npz diff --git a/DeeployTest/Tests/testRemoveIdentityOp/multiOutputIdentity/network.onnx b/DeeployTest/Tests/IntKernels/Identity/multiOutputIdentity/network.onnx similarity index 100% rename from DeeployTest/Tests/testRemoveIdentityOp/multiOutputIdentity/network.onnx rename to DeeployTest/Tests/IntKernels/Identity/multiOutputIdentity/network.onnx diff --git a/DeeployTest/Tests/testRemoveIdentityOp/multiOutputIdentity/outputs.npz b/DeeployTest/Tests/IntKernels/Identity/multiOutputIdentity/outputs.npz similarity index 100% rename from DeeployTest/Tests/testRemoveIdentityOp/multiOutputIdentity/outputs.npz rename to DeeployTest/Tests/IntKernels/Identity/multiOutputIdentity/outputs.npz diff --git a/DeeployTest/Tests/testRemoveIdentityOp/outputIdentity/inputs.npz b/DeeployTest/Tests/IntKernels/Identity/outputIdentity/inputs.npz similarity index 100% rename from DeeployTest/Tests/testRemoveIdentityOp/outputIdentity/inputs.npz rename to DeeployTest/Tests/IntKernels/Identity/outputIdentity/inputs.npz diff --git a/DeeployTest/Tests/testRemoveIdentityOp/outputIdentity/network.onnx b/DeeployTest/Tests/IntKernels/Identity/outputIdentity/network.onnx similarity index 100% rename from DeeployTest/Tests/testRemoveIdentityOp/outputIdentity/network.onnx rename to DeeployTest/Tests/IntKernels/Identity/outputIdentity/network.onnx diff --git a/DeeployTest/Tests/testRemoveIdentityOp/outputIdentity/outputs.npz b/DeeployTest/Tests/IntKernels/Identity/outputIdentity/outputs.npz similarity index 100% rename from DeeployTest/Tests/testRemoveIdentityOp/outputIdentity/outputs.npz rename to DeeployTest/Tests/IntKernels/Identity/outputIdentity/outputs.npz diff --git a/DeeployTest/Tests/testMatMulAdd/inputs.npz b/DeeployTest/Tests/IntKernels/MatMul/add/inputs.npz similarity index 100% rename from DeeployTest/Tests/testMatMulAdd/inputs.npz rename to DeeployTest/Tests/IntKernels/MatMul/add/inputs.npz diff --git a/DeeployTest/Tests/testMatMulAdd/network.onnx b/DeeployTest/Tests/IntKernels/MatMul/add/network.onnx similarity index 100% rename from DeeployTest/Tests/testMatMulAdd/network.onnx rename to DeeployTest/Tests/IntKernels/MatMul/add/network.onnx diff --git a/DeeployTest/Tests/testMatMulAdd/outputs.npz b/DeeployTest/Tests/IntKernels/MatMul/add/outputs.npz similarity index 100% rename from DeeployTest/Tests/testMatMulAdd/outputs.npz rename to DeeployTest/Tests/IntKernels/MatMul/add/outputs.npz diff --git a/DeeployTest/Tests/testMatMulBatch/inputs.npz b/DeeployTest/Tests/IntKernels/MatMul/batch/inputs.npz similarity index 100% rename from DeeployTest/Tests/testMatMulBatch/inputs.npz rename to DeeployTest/Tests/IntKernels/MatMul/batch/inputs.npz diff --git a/DeeployTest/Tests/testMatMulBatch/network.onnx b/DeeployTest/Tests/IntKernels/MatMul/batch/network.onnx similarity index 100% rename from DeeployTest/Tests/testMatMulBatch/network.onnx rename to DeeployTest/Tests/IntKernels/MatMul/batch/network.onnx diff --git a/DeeployTest/Tests/testMatMulBatch/outputs.npz b/DeeployTest/Tests/IntKernels/MatMul/batch/outputs.npz similarity index 100% rename from DeeployTest/Tests/testMatMulBatch/outputs.npz rename to DeeployTest/Tests/IntKernels/MatMul/batch/outputs.npz diff --git a/DeeployTest/Tests/testMatMul/inputs.npz b/DeeployTest/Tests/IntKernels/MatMul/regular/inputs.npz similarity index 100% rename from DeeployTest/Tests/testMatMul/inputs.npz rename to DeeployTest/Tests/IntKernels/MatMul/regular/inputs.npz diff --git a/DeeployTest/Tests/testMatMul/network.onnx b/DeeployTest/Tests/IntKernels/MatMul/regular/network.onnx similarity index 100% rename from DeeployTest/Tests/testMatMul/network.onnx rename to DeeployTest/Tests/IntKernels/MatMul/regular/network.onnx diff --git a/DeeployTest/Tests/testMatMul/outputs.npz b/DeeployTest/Tests/IntKernels/MatMul/regular/outputs.npz similarity index 100% rename from DeeployTest/Tests/testMatMul/outputs.npz rename to DeeployTest/Tests/IntKernels/MatMul/regular/outputs.npz diff --git a/DeeployTest/Tests/testMaxPool/inputs.npz b/DeeployTest/Tests/IntKernels/MaxPool/inputs.npz similarity index 100% rename from DeeployTest/Tests/testMaxPool/inputs.npz rename to DeeployTest/Tests/IntKernels/MaxPool/inputs.npz diff --git a/DeeployTest/Tests/testMaxPool/network.onnx b/DeeployTest/Tests/IntKernels/MaxPool/network.onnx similarity index 100% rename from DeeployTest/Tests/testMaxPool/network.onnx rename to DeeployTest/Tests/IntKernels/MaxPool/network.onnx diff --git a/DeeployTest/Tests/testMaxPool/outputs.npz b/DeeployTest/Tests/IntKernels/MaxPool/outputs.npz similarity index 100% rename from DeeployTest/Tests/testMaxPool/outputs.npz rename to DeeployTest/Tests/IntKernels/MaxPool/outputs.npz diff --git a/DeeployTest/Tests/test1DPad/inputs.npz b/DeeployTest/Tests/IntKernels/Pad/1D/inputs.npz similarity index 100% rename from DeeployTest/Tests/test1DPad/inputs.npz rename to DeeployTest/Tests/IntKernels/Pad/1D/inputs.npz diff --git a/DeeployTest/Tests/test1DPad/network.onnx b/DeeployTest/Tests/IntKernels/Pad/1D/network.onnx similarity index 100% rename from DeeployTest/Tests/test1DPad/network.onnx rename to DeeployTest/Tests/IntKernels/Pad/1D/network.onnx diff --git a/DeeployTest/Tests/test1DPad/outputs.npz b/DeeployTest/Tests/IntKernels/Pad/1D/outputs.npz similarity index 100% rename from DeeployTest/Tests/test1DPad/outputs.npz rename to DeeployTest/Tests/IntKernels/Pad/1D/outputs.npz diff --git a/DeeployTest/Tests/test2DPad/inputs.npz b/DeeployTest/Tests/IntKernels/Pad/2D/inputs.npz similarity index 100% rename from DeeployTest/Tests/test2DPad/inputs.npz rename to DeeployTest/Tests/IntKernels/Pad/2D/inputs.npz diff --git a/DeeployTest/Tests/test2DPad/network.onnx b/DeeployTest/Tests/IntKernels/Pad/2D/network.onnx similarity index 100% rename from DeeployTest/Tests/test2DPad/network.onnx rename to DeeployTest/Tests/IntKernels/Pad/2D/network.onnx diff --git a/DeeployTest/Tests/test2DPad/outputs.npz b/DeeployTest/Tests/IntKernels/Pad/2D/outputs.npz similarity index 100% rename from DeeployTest/Tests/test2DPad/outputs.npz rename to DeeployTest/Tests/IntKernels/Pad/2D/outputs.npz diff --git a/DeeployTest/Tests/testRMSNorm/activations.npz b/DeeployTest/Tests/IntKernels/RMSNorm/activations.npz similarity index 100% rename from DeeployTest/Tests/testRMSNorm/activations.npz rename to DeeployTest/Tests/IntKernels/RMSNorm/activations.npz diff --git a/DeeployTest/Tests/testRMSNorm/inputs.npz b/DeeployTest/Tests/IntKernels/RMSNorm/inputs.npz similarity index 100% rename from DeeployTest/Tests/testRMSNorm/inputs.npz rename to DeeployTest/Tests/IntKernels/RMSNorm/inputs.npz diff --git a/DeeployTest/Tests/testRMSNorm/network.onnx b/DeeployTest/Tests/IntKernels/RMSNorm/network.onnx similarity index 100% rename from DeeployTest/Tests/testRMSNorm/network.onnx rename to DeeployTest/Tests/IntKernels/RMSNorm/network.onnx diff --git a/DeeployTest/Tests/testRMSNorm/outputs.npz b/DeeployTest/Tests/IntKernels/RMSNorm/outputs.npz similarity index 100% rename from DeeployTest/Tests/testRMSNorm/outputs.npz rename to DeeployTest/Tests/IntKernels/RMSNorm/outputs.npz diff --git a/DeeployTest/Tests/testReduceMean/inputs.npz b/DeeployTest/Tests/IntKernels/ReduceMean/inputs.npz similarity index 100% rename from DeeployTest/Tests/testReduceMean/inputs.npz rename to DeeployTest/Tests/IntKernels/ReduceMean/inputs.npz diff --git a/DeeployTest/Tests/testReduceMean/network.onnx b/DeeployTest/Tests/IntKernels/ReduceMean/network.onnx similarity index 100% rename from DeeployTest/Tests/testReduceMean/network.onnx rename to DeeployTest/Tests/IntKernels/ReduceMean/network.onnx diff --git a/DeeployTest/Tests/testReduceMean/outputs.npz b/DeeployTest/Tests/IntKernels/ReduceMean/outputs.npz similarity index 100% rename from DeeployTest/Tests/testReduceMean/outputs.npz rename to DeeployTest/Tests/IntKernels/ReduceMean/outputs.npz diff --git a/DeeployTest/Tests/testReduceSum/inputs.npz b/DeeployTest/Tests/IntKernels/ReduceSum/inputs.npz similarity index 100% rename from DeeployTest/Tests/testReduceSum/inputs.npz rename to DeeployTest/Tests/IntKernels/ReduceSum/inputs.npz diff --git a/DeeployTest/Tests/testReduceSum/network.onnx b/DeeployTest/Tests/IntKernels/ReduceSum/network.onnx similarity index 100% rename from DeeployTest/Tests/testReduceSum/network.onnx rename to DeeployTest/Tests/IntKernels/ReduceSum/network.onnx diff --git a/DeeployTest/Tests/testReduceSum/outputs.npz b/DeeployTest/Tests/IntKernels/ReduceSum/outputs.npz similarity index 100% rename from DeeployTest/Tests/testReduceSum/outputs.npz rename to DeeployTest/Tests/IntKernels/ReduceSum/outputs.npz diff --git a/DeeployTest/Tests/testConcat/activations.npz b/DeeployTest/Tests/IntKernels/Slice/activations.npz similarity index 100% rename from DeeployTest/Tests/testConcat/activations.npz rename to DeeployTest/Tests/IntKernels/Slice/activations.npz diff --git a/DeeployTest/Tests/testSlice/inputs.npz b/DeeployTest/Tests/IntKernels/Slice/inputs.npz similarity index 100% rename from DeeployTest/Tests/testSlice/inputs.npz rename to DeeployTest/Tests/IntKernels/Slice/inputs.npz diff --git a/DeeployTest/Tests/testSlice/network.onnx b/DeeployTest/Tests/IntKernels/Slice/network.onnx similarity index 100% rename from DeeployTest/Tests/testSlice/network.onnx rename to DeeployTest/Tests/IntKernels/Slice/network.onnx diff --git a/DeeployTest/Tests/testSlice/outputs.npz b/DeeployTest/Tests/IntKernels/Slice/outputs.npz similarity index 100% rename from DeeployTest/Tests/testSlice/outputs.npz rename to DeeployTest/Tests/IntKernels/Slice/outputs.npz diff --git a/DeeployTest/Tests/Autoencoder1D/inputs.npz b/DeeployTest/Tests/Models/Autoencoder1D/inputs.npz similarity index 100% rename from DeeployTest/Tests/Autoencoder1D/inputs.npz rename to DeeployTest/Tests/Models/Autoencoder1D/inputs.npz diff --git a/DeeployTest/Tests/Autoencoder1D/network.onnx b/DeeployTest/Tests/Models/Autoencoder1D/network.onnx similarity index 100% rename from DeeployTest/Tests/Autoencoder1D/network.onnx rename to DeeployTest/Tests/Models/Autoencoder1D/network.onnx diff --git a/DeeployTest/Tests/Autoencoder1D/outputs.npz b/DeeployTest/Tests/Models/Autoencoder1D/outputs.npz similarity index 100% rename from DeeployTest/Tests/Autoencoder1D/outputs.npz rename to DeeployTest/Tests/Models/Autoencoder1D/outputs.npz diff --git a/DeeployTest/Tests/CCT/CCT_1_16_16_128/inputs.npz b/DeeployTest/Tests/Models/CCT/FP32/CCT_1_16_16_128/inputs.npz similarity index 100% rename from DeeployTest/Tests/CCT/CCT_1_16_16_128/inputs.npz rename to DeeployTest/Tests/Models/CCT/FP32/CCT_1_16_16_128/inputs.npz diff --git a/DeeployTest/Tests/CCT/CCT_1_16_16_128/network.onnx b/DeeployTest/Tests/Models/CCT/FP32/CCT_1_16_16_128/network.onnx similarity index 100% rename from DeeployTest/Tests/CCT/CCT_1_16_16_128/network.onnx rename to DeeployTest/Tests/Models/CCT/FP32/CCT_1_16_16_128/network.onnx diff --git a/DeeployTest/Tests/CCT/CCT_1_16_16_128/outputs.npz b/DeeployTest/Tests/Models/CCT/FP32/CCT_1_16_16_128/outputs.npz similarity index 100% rename from DeeployTest/Tests/CCT/CCT_1_16_16_128/outputs.npz rename to DeeployTest/Tests/Models/CCT/FP32/CCT_1_16_16_128/outputs.npz diff --git a/DeeployTest/Tests/CCT/CCT_1_16_16_32/inputs.npz b/DeeployTest/Tests/Models/CCT/FP32/CCT_1_16_16_32/inputs.npz similarity index 100% rename from DeeployTest/Tests/CCT/CCT_1_16_16_32/inputs.npz rename to DeeployTest/Tests/Models/CCT/FP32/CCT_1_16_16_32/inputs.npz diff --git a/DeeployTest/Tests/CCT/CCT_1_16_16_32/network.onnx b/DeeployTest/Tests/Models/CCT/FP32/CCT_1_16_16_32/network.onnx similarity index 100% rename from DeeployTest/Tests/CCT/CCT_1_16_16_32/network.onnx rename to DeeployTest/Tests/Models/CCT/FP32/CCT_1_16_16_32/network.onnx diff --git a/DeeployTest/Tests/CCT/CCT_1_16_16_32/outputs.npz b/DeeployTest/Tests/Models/CCT/FP32/CCT_1_16_16_32/outputs.npz similarity index 100% rename from DeeployTest/Tests/CCT/CCT_1_16_16_32/outputs.npz rename to DeeployTest/Tests/Models/CCT/FP32/CCT_1_16_16_32/outputs.npz diff --git a/DeeployTest/Tests/CCT/CCT_1_16_16_64/inputs.npz b/DeeployTest/Tests/Models/CCT/FP32/CCT_1_16_16_64/inputs.npz similarity index 100% rename from DeeployTest/Tests/CCT/CCT_1_16_16_64/inputs.npz rename to DeeployTest/Tests/Models/CCT/FP32/CCT_1_16_16_64/inputs.npz diff --git a/DeeployTest/Tests/CCT/CCT_1_16_16_64/network.onnx b/DeeployTest/Tests/Models/CCT/FP32/CCT_1_16_16_64/network.onnx similarity index 100% rename from DeeployTest/Tests/CCT/CCT_1_16_16_64/network.onnx rename to DeeployTest/Tests/Models/CCT/FP32/CCT_1_16_16_64/network.onnx diff --git a/DeeployTest/Tests/CCT/CCT_1_16_16_64/outputs.npz b/DeeployTest/Tests/Models/CCT/FP32/CCT_1_16_16_64/outputs.npz similarity index 100% rename from DeeployTest/Tests/CCT/CCT_1_16_16_64/outputs.npz rename to DeeployTest/Tests/Models/CCT/FP32/CCT_1_16_16_64/outputs.npz diff --git a/DeeployTest/Tests/CCT/CCT_1_16_16_8/inputs.npz b/DeeployTest/Tests/Models/CCT/FP32/CCT_1_16_16_8/inputs.npz similarity index 100% rename from DeeployTest/Tests/CCT/CCT_1_16_16_8/inputs.npz rename to DeeployTest/Tests/Models/CCT/FP32/CCT_1_16_16_8/inputs.npz diff --git a/DeeployTest/Tests/CCT/CCT_1_16_16_8/network.onnx b/DeeployTest/Tests/Models/CCT/FP32/CCT_1_16_16_8/network.onnx similarity index 100% rename from DeeployTest/Tests/CCT/CCT_1_16_16_8/network.onnx rename to DeeployTest/Tests/Models/CCT/FP32/CCT_1_16_16_8/network.onnx diff --git a/DeeployTest/Tests/CCT/CCT_1_16_16_8/outputs.npz b/DeeployTest/Tests/Models/CCT/FP32/CCT_1_16_16_8/outputs.npz similarity index 100% rename from DeeployTest/Tests/CCT/CCT_1_16_16_8/outputs.npz rename to DeeployTest/Tests/Models/CCT/FP32/CCT_1_16_16_8/outputs.npz diff --git a/DeeployTest/Tests/CCT/CCT_1_32_32_32/inputs.npz b/DeeployTest/Tests/Models/CCT/FP32/CCT_1_32_32_32/inputs.npz similarity index 100% rename from DeeployTest/Tests/CCT/CCT_1_32_32_32/inputs.npz rename to DeeployTest/Tests/Models/CCT/FP32/CCT_1_32_32_32/inputs.npz diff --git a/DeeployTest/Tests/CCT/CCT_1_32_32_32/network.onnx b/DeeployTest/Tests/Models/CCT/FP32/CCT_1_32_32_32/network.onnx similarity index 100% rename from DeeployTest/Tests/CCT/CCT_1_32_32_32/network.onnx rename to DeeployTest/Tests/Models/CCT/FP32/CCT_1_32_32_32/network.onnx diff --git a/DeeployTest/Tests/CCT/CCT_1_32_32_32/outputs.npz b/DeeployTest/Tests/Models/CCT/FP32/CCT_1_32_32_32/outputs.npz similarity index 100% rename from DeeployTest/Tests/CCT/CCT_1_32_32_32/outputs.npz rename to DeeployTest/Tests/Models/CCT/FP32/CCT_1_32_32_32/outputs.npz diff --git a/DeeployTest/Tests/CCT/CCT_1_32_32_8/inputs.npz b/DeeployTest/Tests/Models/CCT/FP32/CCT_1_32_32_8/inputs.npz similarity index 100% rename from DeeployTest/Tests/CCT/CCT_1_32_32_8/inputs.npz rename to DeeployTest/Tests/Models/CCT/FP32/CCT_1_32_32_8/inputs.npz diff --git a/DeeployTest/Tests/CCT/CCT_1_32_32_8/network.onnx b/DeeployTest/Tests/Models/CCT/FP32/CCT_1_32_32_8/network.onnx similarity index 100% rename from DeeployTest/Tests/CCT/CCT_1_32_32_8/network.onnx rename to DeeployTest/Tests/Models/CCT/FP32/CCT_1_32_32_8/network.onnx diff --git a/DeeployTest/Tests/CCT/CCT_1_32_32_8/outputs.npz b/DeeployTest/Tests/Models/CCT/FP32/CCT_1_32_32_8/outputs.npz similarity index 100% rename from DeeployTest/Tests/CCT/CCT_1_32_32_8/outputs.npz rename to DeeployTest/Tests/Models/CCT/FP32/CCT_1_32_32_8/outputs.npz diff --git a/DeeployTest/Tests/CCT/CCT_2_32_32_128/inputs.npz b/DeeployTest/Tests/Models/CCT/FP32/CCT_2_32_32_128/inputs.npz similarity index 100% rename from DeeployTest/Tests/CCT/CCT_2_32_32_128/inputs.npz rename to DeeployTest/Tests/Models/CCT/FP32/CCT_2_32_32_128/inputs.npz diff --git a/DeeployTest/Tests/CCT/CCT_2_32_32_128/network.onnx b/DeeployTest/Tests/Models/CCT/FP32/CCT_2_32_32_128/network.onnx similarity index 100% rename from DeeployTest/Tests/CCT/CCT_2_32_32_128/network.onnx rename to DeeployTest/Tests/Models/CCT/FP32/CCT_2_32_32_128/network.onnx diff --git a/DeeployTest/Tests/CCT/CCT_2_32_32_128/outputs.npz b/DeeployTest/Tests/Models/CCT/FP32/CCT_2_32_32_128/outputs.npz similarity index 100% rename from DeeployTest/Tests/CCT/CCT_2_32_32_128/outputs.npz rename to DeeployTest/Tests/Models/CCT/FP32/CCT_2_32_32_128/outputs.npz diff --git a/DeeployTest/Tests/CCT/CCT_2_32_32_128_Opset20/inputs.npz b/DeeployTest/Tests/Models/CCT/FP32/CCT_2_32_32_128_Opset20/inputs.npz similarity index 100% rename from DeeployTest/Tests/CCT/CCT_2_32_32_128_Opset20/inputs.npz rename to DeeployTest/Tests/Models/CCT/FP32/CCT_2_32_32_128_Opset20/inputs.npz diff --git a/DeeployTest/Tests/CCT/CCT_2_32_32_128_Opset20/network.onnx b/DeeployTest/Tests/Models/CCT/FP32/CCT_2_32_32_128_Opset20/network.onnx similarity index 100% rename from DeeployTest/Tests/CCT/CCT_2_32_32_128_Opset20/network.onnx rename to DeeployTest/Tests/Models/CCT/FP32/CCT_2_32_32_128_Opset20/network.onnx diff --git a/DeeployTest/Tests/CCT/CCT_2_32_32_128_Opset20/outputs.npz b/DeeployTest/Tests/Models/CCT/FP32/CCT_2_32_32_128_Opset20/outputs.npz similarity index 100% rename from DeeployTest/Tests/CCT/CCT_2_32_32_128_Opset20/outputs.npz rename to DeeployTest/Tests/Models/CCT/FP32/CCT_2_32_32_128_Opset20/outputs.npz diff --git a/DeeployTest/Tests/ICCT/activations.npz b/DeeployTest/Tests/Models/CCT/Int/ICCT/activations.npz similarity index 100% rename from DeeployTest/Tests/ICCT/activations.npz rename to DeeployTest/Tests/Models/CCT/Int/ICCT/activations.npz diff --git a/DeeployTest/Tests/ICCT/inputs.npz b/DeeployTest/Tests/Models/CCT/Int/ICCT/inputs.npz similarity index 100% rename from DeeployTest/Tests/ICCT/inputs.npz rename to DeeployTest/Tests/Models/CCT/Int/ICCT/inputs.npz diff --git a/DeeployTest/Tests/ICCT/network.onnx b/DeeployTest/Tests/Models/CCT/Int/ICCT/network.onnx similarity index 100% rename from DeeployTest/Tests/ICCT/network.onnx rename to DeeployTest/Tests/Models/CCT/Int/ICCT/network.onnx diff --git a/DeeployTest/Tests/ICCT/outputs.npz b/DeeployTest/Tests/Models/CCT/Int/ICCT/outputs.npz similarity index 100% rename from DeeployTest/Tests/ICCT/outputs.npz rename to DeeployTest/Tests/Models/CCT/Int/ICCT/outputs.npz diff --git a/DeeployTest/Tests/ICCT_8/activations.npz b/DeeployTest/Tests/Models/CCT/Int/ICCT_8/activations.npz similarity index 100% rename from DeeployTest/Tests/ICCT_8/activations.npz rename to DeeployTest/Tests/Models/CCT/Int/ICCT_8/activations.npz diff --git a/DeeployTest/Tests/ICCT_8/inputs.npz b/DeeployTest/Tests/Models/CCT/Int/ICCT_8/inputs.npz similarity index 100% rename from DeeployTest/Tests/ICCT_8/inputs.npz rename to DeeployTest/Tests/Models/CCT/Int/ICCT_8/inputs.npz diff --git a/DeeployTest/Tests/ICCT_8/network.onnx b/DeeployTest/Tests/Models/CCT/Int/ICCT_8/network.onnx similarity index 100% rename from DeeployTest/Tests/ICCT_8/network.onnx rename to DeeployTest/Tests/Models/CCT/Int/ICCT_8/network.onnx diff --git a/DeeployTest/Tests/ICCT_8/outputs.npz b/DeeployTest/Tests/Models/CCT/Int/ICCT_8/outputs.npz similarity index 100% rename from DeeployTest/Tests/ICCT_8/outputs.npz rename to DeeployTest/Tests/Models/CCT/Int/ICCT_8/outputs.npz diff --git a/DeeployTest/Tests/ICCT_ITA/activations.npz b/DeeployTest/Tests/Models/CCT/Int/ICCT_ITA/activations.npz similarity index 100% rename from DeeployTest/Tests/ICCT_ITA/activations.npz rename to DeeployTest/Tests/Models/CCT/Int/ICCT_ITA/activations.npz diff --git a/DeeployTest/Tests/ICCT_ITA/inputs.npz b/DeeployTest/Tests/Models/CCT/Int/ICCT_ITA/inputs.npz similarity index 100% rename from DeeployTest/Tests/ICCT_ITA/inputs.npz rename to DeeployTest/Tests/Models/CCT/Int/ICCT_ITA/inputs.npz diff --git a/DeeployTest/Tests/ICCT_ITA/network.onnx b/DeeployTest/Tests/Models/CCT/Int/ICCT_ITA/network.onnx similarity index 100% rename from DeeployTest/Tests/ICCT_ITA/network.onnx rename to DeeployTest/Tests/Models/CCT/Int/ICCT_ITA/network.onnx diff --git a/DeeployTest/Tests/ICCT_ITA/outputs.npz b/DeeployTest/Tests/Models/CCT/Int/ICCT_ITA/outputs.npz similarity index 100% rename from DeeployTest/Tests/ICCT_ITA/outputs.npz rename to DeeployTest/Tests/Models/CCT/Int/ICCT_ITA/outputs.npz diff --git a/DeeployTest/Tests/ICCT_ITA_8/activations.npz b/DeeployTest/Tests/Models/CCT/Int/ICCT_ITA_8/activations.npz similarity index 100% rename from DeeployTest/Tests/ICCT_ITA_8/activations.npz rename to DeeployTest/Tests/Models/CCT/Int/ICCT_ITA_8/activations.npz diff --git a/DeeployTest/Tests/ICCT_ITA_8/inputs.npz b/DeeployTest/Tests/Models/CCT/Int/ICCT_ITA_8/inputs.npz similarity index 100% rename from DeeployTest/Tests/ICCT_ITA_8/inputs.npz rename to DeeployTest/Tests/Models/CCT/Int/ICCT_ITA_8/inputs.npz diff --git a/DeeployTest/Tests/ICCT_ITA_8/network.onnx b/DeeployTest/Tests/Models/CCT/Int/ICCT_ITA_8/network.onnx similarity index 100% rename from DeeployTest/Tests/ICCT_ITA_8/network.onnx rename to DeeployTest/Tests/Models/CCT/Int/ICCT_ITA_8/network.onnx diff --git a/DeeployTest/Tests/ICCT_ITA_8/outputs.npz b/DeeployTest/Tests/Models/CCT/Int/ICCT_ITA_8/outputs.npz similarity index 100% rename from DeeployTest/Tests/ICCT_ITA_8/outputs.npz rename to DeeployTest/Tests/Models/CCT/Int/ICCT_ITA_8/outputs.npz diff --git a/DeeployTest/Tests/EEGFormer/activations.npz b/DeeployTest/Tests/Models/EEGFormer/activations.npz similarity index 100% rename from DeeployTest/Tests/EEGFormer/activations.npz rename to DeeployTest/Tests/Models/EEGFormer/activations.npz diff --git a/DeeployTest/Tests/EEGFormer/inputs.npz b/DeeployTest/Tests/Models/EEGFormer/inputs.npz similarity index 100% rename from DeeployTest/Tests/EEGFormer/inputs.npz rename to DeeployTest/Tests/Models/EEGFormer/inputs.npz diff --git a/DeeployTest/Tests/EEGFormer/network.onnx b/DeeployTest/Tests/Models/EEGFormer/network.onnx similarity index 100% rename from DeeployTest/Tests/EEGFormer/network.onnx rename to DeeployTest/Tests/Models/EEGFormer/network.onnx diff --git a/DeeployTest/Tests/EEGFormer/outputs.npz b/DeeployTest/Tests/Models/EEGFormer/outputs.npz similarity index 100% rename from DeeployTest/Tests/EEGFormer/outputs.npz rename to DeeployTest/Tests/Models/EEGFormer/outputs.npz diff --git a/DeeployTest/Tests/MLPerf/AnomalyDetection/activations.npz b/DeeployTest/Tests/Models/MLPerf/AnomalyDetection/activations.npz similarity index 100% rename from DeeployTest/Tests/MLPerf/AnomalyDetection/activations.npz rename to DeeployTest/Tests/Models/MLPerf/AnomalyDetection/activations.npz diff --git a/DeeployTest/Tests/MLPerf/AnomalyDetection/inputs.npz b/DeeployTest/Tests/Models/MLPerf/AnomalyDetection/inputs.npz similarity index 100% rename from DeeployTest/Tests/MLPerf/AnomalyDetection/inputs.npz rename to DeeployTest/Tests/Models/MLPerf/AnomalyDetection/inputs.npz diff --git a/DeeployTest/Tests/MLPerf/AnomalyDetection/network.onnx b/DeeployTest/Tests/Models/MLPerf/AnomalyDetection/network.onnx similarity index 100% rename from DeeployTest/Tests/MLPerf/AnomalyDetection/network.onnx rename to DeeployTest/Tests/Models/MLPerf/AnomalyDetection/network.onnx diff --git a/DeeployTest/Tests/MLPerf/AnomalyDetection/outputs.npz b/DeeployTest/Tests/Models/MLPerf/AnomalyDetection/outputs.npz similarity index 100% rename from DeeployTest/Tests/MLPerf/AnomalyDetection/outputs.npz rename to DeeployTest/Tests/Models/MLPerf/AnomalyDetection/outputs.npz diff --git a/DeeployTest/Tests/MLPerf/ImageClassification/activations.npz b/DeeployTest/Tests/Models/MLPerf/ImageClassification/activations.npz similarity index 100% rename from DeeployTest/Tests/MLPerf/ImageClassification/activations.npz rename to DeeployTest/Tests/Models/MLPerf/ImageClassification/activations.npz diff --git a/DeeployTest/Tests/MLPerf/ImageClassification/inputs.npz b/DeeployTest/Tests/Models/MLPerf/ImageClassification/inputs.npz similarity index 100% rename from DeeployTest/Tests/MLPerf/ImageClassification/inputs.npz rename to DeeployTest/Tests/Models/MLPerf/ImageClassification/inputs.npz diff --git a/DeeployTest/Tests/MLPerf/ImageClassification/network.onnx b/DeeployTest/Tests/Models/MLPerf/ImageClassification/network.onnx similarity index 100% rename from DeeployTest/Tests/MLPerf/ImageClassification/network.onnx rename to DeeployTest/Tests/Models/MLPerf/ImageClassification/network.onnx diff --git a/DeeployTest/Tests/MLPerf/ImageClassification/outputs.npz b/DeeployTest/Tests/Models/MLPerf/ImageClassification/outputs.npz similarity index 100% rename from DeeployTest/Tests/MLPerf/ImageClassification/outputs.npz rename to DeeployTest/Tests/Models/MLPerf/ImageClassification/outputs.npz diff --git a/DeeployTest/Tests/MLPerf/KeywordSpotting/activations.npz b/DeeployTest/Tests/Models/MLPerf/KeywordSpotting/activations.npz similarity index 100% rename from DeeployTest/Tests/MLPerf/KeywordSpotting/activations.npz rename to DeeployTest/Tests/Models/MLPerf/KeywordSpotting/activations.npz diff --git a/DeeployTest/Tests/MLPerf/KeywordSpotting/inputs.npz b/DeeployTest/Tests/Models/MLPerf/KeywordSpotting/inputs.npz similarity index 100% rename from DeeployTest/Tests/MLPerf/KeywordSpotting/inputs.npz rename to DeeployTest/Tests/Models/MLPerf/KeywordSpotting/inputs.npz diff --git a/DeeployTest/Tests/MLPerf/KeywordSpotting/network.onnx b/DeeployTest/Tests/Models/MLPerf/KeywordSpotting/network.onnx similarity index 100% rename from DeeployTest/Tests/MLPerf/KeywordSpotting/network.onnx rename to DeeployTest/Tests/Models/MLPerf/KeywordSpotting/network.onnx diff --git a/DeeployTest/Tests/MLPerf/KeywordSpotting/outputs.npz b/DeeployTest/Tests/Models/MLPerf/KeywordSpotting/outputs.npz similarity index 100% rename from DeeployTest/Tests/MLPerf/KeywordSpotting/outputs.npz rename to DeeployTest/Tests/Models/MLPerf/KeywordSpotting/outputs.npz diff --git a/DeeployTest/Tests/MLPerf/VisualWakeWords/activations.npz b/DeeployTest/Tests/Models/MLPerf/VisualWakeWords/activations.npz similarity index 100% rename from DeeployTest/Tests/MLPerf/VisualWakeWords/activations.npz rename to DeeployTest/Tests/Models/MLPerf/VisualWakeWords/activations.npz diff --git a/DeeployTest/Tests/MLPerf/VisualWakeWords/inputs.npz b/DeeployTest/Tests/Models/MLPerf/VisualWakeWords/inputs.npz similarity index 100% rename from DeeployTest/Tests/MLPerf/VisualWakeWords/inputs.npz rename to DeeployTest/Tests/Models/MLPerf/VisualWakeWords/inputs.npz diff --git a/DeeployTest/Tests/MLPerf/VisualWakeWords/network.onnx b/DeeployTest/Tests/Models/MLPerf/VisualWakeWords/network.onnx similarity index 100% rename from DeeployTest/Tests/MLPerf/VisualWakeWords/network.onnx rename to DeeployTest/Tests/Models/MLPerf/VisualWakeWords/network.onnx diff --git a/DeeployTest/Tests/MLPerf/VisualWakeWords/outputs.npz b/DeeployTest/Tests/Models/MLPerf/VisualWakeWords/outputs.npz similarity index 100% rename from DeeployTest/Tests/MLPerf/VisualWakeWords/outputs.npz rename to DeeployTest/Tests/Models/MLPerf/VisualWakeWords/outputs.npz diff --git a/DeeployTest/Tests/MobileNetv2/activations.npz b/DeeployTest/Tests/Models/MobileNetv2/activations.npz similarity index 100% rename from DeeployTest/Tests/MobileNetv2/activations.npz rename to DeeployTest/Tests/Models/MobileNetv2/activations.npz diff --git a/DeeployTest/Tests/MobileNetv2/inputs.npz b/DeeployTest/Tests/Models/MobileNetv2/inputs.npz similarity index 100% rename from DeeployTest/Tests/MobileNetv2/inputs.npz rename to DeeployTest/Tests/Models/MobileNetv2/inputs.npz diff --git a/DeeployTest/Tests/MobileNetv2/network.onnx b/DeeployTest/Tests/Models/MobileNetv2/network.onnx similarity index 100% rename from DeeployTest/Tests/MobileNetv2/network.onnx rename to DeeployTest/Tests/Models/MobileNetv2/network.onnx diff --git a/DeeployTest/Tests/MobileNetv2/outputs.npz b/DeeployTest/Tests/Models/MobileNetv2/outputs.npz similarity index 100% rename from DeeployTest/Tests/MobileNetv2/outputs.npz rename to DeeployTest/Tests/Models/MobileNetv2/outputs.npz diff --git a/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/ReduceMean/inputs.npz b/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/ReduceMean/inputs.npz new file mode 100644 index 0000000000..cc4264b282 Binary files /dev/null and b/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/ReduceMean/inputs.npz differ diff --git a/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/ReduceMean/network.onnx b/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/ReduceMean/network.onnx new file mode 100644 index 0000000000..6f07b14c01 Binary files /dev/null and b/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/ReduceMean/network.onnx differ diff --git a/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/ReduceMean/outputs.npz b/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/ReduceMean/outputs.npz new file mode 100644 index 0000000000..7142ff12b2 Binary files /dev/null and b/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/ReduceMean/outputs.npz differ diff --git a/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/Slice/slice_layer_0/inputs.npz b/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/Slice/slice_layer_0/inputs.npz new file mode 100644 index 0000000000..f1ec4dedb9 Binary files /dev/null and b/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/Slice/slice_layer_0/inputs.npz differ diff --git a/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/Slice/slice_layer_0/network.onnx b/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/Slice/slice_layer_0/network.onnx new file mode 100644 index 0000000000..0e779d5ff3 Binary files /dev/null and b/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/Slice/slice_layer_0/network.onnx differ diff --git a/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/Slice/slice_layer_0/outputs.npz b/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/Slice/slice_layer_0/outputs.npz new file mode 100644 index 0000000000..a1a672e4a2 Binary files /dev/null and b/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/Slice/slice_layer_0/outputs.npz differ diff --git a/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/Slice/slice_layer_1/inputs.npz b/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/Slice/slice_layer_1/inputs.npz new file mode 100644 index 0000000000..d67a59cd3b Binary files /dev/null and b/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/Slice/slice_layer_1/inputs.npz differ diff --git a/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/Slice/slice_layer_1/network.onnx b/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/Slice/slice_layer_1/network.onnx new file mode 100644 index 0000000000..95a35bdd1b Binary files /dev/null and b/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/Slice/slice_layer_1/network.onnx differ diff --git a/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/Slice/slice_layer_1/outputs.npz b/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/Slice/slice_layer_1/outputs.npz new file mode 100644 index 0000000000..c147288a02 Binary files /dev/null and b/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/Slice/slice_layer_1/outputs.npz differ diff --git a/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/Slice/slice_layer_2/inputs.npz b/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/Slice/slice_layer_2/inputs.npz new file mode 100644 index 0000000000..a3aa88f0f0 Binary files /dev/null and b/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/Slice/slice_layer_2/inputs.npz differ diff --git a/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/Slice/slice_layer_2/network.onnx b/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/Slice/slice_layer_2/network.onnx new file mode 100644 index 0000000000..9248e3f647 --- /dev/null +++ b/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/Slice/slice_layer_2/network.onnx @@ -0,0 +1,29 @@ + deeploy_test_gen:Ø +1starts_c"Constant* +value*: Bstarts  +-ends_c"Constant* +value* :@Bends  +-axes_c"Constant* +value* :Baxes  +/steps_c"Constant* +value*:Bsteps  +E +input +starts_c +ends_c +axes_c +steps_coutput +slice_node"Slice +SliceGraphZ +input + + +1 + +`b +output + + +1 + + B \ No newline at end of file diff --git a/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/Slice/slice_layer_2/outputs.npz b/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/Slice/slice_layer_2/outputs.npz new file mode 100644 index 0000000000..e774993297 Binary files /dev/null and b/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/Slice/slice_layer_2/outputs.npz differ diff --git a/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/Slice/slice_layer_3/inputs.npz b/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/Slice/slice_layer_3/inputs.npz new file mode 100644 index 0000000000..443b869916 Binary files /dev/null and b/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/Slice/slice_layer_3/inputs.npz differ diff --git a/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/Slice/slice_layer_3/network.onnx b/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/Slice/slice_layer_3/network.onnx new file mode 100644 index 0000000000..45f6347db0 --- /dev/null +++ b/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/Slice/slice_layer_3/network.onnx @@ -0,0 +1,29 @@ + deeploy_test_gen:Ø +1starts_c"Constant* +value*:@Bstarts  +-ends_c"Constant* +value* :`Bends  +-axes_c"Constant* +value* :Baxes  +/steps_c"Constant* +value*:Bsteps  +E +input +starts_c +ends_c +axes_c +steps_coutput +slice_node"Slice +SliceGraphZ +input + + +1 + +`b +output + + +1 + + B \ No newline at end of file diff --git a/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/Slice/slice_layer_3/outputs.npz b/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/Slice/slice_layer_3/outputs.npz new file mode 100644 index 0000000000..ab9681c221 Binary files /dev/null and b/DeeployTest/Tests/Models/TinyViT/5M/Layers/FP32/Slice/slice_layer_3/outputs.npz differ diff --git a/DeeployTest/Tests/testFloatDemoTinyViT/inputs.npz b/DeeployTest/Tests/Models/TinyViT/Demo/inputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatDemoTinyViT/inputs.npz rename to DeeployTest/Tests/Models/TinyViT/Demo/inputs.npz diff --git a/DeeployTest/Tests/testFloatDemoTinyViT/network.onnx b/DeeployTest/Tests/Models/TinyViT/Demo/network.onnx similarity index 100% rename from DeeployTest/Tests/testFloatDemoTinyViT/network.onnx rename to DeeployTest/Tests/Models/TinyViT/Demo/network.onnx diff --git a/DeeployTest/Tests/testFloatDemoTinyViT/outputs.npz b/DeeployTest/Tests/Models/TinyViT/Demo/outputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatDemoTinyViT/outputs.npz rename to DeeployTest/Tests/Models/TinyViT/Demo/outputs.npz diff --git a/DeeployTest/Tests/WaveFormer/inputs.npz b/DeeployTest/Tests/Models/WaveFormer/inputs.npz similarity index 100% rename from DeeployTest/Tests/WaveFormer/inputs.npz rename to DeeployTest/Tests/Models/WaveFormer/inputs.npz diff --git a/DeeployTest/Tests/WaveFormer/network.onnx b/DeeployTest/Tests/Models/WaveFormer/network.onnx similarity index 100% rename from DeeployTest/Tests/WaveFormer/network.onnx rename to DeeployTest/Tests/Models/WaveFormer/network.onnx diff --git a/DeeployTest/Tests/WaveFormer/outputs.npz b/DeeployTest/Tests/Models/WaveFormer/outputs.npz similarity index 100% rename from DeeployTest/Tests/WaveFormer/outputs.npz rename to DeeployTest/Tests/Models/WaveFormer/outputs.npz diff --git a/DeeployTest/Tests/microLlama/microLlama1/activations.npz b/DeeployTest/Tests/Models/microLlama/microLlama1/activations.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama1/activations.npz rename to DeeployTest/Tests/Models/microLlama/microLlama1/activations.npz diff --git a/DeeployTest/Tests/microLlama/microLlama1/inputs.npz b/DeeployTest/Tests/Models/microLlama/microLlama1/inputs.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama1/inputs.npz rename to DeeployTest/Tests/Models/microLlama/microLlama1/inputs.npz diff --git a/DeeployTest/Tests/microLlama/microLlama1/network.onnx b/DeeployTest/Tests/Models/microLlama/microLlama1/network.onnx similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama1/network.onnx rename to DeeployTest/Tests/Models/microLlama/microLlama1/network.onnx diff --git a/DeeployTest/Tests/microLlama/microLlama1/outputs.npz b/DeeployTest/Tests/Models/microLlama/microLlama1/outputs.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama1/outputs.npz rename to DeeployTest/Tests/Models/microLlama/microLlama1/outputs.npz diff --git a/DeeployTest/Tests/microLlama/microLlama128/activations.npz b/DeeployTest/Tests/Models/microLlama/microLlama128/activations.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama128/activations.npz rename to DeeployTest/Tests/Models/microLlama/microLlama128/activations.npz diff --git a/DeeployTest/Tests/microLlama/microLlama128/inputs.npz b/DeeployTest/Tests/Models/microLlama/microLlama128/inputs.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama128/inputs.npz rename to DeeployTest/Tests/Models/microLlama/microLlama128/inputs.npz diff --git a/DeeployTest/Tests/microLlama/microLlama128/network.onnx b/DeeployTest/Tests/Models/microLlama/microLlama128/network.onnx similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama128/network.onnx rename to DeeployTest/Tests/Models/microLlama/microLlama128/network.onnx diff --git a/DeeployTest/Tests/microLlama/microLlama128/outputs.npz b/DeeployTest/Tests/Models/microLlama/microLlama128/outputs.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama128/outputs.npz rename to DeeployTest/Tests/Models/microLlama/microLlama128/outputs.npz diff --git a/DeeployTest/Tests/microLlama/microLlama16/activations.npz b/DeeployTest/Tests/Models/microLlama/microLlama16/activations.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama16/activations.npz rename to DeeployTest/Tests/Models/microLlama/microLlama16/activations.npz diff --git a/DeeployTest/Tests/microLlama/microLlama16/inputs.npz b/DeeployTest/Tests/Models/microLlama/microLlama16/inputs.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama16/inputs.npz rename to DeeployTest/Tests/Models/microLlama/microLlama16/inputs.npz diff --git a/DeeployTest/Tests/microLlama/microLlama16/network.onnx b/DeeployTest/Tests/Models/microLlama/microLlama16/network.onnx similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama16/network.onnx rename to DeeployTest/Tests/Models/microLlama/microLlama16/network.onnx diff --git a/DeeployTest/Tests/microLlama/microLlama16/outputs.npz b/DeeployTest/Tests/Models/microLlama/microLlama16/outputs.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama16/outputs.npz rename to DeeployTest/Tests/Models/microLlama/microLlama16/outputs.npz diff --git a/DeeployTest/Tests/microLlama/microLlama16_parallel/activations.npz b/DeeployTest/Tests/Models/microLlama/microLlama16_parallel/activations.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama16_parallel/activations.npz rename to DeeployTest/Tests/Models/microLlama/microLlama16_parallel/activations.npz diff --git a/DeeployTest/Tests/microLlama/microLlama16_parallel/inputs.npz b/DeeployTest/Tests/Models/microLlama/microLlama16_parallel/inputs.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama16_parallel/inputs.npz rename to DeeployTest/Tests/Models/microLlama/microLlama16_parallel/inputs.npz diff --git a/DeeployTest/Tests/microLlama/microLlama16_parallel/network.onnx b/DeeployTest/Tests/Models/microLlama/microLlama16_parallel/network.onnx similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama16_parallel/network.onnx rename to DeeployTest/Tests/Models/microLlama/microLlama16_parallel/network.onnx diff --git a/DeeployTest/Tests/microLlama/microLlama16_parallel/outputs.npz b/DeeployTest/Tests/Models/microLlama/microLlama16_parallel/outputs.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama16_parallel/outputs.npz rename to DeeployTest/Tests/Models/microLlama/microLlama16_parallel/outputs.npz diff --git a/DeeployTest/Tests/microLlama/microLlama1_parallel/activations.npz b/DeeployTest/Tests/Models/microLlama/microLlama1_parallel/activations.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama1_parallel/activations.npz rename to DeeployTest/Tests/Models/microLlama/microLlama1_parallel/activations.npz diff --git a/DeeployTest/Tests/microLlama/microLlama1_parallel/inputs.npz b/DeeployTest/Tests/Models/microLlama/microLlama1_parallel/inputs.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama1_parallel/inputs.npz rename to DeeployTest/Tests/Models/microLlama/microLlama1_parallel/inputs.npz diff --git a/DeeployTest/Tests/microLlama/microLlama1_parallel/network.onnx b/DeeployTest/Tests/Models/microLlama/microLlama1_parallel/network.onnx similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama1_parallel/network.onnx rename to DeeployTest/Tests/Models/microLlama/microLlama1_parallel/network.onnx diff --git a/DeeployTest/Tests/microLlama/microLlama1_parallel/outputs.npz b/DeeployTest/Tests/Models/microLlama/microLlama1_parallel/outputs.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama1_parallel/outputs.npz rename to DeeployTest/Tests/Models/microLlama/microLlama1_parallel/outputs.npz diff --git a/DeeployTest/Tests/microLlama/microLlama2/activations.npz b/DeeployTest/Tests/Models/microLlama/microLlama2/activations.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama2/activations.npz rename to DeeployTest/Tests/Models/microLlama/microLlama2/activations.npz diff --git a/DeeployTest/Tests/microLlama/microLlama2/inputs.npz b/DeeployTest/Tests/Models/microLlama/microLlama2/inputs.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama2/inputs.npz rename to DeeployTest/Tests/Models/microLlama/microLlama2/inputs.npz diff --git a/DeeployTest/Tests/microLlama/microLlama2/network.onnx b/DeeployTest/Tests/Models/microLlama/microLlama2/network.onnx similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama2/network.onnx rename to DeeployTest/Tests/Models/microLlama/microLlama2/network.onnx diff --git a/DeeployTest/Tests/microLlama/microLlama2/outputs.npz b/DeeployTest/Tests/Models/microLlama/microLlama2/outputs.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama2/outputs.npz rename to DeeployTest/Tests/Models/microLlama/microLlama2/outputs.npz diff --git a/DeeployTest/Tests/microLlama/microLlama256/activations.npz b/DeeployTest/Tests/Models/microLlama/microLlama256/activations.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama256/activations.npz rename to DeeployTest/Tests/Models/microLlama/microLlama256/activations.npz diff --git a/DeeployTest/Tests/microLlama/microLlama256/inputs.npz b/DeeployTest/Tests/Models/microLlama/microLlama256/inputs.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama256/inputs.npz rename to DeeployTest/Tests/Models/microLlama/microLlama256/inputs.npz diff --git a/DeeployTest/Tests/microLlama/microLlama256/network.onnx b/DeeployTest/Tests/Models/microLlama/microLlama256/network.onnx similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama256/network.onnx rename to DeeployTest/Tests/Models/microLlama/microLlama256/network.onnx diff --git a/DeeployTest/Tests/microLlama/microLlama256/outputs.npz b/DeeployTest/Tests/Models/microLlama/microLlama256/outputs.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama256/outputs.npz rename to DeeployTest/Tests/Models/microLlama/microLlama256/outputs.npz diff --git a/DeeployTest/Tests/microLlama/microLlama2_parallel/activations.npz b/DeeployTest/Tests/Models/microLlama/microLlama2_parallel/activations.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama2_parallel/activations.npz rename to DeeployTest/Tests/Models/microLlama/microLlama2_parallel/activations.npz diff --git a/DeeployTest/Tests/microLlama/microLlama2_parallel/inputs.npz b/DeeployTest/Tests/Models/microLlama/microLlama2_parallel/inputs.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama2_parallel/inputs.npz rename to DeeployTest/Tests/Models/microLlama/microLlama2_parallel/inputs.npz diff --git a/DeeployTest/Tests/microLlama/microLlama2_parallel/network.onnx b/DeeployTest/Tests/Models/microLlama/microLlama2_parallel/network.onnx similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama2_parallel/network.onnx rename to DeeployTest/Tests/Models/microLlama/microLlama2_parallel/network.onnx diff --git a/DeeployTest/Tests/microLlama/microLlama2_parallel/outputs.npz b/DeeployTest/Tests/Models/microLlama/microLlama2_parallel/outputs.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama2_parallel/outputs.npz rename to DeeployTest/Tests/Models/microLlama/microLlama2_parallel/outputs.npz diff --git a/DeeployTest/Tests/microLlama/microLlama32/activations.npz b/DeeployTest/Tests/Models/microLlama/microLlama32/activations.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama32/activations.npz rename to DeeployTest/Tests/Models/microLlama/microLlama32/activations.npz diff --git a/DeeployTest/Tests/microLlama/microLlama32/inputs.npz b/DeeployTest/Tests/Models/microLlama/microLlama32/inputs.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama32/inputs.npz rename to DeeployTest/Tests/Models/microLlama/microLlama32/inputs.npz diff --git a/DeeployTest/Tests/microLlama/microLlama32/network.onnx b/DeeployTest/Tests/Models/microLlama/microLlama32/network.onnx similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama32/network.onnx rename to DeeployTest/Tests/Models/microLlama/microLlama32/network.onnx diff --git a/DeeployTest/Tests/microLlama/microLlama32/outputs.npz b/DeeployTest/Tests/Models/microLlama/microLlama32/outputs.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama32/outputs.npz rename to DeeployTest/Tests/Models/microLlama/microLlama32/outputs.npz diff --git a/DeeployTest/Tests/microLlama/microLlama32_parallel/activations.npz b/DeeployTest/Tests/Models/microLlama/microLlama32_parallel/activations.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama32_parallel/activations.npz rename to DeeployTest/Tests/Models/microLlama/microLlama32_parallel/activations.npz diff --git a/DeeployTest/Tests/microLlama/microLlama32_parallel/inputs.npz b/DeeployTest/Tests/Models/microLlama/microLlama32_parallel/inputs.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama32_parallel/inputs.npz rename to DeeployTest/Tests/Models/microLlama/microLlama32_parallel/inputs.npz diff --git a/DeeployTest/Tests/microLlama/microLlama32_parallel/network.onnx b/DeeployTest/Tests/Models/microLlama/microLlama32_parallel/network.onnx similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama32_parallel/network.onnx rename to DeeployTest/Tests/Models/microLlama/microLlama32_parallel/network.onnx diff --git a/DeeployTest/Tests/microLlama/microLlama32_parallel/outputs.npz b/DeeployTest/Tests/Models/microLlama/microLlama32_parallel/outputs.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama32_parallel/outputs.npz rename to DeeployTest/Tests/Models/microLlama/microLlama32_parallel/outputs.npz diff --git a/DeeployTest/Tests/microLlama/microLlama4/activations.npz b/DeeployTest/Tests/Models/microLlama/microLlama4/activations.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama4/activations.npz rename to DeeployTest/Tests/Models/microLlama/microLlama4/activations.npz diff --git a/DeeployTest/Tests/microLlama/microLlama4/inputs.npz b/DeeployTest/Tests/Models/microLlama/microLlama4/inputs.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama4/inputs.npz rename to DeeployTest/Tests/Models/microLlama/microLlama4/inputs.npz diff --git a/DeeployTest/Tests/microLlama/microLlama4/network.onnx b/DeeployTest/Tests/Models/microLlama/microLlama4/network.onnx similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama4/network.onnx rename to DeeployTest/Tests/Models/microLlama/microLlama4/network.onnx diff --git a/DeeployTest/Tests/microLlama/microLlama4/outputs.npz b/DeeployTest/Tests/Models/microLlama/microLlama4/outputs.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama4/outputs.npz rename to DeeployTest/Tests/Models/microLlama/microLlama4/outputs.npz diff --git a/DeeployTest/Tests/microLlama/microLlama4_parallel/activations.npz b/DeeployTest/Tests/Models/microLlama/microLlama4_parallel/activations.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama4_parallel/activations.npz rename to DeeployTest/Tests/Models/microLlama/microLlama4_parallel/activations.npz diff --git a/DeeployTest/Tests/microLlama/microLlama4_parallel/inputs.npz b/DeeployTest/Tests/Models/microLlama/microLlama4_parallel/inputs.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama4_parallel/inputs.npz rename to DeeployTest/Tests/Models/microLlama/microLlama4_parallel/inputs.npz diff --git a/DeeployTest/Tests/microLlama/microLlama4_parallel/network.onnx b/DeeployTest/Tests/Models/microLlama/microLlama4_parallel/network.onnx similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama4_parallel/network.onnx rename to DeeployTest/Tests/Models/microLlama/microLlama4_parallel/network.onnx diff --git a/DeeployTest/Tests/microLlama/microLlama4_parallel/outputs.npz b/DeeployTest/Tests/Models/microLlama/microLlama4_parallel/outputs.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama4_parallel/outputs.npz rename to DeeployTest/Tests/Models/microLlama/microLlama4_parallel/outputs.npz diff --git a/DeeployTest/Tests/microLlama/microLlama64/activations.npz b/DeeployTest/Tests/Models/microLlama/microLlama64/activations.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama64/activations.npz rename to DeeployTest/Tests/Models/microLlama/microLlama64/activations.npz diff --git a/DeeployTest/Tests/microLlama/microLlama64/inputs.npz b/DeeployTest/Tests/Models/microLlama/microLlama64/inputs.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama64/inputs.npz rename to DeeployTest/Tests/Models/microLlama/microLlama64/inputs.npz diff --git a/DeeployTest/Tests/microLlama/microLlama64/network.onnx b/DeeployTest/Tests/Models/microLlama/microLlama64/network.onnx similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama64/network.onnx rename to DeeployTest/Tests/Models/microLlama/microLlama64/network.onnx diff --git a/DeeployTest/Tests/microLlama/microLlama64/outputs.npz b/DeeployTest/Tests/Models/microLlama/microLlama64/outputs.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama64/outputs.npz rename to DeeployTest/Tests/Models/microLlama/microLlama64/outputs.npz diff --git a/DeeployTest/Tests/microLlama/microLlama64_parallel/activations.npz b/DeeployTest/Tests/Models/microLlama/microLlama64_parallel/activations.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama64_parallel/activations.npz rename to DeeployTest/Tests/Models/microLlama/microLlama64_parallel/activations.npz diff --git a/DeeployTest/Tests/microLlama/microLlama64_parallel/inputs.npz b/DeeployTest/Tests/Models/microLlama/microLlama64_parallel/inputs.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama64_parallel/inputs.npz rename to DeeployTest/Tests/Models/microLlama/microLlama64_parallel/inputs.npz diff --git a/DeeployTest/Tests/microLlama/microLlama64_parallel/network.onnx b/DeeployTest/Tests/Models/microLlama/microLlama64_parallel/network.onnx similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama64_parallel/network.onnx rename to DeeployTest/Tests/Models/microLlama/microLlama64_parallel/network.onnx diff --git a/DeeployTest/Tests/microLlama/microLlama64_parallel/outputs.npz b/DeeployTest/Tests/Models/microLlama/microLlama64_parallel/outputs.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama64_parallel/outputs.npz rename to DeeployTest/Tests/Models/microLlama/microLlama64_parallel/outputs.npz diff --git a/DeeployTest/Tests/microLlama/microLlama8/activations.npz b/DeeployTest/Tests/Models/microLlama/microLlama8/activations.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama8/activations.npz rename to DeeployTest/Tests/Models/microLlama/microLlama8/activations.npz diff --git a/DeeployTest/Tests/microLlama/microLlama8/inputs.npz b/DeeployTest/Tests/Models/microLlama/microLlama8/inputs.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama8/inputs.npz rename to DeeployTest/Tests/Models/microLlama/microLlama8/inputs.npz diff --git a/DeeployTest/Tests/microLlama/microLlama8/network.onnx b/DeeployTest/Tests/Models/microLlama/microLlama8/network.onnx similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama8/network.onnx rename to DeeployTest/Tests/Models/microLlama/microLlama8/network.onnx diff --git a/DeeployTest/Tests/microLlama/microLlama8/outputs.npz b/DeeployTest/Tests/Models/microLlama/microLlama8/outputs.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama8/outputs.npz rename to DeeployTest/Tests/Models/microLlama/microLlama8/outputs.npz diff --git a/DeeployTest/Tests/microLlama/microLlama8_parallel/activations.npz b/DeeployTest/Tests/Models/microLlama/microLlama8_parallel/activations.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama8_parallel/activations.npz rename to DeeployTest/Tests/Models/microLlama/microLlama8_parallel/activations.npz diff --git a/DeeployTest/Tests/microLlama/microLlama8_parallel/inputs.npz b/DeeployTest/Tests/Models/microLlama/microLlama8_parallel/inputs.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama8_parallel/inputs.npz rename to DeeployTest/Tests/Models/microLlama/microLlama8_parallel/inputs.npz diff --git a/DeeployTest/Tests/microLlama/microLlama8_parallel/network.onnx b/DeeployTest/Tests/Models/microLlama/microLlama8_parallel/network.onnx similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama8_parallel/network.onnx rename to DeeployTest/Tests/Models/microLlama/microLlama8_parallel/network.onnx diff --git a/DeeployTest/Tests/microLlama/microLlama8_parallel/outputs.npz b/DeeployTest/Tests/Models/microLlama/microLlama8_parallel/outputs.npz similarity index 100% rename from DeeployTest/Tests/microLlama/microLlama8_parallel/outputs.npz rename to DeeployTest/Tests/Models/microLlama/microLlama8_parallel/outputs.npz diff --git a/DeeployTest/Tests/miniMobileNet/activations.npz b/DeeployTest/Tests/Models/miniMobileNet/activations.npz similarity index 100% rename from DeeployTest/Tests/miniMobileNet/activations.npz rename to DeeployTest/Tests/Models/miniMobileNet/activations.npz diff --git a/DeeployTest/Tests/miniMobileNet/inputs.npz b/DeeployTest/Tests/Models/miniMobileNet/inputs.npz similarity index 100% rename from DeeployTest/Tests/miniMobileNet/inputs.npz rename to DeeployTest/Tests/Models/miniMobileNet/inputs.npz diff --git a/DeeployTest/Tests/miniMobileNet/network.onnx b/DeeployTest/Tests/Models/miniMobileNet/network.onnx similarity index 100% rename from DeeployTest/Tests/miniMobileNet/network.onnx rename to DeeployTest/Tests/Models/miniMobileNet/network.onnx diff --git a/DeeployTest/Tests/miniMobileNet/outputs.npz b/DeeployTest/Tests/Models/miniMobileNet/outputs.npz similarity index 100% rename from DeeployTest/Tests/miniMobileNet/outputs.npz rename to DeeployTest/Tests/Models/miniMobileNet/outputs.npz diff --git a/DeeployTest/Tests/miniMobileNetv2/activations.npz b/DeeployTest/Tests/Models/miniMobileNetv2/activations.npz similarity index 100% rename from DeeployTest/Tests/miniMobileNetv2/activations.npz rename to DeeployTest/Tests/Models/miniMobileNetv2/activations.npz diff --git a/DeeployTest/Tests/miniMobileNetv2/inputs.npz b/DeeployTest/Tests/Models/miniMobileNetv2/inputs.npz similarity index 100% rename from DeeployTest/Tests/miniMobileNetv2/inputs.npz rename to DeeployTest/Tests/Models/miniMobileNetv2/inputs.npz diff --git a/DeeployTest/Tests/miniMobileNetv2/network.onnx b/DeeployTest/Tests/Models/miniMobileNetv2/network.onnx similarity index 100% rename from DeeployTest/Tests/miniMobileNetv2/network.onnx rename to DeeployTest/Tests/Models/miniMobileNetv2/network.onnx diff --git a/DeeployTest/Tests/miniMobileNetv2/outputs.npz b/DeeployTest/Tests/Models/miniMobileNetv2/outputs.npz similarity index 100% rename from DeeployTest/Tests/miniMobileNetv2/outputs.npz rename to DeeployTest/Tests/Models/miniMobileNetv2/outputs.npz diff --git a/DeeployTest/Tests/simpleCNN/activations.npz b/DeeployTest/Tests/Models/simpleCNN/activations.npz similarity index 100% rename from DeeployTest/Tests/simpleCNN/activations.npz rename to DeeployTest/Tests/Models/simpleCNN/activations.npz diff --git a/DeeployTest/Tests/simpleCNN/inputs.npz b/DeeployTest/Tests/Models/simpleCNN/inputs.npz similarity index 100% rename from DeeployTest/Tests/simpleCNN/inputs.npz rename to DeeployTest/Tests/Models/simpleCNN/inputs.npz diff --git a/DeeployTest/Tests/simpleCNN/network.onnx b/DeeployTest/Tests/Models/simpleCNN/network.onnx similarity index 100% rename from DeeployTest/Tests/simpleCNN/network.onnx rename to DeeployTest/Tests/Models/simpleCNN/network.onnx diff --git a/DeeployTest/Tests/simpleCNN/outputs.npz b/DeeployTest/Tests/Models/simpleCNN/outputs.npz similarity index 100% rename from DeeployTest/Tests/simpleCNN/outputs.npz rename to DeeployTest/Tests/Models/simpleCNN/outputs.npz diff --git a/DeeployTest/Tests/testFloatAdder/activations.npz b/DeeployTest/Tests/Others/Backtracking/activations.npz similarity index 100% rename from DeeployTest/Tests/testFloatAdder/activations.npz rename to DeeployTest/Tests/Others/Backtracking/activations.npz diff --git a/DeeployTest/Tests/testBacktracking/inputs.npz b/DeeployTest/Tests/Others/Backtracking/inputs.npz similarity index 100% rename from DeeployTest/Tests/testBacktracking/inputs.npz rename to DeeployTest/Tests/Others/Backtracking/inputs.npz diff --git a/DeeployTest/Tests/testBacktracking/network.onnx b/DeeployTest/Tests/Others/Backtracking/network.onnx similarity index 100% rename from DeeployTest/Tests/testBacktracking/network.onnx rename to DeeployTest/Tests/Others/Backtracking/network.onnx diff --git a/DeeployTest/Tests/testBacktracking/outputs.npz b/DeeployTest/Tests/Others/Backtracking/outputs.npz similarity index 100% rename from DeeployTest/Tests/testBacktracking/outputs.npz rename to DeeployTest/Tests/Others/Backtracking/outputs.npz diff --git a/DeeployTest/Tests/Dequant/inputs.npz b/DeeployTest/Tests/Others/Dequant/inputs.npz similarity index 100% rename from DeeployTest/Tests/Dequant/inputs.npz rename to DeeployTest/Tests/Others/Dequant/inputs.npz diff --git a/DeeployTest/Tests/Dequant/network.onnx b/DeeployTest/Tests/Others/Dequant/network.onnx similarity index 100% rename from DeeployTest/Tests/Dequant/network.onnx rename to DeeployTest/Tests/Others/Dequant/network.onnx diff --git a/DeeployTest/Tests/Dequant/outputs.npz b/DeeployTest/Tests/Others/Dequant/outputs.npz similarity index 100% rename from DeeployTest/Tests/Dequant/outputs.npz rename to DeeployTest/Tests/Others/Dequant/outputs.npz diff --git a/DeeployTest/Tests/testFloatSGD/inputs.npz b/DeeployTest/Tests/Others/FloatSGD/inputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatSGD/inputs.npz rename to DeeployTest/Tests/Others/FloatSGD/inputs.npz diff --git a/DeeployTest/Tests/testFloatSGD/network.onnx b/DeeployTest/Tests/Others/FloatSGD/network.onnx similarity index 100% rename from DeeployTest/Tests/testFloatSGD/network.onnx rename to DeeployTest/Tests/Others/FloatSGD/network.onnx diff --git a/DeeployTest/Tests/testFloatSGD/outputs.npz b/DeeployTest/Tests/Others/FloatSGD/outputs.npz similarity index 100% rename from DeeployTest/Tests/testFloatSGD/outputs.npz rename to DeeployTest/Tests/Others/FloatSGD/outputs.npz diff --git a/DeeployTest/Tests/Quant/inputs.npz b/DeeployTest/Tests/Others/Quant/inputs.npz similarity index 100% rename from DeeployTest/Tests/Quant/inputs.npz rename to DeeployTest/Tests/Others/Quant/inputs.npz diff --git a/DeeployTest/Tests/Quant/network.onnx b/DeeployTest/Tests/Others/Quant/network.onnx similarity index 100% rename from DeeployTest/Tests/Quant/network.onnx rename to DeeployTest/Tests/Others/Quant/network.onnx diff --git a/DeeployTest/Tests/Quant/outputs.npz b/DeeployTest/Tests/Others/Quant/outputs.npz similarity index 100% rename from DeeployTest/Tests/Quant/outputs.npz rename to DeeployTest/Tests/Others/Quant/outputs.npz diff --git a/DeeployTest/Tests/QuantizedLinear/inputs.npz b/DeeployTest/Tests/Others/QuantizedLinear/inputs.npz similarity index 100% rename from DeeployTest/Tests/QuantizedLinear/inputs.npz rename to DeeployTest/Tests/Others/QuantizedLinear/inputs.npz diff --git a/DeeployTest/Tests/QuantizedLinear/network.onnx b/DeeployTest/Tests/Others/QuantizedLinear/network.onnx similarity index 100% rename from DeeployTest/Tests/QuantizedLinear/network.onnx rename to DeeployTest/Tests/Others/QuantizedLinear/network.onnx diff --git a/DeeployTest/Tests/QuantizedLinear/outputs.npz b/DeeployTest/Tests/Others/QuantizedLinear/outputs.npz similarity index 100% rename from DeeployTest/Tests/QuantizedLinear/outputs.npz rename to DeeployTest/Tests/Others/QuantizedLinear/outputs.npz diff --git a/DeeployTest/Tests/TestRQAdd/activations.npz b/DeeployTest/Tests/Others/RQAdd/activations.npz similarity index 100% rename from DeeployTest/Tests/TestRQAdd/activations.npz rename to DeeployTest/Tests/Others/RQAdd/activations.npz diff --git a/DeeployTest/Tests/TestRQAdd/inputs.npz b/DeeployTest/Tests/Others/RQAdd/inputs.npz similarity index 100% rename from DeeployTest/Tests/TestRQAdd/inputs.npz rename to DeeployTest/Tests/Others/RQAdd/inputs.npz diff --git a/DeeployTest/Tests/TestRQAdd/network.onnx b/DeeployTest/Tests/Others/RQAdd/network.onnx similarity index 100% rename from DeeployTest/Tests/TestRQAdd/network.onnx rename to DeeployTest/Tests/Others/RQAdd/network.onnx diff --git a/DeeployTest/Tests/TestRQAdd/outputs.npz b/DeeployTest/Tests/Others/RQAdd/outputs.npz similarity index 100% rename from DeeployTest/Tests/TestRQAdd/outputs.npz rename to DeeployTest/Tests/Others/RQAdd/outputs.npz diff --git a/DeeployTest/Tests/testRQConv/inputs.npz b/DeeployTest/Tests/Others/RQConv/inputs.npz similarity index 100% rename from DeeployTest/Tests/testRQConv/inputs.npz rename to DeeployTest/Tests/Others/RQConv/inputs.npz diff --git a/DeeployTest/Tests/testRQConv/network.onnx b/DeeployTest/Tests/Others/RQConv/network.onnx similarity index 100% rename from DeeployTest/Tests/testRQConv/network.onnx rename to DeeployTest/Tests/Others/RQConv/network.onnx diff --git a/DeeployTest/Tests/testRQConv/outputs.npz b/DeeployTest/Tests/Others/RQConv/outputs.npz similarity index 100% rename from DeeployTest/Tests/testRQConv/outputs.npz rename to DeeployTest/Tests/Others/RQConv/outputs.npz diff --git a/DeeployTest/Tests/testRQGEMM/inputs.npz b/DeeployTest/Tests/Others/RQGEMM/inputs.npz similarity index 100% rename from DeeployTest/Tests/testRQGEMM/inputs.npz rename to DeeployTest/Tests/Others/RQGEMM/inputs.npz diff --git a/DeeployTest/Tests/testRQGEMM/network.onnx b/DeeployTest/Tests/Others/RQGEMM/network.onnx similarity index 100% rename from DeeployTest/Tests/testRQGEMM/network.onnx rename to DeeployTest/Tests/Others/RQGEMM/network.onnx diff --git a/DeeployTest/Tests/testRQGEMM/outputs.npz b/DeeployTest/Tests/Others/RQGEMM/outputs.npz similarity index 100% rename from DeeployTest/Tests/testRQGEMM/outputs.npz rename to DeeployTest/Tests/Others/RQGEMM/outputs.npz diff --git a/DeeployTest/Tests/testRQGEMMTransB/inputs.npz b/DeeployTest/Tests/Others/RQGEMMTransB/inputs.npz similarity index 100% rename from DeeployTest/Tests/testRQGEMMTransB/inputs.npz rename to DeeployTest/Tests/Others/RQGEMMTransB/inputs.npz diff --git a/DeeployTest/Tests/testRQGEMMTransB/network.onnx b/DeeployTest/Tests/Others/RQGEMMTransB/network.onnx similarity index 100% rename from DeeployTest/Tests/testRQGEMMTransB/network.onnx rename to DeeployTest/Tests/Others/RQGEMMTransB/network.onnx diff --git a/DeeployTest/Tests/testRQGEMMTransB/outputs.npz b/DeeployTest/Tests/Others/RQGEMMTransB/outputs.npz similarity index 100% rename from DeeployTest/Tests/testRQGEMMTransB/outputs.npz rename to DeeployTest/Tests/Others/RQGEMMTransB/outputs.npz diff --git a/DeeployTest/Tests/testRQGEMMwBatch/inputs.npz b/DeeployTest/Tests/Others/RQGEMMwBatch/inputs.npz similarity index 100% rename from DeeployTest/Tests/testRQGEMMwBatch/inputs.npz rename to DeeployTest/Tests/Others/RQGEMMwBatch/inputs.npz diff --git a/DeeployTest/Tests/testRQGEMMwBatch/network.onnx b/DeeployTest/Tests/Others/RQGEMMwBatch/network.onnx similarity index 100% rename from DeeployTest/Tests/testRQGEMMwBatch/network.onnx rename to DeeployTest/Tests/Others/RQGEMMwBatch/network.onnx diff --git a/DeeployTest/Tests/testRQGEMMwBatch/outputs.npz b/DeeployTest/Tests/Others/RQGEMMwBatch/outputs.npz similarity index 100% rename from DeeployTest/Tests/testRQGEMMwBatch/outputs.npz rename to DeeployTest/Tests/Others/RQGEMMwBatch/outputs.npz diff --git a/DeeployTest/Tests/RQHardswish/inputs.npz b/DeeployTest/Tests/Others/RQHardswish/inputs.npz similarity index 100% rename from DeeployTest/Tests/RQHardswish/inputs.npz rename to DeeployTest/Tests/Others/RQHardswish/inputs.npz diff --git a/DeeployTest/Tests/RQHardswish/network.onnx b/DeeployTest/Tests/Others/RQHardswish/network.onnx similarity index 100% rename from DeeployTest/Tests/RQHardswish/network.onnx rename to DeeployTest/Tests/Others/RQHardswish/network.onnx diff --git a/DeeployTest/Tests/RQHardswish/outputs.npz b/DeeployTest/Tests/Others/RQHardswish/outputs.npz similarity index 100% rename from DeeployTest/Tests/RQHardswish/outputs.npz rename to DeeployTest/Tests/Others/RQHardswish/outputs.npz diff --git a/DeeployTest/Tests/testRQMatMul/inputs.npz b/DeeployTest/Tests/Others/RQMatMul/inputs.npz similarity index 100% rename from DeeployTest/Tests/testRQMatMul/inputs.npz rename to DeeployTest/Tests/Others/RQMatMul/inputs.npz diff --git a/DeeployTest/Tests/testRQMatMul/network.onnx b/DeeployTest/Tests/Others/RQMatMul/network.onnx similarity index 100% rename from DeeployTest/Tests/testRQMatMul/network.onnx rename to DeeployTest/Tests/Others/RQMatMul/network.onnx diff --git a/DeeployTest/Tests/testRQMatMul/outputs.npz b/DeeployTest/Tests/Others/RQMatMul/outputs.npz similarity index 100% rename from DeeployTest/Tests/testRQMatMul/outputs.npz rename to DeeployTest/Tests/Others/RQMatMul/outputs.npz diff --git a/DeeployTest/Tests/test2DRequantizedConv/activations.npz b/DeeployTest/Tests/Others/RequantizedConv2D/activations.npz similarity index 100% rename from DeeployTest/Tests/test2DRequantizedConv/activations.npz rename to DeeployTest/Tests/Others/RequantizedConv2D/activations.npz diff --git a/DeeployTest/Tests/test2DRequantizedConv/inputs.npz b/DeeployTest/Tests/Others/RequantizedConv2D/inputs.npz similarity index 100% rename from DeeployTest/Tests/test2DRequantizedConv/inputs.npz rename to DeeployTest/Tests/Others/RequantizedConv2D/inputs.npz diff --git a/DeeployTest/Tests/test2DRequantizedConv/network.onnx b/DeeployTest/Tests/Others/RequantizedConv2D/network.onnx similarity index 100% rename from DeeployTest/Tests/test2DRequantizedConv/network.onnx rename to DeeployTest/Tests/Others/RequantizedConv2D/network.onnx diff --git a/DeeployTest/Tests/test2DRequantizedConv/outputs.npz b/DeeployTest/Tests/Others/RequantizedConv2D/outputs.npz similarity index 100% rename from DeeployTest/Tests/test2DRequantizedConv/outputs.npz rename to DeeployTest/Tests/Others/RequantizedConv2D/outputs.npz diff --git a/DeeployTest/Tests/testRequantizedDWConv/activations.npz b/DeeployTest/Tests/Others/RequantizedDWConv/activations.npz similarity index 100% rename from DeeployTest/Tests/testRequantizedDWConv/activations.npz rename to DeeployTest/Tests/Others/RequantizedDWConv/activations.npz diff --git a/DeeployTest/Tests/testRequantizedDWConv/inputs.npz b/DeeployTest/Tests/Others/RequantizedDWConv/inputs.npz similarity index 100% rename from DeeployTest/Tests/testRequantizedDWConv/inputs.npz rename to DeeployTest/Tests/Others/RequantizedDWConv/inputs.npz diff --git a/DeeployTest/Tests/testRequantizedDWConv/network.onnx b/DeeployTest/Tests/Others/RequantizedDWConv/network.onnx similarity index 100% rename from DeeployTest/Tests/testRequantizedDWConv/network.onnx rename to DeeployTest/Tests/Others/RequantizedDWConv/network.onnx diff --git a/DeeployTest/Tests/testRequantizedDWConv/outputs.npz b/DeeployTest/Tests/Others/RequantizedDWConv/outputs.npz similarity index 100% rename from DeeployTest/Tests/testRequantizedDWConv/outputs.npz rename to DeeployTest/Tests/Others/RequantizedDWConv/outputs.npz diff --git a/DeeployTest/Tests/testRequantizedLinear/activations.npz b/DeeployTest/Tests/Others/RequantizedLinear/activations.npz similarity index 100% rename from DeeployTest/Tests/testRequantizedLinear/activations.npz rename to DeeployTest/Tests/Others/RequantizedLinear/activations.npz diff --git a/DeeployTest/Tests/testRequantizedLinear/inputs.npz b/DeeployTest/Tests/Others/RequantizedLinear/inputs.npz similarity index 100% rename from DeeployTest/Tests/testRequantizedLinear/inputs.npz rename to DeeployTest/Tests/Others/RequantizedLinear/inputs.npz diff --git a/DeeployTest/Tests/testRequantizedLinear/network.onnx b/DeeployTest/Tests/Others/RequantizedLinear/network.onnx similarity index 100% rename from DeeployTest/Tests/testRequantizedLinear/network.onnx rename to DeeployTest/Tests/Others/RequantizedLinear/network.onnx diff --git a/DeeployTest/Tests/testRequantizedLinear/outputs.npz b/DeeployTest/Tests/Others/RequantizedLinear/outputs.npz similarity index 100% rename from DeeployTest/Tests/testRequantizedLinear/outputs.npz rename to DeeployTest/Tests/Others/RequantizedLinear/outputs.npz diff --git a/DeeployTest/Tests/test2DRequantizedStriddedPaddedConv/inputs.npz b/DeeployTest/Tests/Others/RequantizedStriddedPaddedConv2D/inputs.npz similarity index 100% rename from DeeployTest/Tests/test2DRequantizedStriddedPaddedConv/inputs.npz rename to DeeployTest/Tests/Others/RequantizedStriddedPaddedConv2D/inputs.npz diff --git a/DeeployTest/Tests/test2DRequantizedStriddedPaddedConv/network.onnx b/DeeployTest/Tests/Others/RequantizedStriddedPaddedConv2D/network.onnx similarity index 100% rename from DeeployTest/Tests/test2DRequantizedStriddedPaddedConv/network.onnx rename to DeeployTest/Tests/Others/RequantizedStriddedPaddedConv2D/network.onnx diff --git a/DeeployTest/Tests/test2DRequantizedStriddedPaddedConv/outputs.npz b/DeeployTest/Tests/Others/RequantizedStriddedPaddedConv2D/outputs.npz similarity index 100% rename from DeeployTest/Tests/test2DRequantizedStriddedPaddedConv/outputs.npz rename to DeeployTest/Tests/Others/RequantizedStriddedPaddedConv2D/outputs.npz diff --git a/DeeployTest/Tests/simpleRegression/activations.npz b/DeeployTest/Tests/Others/SimpleRegression/activations.npz similarity index 100% rename from DeeployTest/Tests/simpleRegression/activations.npz rename to DeeployTest/Tests/Others/SimpleRegression/activations.npz diff --git a/DeeployTest/Tests/simpleRegression/inputs.npz b/DeeployTest/Tests/Others/SimpleRegression/inputs.npz similarity index 100% rename from DeeployTest/Tests/simpleRegression/inputs.npz rename to DeeployTest/Tests/Others/SimpleRegression/inputs.npz diff --git a/DeeployTest/Tests/simpleRegression/network.onnx b/DeeployTest/Tests/Others/SimpleRegression/network.onnx similarity index 100% rename from DeeployTest/Tests/simpleRegression/network.onnx rename to DeeployTest/Tests/Others/SimpleRegression/network.onnx diff --git a/DeeployTest/Tests/simpleRegression/outputs.npz b/DeeployTest/Tests/Others/SimpleRegression/outputs.npz similarity index 100% rename from DeeployTest/Tests/simpleRegression/outputs.npz rename to DeeployTest/Tests/Others/SimpleRegression/outputs.npz diff --git a/DeeployTest/Tests/testTrainCCT/CCT1_Classifier_Training/CCT_1_16_16_128/inputs.npz b/DeeployTest/Tests/Others/TrainCCT/CCT1_Classifier_Training/CCT_1_16_16_128/inputs.npz similarity index 100% rename from DeeployTest/Tests/testTrainCCT/CCT1_Classifier_Training/CCT_1_16_16_128/inputs.npz rename to DeeployTest/Tests/Others/TrainCCT/CCT1_Classifier_Training/CCT_1_16_16_128/inputs.npz diff --git a/DeeployTest/Tests/testTrainCCT/CCT1_Classifier_Training/CCT_1_16_16_128/network.onnx b/DeeployTest/Tests/Others/TrainCCT/CCT1_Classifier_Training/CCT_1_16_16_128/network.onnx similarity index 100% rename from DeeployTest/Tests/testTrainCCT/CCT1_Classifier_Training/CCT_1_16_16_128/network.onnx rename to DeeployTest/Tests/Others/TrainCCT/CCT1_Classifier_Training/CCT_1_16_16_128/network.onnx diff --git a/DeeployTest/Tests/testTrainCCT/CCT1_Classifier_Training/CCT_1_16_16_128/outputs.npz b/DeeployTest/Tests/Others/TrainCCT/CCT1_Classifier_Training/CCT_1_16_16_128/outputs.npz similarity index 100% rename from DeeployTest/Tests/testTrainCCT/CCT1_Classifier_Training/CCT_1_16_16_128/outputs.npz rename to DeeployTest/Tests/Others/TrainCCT/CCT1_Classifier_Training/CCT_1_16_16_128/outputs.npz diff --git a/DeeployTest/Tests/testTrainCCT/CCT1_Classifier_Training/CCT_1_16_16_16/inputs.npz b/DeeployTest/Tests/Others/TrainCCT/CCT1_Classifier_Training/CCT_1_16_16_16/inputs.npz similarity index 100% rename from DeeployTest/Tests/testTrainCCT/CCT1_Classifier_Training/CCT_1_16_16_16/inputs.npz rename to DeeployTest/Tests/Others/TrainCCT/CCT1_Classifier_Training/CCT_1_16_16_16/inputs.npz diff --git a/DeeployTest/Tests/testTrainCCT/CCT1_Classifier_Training/CCT_1_16_16_16/network.onnx b/DeeployTest/Tests/Others/TrainCCT/CCT1_Classifier_Training/CCT_1_16_16_16/network.onnx similarity index 100% rename from DeeployTest/Tests/testTrainCCT/CCT1_Classifier_Training/CCT_1_16_16_16/network.onnx rename to DeeployTest/Tests/Others/TrainCCT/CCT1_Classifier_Training/CCT_1_16_16_16/network.onnx diff --git a/DeeployTest/Tests/testTrainCCT/CCT1_Classifier_Training/CCT_1_16_16_16/outputs.npz b/DeeployTest/Tests/Others/TrainCCT/CCT1_Classifier_Training/CCT_1_16_16_16/outputs.npz similarity index 100% rename from DeeployTest/Tests/testTrainCCT/CCT1_Classifier_Training/CCT_1_16_16_16/outputs.npz rename to DeeployTest/Tests/Others/TrainCCT/CCT1_Classifier_Training/CCT_1_16_16_16/outputs.npz diff --git a/DeeployTest/Tests/testTrainCCT/CCT1_Classifier_Training/CCT_1_16_16_32/inputs.npz b/DeeployTest/Tests/Others/TrainCCT/CCT1_Classifier_Training/CCT_1_16_16_32/inputs.npz similarity index 100% rename from DeeployTest/Tests/testTrainCCT/CCT1_Classifier_Training/CCT_1_16_16_32/inputs.npz rename to DeeployTest/Tests/Others/TrainCCT/CCT1_Classifier_Training/CCT_1_16_16_32/inputs.npz diff --git a/DeeployTest/Tests/testTrainCCT/CCT1_Classifier_Training/CCT_1_16_16_32/network.onnx b/DeeployTest/Tests/Others/TrainCCT/CCT1_Classifier_Training/CCT_1_16_16_32/network.onnx similarity index 100% rename from DeeployTest/Tests/testTrainCCT/CCT1_Classifier_Training/CCT_1_16_16_32/network.onnx rename to DeeployTest/Tests/Others/TrainCCT/CCT1_Classifier_Training/CCT_1_16_16_32/network.onnx diff --git a/DeeployTest/Tests/testTrainCCT/CCT1_Classifier_Training/CCT_1_16_16_32/outputs.npz b/DeeployTest/Tests/Others/TrainCCT/CCT1_Classifier_Training/CCT_1_16_16_32/outputs.npz similarity index 100% rename from DeeployTest/Tests/testTrainCCT/CCT1_Classifier_Training/CCT_1_16_16_32/outputs.npz rename to DeeployTest/Tests/Others/TrainCCT/CCT1_Classifier_Training/CCT_1_16_16_32/outputs.npz diff --git a/DeeployTest/Tests/testTrainCCT/CCT1_Classifier_Training/CCT_1_16_16_64/inputs.npz b/DeeployTest/Tests/Others/TrainCCT/CCT1_Classifier_Training/CCT_1_16_16_64/inputs.npz similarity index 100% rename from DeeployTest/Tests/testTrainCCT/CCT1_Classifier_Training/CCT_1_16_16_64/inputs.npz rename to DeeployTest/Tests/Others/TrainCCT/CCT1_Classifier_Training/CCT_1_16_16_64/inputs.npz diff --git a/DeeployTest/Tests/testTrainCCT/CCT1_Classifier_Training/CCT_1_16_16_64/network.onnx b/DeeployTest/Tests/Others/TrainCCT/CCT1_Classifier_Training/CCT_1_16_16_64/network.onnx similarity index 100% rename from DeeployTest/Tests/testTrainCCT/CCT1_Classifier_Training/CCT_1_16_16_64/network.onnx rename to DeeployTest/Tests/Others/TrainCCT/CCT1_Classifier_Training/CCT_1_16_16_64/network.onnx diff --git a/DeeployTest/Tests/testTrainCCT/CCT1_Classifier_Training/CCT_1_16_16_64/outputs.npz b/DeeployTest/Tests/Others/TrainCCT/CCT1_Classifier_Training/CCT_1_16_16_64/outputs.npz similarity index 100% rename from DeeployTest/Tests/testTrainCCT/CCT1_Classifier_Training/CCT_1_16_16_64/outputs.npz rename to DeeployTest/Tests/Others/TrainCCT/CCT1_Classifier_Training/CCT_1_16_16_64/outputs.npz diff --git a/DeeployTest/Tests/testTrainCCT/CCT1_Classifier_Training/CCT_1_16_16_8/inputs.npz b/DeeployTest/Tests/Others/TrainCCT/CCT1_Classifier_Training/CCT_1_16_16_8/inputs.npz similarity index 100% rename from DeeployTest/Tests/testTrainCCT/CCT1_Classifier_Training/CCT_1_16_16_8/inputs.npz rename to DeeployTest/Tests/Others/TrainCCT/CCT1_Classifier_Training/CCT_1_16_16_8/inputs.npz diff --git a/DeeployTest/Tests/testTrainCCT/CCT1_Classifier_Training/CCT_1_16_16_8/network.onnx b/DeeployTest/Tests/Others/TrainCCT/CCT1_Classifier_Training/CCT_1_16_16_8/network.onnx similarity index 100% rename from DeeployTest/Tests/testTrainCCT/CCT1_Classifier_Training/CCT_1_16_16_8/network.onnx rename to DeeployTest/Tests/Others/TrainCCT/CCT1_Classifier_Training/CCT_1_16_16_8/network.onnx diff --git a/DeeployTest/Tests/testTrainCCT/CCT1_Classifier_Training/CCT_1_16_16_8/outputs.npz b/DeeployTest/Tests/Others/TrainCCT/CCT1_Classifier_Training/CCT_1_16_16_8/outputs.npz similarity index 100% rename from DeeployTest/Tests/testTrainCCT/CCT1_Classifier_Training/CCT_1_16_16_8/outputs.npz rename to DeeployTest/Tests/Others/TrainCCT/CCT1_Classifier_Training/CCT_1_16_16_8/outputs.npz diff --git a/DeeployTest/Tests/Transformer/activations.npz b/DeeployTest/Tests/Others/Transformer/activations.npz similarity index 100% rename from DeeployTest/Tests/Transformer/activations.npz rename to DeeployTest/Tests/Others/Transformer/activations.npz diff --git a/DeeployTest/Tests/Transformer/inputs.npz b/DeeployTest/Tests/Others/Transformer/inputs.npz similarity index 100% rename from DeeployTest/Tests/Transformer/inputs.npz rename to DeeployTest/Tests/Others/Transformer/inputs.npz diff --git a/DeeployTest/Tests/Transformer/network.onnx b/DeeployTest/Tests/Others/Transformer/network.onnx similarity index 100% rename from DeeployTest/Tests/Transformer/network.onnx rename to DeeployTest/Tests/Others/Transformer/network.onnx diff --git a/DeeployTest/Tests/Transformer/outputs.npz b/DeeployTest/Tests/Others/Transformer/outputs.npz similarity index 100% rename from DeeployTest/Tests/Transformer/outputs.npz rename to DeeployTest/Tests/Others/Transformer/outputs.npz diff --git a/DeeployTest/Tests/testTypeInferenceDifferentTypes/inputs.npz b/DeeployTest/Tests/Others/TypeInference/inputs.npz similarity index 100% rename from DeeployTest/Tests/testTypeInferenceDifferentTypes/inputs.npz rename to DeeployTest/Tests/Others/TypeInference/inputs.npz diff --git a/DeeployTest/Tests/testTypeInferenceDifferentTypes/network.onnx b/DeeployTest/Tests/Others/TypeInference/network.onnx similarity index 100% rename from DeeployTest/Tests/testTypeInferenceDifferentTypes/network.onnx rename to DeeployTest/Tests/Others/TypeInference/network.onnx diff --git a/DeeployTest/Tests/testTypeInferenceDifferentTypes/outputs.npz b/DeeployTest/Tests/Others/TypeInference/outputs.npz similarity index 100% rename from DeeployTest/Tests/testTypeInferenceDifferentTypes/outputs.npz rename to DeeployTest/Tests/Others/TypeInference/outputs.npz diff --git a/DeeployTest/Tests/testSlice/activations.npz b/DeeployTest/Tests/Others/iNoNorm/activations.npz similarity index 100% rename from DeeployTest/Tests/testSlice/activations.npz rename to DeeployTest/Tests/Others/iNoNorm/activations.npz diff --git a/DeeployTest/Tests/TestiNoNorm/inputs.npz b/DeeployTest/Tests/Others/iNoNorm/inputs.npz similarity index 100% rename from DeeployTest/Tests/TestiNoNorm/inputs.npz rename to DeeployTest/Tests/Others/iNoNorm/inputs.npz diff --git a/DeeployTest/Tests/TestiNoNorm/network.onnx b/DeeployTest/Tests/Others/iNoNorm/network.onnx similarity index 100% rename from DeeployTest/Tests/TestiNoNorm/network.onnx rename to DeeployTest/Tests/Others/iNoNorm/network.onnx diff --git a/DeeployTest/Tests/TestiNoNorm/outputs.npz b/DeeployTest/Tests/Others/iNoNorm/outputs.npz similarity index 100% rename from DeeployTest/Tests/TestiNoNorm/outputs.npz rename to DeeployTest/Tests/Others/iNoNorm/outputs.npz diff --git a/DeeployTest/Tests/trueIntegerDivSandwich/activations.npz b/DeeployTest/Tests/Others/trueIntegerDivSandwich/activations.npz similarity index 100% rename from DeeployTest/Tests/trueIntegerDivSandwich/activations.npz rename to DeeployTest/Tests/Others/trueIntegerDivSandwich/activations.npz diff --git a/DeeployTest/Tests/trueIntegerDivSandwich/inputs.npz b/DeeployTest/Tests/Others/trueIntegerDivSandwich/inputs.npz similarity index 100% rename from DeeployTest/Tests/trueIntegerDivSandwich/inputs.npz rename to DeeployTest/Tests/Others/trueIntegerDivSandwich/inputs.npz diff --git a/DeeployTest/Tests/trueIntegerDivSandwich/network.onnx b/DeeployTest/Tests/Others/trueIntegerDivSandwich/network.onnx similarity index 100% rename from DeeployTest/Tests/trueIntegerDivSandwich/network.onnx rename to DeeployTest/Tests/Others/trueIntegerDivSandwich/network.onnx diff --git a/DeeployTest/Tests/trueIntegerDivSandwich/outputs.npz b/DeeployTest/Tests/Others/trueIntegerDivSandwich/outputs.npz similarity index 100% rename from DeeployTest/Tests/trueIntegerDivSandwich/outputs.npz rename to DeeployTest/Tests/Others/trueIntegerDivSandwich/outputs.npz diff --git a/DeeployTest/deeployStateEqualityTest.py b/DeeployTest/deeployStateEqualityTest.py index 6a3f5cdebe..f7068259a5 100644 --- a/DeeployTest/deeployStateEqualityTest.py +++ b/DeeployTest/deeployStateEqualityTest.py @@ -24,7 +24,7 @@ metavar = 'testdir', dest = 'dir', type = str, - default = './Tests/simpleRegression', + default = './Tests/Others/SimpleRegression', help = 'Set the regression test\n') parser.add_argument('-d', metavar = 'dumpdir', diff --git a/DeeployTest/generateNetwork.py b/DeeployTest/generateNetwork.py index cf8acf05db..5ce5726cc1 100644 --- a/DeeployTest/generateNetwork.py +++ b/DeeployTest/generateNetwork.py @@ -134,8 +134,8 @@ def generateNetwork(args): log.debug(f"Deployer: {deployer}") if not isinstance( - platform, CMSISPlatform - ) and not "simpleCNN" in args.dir and not "testRQMatMul" in args.dir and not "testRQGEMM" in args.dir: + platform, + CMSISPlatform) and not "simpleCNN" in args.dir and not "RQMatMul" in args.dir and not "RQGEMM" in args.dir: deployer.loweringOptimizer.passes.insert(0, EmulateCMSISRequantPass()) verbosityCfg = _NoVerbosity diff --git a/DeeployTest/testComponentGraph.py b/DeeployTest/testComponentGraph.py index 7875074092..91470e6432 100644 --- a/DeeployTest/testComponentGraph.py +++ b/DeeployTest/testComponentGraph.py @@ -11,7 +11,7 @@ from Deeploy.ComponentGraph import extractComponentGraph, extractComponentsFromComponentGraph if __name__ == "__main__": - test_dir = "Tests/WaveFormer" + test_dir = "Tests/Models/WaveFormer" colors = ["red", "green", "blue", "yellow"] component_color = "red" color_attr = "color" diff --git a/DeeployTest/testEngineAwareOptimizerWrapper.py b/DeeployTest/testEngineAwareOptimizerWrapper.py index 81c2d30d27..9e684345c0 100644 --- a/DeeployTest/testEngineAwareOptimizerWrapper.py +++ b/DeeployTest/testEngineAwareOptimizerWrapper.py @@ -22,7 +22,7 @@ def _test_partial_coloring(): - test_dir = "Tests/simpleRegression" + test_dir = "Tests/Others/SimpleRegression" model = onnx.load(os.path.join(test_dir, "network.onnx")) graph = gs.import_onnx(model).toposort() @@ -79,7 +79,7 @@ def _test_pass(_pass: TopologyOptimizationPass, graph: gs.Graph, engineName: str def _test_passes(): - test_dir = "Tests/simpleRegression" + test_dir = "Tests/Others/SimpleRegression" model = onnx.load(os.path.join(test_dir, "network.onnx")) graph = gs.import_onnx(model).toposort() passes = [ diff --git a/DeeployTest/testRegexMatching.py b/DeeployTest/testRegexMatching.py index ec91a01e97..eeadbdb7e9 100644 --- a/DeeployTest/testRegexMatching.py +++ b/DeeployTest/testRegexMatching.py @@ -42,7 +42,7 @@ def __init__(self): if __name__ == "__main__": optimizer = TopologyOptimizer([ConvTestPass()]) - model = onnx.load_model('Tests/simpleCNN/network.onnx') + model = onnx.load_model('Tests/Models/simpleCNN/network.onnx') graph = gs.import_onnx(model) match_count = 0 diff --git a/DeeployTest/testReplaceInsertSubgraph.py b/DeeployTest/testReplaceInsertSubgraph.py index c3129fff76..df00f87c67 100644 --- a/DeeployTest/testReplaceInsertSubgraph.py +++ b/DeeployTest/testReplaceInsertSubgraph.py @@ -11,7 +11,7 @@ from Deeploy.OptimizationPasses.TopologyOptimizationPasses.PULPPasses import PULPConvRequantMergePass if __name__ == "__main__": - test_dir = "Tests/simpleRegression" + test_dir = "Tests/Others/SimpleRegression" model = onnx.load(os.path.join(test_dir, "network.onnx")) graph = gs.import_onnx(model).toposort() diff --git a/DeeployTest/testSlice_PULP.py b/DeeployTest/testSlice_PULP.py index 0dbda95840..00ff905f9f 100644 --- a/DeeployTest/testSlice_PULP.py +++ b/DeeployTest/testSlice_PULP.py @@ -41,11 +41,11 @@ signProp = False - onnx_graph = onnx.load_model('./Tests/testSlice/network.onnx') + onnx_graph = onnx.load_model('./Tests/IntKernels/Slice/network.onnx') graph = gs.import_onnx(onnx_graph) - inputs = np.load('./Tests/testSlice/inputs.npz') - outputs = np.load(f'./Tests/testSlice/outputs.npz') + inputs = np.load('./Tests/IntKernels/Slice/inputs.npz') + outputs = np.load(f'./Tests/IntKernels/Slice/outputs.npz') tensors = graph.tensors() # Load as int64 and infer types later @@ -99,18 +99,18 @@ if not isFloat and not buffer._signed: values -= buffer.nLevels // 2 - generateTestNetwork(deployer, test_inputs, test_outputs, 'TEST_SIRACUSA/Tests/testSlice', _NoVerbosity) + generateTestNetwork(deployer, test_inputs, test_outputs, 'TEST_SIRACUSA/Tests/IntKernels/Slice', _NoVerbosity) os.system( - f"$CMAKE -DTOOLCHAIN={args.toolchain} -DTOOLCHAIN_INSTALL_DIR={_TOOLCHAIN_DIR} -DTESTNAME=testSlice -DGENERATED_SOURCE=TEST_SIRACUSA/Tests/testSlice -Dplatform=Siracusa -B TEST_SIRACUSA/build -DNUM_CORES=1 .." + f"$CMAKE -DTOOLCHAIN={args.toolchain} -DTOOLCHAIN_INSTALL_DIR={_TOOLCHAIN_DIR} -DTESTNAME=Slice -DGENERATED_SOURCE=TEST_SIRACUSA/Tests/IntKernels/Slice -Dplatform=Siracusa -B TEST_SIRACUSA/build -DNUM_CORES=1 .." ) - process = subprocess.Popen(["$CMAKE --build TEST_SIRACUSA/build --target gvsoc_testSlice"], + process = subprocess.Popen(["$CMAKE --build TEST_SIRACUSA/build --target gvsoc_Slice"], stdout = subprocess.PIPE, stderr = subprocess.STDOUT, shell = True, encoding = 'utf-8') fileHandle = open('out.txt', 'a') - fileHandle.write(f"################## Testing Tests/testSlice on SIRACUSA Platform ##################\n") + fileHandle.write(f"################## Testing Tests/IntKernels/Slice on SIRACUSA Platform ##################\n") result = "" while True: @@ -127,4 +127,4 @@ fileHandle.close() if not "Errors: 0 out of " in result: - raise RuntimeError(f"Found an error in Tests/testSlice") + raise RuntimeError(f"Found an error in Tests/IntKernels/Slice") diff --git a/docs/install.md b/docs/install.md index 4499ba1ef0..f2749867b4 100644 --- a/docs/install.md +++ b/docs/install.md @@ -84,7 +84,7 @@ For example, you can run ``` cd DeeployTest -python testRunner_generic.py -t Tests/simpleRegression +python testRunner_generic.py -t Tests/Others/SimpleRegression ``` to run the `simpleRegression` test on your workstation. Various other tests are available and compatibility between tests and platforms is tested in the `.gitlab-ci.yml` file. diff --git a/docs/tutorials/introduction.md b/docs/tutorials/introduction.md index e2a54f9f6f..1e638bd8a8 100644 --- a/docs/tutorials/introduction.md +++ b/docs/tutorials/introduction.md @@ -38,11 +38,11 @@ From the `DeeployTest` folder, you can use the `testRunner` to compile ONNXs and To validate your installation, you can run a simple Add node on each platform: ``` -python testRunner_generic.py -t Tests/Adder -python testRunner_cortexm.py -t Tests/Adder -python testRunner_mempool.py -t Tests/Adder -python testRunner_snitch.py -t Tests/Adder/ -python testRunner_siracusa.py -t Tests/Adder --cores=8 +python testRunner_generic.py -t Tests/IntKernels/Add/Regular +python testRunner_cortexm.py -t Tests/IntKernels/Add/Regular +python testRunner_mempool.py -t Tests/IntKernels/Add/Regular +python testRunner_snitch.py -t Tests/IntKernels/Add/Regular/ +python testRunner_siracusa.py -t Tests/IntKernels/Add/Regular --cores=8 ``` Once all these basic tests are passed, we can jump into the basics of Deeploy. @@ -67,9 +67,9 @@ The figure below gives an overview of the deployment stack. As you can see, ther You can visualize the ONNX graphs using [Netron](https://netron.app/). Either use the web interface or install the python package with `pip install netron`. -> ✅ **Task:** Visualize the ONNX graph of the `Adder`, `MobileNetv2`, and `Transformer` +> ✅ **Task:** Visualize the ONNX graph of the `IntKernels/Add/Regular`, `Models/MobileNetv2`, and `Others/Transformer` -The ONNX graphs are in `DeeployTest/Tests//network.onnx`. The networks are increasing in complexity, `Adder` is a single node network for unit testing, while `MobileNetv2` is a simple sequential network mostly made of convolutions. Finally, the `Transformer` network showcases a typical transformer block used in Encoder and Decoder networks. If you want to peek at a complex network, you can visualize `microLlama/microLlama128`. +The ONNX graphs are in `DeeployTest/Tests//network.onnx`. The networks are increasing in complexity, `IntKernels/Add/Regular` is a single node network for unit testing, while `Models/MobileNetv2` is a simple sequential network mostly made of convolutions. Finally, the `Others/Transformer` network showcases a typical transformer block used in Encoder and Decoder networks. If you want to peek at a complex network, you can visualize `Models/microLlama/microLlama128`. Now that we understand Deeploy's input, let's check the output-generated code! @@ -77,15 +77,15 @@ Now that we understand Deeploy's input, let's check the output-generated code! The generated code is located in the following directory: `DeeployTest/TEST_/Tests`, and the `Network.c` file is the interesting one. -The generated code is trivial for the `Adder` graph; we simply use the template for the `Add` node of the Generic platform. You can find the template declaration in `Deeploy/Targets/Generic/Templates/AddTemplate.py`. +The generated code is trivial for the `IntKernels/Add/Regular` graph; we simply use the template for the `Add` node of the Generic platform. You can find the template declaration in `Deeploy/Targets/Generic/Templates/AddTemplate.py`. -Now, if you want to look at something a bit more complex, run `python testRunner_generic.py -t ./Tests/miniMobileNetv2` (from `DeeployTest`) and look at the generated code. There are two interesting points you can notice: +Now, if you want to look at something a bit more complex, run `python testRunner_generic.py -t ./Tests/Models/miniMobileNetv2` (from `DeeployTest`) and look at the generated code. There are two interesting points you can notice: - We hoist the constants at the top of the file. -- In the `RunNetwork` function, we sequentially have node templates to execute the operands and malloc/free to manage the memory. You can open the ONNX graph of `miniMobileNetv2` on the side to try to match the nodes of the graph with their generated code. +- In the `RunNetwork` function, we sequentially have node templates to execute the operands and malloc/free to manage the memory. You can open the ONNX graph of `Models/miniMobileNetv2` on the side to try to match the nodes of the graph with their generated code. > ✅ **Task:** Visualize the effect of passes on the ONNX graph for the Siracusa platform. -Deeploy applies passes on the ONNX graph to transform its topology and optimize its execution. Let's visualize the effect of the passes used in the Siracusa Platform. First, let's execute our `miniMobileNetv2` on Siracusa with `python testRunner_siracusa.py -t ./Tests/miniMobileNetv2`. You can find the original ONNX graph at `DeeployTest/Tests/miniMobileNetv2/network.onnx`, and the transformed ONNX graph at `DeeployTest/TEST_SIRACUSA/Tests/miniMobileNetv2/deeployStates/backend_post_binding.onnx`. Open both ONNX graphs side by side to compare them. +Deeploy applies passes on the ONNX graph to transform its topology and optimize its execution. Let's visualize the effect of the passes used in the Siracusa Platform. First, let's execute our `miniMobileNetv2` on Siracusa with `python testRunner_siracusa.py -t ./Tests/Models/miniMobileNetv2`. You can find the original ONNX graph at `DeeployTest/Tests/Models/miniMobileNetv2/network.onnx`, and the transformed ONNX graph at `DeeployTest/TEST_SIRACUSA/Tests/Models/miniMobileNetv2/deeployStates/backend_post_binding.onnx`. Open both ONNX graphs side by side to compare them. You can notice the effect of two passes on the graph: - One pass fuses the `Conv` and `RequantShift` nodes. This is a common technique named [Operator Fusion](https://medium.com/data-science/how-pytorch-2-0-accelerates-deep-learning-with-operator-fusion-and-cpu-gpu-code-generation-35132a85bd26) and used in many DNN compilers. @@ -140,7 +140,7 @@ Now that you understand the hardware and the kind of workload we want to execute
Solution - > If you run `python testRunner_siracusa.py -t Tests/microLlama/microLlama128 --cores=1` and then `python testRunner_siracusa.py -t Tests/microLlama/microLlama128 --cores=8`, you should measure a runtime of ~16,1M cycles for 1 core and 3.1M cycles for 8 cores. + > If you run `python testRunner_siracusa.py -t Tests/Models/microLlama/microLlama128 --cores=1` and then `python testRunner_siracusa.py -t Tests/Models/microLlama/microLlama128 --cores=8`, you should measure a runtime of ~16,1M cycles for 1 core and 3.1M cycles for 8 cores. > > The speedup ratio is obtained via $\frac{\text{Runtime 1 cores}}{\text{Runtime 8 cores}} = 5.2$. Hence, using 8 cores instead of 1 leads to a 5.2 times speedup. > @@ -162,9 +162,9 @@ The good news is that Deeploy can already do that! So, let's generate and run so
Solution - > Bad configuration: `python testRunner_tiled_siracusa.py -t Tests/microLlama/microLlama64_parallel --cores=8 --l1 8000 --defaultMemLevel=L2` -> Runtime: 47.5 MCycles + > Bad configuration: `python testRunner_tiled_siracusa.py -t Tests/Models/microLlama/microLlama64_parallel --cores=8 --l1 8000 --defaultMemLevel=L2` -> Runtime: 47.5 MCycles > - > Good configuration `python testRunner_tiled_siracusa.py -t Tests/microLlama/microLlama64_parallel --cores=8 --l1 64000 --defaultMemLevel=L2`: -> Runtime: 35.3 MCycles + > Good configuration `python testRunner_tiled_siracusa.py -t Tests/Models/microLlama/microLlama64_parallel --cores=8 --l1 64000 --defaultMemLevel=L2`: -> Runtime: 35.3 MCycles > > Justification: As the size of the L1 memory gets smaller, tiles also get smaller and smaller. Smaller tiles usually mean that it's harder to keep the core properly utilized. @@ -199,7 +199,7 @@ To use the NPU, you can use the `testRunner_tiled_siracusa_w_neureka.py`. The Li > The runtime in parallel mode with NPU is obtained with: > >` - python testRunner_tiled_siracusa_w_neureka.py -t Tests/microLlama/microLlama64_parallel --cores=8 --l1 64000 --defaultMemLevel=L2 + python testRunner_tiled_siracusa_w_neureka.py -t Tests/Models/microLlama/microLlama64_parallel --cores=8 --l1 64000 --defaultMemLevel=L2 ` > > And returns 28.6 MCycles of runtime. The runtime without NPU was measured above and is 35.3 MCycles. Hence, the speedup is ~1.23 times.