Skip to content

Commit cfd9151

Browse files
committed
Update and add optimal parameters for several GPU architectures
1 parent 4ea5007 commit cfd9151

2 files changed

Lines changed: 121 additions & 117 deletions

File tree

Lines changed: 115 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -1,115 +1,115 @@
1-
Architecture,default,default_cpu,MI100,VEGA,TAHITI,TESLA,FERMI,PASCAL,KEPLER,AMPERE,TURING,ADA,OPENCL,RDNA,MI210,BLACKWELL
2-
,,,,,,,,,,,,,,,,
3-
CORE:,,,,,,,,,,,,,,,,
4-
WARP_SIZE,0,,64,64,32,32,32,32,32,32,32,32,32,32,64,32
5-
THREAD_COUNT_DEFAULT,256,,256,256,,,,,,512,512,512,256,512,512,512
6-
,,,,,,,,,,,,,,,,
7-
LB:,,,,,,,,,,,,,,,,
8-
GPUTPCCreateTrackingData,256,,"[256, 7]","[192, 2]",,,,,,384,256,256,,,,384
9-
GPUTPCTrackletConstructor,256,,"[768, 8]","[512, 10]","[256, 2]","[256, 1]","[256, 2]","[1024, 2]","[512, 4]","[256, 2]","[256, 2]","[256, 2]",,,,768
10-
GPUTPCTrackletSelector,256,,"[384, 5]","[192, 10]","[256, 3]","[256, 1]","[256, 3]","[512, 4]","[256, 3]","[192, 3]","[192, 3]","[192, 3]",,,,992
11-
GPUTPCNeighboursFinder,256,,"[192, 8]","[960, 8]",256,256,256,512,256,"[640, 1]","[640, 1]","[640, 1]",,,,992
12-
GPUTPCNeighboursCleaner,256,,"[128, 5]","[384, 9]",256,256,256,256,256,512,512,512,,,,672
13-
GPUTPCExtrapolationTracking,256,,"[256, 7]","[256, 2]",,,,,,"[128, 4]","[192, 2]","[192, 2]",,,,896
14-
GPUTRDTrackerKernels_gpuVersion,512,,,,,,,,,,,,,,,
15-
GPUTPCCreateOccupancyMap_fill,256,,,,,,,,,,,,,,,
16-
GPUTPCCreateOccupancyMap_fold,256,,,,,,,,,,,,,,,
17-
GPUTRDTrackerKernels_o2Version,512,,,,,,,,,,,,,,,
18-
GPUTPCCompressionKernels_step0attached,256,,"[128, 1]","[64, 2]",,,,,,"[64, 2]",128,128,,,,"[96, 3]"
19-
GPUTPCCompressionKernels_step1unattached,256,,"[512, 2]","[512, 2]",,,,,,"[512, 3]","[512, 2]","[512, 2]",,,,"[512, 2]"
20-
GPUTPCDecompressionKernels_step0attached,256,,"[128, 2]","[128, 2]",,,,,,"[32, 1]","[32, 1]","[32, 1]",,,,"[32, 1]"
21-
GPUTPCDecompressionKernels_step1unattached,256,,"[64, 2]","[64, 2]",,,,,,"[32, 1]","[32, 1]","[32, 1]",,,,"[32, 1]"
22-
GPUTPCDecompressionUtilKernels_sortPerSectorRow,256,,,,,,,,,,,,,,,
23-
GPUTPCDecompressionUtilKernels_countFilteredClusters,256,,,,,,,,,,,,,,,
24-
GPUTPCDecompressionUtilKernels_storeFilteredClusters,256,,,,,,,,,,,,,,,
25-
GPUTPCCFDecodeZS,"[128, 4]",,"[64, 4]","[64, 1]",,,,,,"[64, 10]","[64, 8]","[64, 8]",,,,"[64, 10]"
26-
GPUTPCCFDecodeZSLink,"""GPUCA_WARP_SIZE""",,"""GPUCA_WARP_SIZE""","""GPUCA_WARP_SIZE""",,,,,,"""GPUCA_WARP_SIZE""","""GPUCA_WARP_SIZE""","""GPUCA_WARP_SIZE""",,,,"""GPUCA_WARP_SIZE"""
27-
GPUTPCCFDecodeZSDenseLink,"""GPUCA_WARP_SIZE""",,"[""GPUCA_WARP_SIZE"", 4]","[""GPUCA_WARP_SIZE"", 14]",,,,,,"""GPUCA_WARP_SIZE""","""GPUCA_WARP_SIZE""","""GPUCA_WARP_SIZE""",,,,"[""GPUCA_WARP_SIZE"", 8]"
28-
GPUTPCCFGather,"[1024, 1]",,"[1024, 5]","[1024, 1]",,,,,,"[1024, 1]","[1024, 1]","[1024, 1]",,,,"[1024, 1]"
29-
COMPRESSION_GATHER,1024,,1024,1024,,,,,,1024,1024,1024,,,,
30-
GPUTPCGMMergerTrackFit,256,,"[192, 2]","[64, 7]",,,,,,"[64, 4]","[32, 8]","[32, 8]",,,,"[64, 8]"
31-
GPUTPCGMMergerFollowLoopers,256,,"[256, 5]","[256, 4]",,,,,,"[64, 12]","[128, 4]","[128, 4]",,,,"[224, 3]"
32-
GPUTPCGMMergerSectorRefit,256,,"[64, 4]","[256, 2]",,,,,,"[32, 6]","[64, 5]","[64, 5]",,,,"[32, 10]"
33-
GPUTPCGMMergerUnpackResetIds,256,,256,256,,,,,,256,256,256,,,,256
34-
GPUTPCGMMergerUnpackGlobal,256,,256,256,,,,,,256,256,256,,,,256
35-
GPUTPCGMMergerResolve_step0,256,,512,256,,,,,,256,256,256,,,,256
36-
GPUTPCGMMergerResolve_step1,256,,512,256,,,,,,256,256,256,,,,256
37-
GPUTPCGMMergerResolve_step2,256,,512,256,,,,,,256,256,256,,,,256
38-
GPUTPCGMMergerResolve_step3,256,,512,256,,,,,,256,256,256,,,,256
39-
GPUTPCGMMergerResolve_step4,256,,512,256,,,,,,"[256, 4]","[256, 4]","[256, 4]",,,,"[256, 4]"
40-
GPUTPCGMMergerClearLinks,256,,256,256,,,,,,256,256,256,,,,256
41-
GPUTPCGMMergerMergeWithinPrepare,256,,256,256,,,,,,256,256,256,,,,256
42-
GPUTPCGMMergerMergeSectorsPrepare,256,,256,256,,,,,,"[256, 2]","[256, 2]","[256, 2]",,,,"[256, 2]"
43-
GPUTPCGMMergerMergeBorders_step0,256,,512,256,,,,,,192,192,192,,,,192
44-
GPUTPCGMMergerMergeBorders_step2,256,,512,256,,,,,,"[64, 2]",256,256,,,,"[64, 2]"
45-
GPUTPCGMMergerMergeCE,256,,512,256,,,,,,256,256,256,,,,256
46-
GPUTPCGMMergerLinkExtrapolatedTracks,256,,256,256,,,,,,256,256,256,,,,256
47-
GPUTPCGMMergerCollect,256,,"[768, 1]","[1024, 1]",,,,,,"[256, 2]","[128, 2]","[128, 2]",,,,"[288, 1]"
48-
GPUTPCGMMergerSortTracksPrepare,256,,256,256,,,,,,256,256,256,,,,256
49-
GPUTPCGMMergerPrepareForFit_step0,256,,256,256,,,,,,256,256,256,,,,256
50-
GPUTPCGMMergerPrepareForFit_step1,256,,256,256,,,,,,256,256,256,,,,256
51-
GPUTPCGMMergerPrepareForFit_step2,256,,256,256,,,,,,256,256,256,,,,256
52-
GPUTPCGMMergerFinalize_step0,256,,,256,,,,,,,,,,,,256
53-
GPUTPCGMMergerFinalize_step1,256,,,256,,,,,,,,,,,,256
54-
GPUTPCGMMergerFinalize_step2,256,,,256,,,,,,,,,,,,256
55-
GPUTPCGMMergerMergeLoopers_step0,256,,,,,,,,,,,,,,,256
56-
GPUTPCGMMergerMergeLoopers_step1,256,,,,,,,,,,,,,,,256
57-
GPUTPCGMMergerMergeLoopers_step2,256,,,,,,,,,,,,,,,256
58-
GPUTPCGMO2Output_prepare,256,,,,,,,,,,,,,,,256
59-
GPUTPCGMO2Output_output,256,,,,,,,,,,,,,,,256
60-
GPUTPCStartHitsFinder,256,,"[1024, 2]","[1024, 7]",256,256,256,256,256,512,512,512,,,,608
61-
GPUTPCStartHitsSorter,256,,"[1024, 5]","[512, 7]",256,256,256,256,256,"[512, 1]","[512, 1]","[512, 1]",,,,608
62-
GPUTPCCFCheckPadBaseline,576,,"[576, 2]","[576, 2]",,,,,,"[576, 2]",,,,,,"[576, 2]"
63-
GPUTPCCFHIPTailConnector,256,,256,256,,,,,,256,
64-
GPUTPCCFHIPClusterizer,256,,256,256,,,,,,256,
65-
GPUTPCCFChargeMapFiller_fillIndexMap,512,,512,512,,,,,,448,,,,,,448
66-
GPUTPCCFChargeMapFiller_fillFromDigits,512,,512,512,,,,,,448,,,,,,448
67-
GPUTPCCFChargeMapFiller_findFragmentStart,512,,512,512,,,,,,448,,,,,,448
68-
GPUTPCCFPeakFinder,512,,"[512, 9]","[512, 4]",,,,,,128,,,,,,"[128, 5]"
69-
GPUTPCCFNoiseSuppression,512,,512,512,,,,,,448,,,,,,
70-
GPUTPCCFDeconvolution,512,,"[512, 5]","[512, 5]",,,,,,384,,,,,,384
71-
GPUTPCCFClusterizer,512,,"[448, 3]","[512, 2]",,,,,,448,,,,,,"[160, 5]"
72-
GPUTPCNNClusterizerKernels,512,,,,,,,,,,,,,,,
73-
GPUTrackingRefitKernel_mode0asGPU,256,,,,,,,,,,,,,,,256
74-
GPUTrackingRefitKernel_mode1asTrackParCov,256,,,,,,,,,,,,,,,256
75-
GPUMemClean16,"[""GPUCA_THREAD_COUNT_DEFAULT"", 1]",,,,,,,,,,,,,,,
76-
GPUitoa,"[""GPUCA_THREAD_COUNT_DEFAULT"", 1]",,,,,,,,,,,,,,,
77-
GPUTPCCFNoiseSuppression_noiseSuppression,"""GPUCA_LB_GPUTPCCFNoiseSuppression""",,,,,,,,,,,,,,,448
78-
GPUTPCCFNoiseSuppression_updatePeaks,"""GPUCA_LB_GPUTPCCFNoiseSuppression""",,,,,,,,,,,,,,,448
79-
GPUTPCNNClusterizerKernels_runCfClusterizer,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,,,,
80-
GPUTPCNNClusterizerKernels_fillInputNNCPU,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,,,,
81-
GPUTPCNNClusterizerKernels_fillInputNNGPU,1024,,,,,,,,,,,,,,,
82-
GPUTPCNNClusterizerKernels_determineClass1Labels,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,,,,
83-
GPUTPCNNClusterizerKernels_determineClass2Labels,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,,,,
84-
GPUTPCNNClusterizerKernels_publishClass1Regression,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,,,,
85-
GPUTPCNNClusterizerKernels_publishClass2Regression,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,,,,
86-
GPUTPCNNClusterizerKernels_publishDeconvolutionFlags,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,,,,
87-
GPUTPCCFStreamCompaction_scanStart,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",,,,,,,,,,,,,,,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE"""
88-
GPUTPCCFStreamCompaction_scanUp,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",,,,,,,,,,,,,,,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE"""
89-
GPUTPCCFStreamCompaction_scanTop,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",,,,,,,,,,,,,,,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE"""
90-
GPUTPCCFStreamCompaction_scanDown,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",,,,,,,,,,,,,,,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE"""
91-
GPUTPCCFStreamCompaction_compactDigits,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",,,,,,,,,,,,,,,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE"""
92-
GPUTPCCompressionGatherKernels_unbuffered,"""GPUCA_LB_COMPRESSION_GATHER""",,,,,,,,,,,,,,,
93-
GPUTPCCompressionGatherKernels_buffered32,"""GPUCA_LB_COMPRESSION_GATHER""",,,,,,,,,,,,,,,
94-
GPUTPCCompressionGatherKernels_buffered64,"""GPUCA_LB_COMPRESSION_GATHER""",,,,,,,,,,,,,,,
95-
GPUTPCCompressionGatherKernels_buffered128,"""GPUCA_LB_COMPRESSION_GATHER""",,,,,,,,,,,,,,,
96-
GPUTPCCompressionGatherKernels_multiBlock,"""GPUCA_LB_COMPRESSION_GATHER""",,,,,,,,,,,,,,,
97-
GPUTPCGMMergerFinalize_0,256,,256,,,,,,,256,256,256,,,,256
98-
GPUTPCGMMergerFinalize_1,256,,256,,,,,,,256,256,256,,,,256
99-
GPUTPCGMMergerFinalize_2,256,,256,,,,,,,256,256,256,,,,256
100-
,,,,,,,,,,,,,,,,
101-
PAR:,,,,,,,,,,,,,,,,
102-
AMD_EUS_PER_CU,0,0,4,4,,,,,,,,,,,,0
103-
SORT_STARTHITS,1,0,,,,,,,,,,,,,,1
104-
NEIGHBOURS_FINDER_MAX_NNEIGHUP,6,0,10,4,,,,,,4,4,4,,,,2
105-
NEIGHBOURS_FINDER_UNROLL_GLOBAL,4,0,4,2,,,,,,,,,,,,2
106-
NEIGHBOURS_FINDER_UNROLL_SHARED,1,0,0,0,,,,,,,,,,,,1
107-
TRACKLET_SELECTOR_HITS_REG_SIZE,12,0,9,27,,,,,,20,20,20,,,,2
108-
ALTERNATE_BORDER_SORT,1,0,1,1,,,,,,1,1,1,,,,1
109-
SORT_BEFORE_FIT,1,0,1,1,,,,,,1,1,1,,,,1
110-
NO_ATOMIC_PRECHECK,0,0,1,1,,,,,,1,1,1,,,,1
111-
DEDX_STORAGE_TYPE,"""uint16_t""","""float""","""uint16_t""","""uint16_t""",,,,,,"""uint16_t""","""uint16_t""","""uint16_t""",,,,"""uint16_t"""
112-
MERGER_INTERPOLATION_ERROR_TYPE,"""half""","""float""","""half""","""half""",,,,,,"""half""","""half""","""half""",,,,"""half"""
113-
COMP_GATHER_KERNEL,4,0,4,4,,,,,,4,4,4,,,,4
114-
COMP_GATHER_MODE,3,0,3,3,,,,,,3,3,3,,,,3
115-
CF_SCAN_WORKGROUP_SIZE,512,0,,,,,,,,,,,,,,
1+
Architecture,default,default_cpu,MI100,VEGA,TAHITI,TESLA,FERMI,PASCAL,KEPLER,AMPERE,TURING,HOPPER,ADA,OPENCL,RDNA,MI210,BLACKWELL,MI300
2+
,,,,,,,,,,,,,,,,,,
3+
CORE:,,,,,,,,,,,,,,,,,,
4+
WARP_SIZE,0,,64,64,32,32,32,32,32,32,32,,32,32,64,64,32,64
5+
THREAD_COUNT_DEFAULT,256,,256,256,,,,,,512,512,,512,256,512,512,512,
6+
,,,,,,,,,,,,,,,,,,
7+
LB:,,,,,,,,,,,,,,,,,,
8+
GPUTPCCreateTrackingData,256,,"[256, 7]","[192, 2]",,,,,,"[224, 7]",256,"[128, 14]",416,,"[64, 21]",,448,"[320, 2]"
9+
GPUTPCTrackletConstructor,256,,"[768, 8]","[512, 10]","[256, 2]","[256, 1]","[256, 2]","[1024, 2]","[512, 4]",1024,"[256, 2]",1024,"[1024, 1]",,"[768, 2]",,"[768, 1]",512
10+
GPUTPCTrackletSelector,256,,"[384, 5]","[192, 10]","[256, 3]","[256, 1]","[256, 3]","[512, 4]","[256, 3]","[288, 3]","[192, 3]","[544, 1]","[32, 2]",,"[384, 3]",,"[32, 18]","[256, 6]"
11+
GPUTPCNeighboursFinder,256,,"[192, 8]","[960, 8]",256,256,256,512,256,864,"[640, 1]","[512, 2]","[736, 1]",,"[480, 3]",,"[896, 1]","[704, 1]"
12+
GPUTPCNeighboursCleaner,256,,"[128, 5]","[384, 9]",256,256,256,256,256,544,512,"[192, 9]","[512, 1]",,"[384, 5]",,832,"[640, 1]"
13+
GPUTPCExtrapolationTracking,256,,"[256, 7]","[256, 2]",,,,,,"[352, 4]","[192, 2]","[896, 1]","[352, 1]",,"[1024, 1]",,"[224, 2]",1024
14+
GPUTRDTrackerKernels_gpuVersion,512,,,,,,,,,512,,512,512,,512,,512,512
15+
GPUTPCCreateOccupancyMap_fill,256,,,,,,,,,256,,256,256,,256,,256,256
16+
GPUTPCCreateOccupancyMap_fold,256,,,,,,,,,256,,256,256,,256,,256,256
17+
GPUTRDTrackerKernels_o2Version,512,,,,,,,,,512,,512,512,,512,,512,512
18+
GPUTPCCompressionKernels_step0attached,256,,"[128, 1]","[64, 2]",,,,,,"[160, 2]",128,"[448, 1]",352,,"[1024, 1]",,352,"[128, 4]"
19+
GPUTPCCompressionKernels_step1unattached,256,,"[512, 2]","[512, 2]",,,,,,"[288, 4]","[512, 2]","[256, 4]","[512, 2]",,"[512, 3]",,"[512, 2]","[512, 3]"
20+
GPUTPCDecompressionKernels_step0attached,256,,"[128, 2]","[128, 2]",,,,,,"[32, 1]","[32, 1]","[32, 1]","[32, 1]",,"[128, 1]",,"[32, 1]","[128, 1]"
21+
GPUTPCDecompressionKernels_step1unattached,256,,"[64, 2]","[64, 2]",,,,,,"[32, 1]","[32, 1]","[32, 1]","[32, 1]",,"[64, 1]",,"[32, 1]","[64, 1]"
22+
GPUTPCDecompressionUtilKernels_sortPerSectorRow,256,,,,,,,,,256,,256,256,,256,,256,256
23+
GPUTPCDecompressionUtilKernels_countFilteredClusters,256,,,,,,,,,256,,256,256,,256,,256,256
24+
GPUTPCDecompressionUtilKernels_storeFilteredClusters,256,,,,,,,,,256,,256,256,,256,,256,256
25+
GPUTPCCFDecodeZS,"[128, 4]",,"[64, 4]","[64, 1]",,,,,,"[32, 10]","[64, 8]","[32, 10]","[32, 10]",,"[64, 1]",,"[32, 10]","[64, 1]"
26+
GPUTPCCFDecodeZSLink,"""GPUCA_WARP_SIZE""",,"""GPUCA_WARP_SIZE""","""GPUCA_WARP_SIZE""",,,,,,32,"""GPUCA_WARP_SIZE""",32,32,,64,,32,64
27+
GPUTPCCFDecodeZSDenseLink,"""GPUCA_WARP_SIZE""",,"[""GPUCA_WARP_SIZE"", 4]","[""GPUCA_WARP_SIZE"", 14]",,,,,,"[32, 14]","""GPUCA_WARP_SIZE""","[32, 22]","[32, 22]",,"[64, 17]",,"[32, 6]","[64, 5]"
28+
GPUTPCCFGather,"[1024, 1]",,"[1024, 5]","[1024, 1]",,,,,,"[160, 11]","[1024, 1]",736,896,,"[928, 1]",,"[128, 12]","[320, 2]"
29+
COMPRESSION_GATHER,1024,,1024,1024,,,,,,1024,1024,,1024,,,,,
30+
GPUTPCGMMergerTrackFit,256,,"[192, 2]","[64, 7]",,,,,,"[32, 16]","[32, 8]","[32, 14]","[160, 2]",,"[32, 24]",,512,"[64, 6]"
31+
GPUTPCGMMergerFollowLoopers,256,,"[256, 5]","[256, 4]",,,,,,"[256, 4]","[128, 4]","[1024, 1]",640,,"[128, 16]",,"[1024, 1]","[256, 7]"
32+
GPUTPCGMMergerSectorRefit,256,,"[64, 4]","[256, 2]",,,,,,"[32, 8]","[64, 5]","[32, 7]","[32, 7]",,"[32, 20]",,"[32, 11]","[64, 4]"
33+
GPUTPCGMMergerUnpackResetIds,256,,256,256,,,,,,256,256,256,256,,256,,256,256
34+
GPUTPCGMMergerUnpackGlobal,256,,256,256,,,,,,256,256,256,256,,256,,256,256
35+
GPUTPCGMMergerResolve_step0,256,,512,256,,,,,,256,256,256,256,,256,,256,256
36+
GPUTPCGMMergerResolve_step1,256,,512,256,,,,,,256,256,256,256,,256,,256,256
37+
GPUTPCGMMergerResolve_step2,256,,512,256,,,,,,256,256,256,256,,256,,256,256
38+
GPUTPCGMMergerResolve_step3,256,,512,256,,,,,,256,256,256,256,,256,,256,256
39+
GPUTPCGMMergerResolve_step4,256,,512,256,,,,,,"[256, 4]","[256, 4]","[256, 4]","[256, 4]",,256,,"[256, 4]",256
40+
GPUTPCGMMergerClearLinks,256,,256,256,,,,,,256,256,256,256,,256,,256,256
41+
GPUTPCGMMergerMergeWithinPrepare,256,,256,256,,,,,,256,256,256,256,,256,,256,256
42+
GPUTPCGMMergerMergeSectorsPrepare,256,,256,256,,,,,,"[256, 2]","[256, 2]","[256, 2]","[256, 2]",,256,,"[256, 2]",256
43+
GPUTPCGMMergerMergeBorders_step0,256,,512,256,,,,,,192,192,192,192,,256,,192,256
44+
GPUTPCGMMergerMergeBorders_step2,256,,512,256,,,,,,"[64, 2]",256,"[64, 2]","[64, 2]",,256,,"[64, 2]",256
45+
GPUTPCGMMergerMergeCE,256,,512,256,,,,,,256,256,256,256,,256,,256,256
46+
GPUTPCGMMergerLinkExtrapolatedTracks,256,,256,256,,,,,,256,256,256,256,,256,,256,256
47+
GPUTPCGMMergerCollect,256,,"[768, 1]","[1024, 1]",,,,,,"[864, 1]","[128, 2]","[896, 1]",128,,1024,,416,"[384, 4]"
48+
GPUTPCGMMergerSortTracksPrepare,256,,256,256,,,,,,256,256,256,256,,256,,256,256
49+
GPUTPCGMMergerPrepareForFit_step0,256,,256,256,,,,,,256,256,256,256,,256,,256,256
50+
GPUTPCGMMergerPrepareForFit_step1,256,,256,256,,,,,,256,256,256,256,,256,,256,256
51+
GPUTPCGMMergerPrepareForFit_step2,256,,256,256,,,,,,256,256,256,256,,256,,256,256
52+
GPUTPCGMMergerFinalize_step0,256,,,256,,,,,,256,,256,256,,256,,256,256
53+
GPUTPCGMMergerFinalize_step1,256,,,256,,,,,,256,,256,256,,256,,256,256
54+
GPUTPCGMMergerFinalize_step2,256,,,256,,,,,,256,,256,256,,256,,256,256
55+
GPUTPCGMMergerMergeLoopers_step0,256,,,,,,,,,256,,256,256,,256,,256,256
56+
GPUTPCGMMergerMergeLoopers_step1,256,,,,,,,,,256,,256,256,,256,,256,256
57+
GPUTPCGMMergerMergeLoopers_step2,256,,,,,,,,,256,,256,256,,256,,256,256
58+
GPUTPCGMO2Output_prepare,256,,,,,,,,,256,,256,256,,256,,256,256
59+
GPUTPCGMO2Output_output,256,,,,,,,,,256,,256,256,,256,,256,256
60+
GPUTPCStartHitsFinder,256,,"[1024, 2]","[1024, 7]",256,256,256,256,256,"[224, 1]",512,"[416, 4]",928,,"[320, 5]",,992,"[448, 3]"
61+
GPUTPCStartHitsSorter,256,,"[1024, 5]","[512, 7]",256,256,256,256,256,"[320, 2]","[512, 1]","[864, 1]","[96, 2]",,"[192, 5]",,"[64, 17]","[448, 1]"
62+
GPUTPCCFCheckPadBaseline,576,,"[576, 2]","[576, 2]",,,,,,"[576, 3]",,"[576, 1]","[576, 1]",,"[576, 2]",,576,576
63+
GPUTPCCFHIPTailConnector,256,,256,256,,,,,,"[224, 2]",,"[320, 5]","[704, 1]",,"[128, 7]",,"[192, 5]","[448, 4]"
64+
GPUTPCCFHIPClusterizer,256,,256,256,,,,,,"[288, 5]",,"[480, 3]","[448, 3]",,352,,"[128, 4]","[512, 3]"
65+
GPUTPCCFChargeMapFiller_fillIndexMap,512,,512,512,,,,,,448,,448,448,,512,,448,512
66+
GPUTPCCFChargeMapFiller_fillFromDigits,512,,512,512,,,,,,448,,448,448,,512,,448,512
67+
GPUTPCCFChargeMapFiller_findFragmentStart,512,,512,512,,,,,,448,,448,448,,512,,448,512
68+
GPUTPCCFPeakFinder,512,,"[512, 9]","[512, 4]",,,,,,416,,992,"[672, 1]",,"[384, 2]",,"[480, 3]","[192, 10]"
69+
GPUTPCCFNoiseSuppression,512,,512,512,,,,,,608,,896,480,,160,,64,448
70+
GPUTPCCFDeconvolution,512,,"[512, 5]","[512, 5]",,,,,,"[480, 4]",,224,512,,480,,"[704, 2]","[448, 3]"
71+
GPUTPCCFClusterizer,512,,"[448, 3]","[512, 2]",,,,,,"[608, 3]",,736,"[192, 3]",,576,,"[128, 6]","[832, 2]"
72+
GPUTPCNNClusterizerKernels,512,,,,,,,,,,,,,,,,,
73+
GPUTrackingRefitKernel_mode0asGPU,256,,,,,,,,,256,,256,256,,256,,256,256
74+
GPUTrackingRefitKernel_mode1asTrackParCov,256,,,,,,,,,256,,256,256,,256,,256,256
75+
GPUMemClean16,"[""GPUCA_THREAD_COUNT_DEFAULT"", 1]",,,,,,,,,"[512, 1]",,"[512, 1]","[512, 1]",,"[256, 1]",,"[512, 1]","[256, 1]"
76+
GPUitoa,"[""GPUCA_THREAD_COUNT_DEFAULT"", 1]",,,,,,,,,"[512, 1]",,"[512, 1]","[512, 1]",,"[256, 1]",,"[512, 1]","[256, 1]"
77+
GPUTPCCFNoiseSuppression_noiseSuppression,"""GPUCA_LB_GPUTPCCFNoiseSuppression""",,,,,,,,,,,,,,,,448,
78+
GPUTPCCFNoiseSuppression_updatePeaks,"""GPUCA_LB_GPUTPCCFNoiseSuppression""",,,,,,,,,,,,,,,,448,
79+
GPUTPCNNClusterizerKernels_runCfClusterizer,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,,,,,,
80+
GPUTPCNNClusterizerKernels_fillInputNNCPU,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,,,,,,
81+
GPUTPCNNClusterizerKernels_fillInputNNGPU,1024,,,,,,,,,,,,,,,,,
82+
GPUTPCNNClusterizerKernels_determineClass1Labels,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,,,,,,
83+
GPUTPCNNClusterizerKernels_determineClass2Labels,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,,,,,,
84+
GPUTPCNNClusterizerKernels_publishClass1Regression,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,,,,,,
85+
GPUTPCNNClusterizerKernels_publishClass2Regression,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,,,,,,
86+
GPUTPCNNClusterizerKernels_publishDeconvolutionFlags,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,,,,,,
87+
GPUTPCCFStreamCompaction_scanStart,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",,,,,,,,,,,,,,,,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",
88+
GPUTPCCFStreamCompaction_scanUp,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",,,,,,,,,,,,,,,,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",
89+
GPUTPCCFStreamCompaction_scanTop,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",,,,,,,,,,,,,,,,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",
90+
GPUTPCCFStreamCompaction_scanDown,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",,,,,,,,,,,,,,,,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",
91+
GPUTPCCFStreamCompaction_compactDigits,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",,,,,,,,,,,,,,,,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",
92+
GPUTPCCompressionGatherKernels_unbuffered,"""GPUCA_LB_COMPRESSION_GATHER""",,,,,,,,,1024,,1024,1024,,1024,,1024,1024
93+
GPUTPCCompressionGatherKernels_buffered32,"""GPUCA_LB_COMPRESSION_GATHER""",,,,,,,,,1024,,1024,1024,,1024,,1024,1024
94+
GPUTPCCompressionGatherKernels_buffered64,"""GPUCA_LB_COMPRESSION_GATHER""",,,,,,,,,1024,,1024,1024,,1024,,1024,1024
95+
GPUTPCCompressionGatherKernels_buffered128,"""GPUCA_LB_COMPRESSION_GATHER""",,,,,,,,,1024,,1024,1024,,1024,,1024,1024
96+
GPUTPCCompressionGatherKernels_multiBlock,"""GPUCA_LB_COMPRESSION_GATHER""",,,,,,,,,1024,,1024,1024,,1024,,1024,1024
97+
GPUTPCGMMergerFinalize_0,256,,256,,,,,,,256,256,,256,,,,256,
98+
GPUTPCGMMergerFinalize_1,256,,256,,,,,,,256,256,,256,,,,256,
99+
GPUTPCGMMergerFinalize_2,256,,256,,,,,,,256,256,,256,,,,256,
100+
,,,,,,,,,,,,,,,,,,
101+
PAR:,,,,,,,,,,,,,,,,,,
102+
AMD_EUS_PER_CU,0,0,4,4,,,,,,,,,,,4,,0,4
103+
SORT_STARTHITS,1,0,,,,,,,,1,,1,1,,1,,1,1
104+
NEIGHBOURS_FINDER_MAX_NNEIGHUP,6,0,10,4,,,,,,4,4,4,4,,5,,4,5
105+
NEIGHBOURS_FINDER_UNROLL_GLOBAL,4,0,4,2,,,,,,2,,8,8,,4,,8,2
106+
NEIGHBOURS_FINDER_UNROLL_SHARED,1,0,0,0,,,,,,1,,1,0,,1,,1,1
107+
TRACKLET_SELECTOR_HITS_REG_SIZE,12,0,9,27,,,,,,20,20,20,20,,20,,20,20
108+
ALTERNATE_BORDER_SORT,1,0,1,1,,,,,,1,1,1,1,,1,,1,1
109+
SORT_BEFORE_FIT,1,0,1,1,,,,,,1,1,1,1,,1,,1,1
110+
NO_ATOMIC_PRECHECK,0,0,1,1,,,,,,1,1,1,1,,1,,1,1
111+
DEDX_STORAGE_TYPE,"""uint16_t""","""float""","""uint16_t""","""uint16_t""",,,,,,"""uint16_t""","""uint16_t""","""uint16_t""","""uint16_t""",,"""uint16_t""",,"""uint16_t""","""uint16_t"""
112+
MERGER_INTERPOLATION_ERROR_TYPE,"""half""","""float""","""half""","""half""",,,,,,"""half""","""half""","""half""","""half""",,"""half""",,"""half""","""half"""
113+
COMP_GATHER_KERNEL,4,0,4,4,,,,,,4,4,4,4,,4,,4,4
114+
COMP_GATHER_MODE,3,0,3,3,,,,,,3,3,3,3,,3,,3,3
115+
CF_SCAN_WORKGROUP_SIZE,512,0,,,,,,,,224,,992,448,,1024,,512,448

0 commit comments

Comments
 (0)