diff --git a/GPU/GPUTracking/Definitions/Parameters/GPUParameters.csv b/GPU/GPUTracking/Definitions/Parameters/GPUParameters.csv index 823a70b24565b..194f2933d0e21 100644 --- a/GPU/GPUTracking/Definitions/Parameters/GPUParameters.csv +++ b/GPU/GPUTracking/Definitions/Parameters/GPUParameters.csv @@ -1,115 +1,115 @@ -Architecture,default,default_cpu,MI100,VEGA,TAHITI,TESLA,FERMI,PASCAL,KEPLER,AMPERE,TURING,ADA,OPENCL,RDNA,MI210,BLACKWELL -,,,,,,,,,,,,,,,, -CORE:,,,,,,,,,,,,,,,, -WARP_SIZE,0,,64,64,32,32,32,32,32,32,32,32,32,32,64,32 -THREAD_COUNT_DEFAULT,256,,256,256,,,,,,512,512,512,256,512,512,512 -,,,,,,,,,,,,,,,, -LB:,,,,,,,,,,,,,,,, -GPUTPCCreateTrackingData,256,,"[256, 7]","[192, 2]",,,,,,384,256,256,,,,384 -GPUTPCTrackletConstructor,256,,"[768, 8]","[512, 10]","[256, 2]","[256, 1]","[256, 2]","[1024, 2]","[512, 4]","[256, 2]","[256, 2]","[256, 2]",,,,768 -GPUTPCTrackletSelector,256,,"[384, 5]","[192, 10]","[256, 3]","[256, 1]","[256, 3]","[512, 4]","[256, 3]","[192, 3]","[192, 3]","[192, 3]",,,,992 -GPUTPCNeighboursFinder,256,,"[192, 8]","[960, 8]",256,256,256,512,256,"[640, 1]","[640, 1]","[640, 1]",,,,992 -GPUTPCNeighboursCleaner,256,,"[128, 5]","[384, 9]",256,256,256,256,256,512,512,512,,,,672 -GPUTPCExtrapolationTracking,256,,"[256, 7]","[256, 2]",,,,,,"[128, 4]","[192, 2]","[192, 2]",,,,896 -GPUTRDTrackerKernels_gpuVersion,512,,,,,,,,,,,,,,, -GPUTPCCreateOccupancyMap_fill,256,,,,,,,,,,,,,,, -GPUTPCCreateOccupancyMap_fold,256,,,,,,,,,,,,,,, -GPUTRDTrackerKernels_o2Version,512,,,,,,,,,,,,,,, -GPUTPCCompressionKernels_step0attached,256,,"[128, 1]","[64, 2]",,,,,,"[64, 2]",128,128,,,,"[96, 3]" -GPUTPCCompressionKernels_step1unattached,256,,"[512, 2]","[512, 2]",,,,,,"[512, 3]","[512, 2]","[512, 2]",,,,"[512, 2]" -GPUTPCDecompressionKernels_step0attached,256,,"[128, 2]","[128, 2]",,,,,,"[32, 1]","[32, 1]","[32, 1]",,,,"[32, 1]" -GPUTPCDecompressionKernels_step1unattached,256,,"[64, 2]","[64, 2]",,,,,,"[32, 1]","[32, 1]","[32, 1]",,,,"[32, 1]" -GPUTPCDecompressionUtilKernels_sortPerSectorRow,256,,,,,,,,,,,,,,, -GPUTPCDecompressionUtilKernels_countFilteredClusters,256,,,,,,,,,,,,,,, -GPUTPCDecompressionUtilKernels_storeFilteredClusters,256,,,,,,,,,,,,,,, -GPUTPCCFDecodeZS,"[128, 4]",,"[64, 4]","[64, 1]",,,,,,"[64, 10]","[64, 8]","[64, 8]",,,,"[64, 10]" -GPUTPCCFDecodeZSLink,"""GPUCA_WARP_SIZE""",,"""GPUCA_WARP_SIZE""","""GPUCA_WARP_SIZE""",,,,,,"""GPUCA_WARP_SIZE""","""GPUCA_WARP_SIZE""","""GPUCA_WARP_SIZE""",,,,"""GPUCA_WARP_SIZE""" -GPUTPCCFDecodeZSDenseLink,"""GPUCA_WARP_SIZE""",,"[""GPUCA_WARP_SIZE"", 4]","[""GPUCA_WARP_SIZE"", 14]",,,,,,"""GPUCA_WARP_SIZE""","""GPUCA_WARP_SIZE""","""GPUCA_WARP_SIZE""",,,,"[""GPUCA_WARP_SIZE"", 8]" -GPUTPCCFGather,"[1024, 1]",,"[1024, 5]","[1024, 1]",,,,,,"[1024, 1]","[1024, 1]","[1024, 1]",,,,"[1024, 1]" -COMPRESSION_GATHER,1024,,1024,1024,,,,,,1024,1024,1024,,,, -GPUTPCGMMergerTrackFit,256,,"[192, 2]","[64, 7]",,,,,,"[64, 4]","[32, 8]","[32, 8]",,,,"[64, 8]" -GPUTPCGMMergerFollowLoopers,256,,"[256, 5]","[256, 4]",,,,,,"[64, 12]","[128, 4]","[128, 4]",,,,"[224, 3]" -GPUTPCGMMergerSectorRefit,256,,"[64, 4]","[256, 2]",,,,,,"[32, 6]","[64, 5]","[64, 5]",,,,"[32, 10]" -GPUTPCGMMergerUnpackResetIds,256,,256,256,,,,,,256,256,256,,,,256 -GPUTPCGMMergerUnpackGlobal,256,,256,256,,,,,,256,256,256,,,,256 -GPUTPCGMMergerResolve_step0,256,,512,256,,,,,,256,256,256,,,,256 -GPUTPCGMMergerResolve_step1,256,,512,256,,,,,,256,256,256,,,,256 -GPUTPCGMMergerResolve_step2,256,,512,256,,,,,,256,256,256,,,,256 -GPUTPCGMMergerResolve_step3,256,,512,256,,,,,,256,256,256,,,,256 -GPUTPCGMMergerResolve_step4,256,,512,256,,,,,,"[256, 4]","[256, 4]","[256, 4]",,,,"[256, 4]" -GPUTPCGMMergerClearLinks,256,,256,256,,,,,,256,256,256,,,,256 -GPUTPCGMMergerMergeWithinPrepare,256,,256,256,,,,,,256,256,256,,,,256 -GPUTPCGMMergerMergeSectorsPrepare,256,,256,256,,,,,,"[256, 2]","[256, 2]","[256, 2]",,,,"[256, 2]" -GPUTPCGMMergerMergeBorders_step0,256,,512,256,,,,,,192,192,192,,,,192 -GPUTPCGMMergerMergeBorders_step2,256,,512,256,,,,,,"[64, 2]",256,256,,,,"[64, 2]" -GPUTPCGMMergerMergeCE,256,,512,256,,,,,,256,256,256,,,,256 -GPUTPCGMMergerLinkExtrapolatedTracks,256,,256,256,,,,,,256,256,256,,,,256 -GPUTPCGMMergerCollect,256,,"[768, 1]","[1024, 1]",,,,,,"[256, 2]","[128, 2]","[128, 2]",,,,"[288, 1]" -GPUTPCGMMergerSortTracksPrepare,256,,256,256,,,,,,256,256,256,,,,256 -GPUTPCGMMergerPrepareForFit_step0,256,,256,256,,,,,,256,256,256,,,,256 -GPUTPCGMMergerPrepareForFit_step1,256,,256,256,,,,,,256,256,256,,,,256 -GPUTPCGMMergerPrepareForFit_step2,256,,256,256,,,,,,256,256,256,,,,256 -GPUTPCGMMergerFinalize_step0,256,,,256,,,,,,,,,,,,256 -GPUTPCGMMergerFinalize_step1,256,,,256,,,,,,,,,,,,256 -GPUTPCGMMergerFinalize_step2,256,,,256,,,,,,,,,,,,256 -GPUTPCGMMergerMergeLoopers_step0,256,,,,,,,,,,,,,,,256 -GPUTPCGMMergerMergeLoopers_step1,256,,,,,,,,,,,,,,,256 -GPUTPCGMMergerMergeLoopers_step2,256,,,,,,,,,,,,,,,256 -GPUTPCGMO2Output_prepare,256,,,,,,,,,,,,,,,256 -GPUTPCGMO2Output_output,256,,,,,,,,,,,,,,,256 -GPUTPCStartHitsFinder,256,,"[1024, 2]","[1024, 7]",256,256,256,256,256,512,512,512,,,,608 -GPUTPCStartHitsSorter,256,,"[1024, 5]","[512, 7]",256,256,256,256,256,"[512, 1]","[512, 1]","[512, 1]",,,,608 -GPUTPCCFCheckPadBaseline,576,,"[576, 2]","[576, 2]",,,,,,"[576, 2]",,,,,,"[576, 2]" -GPUTPCCFHIPTailConnector,256,,256,256,,,,,,256, -GPUTPCCFHIPClusterizer,256,,256,256,,,,,,256, -GPUTPCCFChargeMapFiller_fillIndexMap,512,,512,512,,,,,,448,,,,,,448 -GPUTPCCFChargeMapFiller_fillFromDigits,512,,512,512,,,,,,448,,,,,,448 -GPUTPCCFChargeMapFiller_findFragmentStart,512,,512,512,,,,,,448,,,,,,448 -GPUTPCCFPeakFinder,512,,"[512, 9]","[512, 4]",,,,,,128,,,,,,"[128, 5]" -GPUTPCCFNoiseSuppression,512,,512,512,,,,,,448,,,,,, -GPUTPCCFDeconvolution,512,,"[512, 5]","[512, 5]",,,,,,384,,,,,,384 -GPUTPCCFClusterizer,512,,"[448, 3]","[512, 2]",,,,,,448,,,,,,"[160, 5]" -GPUTPCNNClusterizerKernels,512,,,,,,,,,,,,,,, -GPUTrackingRefitKernel_mode0asGPU,256,,,,,,,,,,,,,,,256 -GPUTrackingRefitKernel_mode1asTrackParCov,256,,,,,,,,,,,,,,,256 -GPUMemClean16,"[""GPUCA_THREAD_COUNT_DEFAULT"", 1]",,,,,,,,,,,,,,, -GPUitoa,"[""GPUCA_THREAD_COUNT_DEFAULT"", 1]",,,,,,,,,,,,,,, -GPUTPCCFNoiseSuppression_noiseSuppression,"""GPUCA_LB_GPUTPCCFNoiseSuppression""",,,,,,,,,,,,,,,448 -GPUTPCCFNoiseSuppression_updatePeaks,"""GPUCA_LB_GPUTPCCFNoiseSuppression""",,,,,,,,,,,,,,,448 -GPUTPCNNClusterizerKernels_runCfClusterizer,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,,,, -GPUTPCNNClusterizerKernels_fillInputNNCPU,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,,,, -GPUTPCNNClusterizerKernels_fillInputNNGPU,1024,,,,,,,,,,,,,,, -GPUTPCNNClusterizerKernels_determineClass1Labels,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,,,, -GPUTPCNNClusterizerKernels_determineClass2Labels,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,,,, -GPUTPCNNClusterizerKernels_publishClass1Regression,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,,,, -GPUTPCNNClusterizerKernels_publishClass2Regression,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,,,, -GPUTPCNNClusterizerKernels_publishDeconvolutionFlags,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,,,, -GPUTPCCFStreamCompaction_scanStart,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",,,,,,,,,,,,,,,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""" -GPUTPCCFStreamCompaction_scanUp,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",,,,,,,,,,,,,,,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""" -GPUTPCCFStreamCompaction_scanTop,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",,,,,,,,,,,,,,,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""" -GPUTPCCFStreamCompaction_scanDown,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",,,,,,,,,,,,,,,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""" -GPUTPCCFStreamCompaction_compactDigits,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",,,,,,,,,,,,,,,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""" -GPUTPCCompressionGatherKernels_unbuffered,"""GPUCA_LB_COMPRESSION_GATHER""",,,,,,,,,,,,,,, -GPUTPCCompressionGatherKernels_buffered32,"""GPUCA_LB_COMPRESSION_GATHER""",,,,,,,,,,,,,,, -GPUTPCCompressionGatherKernels_buffered64,"""GPUCA_LB_COMPRESSION_GATHER""",,,,,,,,,,,,,,, -GPUTPCCompressionGatherKernels_buffered128,"""GPUCA_LB_COMPRESSION_GATHER""",,,,,,,,,,,,,,, -GPUTPCCompressionGatherKernels_multiBlock,"""GPUCA_LB_COMPRESSION_GATHER""",,,,,,,,,,,,,,, -GPUTPCGMMergerFinalize_0,256,,256,,,,,,,256,256,256,,,,256 -GPUTPCGMMergerFinalize_1,256,,256,,,,,,,256,256,256,,,,256 -GPUTPCGMMergerFinalize_2,256,,256,,,,,,,256,256,256,,,,256 -,,,,,,,,,,,,,,,, -PAR:,,,,,,,,,,,,,,,, -AMD_EUS_PER_CU,0,0,4,4,,,,,,,,,,,,0 -SORT_STARTHITS,1,0,,,,,,,,,,,,,,1 -NEIGHBOURS_FINDER_MAX_NNEIGHUP,6,0,10,4,,,,,,4,4,4,,,,2 -NEIGHBOURS_FINDER_UNROLL_GLOBAL,4,0,4,2,,,,,,,,,,,,2 -NEIGHBOURS_FINDER_UNROLL_SHARED,1,0,0,0,,,,,,,,,,,,1 -TRACKLET_SELECTOR_HITS_REG_SIZE,12,0,9,27,,,,,,20,20,20,,,,2 -ALTERNATE_BORDER_SORT,1,0,1,1,,,,,,1,1,1,,,,1 -SORT_BEFORE_FIT,1,0,1,1,,,,,,1,1,1,,,,1 -NO_ATOMIC_PRECHECK,0,0,1,1,,,,,,1,1,1,,,,1 -DEDX_STORAGE_TYPE,"""uint16_t""","""float""","""uint16_t""","""uint16_t""",,,,,,"""uint16_t""","""uint16_t""","""uint16_t""",,,,"""uint16_t""" -MERGER_INTERPOLATION_ERROR_TYPE,"""half""","""float""","""half""","""half""",,,,,,"""half""","""half""","""half""",,,,"""half""" -COMP_GATHER_KERNEL,4,0,4,4,,,,,,4,4,4,,,,4 -COMP_GATHER_MODE,3,0,3,3,,,,,,3,3,3,,,,3 -CF_SCAN_WORKGROUP_SIZE,512,0,,,,,,,,,,,,,, +Architecture,default,default_cpu,MI100,VEGA,TAHITI,TESLA,FERMI,PASCAL,KEPLER,AMPERE,TURING,HOPPER,ADA,OPENCL,RDNA,MI210,BLACKWELL,MI300 +,,,,,,,,,,,,,,,,,, +CORE:,,,,,,,,,,,,,,,,,, +WARP_SIZE,0,,64,64,32,32,32,32,32,32,32,,32,32,64,64,32,64 +THREAD_COUNT_DEFAULT,256,,256,256,,,,,,512,512,,512,256,512,512,512, +,,,,,,,,,,,,,,,,,, +LB:,,,,,,,,,,,,,,,,,, +GPUTPCCreateTrackingData,256,,"[256, 7]","[192, 2]",,,,,,"[224, 7]",256,"[128, 14]",416,,"[64, 21]",,448,"[320, 2]" +GPUTPCTrackletConstructor,256,,"[768, 8]","[512, 10]","[256, 2]","[256, 1]","[256, 2]","[1024, 2]","[512, 4]",1024,"[256, 2]",1024,"[1024, 1]",,"[768, 2]",,"[768, 1]",512 +GPUTPCTrackletSelector,256,,"[384, 5]","[192, 10]","[256, 3]","[256, 1]","[256, 3]","[512, 4]","[256, 3]","[288, 3]","[192, 3]","[544, 1]","[32, 2]",,"[384, 3]",,"[32, 18]","[256, 6]" +GPUTPCNeighboursFinder,256,,"[192, 8]","[960, 8]",256,256,256,512,256,864,"[640, 1]","[512, 2]","[736, 1]",,"[480, 3]",,"[896, 1]","[704, 1]" +GPUTPCNeighboursCleaner,256,,"[128, 5]","[384, 9]",256,256,256,256,256,544,512,"[192, 9]","[512, 1]",,"[384, 5]",,832,"[640, 1]" +GPUTPCExtrapolationTracking,256,,"[256, 7]","[256, 2]",,,,,,"[352, 4]","[192, 2]","[896, 1]","[352, 1]",,"[1024, 1]",,"[224, 2]",1024 +GPUTRDTrackerKernels_gpuVersion,512,,,,,,,,,512,,512,512,,512,,512,512 +GPUTPCCreateOccupancyMap_fill,256,,,,,,,,,256,,256,256,,256,,256,256 +GPUTPCCreateOccupancyMap_fold,256,,,,,,,,,256,,256,256,,256,,256,256 +GPUTRDTrackerKernels_o2Version,512,,,,,,,,,512,,512,512,,512,,512,512 +GPUTPCCompressionKernels_step0attached,256,,"[128, 1]","[64, 2]",,,,,,"[160, 2]",128,"[448, 1]",352,,"[1024, 1]",,352,"[128, 4]" +GPUTPCCompressionKernels_step1unattached,256,,"[512, 2]","[512, 2]",,,,,,"[288, 4]","[512, 2]","[256, 4]","[512, 2]",,"[512, 3]",,"[512, 2]","[512, 3]" +GPUTPCDecompressionKernels_step0attached,256,,"[128, 2]","[128, 2]",,,,,,"[32, 1]","[32, 1]","[32, 1]","[32, 1]",,"[128, 1]",,"[32, 1]","[128, 1]" +GPUTPCDecompressionKernels_step1unattached,256,,"[64, 2]","[64, 2]",,,,,,"[32, 1]","[32, 1]","[32, 1]","[32, 1]",,"[64, 1]",,"[32, 1]","[64, 1]" +GPUTPCDecompressionUtilKernels_sortPerSectorRow,256,,,,,,,,,256,,256,256,,256,,256,256 +GPUTPCDecompressionUtilKernels_countFilteredClusters,256,,,,,,,,,256,,256,256,,256,,256,256 +GPUTPCDecompressionUtilKernels_storeFilteredClusters,256,,,,,,,,,256,,256,256,,256,,256,256 +GPUTPCCFDecodeZS,"[128, 4]",,"[64, 4]","[64, 1]",,,,,,"[32, 10]","[64, 8]","[32, 10]","[32, 10]",,"[64, 1]",,"[32, 10]","[64, 1]" +GPUTPCCFDecodeZSLink,"""GPUCA_WARP_SIZE""",,"""GPUCA_WARP_SIZE""","""GPUCA_WARP_SIZE""",,,,,,32,"""GPUCA_WARP_SIZE""",32,32,,64,,32,64 +GPUTPCCFDecodeZSDenseLink,"""GPUCA_WARP_SIZE""",,"[""GPUCA_WARP_SIZE"", 4]","[""GPUCA_WARP_SIZE"", 14]",,,,,,"[32, 14]","""GPUCA_WARP_SIZE""","[32, 22]","[32, 22]",,"[64, 17]",,"[32, 6]","[64, 5]" +GPUTPCCFGather,"[1024, 1]",,"[1024, 5]","[1024, 1]",,,,,,"[160, 11]","[1024, 1]",736,896,,"[928, 1]",,"[128, 12]","[320, 2]" +COMPRESSION_GATHER,1024,,1024,1024,,,,,,1024,1024,,1024,,,,, +GPUTPCGMMergerTrackFit,256,,"[192, 2]","[64, 7]",,,,,,"[32, 16]","[32, 8]","[32, 14]","[160, 2]",,"[32, 24]",,512,"[64, 6]" +GPUTPCGMMergerFollowLoopers,256,,"[256, 5]","[256, 4]",,,,,,"[256, 4]","[128, 4]","[1024, 1]",640,,"[128, 16]",,"[1024, 1]","[256, 7]" +GPUTPCGMMergerSectorRefit,256,,"[64, 4]","[256, 2]",,,,,,"[32, 8]","[64, 5]","[32, 7]","[32, 7]",,"[32, 20]",,"[32, 11]","[64, 4]" +GPUTPCGMMergerUnpackResetIds,256,,256,256,,,,,,256,256,256,256,,256,,256,256 +GPUTPCGMMergerUnpackGlobal,256,,256,256,,,,,,256,256,256,256,,256,,256,256 +GPUTPCGMMergerResolve_step0,256,,512,256,,,,,,256,256,256,256,,256,,256,256 +GPUTPCGMMergerResolve_step1,256,,512,256,,,,,,256,256,256,256,,256,,256,256 +GPUTPCGMMergerResolve_step2,256,,512,256,,,,,,256,256,256,256,,256,,256,256 +GPUTPCGMMergerResolve_step3,256,,512,256,,,,,,256,256,256,256,,256,,256,256 +GPUTPCGMMergerResolve_step4,256,,512,256,,,,,,"[256, 4]","[256, 4]","[256, 4]","[256, 4]",,256,,"[256, 4]",256 +GPUTPCGMMergerClearLinks,256,,256,256,,,,,,256,256,256,256,,256,,256,256 +GPUTPCGMMergerMergeWithinPrepare,256,,256,256,,,,,,256,256,256,256,,256,,256,256 +GPUTPCGMMergerMergeSectorsPrepare,256,,256,256,,,,,,"[256, 2]","[256, 2]","[256, 2]","[256, 2]",,256,,"[256, 2]",256 +GPUTPCGMMergerMergeBorders_step0,256,,512,256,,,,,,192,192,192,192,,256,,192,256 +GPUTPCGMMergerMergeBorders_step2,256,,512,256,,,,,,"[64, 2]",256,"[64, 2]","[64, 2]",,256,,"[64, 2]",256 +GPUTPCGMMergerMergeCE,256,,512,256,,,,,,256,256,256,256,,256,,256,256 +GPUTPCGMMergerLinkExtrapolatedTracks,256,,256,256,,,,,,256,256,256,256,,256,,256,256 +GPUTPCGMMergerCollect,256,,"[768, 1]","[1024, 1]",,,,,,"[864, 1]","[128, 2]","[896, 1]",128,,1024,,416,"[384, 4]" +GPUTPCGMMergerSortTracksPrepare,256,,256,256,,,,,,256,256,256,256,,256,,256,256 +GPUTPCGMMergerPrepareForFit_step0,256,,256,256,,,,,,256,256,256,256,,256,,256,256 +GPUTPCGMMergerPrepareForFit_step1,256,,256,256,,,,,,256,256,256,256,,256,,256,256 +GPUTPCGMMergerPrepareForFit_step2,256,,256,256,,,,,,256,256,256,256,,256,,256,256 +GPUTPCGMMergerFinalize_step0,256,,,256,,,,,,256,,256,256,,256,,256,256 +GPUTPCGMMergerFinalize_step1,256,,,256,,,,,,256,,256,256,,256,,256,256 +GPUTPCGMMergerFinalize_step2,256,,,256,,,,,,256,,256,256,,256,,256,256 +GPUTPCGMMergerMergeLoopers_step0,256,,,,,,,,,256,,256,256,,256,,256,256 +GPUTPCGMMergerMergeLoopers_step1,256,,,,,,,,,256,,256,256,,256,,256,256 +GPUTPCGMMergerMergeLoopers_step2,256,,,,,,,,,256,,256,256,,256,,256,256 +GPUTPCGMO2Output_prepare,256,,,,,,,,,256,,256,256,,256,,256,256 +GPUTPCGMO2Output_output,256,,,,,,,,,256,,256,256,,256,,256,256 +GPUTPCStartHitsFinder,256,,"[1024, 2]","[1024, 7]",256,256,256,256,256,"[224, 1]",512,"[416, 4]",928,,"[320, 5]",,992,"[448, 3]" +GPUTPCStartHitsSorter,256,,"[1024, 5]","[512, 7]",256,256,256,256,256,"[320, 2]","[512, 1]","[864, 1]","[96, 2]",,"[192, 5]",,"[64, 17]","[448, 1]" +GPUTPCCFCheckPadBaseline,576,,"[576, 2]","[576, 2]",,,,,,"[576, 3]",,"[576, 1]","[576, 1]",,"[576, 2]",,576,576 +GPUTPCCFHIPTailConnector,256,,256,256,,,,,,"[224, 2]",,"[320, 5]","[704, 1]",,"[128, 7]",,"[192, 5]","[448, 4]" +GPUTPCCFHIPClusterizer,256,,256,256,,,,,,"[288, 5]",,"[480, 3]","[448, 3]",,352,,"[128, 4]","[512, 3]" +GPUTPCCFChargeMapFiller_fillIndexMap,512,,512,512,,,,,,448,,448,448,,512,,448,512 +GPUTPCCFChargeMapFiller_fillFromDigits,512,,512,512,,,,,,448,,448,448,,512,,448,512 +GPUTPCCFChargeMapFiller_findFragmentStart,512,,512,512,,,,,,448,,448,448,,512,,448,512 +GPUTPCCFPeakFinder,512,,"[512, 9]","[512, 4]",,,,,,416,,992,"[672, 1]",,"[384, 2]",,"[480, 3]","[192, 10]" +GPUTPCCFNoiseSuppression,512,,512,512,,,,,,608,,896,480,,160,,64,448 +GPUTPCCFDeconvolution,512,,"[512, 5]","[512, 5]",,,,,,"[480, 4]",,224,512,,480,,"[704, 2]","[448, 3]" +GPUTPCCFClusterizer,512,,"[448, 3]","[512, 2]",,,,,,"[608, 3]",,736,"[192, 3]",,576,,"[128, 6]","[832, 2]" +GPUTPCNNClusterizerKernels,512,,,,,,,,,,,,,,,,, +GPUTrackingRefitKernel_mode0asGPU,256,,,,,,,,,256,,256,256,,256,,256,256 +GPUTrackingRefitKernel_mode1asTrackParCov,256,,,,,,,,,256,,256,256,,256,,256,256 +GPUMemClean16,"[""GPUCA_THREAD_COUNT_DEFAULT"", 1]",,,,,,,,,"[512, 1]",,"[512, 1]","[512, 1]",,"[256, 1]",,"[512, 1]","[256, 1]" +GPUitoa,"[""GPUCA_THREAD_COUNT_DEFAULT"", 1]",,,,,,,,,"[512, 1]",,"[512, 1]","[512, 1]",,"[256, 1]",,"[512, 1]","[256, 1]" +GPUTPCCFNoiseSuppression_noiseSuppression,"""GPUCA_LB_GPUTPCCFNoiseSuppression""",,,,,,,,,,,,,,,,448, +GPUTPCCFNoiseSuppression_updatePeaks,"""GPUCA_LB_GPUTPCCFNoiseSuppression""",,,,,,,,,,,,,,,,448, +GPUTPCNNClusterizerKernels_runCfClusterizer,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,,,,,, +GPUTPCNNClusterizerKernels_fillInputNNCPU,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,,,,,, +GPUTPCNNClusterizerKernels_fillInputNNGPU,1024,,,,,,,,,,,,,,,,, +GPUTPCNNClusterizerKernels_determineClass1Labels,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,,,,,, +GPUTPCNNClusterizerKernels_determineClass2Labels,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,,,,,, +GPUTPCNNClusterizerKernels_publishClass1Regression,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,,,,,, +GPUTPCNNClusterizerKernels_publishClass2Regression,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,,,,,, +GPUTPCNNClusterizerKernels_publishDeconvolutionFlags,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,,,,,, +GPUTPCCFStreamCompaction_scanStart,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",,,,,,,,,,,,,,,,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""", +GPUTPCCFStreamCompaction_scanUp,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",,,,,,,,,,,,,,,,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""", +GPUTPCCFStreamCompaction_scanTop,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",,,,,,,,,,,,,,,,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""", +GPUTPCCFStreamCompaction_scanDown,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",,,,,,,,,,,,,,,,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""", +GPUTPCCFStreamCompaction_compactDigits,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",,,,,,,,,,,,,,,,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""", +GPUTPCCompressionGatherKernels_unbuffered,"""GPUCA_LB_COMPRESSION_GATHER""",,,,,,,,,1024,,1024,1024,,1024,,1024,1024 +GPUTPCCompressionGatherKernels_buffered32,"""GPUCA_LB_COMPRESSION_GATHER""",,,,,,,,,1024,,1024,1024,,1024,,1024,1024 +GPUTPCCompressionGatherKernels_buffered64,"""GPUCA_LB_COMPRESSION_GATHER""",,,,,,,,,1024,,1024,1024,,1024,,1024,1024 +GPUTPCCompressionGatherKernels_buffered128,"""GPUCA_LB_COMPRESSION_GATHER""",,,,,,,,,1024,,1024,1024,,1024,,1024,1024 +GPUTPCCompressionGatherKernels_multiBlock,"""GPUCA_LB_COMPRESSION_GATHER""",,,,,,,,,1024,,1024,1024,,1024,,1024,1024 +GPUTPCGMMergerFinalize_0,256,,256,,,,,,,256,256,,256,,,,256, +GPUTPCGMMergerFinalize_1,256,,256,,,,,,,256,256,,256,,,,256, +GPUTPCGMMergerFinalize_2,256,,256,,,,,,,256,256,,256,,,,256, +,,,,,,,,,,,,,,,,,, +PAR:,,,,,,,,,,,,,,,,,, +AMD_EUS_PER_CU,0,0,4,4,,,,,,,,,,,4,,0,4 +SORT_STARTHITS,1,0,,,,,,,,1,,1,1,,1,,1,1 +NEIGHBOURS_FINDER_MAX_NNEIGHUP,6,0,10,4,,,,,,4,4,4,4,,5,,4,5 +NEIGHBOURS_FINDER_UNROLL_GLOBAL,4,0,4,2,,,,,,2,,8,8,,4,,8,2 +NEIGHBOURS_FINDER_UNROLL_SHARED,1,0,0,0,,,,,,1,,1,0,,1,,1,1 +TRACKLET_SELECTOR_HITS_REG_SIZE,12,0,9,27,,,,,,20,20,20,20,,20,,20,20 +ALTERNATE_BORDER_SORT,1,0,1,1,,,,,,1,1,1,1,,1,,1,1 +SORT_BEFORE_FIT,1,0,1,1,,,,,,1,1,1,1,,1,,1,1 +NO_ATOMIC_PRECHECK,0,0,1,1,,,,,,1,1,1,1,,1,,1,1 +DEDX_STORAGE_TYPE,"""uint16_t""","""float""","""uint16_t""","""uint16_t""",,,,,,"""uint16_t""","""uint16_t""","""uint16_t""","""uint16_t""",,"""uint16_t""",,"""uint16_t""","""uint16_t""" +MERGER_INTERPOLATION_ERROR_TYPE,"""half""","""float""","""half""","""half""",,,,,,"""half""","""half""","""half""","""half""",,"""half""",,"""half""","""half""" +COMP_GATHER_KERNEL,4,0,4,4,,,,,,4,4,4,4,,4,,4,4 +COMP_GATHER_MODE,3,0,3,3,,,,,,3,3,3,3,,3,,3,3 +CF_SCAN_WORKGROUP_SIZE,512,0,,,,,,,,224,,992,448,,1024,,512,448 diff --git a/dependencies/FindO2GPU.cmake b/dependencies/FindO2GPU.cmake index b229f46422eb8..1c3b997f64272 100644 --- a/dependencies/FindO2GPU.cmake +++ b/dependencies/FindO2GPU.cmake @@ -10,7 +10,7 @@ # or submit itself to any jurisdiction. # NOTE!!!! - Whenever this file is changed, move it over to alidist/resources -# FindO2GPU.cmake Version 16 +# FindO2GPU.cmake Version 17 set(CUDA_COMPUTETARGET_DEFAULT_FULL 80-real 86-real 89-real 120-real 75-virtual) set(HIP_AMDGPUTARGET_DEFAULT_FULL gfx906;gfx908) @@ -54,9 +54,11 @@ function(detect_gpu_arch backend) # Detect GPU architecture, optionally filterri endif() if(CUDA_FIRST_TARGET GREATER_EQUAL 120) set(CUDA_TARGET BLACKWELL) + elseif(CUDA_FIRST_TARGET GREATER_EQUAL 90) + set(CUDA_TARGET HOPPER) elseif(CUDA_FIRST_TARGET GREATER_EQUAL 89) set(CUDA_TARGET ADA) - elseif(CUDA_FIRST_TARGET GREATER_EQUAL 86) + elseif(CUDA_FIRST_TARGET GREATER_EQUAL 80) set(CUDA_TARGET AMPERE) elseif(CUDA_FIRST_TARGET GREATER_EQUAL 75) set(CUDA_TARGET TURING) @@ -81,6 +83,8 @@ function(detect_gpu_arch backend) # Detect GPU architecture, optionally filterri string(REGEX MATCH "....$" HIP_FIRST_TARGET_PADDED "0000${HIP_FIRST_TARGET}") if(HIP_FIRST_TARGET_PADDED STRGREATER_EQUAL "1000") set(HIP_TARGET RDNA) + elseif(HIP_FIRST_TARGET_PADDED STRGREATER_EQUAL "0940") + set(HIP_TARGET MI300) elseif(HIP_FIRST_TARGET_PADDED STRGREATER_EQUAL "090a") set(HIP_TARGET MI210) elseif(HIP_FIRST_TARGET_PADDED STRGREATER_EQUAL "0908")