From 40760096c50ef8defdf928638a4da07bd2eb17c7 Mon Sep 17 00:00:00 2001
From: David Rohr <drohr@jwdt.org>
Date: Sat, 13 Jun 2026 11:02:19 +0200
Subject: [PATCH] GPU: Process dEdx with full qTot range

---
 .../include/DataFormatsTPC/ClusterNative.h    | 27 +++++--------
 .../DataFormatsTPC/ClusterNativeHelper.h      |  2 +-
 Detectors/Align/src/AlignableDetectorTPC.cxx  | 12 +++---
 .../TPC/calibration/src/CalculatedEdx.cxx     |  2 +-
 .../calibration/src/CalibPadGainTracks.cxx    |  2 +-
 Detectors/TPC/calibration/src/TrackDump.cxx   |  2 +-
 .../src/HardwareClusterDecoder.cxx            |  2 +-
 .../reconstruction/test/testGPUCATracking.cxx |  2 +-
 .../Base/GPUReconstructionConvert.cxx         | 39 +------------------
 .../Base/GPUReconstructionConvert.h           |  1 -
 .../GPUTPCClusterStatistics.cxx               | 10 ++---
 .../GPUTPCCompressionKernels.cxx              |  6 +--
 GPU/GPUTracking/Global/GPUChainTracking.h     |  1 -
 .../GPUChainTrackingDebugAndProfiling.cxx     |  4 +-
 .../Global/GPUChainTrackingTransformation.cxx | 21 +---------
 GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx |  4 +-
 GPU/GPUTracking/Refit/GPUTrackingRefit.cxx    | 10 ++---
 .../Standalone/Benchmark/standalone.cxx       |  6 ---
 .../TPCClusterFinder/ClusterAccumulator.cxx   |  4 +-
 .../GPUTPCClusterFinderDump.cxx               |  3 +-
 20 files changed, 44 insertions(+), 116 deletions(-)

diff --git a/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h b/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h
index 7939387bc76a8..fb81afdf67587 100644
--- a/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h
+++ b/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h
@@ -73,7 +73,7 @@ struct ClusterNative {
   uint8_t sigmaTimePacked;  //< Sigma of the time in packed format
   uint8_t sigmaPadPacked;   //< Sigma of the pad in packed format
   uint16_t qMax;            //< QMax of the cluster
-  uint16_t qTot;            //< Total charge of the cluster
+  uint16_t qTotPacked;      //< Total charge of the cluster
 
   GPUd() static uint16_t packPad(float pad) { return (uint16_t)(pad * scalePadPacked + 0.5); }
   GPUd() static uint32_t packTime(float time) { return (uint32_t)(time * scaleTimePacked + 0.5); }
@@ -81,20 +81,13 @@ struct ClusterNative {
   GPUd() static float unpackTime(uint32_t time) { return float(time) * (1.f / scaleTimePacked); }
 
   GPUdDefault() ClusterNative() = default;
-  GPUd() ClusterNative(uint32_t time, uint8_t flags, uint16_t pad, uint8_t sigmaTime, uint8_t sigmaPad, uint16_t qmax, uint16_t qtot) : padPacked(pad), sigmaTimePacked(sigmaTime), sigmaPadPacked(sigmaPad), qMax(qmax), qTot(qtot)
+  GPUd() ClusterNative(uint32_t time, uint8_t flags, uint16_t pad, uint8_t sigmaTime, uint8_t sigmaPad, uint16_t qmax, uint16_t qtotPacked) : padPacked(pad), sigmaTimePacked(sigmaTime), sigmaPadPacked(sigmaPad), qMax(qmax), qTotPacked(qtotPacked)
   {
     setTimePackedFlags(time, flags);
   }
 
   GPUd() uint16_t getQmax() const { return qMax; }
-  GPUd() uint16_t getQtot() const
-  {
-    if (isSaturated()) [[unlikely]] {
-      auto sQtot = getSaturatedQtot();
-      return sQtot < USHRT_MAX ? sQtot : USHRT_MAX;
-    }
-    return qTot;
-  }
+  GPUd() uint32_t getQtot() const { return isSaturated() ? getSaturatedQtot() : (uint32_t)qTotPacked; }
   GPUd() uint8_t getFlags() const { return timeFlagsPacked >> 24; }
   GPUd() uint32_t getTimePacked() const { return timeFlagsPacked & 0xFFFFFF; }
   GPUd() void setTimePackedFlags(uint32_t timePacked, uint8_t flags)
@@ -155,19 +148,19 @@ struct ClusterNative {
     sigmaPadPacked = tmp;
   }
 
-  GPUd() bool isSaturated() const { return qTot > maxRegularQtot; }
+  GPUd() bool isSaturated() const { return qTotPacked > maxRegularQtot; }
 
   GPUd() void setSaturatedQtot(uint32_t qtot)
   {
-    this->qTot = USHRT_MAX;
+    this->qTotPacked = USHRT_MAX;
     if (qtot < maxSaturatedQtot) {
-      this->qTot = ((qtot + scaleSaturatedQtot / 2) / scaleSaturatedQtot) + maxRegularQtot;
+      this->qTotPacked = ((qtot + scaleSaturatedQtot / 2) / scaleSaturatedQtot) + maxRegularQtot;
     }
   }
 
   GPUd() uint32_t getSaturatedQtot() const
   {
-    return uint32_t(qTot - maxRegularQtot) * scaleSaturatedQtot;
+    return uint32_t(qTotPacked - maxRegularQtot) * scaleSaturatedQtot;
   }
 
   GPUd() void setSaturatedTailLength(uint32_t tail)
@@ -192,8 +185,8 @@ struct ClusterNative {
       return (this->sigmaPadPacked < rhs.sigmaPadPacked);
     } else if (this->qMax != rhs.qMax) {
       return (this->qMax < rhs.qMax);
-    } else if (this->qTot != rhs.qTot) {
-      return (this->qTot < rhs.qTot);
+    } else if (this->qTotPacked != rhs.qTotPacked) {
+      return (this->getQtot() < rhs.getQtot());
     } else {
       return (this->getFlags() < rhs.getFlags());
     }
@@ -206,7 +199,7 @@ struct ClusterNative {
            this->sigmaTimePacked == rhs.sigmaTimePacked &&
            this->sigmaPadPacked == rhs.sigmaPadPacked &&
            this->qMax == rhs.qMax &&
-           this->qTot == rhs.qTot &&
+           this->qTotPacked == rhs.qTotPacked &&
            this->getFlags() == rhs.getFlags();
   }
 
diff --git a/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNativeHelper.h b/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNativeHelper.h
index b8d6a3e7a9428..c8f071c7cd416 100644
--- a/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNativeHelper.h
+++ b/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNativeHelper.h
@@ -312,7 +312,7 @@ class ClusterNativeHelper
         sigmaTime = rhs.getSigmaTime();
         sigmaPad = rhs.getSigmaPad();
         qMax = rhs.qMax;
-        qTot = rhs.qTot;
+        qTot = rhs.qTotPacked;
         flags = rhs.getFlags();
         return *this;
       }
diff --git a/Detectors/Align/src/AlignableDetectorTPC.cxx b/Detectors/Align/src/AlignableDetectorTPC.cxx
index 980ded2d8ff2f..46473ce091c59 100644
--- a/Detectors/Align/src/AlignableDetectorTPC.cxx
+++ b/Detectors/Align/src/AlignableDetectorTPC.cxx
@@ -166,18 +166,18 @@ int AlignableDetectorTPC::processPoints(GIndex gid, int npntCut, bool inv)
           // mController->getTPCCorrMaps()->Transform(sector, row, cl->getPad(), cl->getTime(), x, y, z, tOffset);
           currentRow = row;
           currentSector = sector;
-          charge = cl->qTot;
+          charge = cl->getQtot();
           clusterState = nextState;
           combRow = row;
           LOGP(debug, "starting a supercluster at row {} of sector {} -> {},{},{}", currentRow, currentSector, x, y, z);
         } else {
           // float xx, yy, zz;
           // mController->getTPCCorrMaps()->Transform(sector, row, cl->getPad(), cl->getTime(), xx, yy, zz, tOffset);
-          x += xTmp * cl->qTot;
-          y += yTmp * cl->qTot;
-          z += zTmp * cl->qTot;
-          combRow += row * cl->qTot;
-          charge += cl->qTot;
+          x += xTmp * cl->getQtot();
+          y += yTmp * cl->getQtot();
+          z += zTmp * cl->getQtot();
+          combRow += row * cl->getQtot();
+          charge += cl->getQtot();
           clusterState |= nextState;
           npntCut--;
           LOGP(debug, "merging cluster #{} at row {} to a supercluster starting at row {} ", clusters + 1, row, currentRow);
diff --git a/Detectors/TPC/calibration/src/CalculatedEdx.cxx b/Detectors/TPC/calibration/src/CalculatedEdx.cxx
index 396214775eb76..18b2f6e3010c7 100644
--- a/Detectors/TPC/calibration/src/CalculatedEdx.cxx
+++ b/Detectors/TPC/calibration/src/CalculatedEdx.cxx
@@ -245,7 +245,7 @@ void CalculatedEdx::calculatedEdx(o2::tpc::TrackTPC& track, dEdxInfo& output, fl
     }
 
     // get charge values
-    float chargeTot = cl.qTot;
+    float chargeTot = cl.getQtot();
     float chargeMax = cl.qMax;
 
     // get threshold
diff --git a/Detectors/TPC/calibration/src/CalibPadGainTracks.cxx b/Detectors/TPC/calibration/src/CalibPadGainTracks.cxx
index 37400a28e4670..99c2f1b88af9a 100644
--- a/Detectors/TPC/calibration/src/CalibPadGainTracks.cxx
+++ b/Detectors/TPC/calibration/src/CalibPadGainTracks.cxx
@@ -100,7 +100,7 @@ void CalibPadGainTracks::processTrack(o2::tpc::TrackTPC track, o2::gpu::GPUO2Int
     }
 
     const int region = Mapper::REGION[rowIndex];
-    const float charge = (mChargeType == ChargeType::Max) ? cl.qMax : cl.qTot;
+    const float charge = (mChargeType == ChargeType::Max) ? cl.qMax : cl.getQtot();
     const float effectiveLength = mCalibTrackTopologyPol ? getTrackTopologyCorrectionPol(track, cl, region, charge) : getTrackTopologyCorrection(track, region);
 
     const unsigned char pad = std::clamp(static_cast<unsigned int>(cl.getPad() + 0.5f), static_cast<unsigned int>(0), Mapper::PADSPERROW[region][Mapper::getLocalRowFromGlobalRow(rowIndex)] - 1); // the left side of the pad is defined at e.g. 3.5 and the right side at 4.5
diff --git a/Detectors/TPC/calibration/src/TrackDump.cxx b/Detectors/TPC/calibration/src/TrackDump.cxx
index 72042a537dc5f..4a286d4d27149 100644
--- a/Detectors/TPC/calibration/src/TrackDump.cxx
+++ b/Detectors/TPC/calibration/src/TrackDump.cxx
@@ -73,7 +73,7 @@ void TrackDump::filter(const gsl::span<const TrackTPC> tracks, ClusterNativeAcce
       excludes[sector][padrow].emplace_back(clusterIndexInRow);
 
       if (clustersGlobal) {
-        auto& clGlobal = clustersGlobal->emplace_back(ClusterGlobal{clInfo.gx(), clInfo.gy(), cl.qMax, cl.qTot, sector, padrow});
+        auto& clGlobal = clustersGlobal->emplace_back(ClusterGlobal{clInfo.gx(), clInfo.gy(), cl.qMax, cl.getQtot(), sector, padrow});
       }
     }
   }
diff --git a/Detectors/TPC/reconstruction/src/HardwareClusterDecoder.cxx b/Detectors/TPC/reconstruction/src/HardwareClusterDecoder.cxx
index e2259cce59e50..3707c566944d2 100644
--- a/Detectors/TPC/reconstruction/src/HardwareClusterDecoder.cxx
+++ b/Detectors/TPC/reconstruction/src/HardwareClusterDecoder.cxx
@@ -87,7 +87,7 @@ int HardwareClusterDecoder::decodeClusters(std::vector<std::pair<const ClusterHa
             cOut.setSigmaPad(std::sqrt(cIn.getSigmaPad2()));
             cOut.setSigmaTime(std::sqrt(cIn.getSigmaTime2()));
             cOut.qMax = cIn.getQMax();
-            cOut.qTot = cIn.getQTot();
+            cOut.qTotPacked = cIn.getQTot();
             mIntegrator->integrateCluster(sector, padRowGlobal, pad, cIn.getQTot());
             if (outMCLabels) {
               auto& mcOut = outMCLabelContainers[containerRowCluster[sector][padRowGlobal]];
diff --git a/Detectors/TPC/reconstruction/test/testGPUCATracking.cxx b/Detectors/TPC/reconstruction/test/testGPUCATracking.cxx
index 20660473f4c37..9d44e5c8b5890 100644
--- a/Detectors/TPC/reconstruction/test/testGPUCATracking.cxx
+++ b/Detectors/TPC/reconstruction/test/testGPUCATracking.cxx
@@ -95,7 +95,7 @@ BOOST_AUTO_TEST_CASE(CATracking_test1)
     cont[i].clusters[0].setSigmaTime(1);
     cont[i].clusters[0].setSigmaPad(1);
     cont[i].clusters[0].qMax = 10;
-    cont[i].clusters[0].qTot = 50;
+    cont[i].clusters[0].qTotPacked = 50;
   }
   std::unique_ptr<ClusterNative[]> clusterBuffer;
   std::unique_ptr<ClusterNativeAccess> clusters = ClusterNativeHelper::createClusterNativeIndex(clusterBuffer, cont, nullptr, nullptr);
diff --git a/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx b/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx
index 9ec1af55a7a62..0b5e15303f22a 100644
--- a/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx
+++ b/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx
@@ -48,43 +48,6 @@ using namespace o2::tpc;
 using namespace o2::tpc::constants;
 using namespace std::string_literals;
 
-void GPUReconstructionConvert::ConvertNativeToClusterData(o2::tpc::ClusterNativeAccess* native, std::unique_ptr<GPUTPCClusterData[]>* clusters, uint32_t* nClusters, const TPCFastTransformPOD* transform, int32_t continuousMaxTimeBin)
-{
-  memset(nClusters, 0, NSECTORS * sizeof(nClusters[0]));
-  uint32_t offset = 0;
-  for (uint32_t i = 0; i < NSECTORS; i++) {
-    uint32_t nClSector = 0;
-    for (uint32_t j = 0; j < GPUTPCGeometry::NROWS; j++) {
-      nClSector += native->nClusters[i][j];
-    }
-    nClusters[i] = nClSector;
-    clusters[i].reset(new GPUTPCClusterData[nClSector]);
-    nClSector = 0;
-    for (uint32_t j = 0; j < GPUTPCGeometry::NROWS; j++) {
-      for (uint32_t k = 0; k < native->nClusters[i][j]; k++) {
-        const auto& clin = native->clusters[i][j][k];
-        float x = 0, y = 0, z = 0;
-        if (continuousMaxTimeBin == 0) {
-          transform->Transform(i, j, clin.getPad(), clin.getTime(), x, y, z);
-        } else {
-          transform->TransformInTimeFrame(i, j, clin.getPad(), clin.getTime(), x, y, z, continuousMaxTimeBin);
-        }
-        auto& clout = clusters[i].get()[nClSector];
-        clout.x = x;
-        clout.y = y;
-        clout.z = z;
-        clout.row = j;
-        clout.amp = clin.qTot;
-        clout.flags = clin.getFlags();
-        clout.id = offset + k;
-        nClSector++;
-      }
-      native->clusterOffset[i][j] = offset;
-      offset += native->nClusters[i][j];
-    }
-  }
-}
-
 void GPUReconstructionConvert::ConvertRun2RawToNative(o2::tpc::ClusterNativeAccess& native, std::unique_ptr<ClusterNative[]>& nativeBuffer, const AliHLTTPCRawCluster** rawClusters, uint32_t* nRawClusters)
 {
   memset((void*)&native, 0, sizeof(native));
@@ -110,7 +73,7 @@ void GPUReconstructionConvert::ConvertRun2RawToNative(o2::tpc::ClusterNativeAcce
       c.setSigmaTime(CAMath::Sqrt(org.GetSigmaTime2()));
       c.setSigmaPad(CAMath::Sqrt(org.GetSigmaPad2()));
       c.qMax = org.GetQMax();
-      c.qTot = org.GetCharge();
+      c.qTotPacked = org.GetCharge();
     }
   }
 }
diff --git a/GPU/GPUTracking/Base/GPUReconstructionConvert.h b/GPU/GPUTracking/Base/GPUReconstructionConvert.h
index 17958303103a0..bcf621e379884 100644
--- a/GPU/GPUTracking/Base/GPUReconstructionConvert.h
+++ b/GPU/GPUTracking/Base/GPUReconstructionConvert.h
@@ -50,7 +50,6 @@ class GPUReconstructionConvert
 {
  public:
   constexpr static uint32_t NSECTORS = o2::tpc::constants::MAXSECTOR;
-  static void ConvertNativeToClusterData(o2::tpc::ClusterNativeAccess* native, std::unique_ptr<GPUTPCClusterData[]>* clusters, uint32_t* nClusters, const TPCFastTransformPOD* transform, int32_t continuousMaxTimeBin = 0);
   static void ConvertRun2RawToNative(o2::tpc::ClusterNativeAccess& native, std::unique_ptr<o2::tpc::ClusterNative[]>& nativeBuffer, const AliHLTTPCRawCluster** rawClusters, uint32_t* nRawClusters);
   template <class S>
   static void RunZSEncoder(const S& in, std::unique_ptr<uint64_t[]>* outBuffer, uint32_t* outSizes, o2::raw::RawFileWriter* raw, const o2::InteractionRecord* ir, const GPUParam& param, int32_t version, bool verify, float threshold = 0.f, bool padding = false, std::function<void(std::vector<o2::tpc::Digit>&)> digitsFilter = nullptr);
diff --git a/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.cxx b/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.cxx
index 3191067570b66..1cc7d0403879c 100644
--- a/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.cxx
+++ b/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.cxx
@@ -131,7 +131,7 @@ void GPUTPCClusterStatistics::RunStatistics(const o2::tpc::ClusterNativeAccess*
             GPUTPCCompression::truncateSignificantBitsChargeMax(tmpClusters[k].qMax, param);
             GPUTPCCompression::truncateSignificantBitsWidth(tmpClusters[k].sigmaPadPacked, param);
             if (!tmpClusters[k].isSaturated()) [[likely]] {
-              GPUTPCCompression::truncateSignificantBitsCharge(tmpClusters[k].qTot, param);
+              GPUTPCCompression::truncateSignificantBitsCharge(tmpClusters[k].qTotPacked, param);
               GPUTPCCompression::truncateSignificantBitsWidth(tmpClusters[k].sigmaTimePacked, param);
             }
           }
@@ -140,10 +140,10 @@ void GPUTPCClusterStatistics::RunStatistics(const o2::tpc::ClusterNativeAccess*
         for (uint32_t k = 0; k < clustersNative->nClusters[i][j]; k++) {
           const o2::tpc::ClusterNative& c1 = tmpClusters[k];
           const o2::tpc::ClusterNative& c2 = clustersNativeDecoded.clusters[i][j][k];
-          if (c1.timeFlagsPacked != c2.timeFlagsPacked || c1.padPacked != c2.padPacked || c1.sigmaTimePacked != c2.sigmaTimePacked || c1.sigmaPadPacked != c2.sigmaPadPacked || c1.qMax != c2.qMax || c1.qTot != c2.qTot) {
+          if (c1.timeFlagsPacked != c2.timeFlagsPacked || c1.padPacked != c2.padPacked || c1.sigmaTimePacked != c2.sigmaTimePacked || c1.sigmaPadPacked != c2.sigmaPadPacked || c1.qMax != c2.qMax || c1.qTotPacked != c2.qTotPacked) {
             if (decodingErrors++ < 100) {
-              GPUWarning("Cluster mismatch: sector %2u row %3u hit %5u: %6d %3d %4d %3d %3d %4d %4d", i, j, k, (int32_t)c1.getTimePacked(), (int32_t)c1.getFlags(), (int32_t)c1.padPacked, (int32_t)c1.sigmaTimePacked, (int32_t)c1.sigmaPadPacked, (int32_t)c1.qMax, (int32_t)c1.qTot);
-              GPUWarning("%45s %6d %3d %4d %3d %3d %4d %4d", "", (int32_t)c2.getTimePacked(), (int32_t)c2.getFlags(), (int32_t)c2.padPacked, (int32_t)c2.sigmaTimePacked, (int32_t)c2.sigmaPadPacked, (int32_t)c2.qMax, (int32_t)c2.qTot);
+              GPUWarning("Cluster mismatch: sector %2u row %3u hit %5u: %6d %3d %4d %3d %3d %4d %4d", i, j, k, (int32_t)c1.getTimePacked(), (int32_t)c1.getFlags(), (int32_t)c1.padPacked, (int32_t)c1.sigmaTimePacked, (int32_t)c1.sigmaPadPacked, (int32_t)c1.qMax, (int32_t)c1.qTotPacked);
+              GPUWarning("%45s %6d %3d %4d %3d %3d %4d %4d", "", (int32_t)c2.getTimePacked(), (int32_t)c2.getFlags(), (int32_t)c2.padPacked, (int32_t)c2.sigmaTimePacked, (int32_t)c2.sigmaPadPacked, (int32_t)c2.qMax, (int32_t)c2.qTotPacked);
             }
           }
         }
@@ -194,7 +194,7 @@ void GPUTPCClusterStatistics::RunStatistics(const o2::tpc::ClusterNativeAccess*
       for (uint32_t j = 0; j < GPUTPCGeometry::NROWS; j++) {
         for (uint32_t k = 0; k < clustersNativeDecoded.nClusters[i][j]; k++) {
           const auto& cl = clustersNativeDecoded.clusters[i][j][k];
-          csv << i << ',' << j << ',' << cl.getTimePacked() << ',' << cl.padPacked << ',' << (uint32_t)cl.getFlags() << ',' << cl.qTot << ',' << cl.qMax << ',' << (uint32_t)cl.sigmaTimePacked << ',' << (uint32_t)cl.sigmaPadPacked << '\n';
+          csv << i << ',' << j << ',' << cl.getTimePacked() << ',' << cl.padPacked << ',' << (uint32_t)cl.getFlags() << ',' << cl.qTotPacked << ',' << cl.qMax << ',' << (uint32_t)cl.sigmaTimePacked << ',' << (uint32_t)cl.sigmaPadPacked << '\n';
         }
       }
     }
diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx
index bd42c2a2472d4..b499ea10e679b 100644
--- a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx
+++ b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx
@@ -117,7 +117,7 @@ GPUdii() void GPUTPCCompressionKernels::Thread<GPUTPCCompressionKernels::step0at
         float time = CAMath::Max(0.f, geo.LinearZ2Time(hit.sector, track.Z() + zOffset));
         c.timeResA[cidx] = (orgCl.getTimePacked() - orgCl.packTime(time)) & 0xFFFFFF;
       }
-      uint16_t qtot = orgCl.qTot, qmax = orgCl.qMax;
+      uint16_t qtot = orgCl.qTotPacked, qmax = orgCl.qMax;
       uint8_t sigmapad = orgCl.sigmaPadPacked, sigmatime = orgCl.sigmaTimePacked;
       if (param.rec.tpc.compressionTypeMask & GPUSettings::CompressionTruncate) {
         compressor.truncateSignificantBitsChargeMax(qmax, param);
@@ -184,7 +184,7 @@ GPUd() bool GPUTPCCompressionKernels::GPUTPCCompressionKernels_Compare<4>::opera
   if (mClsPtr[a].padPacked != mClsPtr[b].padPacked) {
     return mClsPtr[a].padPacked < mClsPtr[b].padPacked;
   }
-  return mClsPtr[a].qTot < mClsPtr[b].qTot;
+  return mClsPtr[a].qTotPacked < mClsPtr[b].qTotPacked;
 }
 
 GPUd() bool GPUTPCCompression::rejectCluster(int32_t idx, const GPUParam& GPUrestrict() param, const GPUTrackingInOutPointers& GPUrestrict() ioPtrs) const
@@ -296,7 +296,7 @@ GPUdii() void GPUTPCCompressionKernels::Thread<GPUTPCCompressionKernels::step1un
         int32_t preId = j != 0 ? (int32_t)sortBuffer[j - 1] : (totalCount != 0 ? (int32_t)smem.lastIndex : -1);
         GPUTPCCompression_EncodeUnattached(param.rec.tpc.compressionTypeMask, orgCl, c.timeDiffU[outidx], c.padDiffU[outidx], preId == -1 ? nullptr : &clusters->clusters[iSector][iRow][preId]);
 
-        uint16_t qtot = orgCl.qTot, qmax = orgCl.qMax;
+        uint16_t qtot = orgCl.qTotPacked, qmax = orgCl.qMax;
         uint8_t sigmapad = orgCl.sigmaPadPacked, sigmatime = orgCl.sigmaTimePacked;
         if (param.rec.tpc.compressionTypeMask & GPUSettings::CompressionTruncate) {
           compressor.truncateSignificantBitsChargeMax(qmax, param);
diff --git a/GPU/GPUTracking/Global/GPUChainTracking.h b/GPU/GPUTracking/Global/GPUChainTracking.h
index 78a43856f00f1..3c3531530372a 100644
--- a/GPU/GPUTracking/Global/GPUChainTracking.h
+++ b/GPU/GPUTracking/Global/GPUChainTracking.h
@@ -148,7 +148,6 @@ class GPUChainTracking : public GPUChain
 
   // Converter / loader functions
   int32_t ConvertNativeToClusterData();
-  void ConvertNativeToClusterDataLegacy();
   void ConvertRun2RawToNative();
   void ConvertZSEncoder(int32_t version);
   void ConvertZSFilter(bool zs12bit);
diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx
index 8f200d2c57a6d..7659d19693777 100644
--- a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx
+++ b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx
@@ -317,7 +317,7 @@ void GPUChainTracking::RunTPCClusterFilter(o2::tpc::ClusterNativeAccess* cluster
           o2::tpc::ClusterNative cl = clusters->clusters[iSector][iRow][k];
           bool keep = true;
           if (applyClusterCuts) {
-            keep = keep && cl.qTot > param().rec.tpc.cfQTotCutoff && cl.qMax > param().rec.tpc.cfQMaxCutoff;
+            keep = keep && cl.getQtot() > param().rec.tpc.cfQTotCutoff && cl.qMax > param().rec.tpc.cfQMaxCutoff;
             keep = keep && (!(cl.getFlags() & o2::tpc::ClusterNative::flagSingle) || ((cl.sigmaPadPacked || cl.qMax > param().rec.tpc.cfQMaxCutoffSinglePad) && (cl.sigmaTimePacked || cl.qMax > param().rec.tpc.cfQMaxCutoffSingleTime)));
           }
           if (param().tpcCutTimeBin > 0) {
@@ -353,7 +353,7 @@ void GPUChainTracking::DumpClusters(std::ostream& out, const o2::tpc::ClusterNat
       out << "  Row: " << i << ": " << clusters->nClusters[iSec][i] << " clusters:\n";
       for (uint32_t j = 0; j < clusters->nClusters[iSec][i]; j++) {
         const auto& cl = clusters->clusters[iSec][i][j];
-        out << "    " << std::hex << cl.timeFlagsPacked << std::dec << " " << cl.padPacked << " " << int32_t{cl.sigmaTimePacked} << " " << int32_t{cl.sigmaPadPacked} << " " << cl.qMax << " " << cl.qTot << "\n";
+        out << "    " << std::hex << cl.timeFlagsPacked << std::dec << " " << cl.padPacked << " " << int32_t{cl.sigmaTimePacked} << " " << int32_t{cl.sigmaPadPacked} << " " << cl.qMax << " " << cl.qTotPacked << "\n";
       }
     }
   }
diff --git a/GPU/GPUTracking/Global/GPUChainTrackingTransformation.cxx b/GPU/GPUTracking/Global/GPUChainTrackingTransformation.cxx
index 770997333aa23..46bcd6931c302 100644
--- a/GPU/GPUTracking/Global/GPUChainTrackingTransformation.cxx
+++ b/GPU/GPUTracking/Global/GPUChainTrackingTransformation.cxx
@@ -62,25 +62,6 @@ int32_t GPUChainTracking::ConvertNativeToClusterData()
   return 0;
 }
 
-void GPUChainTracking::ConvertNativeToClusterDataLegacy()
-{
-  ClusterNativeAccess* tmp = mIOMem.clusterNativeAccess.get();
-  if (tmp != mIOPtrs.clustersNative) {
-    *tmp = *mIOPtrs.clustersNative;
-  }
-  GPUReconstructionConvert::ConvertNativeToClusterData(mIOMem.clusterNativeAccess.get(), mIOMem.clusterData, mIOPtrs.nClusterData, processors()->calibObjects.fastTransform, param().continuousMaxTimeBin);
-  for (uint32_t i = 0; i < NSECTORS; i++) {
-    mIOPtrs.clusterData[i] = mIOMem.clusterData[i].get();
-    if (GetProcessingSettings().registerStandaloneInputMemory) {
-      if (mRec->registerMemoryForGPU(mIOMem.clusterData[i].get(), mIOPtrs.nClusterData[i] * sizeof(*mIOPtrs.clusterData[i]))) {
-        throw std::runtime_error("Error registering memory for GPU");
-      }
-    }
-  }
-  mIOPtrs.clustersNative = nullptr;
-  mIOMem.clustersNative.reset(nullptr);
-}
-
 void GPUChainTracking::ConvertRun2RawToNative()
 {
   GPUReconstructionConvert::ConvertRun2RawToNative(*mIOMem.clusterNativeAccess, mIOMem.clustersNative, mIOPtrs.rawClusters, mIOPtrs.nRawClusters);
@@ -143,7 +124,7 @@ int32_t GPUChainTracking::ForwardTPCDigits()
         c.setPad(d.getPad());
         c.setSigmaTime(1);
         c.setSigmaPad(1);
-        c.qTot = c.qMax = d.getChargeFloat();
+        c.qTotPacked = c.qMax = d.getChargeFloat();
         tmp[i][d.getRow()].emplace_back(c);
         nTotal++;
       }
diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx
index 53e7f6c918309..23842d8a1f859 100644
--- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx
+++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx
@@ -294,7 +294,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_
               const int32_t clusterCount = (ihit - ihitMergeFirst) * wayDirection + 1;
               for (int32_t iTmp = ihitMergeFirst; iTmp != ihit + wayDirection; iTmp += wayDirection) {
                 const ClusterNative& cl = merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[cluster.num];
-                qtot += cl.qTot;
+                qtot += cl.getQtot();
                 qmax = CAMath::Max<float>(qmax, cl.qMax);
                 pad += cl.getPad();
                 relTime += cl.getTime();
@@ -441,7 +441,7 @@ GPUd() int32_t GPUTPCGMTrackParam::MergeDoubleRowClusters(int32_t& ihit, int32_t
     clusterState = 0;
     while (true) {
       const ClusterNative& GPUrestrict() cl = merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[clusters[ihit].num];
-      float clamp = cl.qTot;
+      float clamp = cl.getQtot();
       float clx, cly, clz;
       merger->GetConstantMem()->calibObjects.fastTransform->Transform(clusters[ihit].sector, clusters[ihit].row, cl.getPad(), cl.getTime(), clx, cly, clz, mTOffset);
       float dy = cly - projY;
diff --git a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx
index f8bac8ce83718..de0525edcce2a 100644
--- a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx
+++ b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx
@@ -290,7 +290,7 @@ GPUd() int32_t GPUTrackingRefit::RefitTrack(T& trkX, bool outward, bool resetCov
           CADEBUG(printf("\tHit %3d/%3d Row %3d: Cluster Alpha %8.3f %3d, X %8.3f - Y %8.3f, Z %8.3f - State %d\n", ii, count, row, mPparam->Alpha(sector), (int32_t)sector, x, y, z, (int32_t)nextState));
           currentRow = row;
           currentSector = sector;
-          charge = cl->qTot;
+          charge = cl->getQtot();
           clusterState = nextState;
           time = cl->getTime();
           invSqrtCharge = CAMath::InvSqrt(cl->qMax);
@@ -299,10 +299,10 @@ GPUd() int32_t GPUTrackingRefit::RefitTrack(T& trkX, bool outward, bool resetCov
           float xx, yy, zz;
           mPfastTransform->Transform(sector, row, cl->getPad(), cl->getTime(), xx, yy, zz, tOffset);
           CADEBUG(printf("\tHit %3d/%3d Row %3d: Cluster Alpha %8.3f %3d, X %8.3f - Y %8.3f, Z %8.3f - State %d\n", ii, count, row, mPparam->Alpha(sector), (int32_t)sector, xx, yy, zz, (int32_t)nextState));
-          x += xx * cl->qTot;
-          y += yy * cl->qTot;
-          z += zz * cl->qTot;
-          charge += cl->qTot;
+          x += xx * cl->getQtot();
+          y += yy * cl->getQtot();
+          z += zz * cl->getQtot();
+          charge += cl->getQtot();
           clusterState |= nextState;
         }
         cl = nullptr;
diff --git a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx
index cdb5358685670..a9fed2d4c9897 100644
--- a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx
+++ b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx
@@ -566,12 +566,6 @@ int32_t ReadEvent(int32_t n)
     }
   }
 #endif
-  if (chainTracking->mIOPtrs.clustersNative && (configStandalone.TF.bunchSim || configStandalone.TF.nMerge || !configStandalone.runTransformation)) {
-    if (configStandalone.proc.debugLevel >= 2) {
-      printf("Converting Native to Legacy ClusterData for overlaying - WARNING: No raw clusters produced - Compression etc will not run!!!\n");
-    }
-    chainTracking->ConvertNativeToClusterDataLegacy();
-  }
   return 0;
 }
 
diff --git a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx
index a80283b91c940..bcf04d2a6de1b 100644
--- a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx
+++ b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx
@@ -94,8 +94,8 @@ GPUd() bool ClusterAccumulator::toNative(const CfChargePos& pos, const Charge q,
     isEdgeCluster = pad == 0 || pad == GPUTPCGeometry::NPads(pos.row()) - 1;
   }
 
-  cn.qTot = CAMath::Float2UIntRn(mQtot);
-  if (cn.qTot <= param.rec.tpc.cfQTotCutoff) {
+  cn.qTotPacked = CAMath::Float2UIntRn(mQtot);
+  if (cn.qTotPacked <= param.rec.tpc.cfQTotCutoff) {
     return false;
   }
   cn.qMax = q; // cfQMaxCutoff check already done at PeakFinder level
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx
index 2b21af6a08bed..3b06db8efc1a3 100644
--- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx
+++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx
@@ -166,10 +166,9 @@ void GPUTPCClusterFinder::DumpClusters(std::ostream& out)
 
     out << "Row: " << i << ": " << N << "\n";
     for (const auto& cl : sortedCluster) {
-      uint32_t qTot = cl.qTot;
+      uint32_t qTot = cl.getQtot();
       uint32_t sigmaTime = cl.sigmaTimePacked;
       if (cl.isSaturated()) {
-        qTot = cl.getSaturatedQtot();
         sigmaTime = cl.getSaturatedTailLength();
       }
       out << std::hex << cl.timeFlagsPacked << std::dec << " " << cl.padPacked << " " << sigmaTime << " " << int32_t{cl.sigmaPadPacked} << " " << cl.qMax << " " << qTot << "\n";