From 0d77bb7b3a00f30322e7024b48fb7f353d00e0d3 Mon Sep 17 00:00:00 2001 From: Oliver Rietmann Date: Wed, 4 Feb 2026 19:21:17 +0100 Subject: [PATCH 1/2] correct result --- GPU/Common/GPUCommonAlgorithm.h | 18 +- GPU/Common/GPUCommonAlgorithmThrust.h | 14 +- GPU/Common/MemLayout.h | 341 ++++++++---------- GPU/GPUTracking/DataTypes/GPUDataTypes.h | 5 +- GPU/GPUTracking/Global/GPUChainTracking.cxx | 13 +- GPU/GPUTracking/Global/GPUChainTracking.h | 7 +- GPU/GPUTracking/Global/GPUChainTrackingIO.cxx | 8 +- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 54 +-- GPU/GPUTracking/Merger/GPUTPCGMMerger.h | 7 +- .../Merger/GPUTPCGMSectorTrack.cxx | 16 +- GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.h | 9 +- .../SectorTracker/GPUTPCBaseTrackParam.h | 52 +-- .../GPUTPCExtrapolationTracking.cxx | 6 +- .../GPUTPCSectorDebugSortKernels.cxx | 6 +- GPU/GPUTracking/SectorTracker/GPUTPCTrack.h | 63 ++-- .../SectorTracker/GPUTPCTrackLinearisation.h | 4 +- .../SectorTracker/GPUTPCTrackParam.cxx | 10 +- .../SectorTracker/GPUTPCTrackParam.h | 35 +- .../SectorTracker/GPUTPCTracker.cxx | 9 +- GPU/GPUTracking/SectorTracker/GPUTPCTracker.h | 13 +- .../SectorTracker/GPUTPCTrackerDump.cxx | 2 +- .../SectorTracker/GPUTPCTracklet.h | 31 +- .../GPUTPCTrackletConstructor.cxx | 14 +- .../SectorTracker/GPUTPCTrackletConstructor.h | 11 +- .../SectorTracker/GPUTPCTrackletSelector.cxx | 2 +- .../display/render/GPUDisplayDraw.cxx | 2 +- 26 files changed, 344 insertions(+), 408 deletions(-) diff --git a/GPU/Common/GPUCommonAlgorithm.h b/GPU/Common/GPUCommonAlgorithm.h index db57e7ec06d4b..655c0291129ff 100644 --- a/GPU/Common/GPUCommonAlgorithm.h +++ b/GPU/Common/GPUCommonAlgorithm.h @@ -16,6 +16,7 @@ #define GPUCOMMONALGORITHM_H #include "GPUCommonDef.h" +#include "MemLayout.h" #if !defined(GPUCA_GPUCODE) // Could also enable custom search on the CPU, but it is not always faster, so we stick to std::sort #include @@ -28,19 +29,20 @@ namespace o2::gpu { class GPUCommonAlgorithm { + public: template - GPUd() static void sort(T* begin, T* end); + GPUd() static void sort(T begin, T end); template GPUd() static void sortInBlock(T* begin, T* end); template - GPUd() static void sortDeviceDynamic(T* begin, T* end); + GPUd() static void sortDeviceDynamic(T begin, T end); template - GPUd() static void sort(T* begin, T* end, const S& comp); + GPUd() static void sort(T begin, T end, const S& comp); template GPUd() static void sortInBlock(T* begin, T* end, const S& comp); template - GPUd() static void sortDeviceDynamic(T* begin, T* end, const S& comp); + GPUd() static void sortDeviceDynamic(T begin, T end, const S& comp); #ifndef __OPENCL__ template GPUh() static void sortOnDevice(auto* rec, int32_t stream, T* begin, size_t N, const S& comp); @@ -224,7 +226,7 @@ namespace o2::gpu { template -GPUdi() void GPUCommonAlgorithm::sortDeviceDynamic(T* begin, T* end) +GPUdi() void GPUCommonAlgorithm::sortDeviceDynamic(T begin, T end) { #ifndef GPUCA_GPUCODE GPUCommonAlgorithm::sort(begin, end); @@ -234,7 +236,7 @@ GPUdi() void GPUCommonAlgorithm::sortDeviceDynamic(T* begin, T* end) } template -GPUdi() void GPUCommonAlgorithm::sortDeviceDynamic(T* begin, T* end, const S& comp) +GPUdi() void GPUCommonAlgorithm::sortDeviceDynamic(T begin, T end, const S& comp) { GPUCommonAlgorithm::sort(begin, end, comp); } @@ -248,7 +250,7 @@ namespace o2::gpu { template -GPUdi() void GPUCommonAlgorithm::sort(T* begin, T* end) +GPUdi() void GPUCommonAlgorithm::sort(T begin, T end) { #ifdef GPUCA_ALGORITHM_STD std::sort(begin, end); @@ -258,7 +260,7 @@ GPUdi() void GPUCommonAlgorithm::sort(T* begin, T* end) } template -GPUdi() void GPUCommonAlgorithm::sort(T* begin, T* end, const S& comp) +GPUdi() void GPUCommonAlgorithm::sort(T begin, T end, const S& comp) { #ifdef GPUCA_ALGORITHM_STD std::sort(begin, end, comp); diff --git a/GPU/Common/GPUCommonAlgorithmThrust.h b/GPU/Common/GPUCommonAlgorithmThrust.h index 7af3138d45490..fa842b485d746 100644 --- a/GPU/Common/GPUCommonAlgorithmThrust.h +++ b/GPU/Common/GPUCommonAlgorithmThrust.h @@ -19,12 +19,14 @@ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wshadow" #include +#include #include #include #pragma GCC diagnostic pop #include "GPUCommonDef.h" #include "GPUCommonHelpers.h" +#include "GPUTPCTrack.h" #ifndef __HIPCC__ // CUDA #include @@ -81,19 +83,15 @@ GPUdi() void GPUCommonAlgorithm::sortInBlock(T* begin, T* end, const S& comp) */ template -GPUdi() void GPUCommonAlgorithm::sortDeviceDynamic(T* begin, T* end) +GPUdi() void GPUCommonAlgorithm::sortDeviceDynamic(T begin, T end) { - thrust::device_ptr thrustBegin(begin); - thrust::device_ptr thrustEnd(end); - thrust::sort(GPUCA_THRUST_NAMESPACE::par, thrustBegin, thrustEnd); + thrust::sort(GPUCA_THRUST_NAMESPACE::par, begin, end); } template -GPUdi() void GPUCommonAlgorithm::sortDeviceDynamic(T* begin, T* end, const S& comp) +GPUdi() void GPUCommonAlgorithm::sortDeviceDynamic(T begin, T end, const S& comp) { - thrust::device_ptr thrustBegin(begin); - thrust::device_ptr thrustEnd(end); - thrust::sort(GPUCA_THRUST_NAMESPACE::par, thrustBegin, thrustEnd, comp); + thrust::sort(GPUCA_THRUST_NAMESPACE::par, begin, end, comp); } #ifndef GPUCA_GPUCODE_COMPILEKERNELS diff --git a/GPU/Common/MemLayout.h b/GPU/Common/MemLayout.h index 53ca642192d65..1d4952904cdb1 100644 --- a/GPU/Common/MemLayout.h +++ b/GPU/Common/MemLayout.h @@ -1,257 +1,212 @@ #ifndef MEMLAYOUT_H #define MEMLAYOUT_H -#include "GPUCommonDefAPI.h" - namespace MemLayout { -template using value = T; +using size_t = decltype(sizeof(0)); +using ptrdiff_t = decltype(static_cast(nullptr) - static_cast(nullptr)); +template using value = T; template using reference = T&; -template using reference_restrict = T& GPUrestrict(); - template using const_reference = const T&; -template using const_reference_restrict = const T& GPUrestrict(); - template using pointer = T*; -template using pointer_restrtict = T* GPUrestrict(); - template using const_pointer = const T*; -template using const_pointer_restrict = const T* GPUrestrict(); - -using size_t = decltype(sizeof 0); -using ptrdiff_t = decltype(static_cast(nullptr) - static_cast(nullptr)); - -enum Flag { soa, aos }; - -// The types S, S, and S need to be aggregate constructible -template