diff --git a/.github/workflows/standalone-benchmark.yml b/.github/workflows/standalone-benchmark.yml index 0713ffc6a1ed7..9527e6a5a4403 100644 --- a/.github/workflows/standalone-benchmark.yml +++ b/.github/workflows/standalone-benchmark.yml @@ -73,6 +73,7 @@ jobs: STANDALONE_DIR: /root/standalone BUILD_DIR: /root/standalone/build ARTIFACT_FILE: /root/artifact.txt + ALIBUILD_O2_FORCE_GPU: 1 - name: Upload Artifact uses: actions/upload-artifact@v4 diff --git a/GPU/Common/GPUCommonAlgorithm.h b/GPU/Common/GPUCommonAlgorithm.h index db57e7ec06d4b..a1bbdd930b4cb 100644 --- a/GPU/Common/GPUCommonAlgorithm.h +++ b/GPU/Common/GPUCommonAlgorithm.h @@ -28,19 +28,20 @@ namespace o2::gpu { class GPUCommonAlgorithm { + public: template - GPUd() static void sort(T* begin, T* end); + GPUd() static void sort(T begin, T end); template GPUd() static void sortInBlock(T* begin, T* end); template - GPUd() static void sortDeviceDynamic(T* begin, T* end); + GPUd() static void sortDeviceDynamic(T begin, T end); template - GPUd() static void sort(T* begin, T* end, const S& comp); + GPUd() static void sort(T begin, T end, const S& comp); template GPUd() static void sortInBlock(T* begin, T* end, const S& comp); template - GPUd() static void sortDeviceDynamic(T* begin, T* end, const S& comp); + GPUd() static void sortDeviceDynamic(T begin, T end, const S& comp); #ifndef __OPENCL__ template GPUh() static void sortOnDevice(auto* rec, int32_t stream, T* begin, size_t N, const S& comp); @@ -224,7 +225,7 @@ namespace o2::gpu { template -GPUdi() void GPUCommonAlgorithm::sortDeviceDynamic(T* begin, T* end) +GPUdi() void GPUCommonAlgorithm::sortDeviceDynamic(T begin, T end) { #ifndef GPUCA_GPUCODE GPUCommonAlgorithm::sort(begin, end); @@ -234,7 +235,7 @@ GPUdi() void GPUCommonAlgorithm::sortDeviceDynamic(T* begin, T* end) } template -GPUdi() void GPUCommonAlgorithm::sortDeviceDynamic(T* begin, T* end, const S& comp) +GPUdi() void GPUCommonAlgorithm::sortDeviceDynamic(T begin, T end, const S& comp) { GPUCommonAlgorithm::sort(begin, end, comp); } @@ -248,7 +249,7 @@ namespace o2::gpu { template -GPUdi() void GPUCommonAlgorithm::sort(T* begin, T* end) +GPUdi() void GPUCommonAlgorithm::sort(T begin, T end) { #ifdef GPUCA_ALGORITHM_STD std::sort(begin, end); @@ -258,7 +259,7 @@ GPUdi() void GPUCommonAlgorithm::sort(T* begin, T* end) } template -GPUdi() void GPUCommonAlgorithm::sort(T* begin, T* end, const S& comp) +GPUdi() void GPUCommonAlgorithm::sort(T begin, T end, const S& comp) { #ifdef GPUCA_ALGORITHM_STD std::sort(begin, end, comp); diff --git a/GPU/Common/GPUCommonAlgorithmThrust.h b/GPU/Common/GPUCommonAlgorithmThrust.h index 7af3138d45490..be74e2143ae37 100644 --- a/GPU/Common/GPUCommonAlgorithmThrust.h +++ b/GPU/Common/GPUCommonAlgorithmThrust.h @@ -25,6 +25,7 @@ #include "GPUCommonDef.h" #include "GPUCommonHelpers.h" +#include "GPUTPCTrack.h" #ifndef __HIPCC__ // CUDA #include @@ -81,19 +82,15 @@ GPUdi() void GPUCommonAlgorithm::sortInBlock(T* begin, T* end, const S& comp) */ template -GPUdi() void GPUCommonAlgorithm::sortDeviceDynamic(T* begin, T* end) +GPUdi() void GPUCommonAlgorithm::sortDeviceDynamic(T begin, T end) { - thrust::device_ptr thrustBegin(begin); - thrust::device_ptr thrustEnd(end); - thrust::sort(GPUCA_THRUST_NAMESPACE::par, thrustBegin, thrustEnd); + thrust::sort(GPUCA_THRUST_NAMESPACE::par, begin, end); } template -GPUdi() void GPUCommonAlgorithm::sortDeviceDynamic(T* begin, T* end, const S& comp) +GPUdi() void GPUCommonAlgorithm::sortDeviceDynamic(T begin, T end, const S& comp) { - thrust::device_ptr thrustBegin(begin); - thrust::device_ptr thrustEnd(end); - thrust::sort(GPUCA_THRUST_NAMESPACE::par, thrustBegin, thrustEnd, comp); + thrust::sort(GPUCA_THRUST_NAMESPACE::par, begin, end, comp); } #ifndef GPUCA_GPUCODE_COMPILEKERNELS diff --git a/GPU/Common/MemLayout.h b/GPU/Common/MemLayout.h index 53ca642192d65..250ad36435474 100644 --- a/GPU/Common/MemLayout.h +++ b/GPU/Common/MemLayout.h @@ -1,257 +1,308 @@ #ifndef MEMLAYOUT_H #define MEMLAYOUT_H -#include "GPUCommonDefAPI.h" - namespace MemLayout { -template using value = T; +using size_t = decltype(sizeof(0)); +using ptrdiff_t = decltype(static_cast(nullptr) - static_cast(nullptr)); +template using value = T; template using reference = T&; -template using reference_restrict = T& GPUrestrict(); - template using const_reference = const T&; -template using const_reference_restrict = const T& GPUrestrict(); - template using pointer = T*; -template using pointer_restrtict = T* GPUrestrict(); - template using const_pointer = const T*; -template using const_pointer_restrict = const T* GPUrestrict(); -using size_t = decltype(sizeof 0); -using ptrdiff_t = decltype(static_cast(nullptr) - static_cast(nullptr)); - -enum Flag { soa, aos }; - -// The types S, S, and S need to be aggregate constructible -template