From 58e60e6252fe75352e6c759b9fda92d8d767a8b3 Mon Sep 17 00:00:00 2001 From: Tobias Reiter Date: Tue, 11 Nov 2025 11:15:00 +0100 Subject: [PATCH 1/6] Limit number of reflections on GPU --- gpu/include/raygLaunchParams.hpp | 3 ++- gpu/include/raygPerRayData.hpp | 1 + gpu/include/raygTrace.hpp | 3 +++ gpu/pipelines/GeneralPipelineDisk.cu | 1 + gpu/pipelines/GeneralPipelineLine.cu | 1 + gpu/pipelines/GeneralPipelineTriangle.cu | 2 +- include/viennaray/rayTraceKernel.hpp | 2 +- include/viennaray/rayUtil.hpp | 1 + 8 files changed, 11 insertions(+), 3 deletions(-) diff --git a/gpu/include/raygLaunchParams.hpp b/gpu/include/raygLaunchParams.hpp index 9e0d18c..b09736d 100644 --- a/gpu/include/raygLaunchParams.hpp +++ b/gpu/include/raygLaunchParams.hpp @@ -25,6 +25,7 @@ struct LaunchParams { unsigned int numElements; // to determine result buffer index unsigned int *dataPerParticle; // to determine result buffer index + unsigned maxReflections = std::numeric_limits::max(); unsigned maxBoundaryHits = 1000; uint8_t particleIdx = 0; uint8_t particleType = 0; @@ -71,7 +72,7 @@ __device__ __forceinline__ bool continueRay(const LaunchParams &launchParams, const PerRayData &prd) { return prd.rayWeight > launchParams.rayWeightThreshold && prd.numBoundaryHits < launchParams.maxBoundaryHits && - prd.energy >= 0.f; + prd.energy >= 0.f && prd.numReflections < launchParams.maxReflections; } #endif diff --git a/gpu/include/raygPerRayData.hpp b/gpu/include/raygPerRayData.hpp index d8b4cd5..dbd7b8e 100644 --- a/gpu/include/raygPerRayData.hpp +++ b/gpu/include/raygPerRayData.hpp @@ -31,6 +31,7 @@ struct PerRayData { // Hit data unsigned int numBoundaryHits = 0; + unsigned int numReflections = 0; unsigned int primID = 0; // primID of closest hit float tMin = 1e20f; // distance to closest hit diff --git a/gpu/include/raygTrace.hpp b/gpu/include/raygTrace.hpp index 237ddbc..fd46218 100644 --- a/gpu/include/raygTrace.hpp +++ b/gpu/include/raygTrace.hpp @@ -107,6 +107,9 @@ template class Trace { // Threshold value for neighbor detection in disk-based geometries launchParams.tThreshold = 1.1 * gridDelta_; // TODO: find the best value + launchParams.maxReflections = config_.maxReflections; + launchParams.maxBoundaryHits = config_.maxBoundaryHits; + int numPointsPerDim = static_cast(std::sqrt(static_cast(launchParams.numElements))); diff --git a/gpu/pipelines/GeneralPipelineDisk.cu b/gpu/pipelines/GeneralPipelineDisk.cu index a6928ed..695c8d4 100644 --- a/gpu/pipelines/GeneralPipelineDisk.cu +++ b/gpu/pipelines/GeneralPipelineDisk.cu @@ -188,5 +188,6 @@ extern "C" __global__ void __raygen__() { 0, // missSBTIndex u0, u1); // Payload prd.totalCount = 0; // Reset PerRayData + prd.numReflections++; } } diff --git a/gpu/pipelines/GeneralPipelineLine.cu b/gpu/pipelines/GeneralPipelineLine.cu index 86a511f..aff1f23 100644 --- a/gpu/pipelines/GeneralPipelineLine.cu +++ b/gpu/pipelines/GeneralPipelineLine.cu @@ -125,5 +125,6 @@ extern "C" __global__ void __raygen__() { 0, // missSBTIndex u0, u1); // Payload prd.totalCount = 0; // Reset PerRayData + prd.numReflections++; } } diff --git a/gpu/pipelines/GeneralPipelineTriangle.cu b/gpu/pipelines/GeneralPipelineTriangle.cu index 2625c34..62fe89d 100644 --- a/gpu/pipelines/GeneralPipelineTriangle.cu +++ b/gpu/pipelines/GeneralPipelineTriangle.cu @@ -99,7 +99,7 @@ extern "C" __global__ void __raygen__() { 1, // SBT stride 0, // missSBTIndex u0, u1); // Payload - + prd.numReflections++; #ifdef COUNT_RAYS int *counter = reinterpret_cast(launchParams.customData); atomicAdd(counter, 1); diff --git a/include/viennaray/rayTraceKernel.hpp b/include/viennaray/rayTraceKernel.hpp index 5ac9694..fa39ce1 100644 --- a/include/viennaray/rayTraceKernel.hpp +++ b/include/viennaray/rayTraceKernel.hpp @@ -211,7 +211,7 @@ template class TraceKernel { /* -------- Boundary hit -------- */ if (rayHit.hit.geomID == boundaryID) { - if (++boundaryHits > 1000) { + if (++boundaryHits > config_.maxBoundaryHits) { // terminate ray if too many boundary hits ++raysTerminated; break; diff --git a/include/viennaray/rayUtil.hpp b/include/viennaray/rayUtil.hpp index 0220e1a..40da777 100644 --- a/include/viennaray/rayUtil.hpp +++ b/include/viennaray/rayUtil.hpp @@ -83,6 +83,7 @@ struct KernelConfig { size_t numRaysPerPoint = 1000; size_t numRaysFixed = 0; unsigned maxReflections = std::numeric_limits::max(); + unsigned maxBoundaryHits = 1000; unsigned rngSeed = 0; bool useRandomSeed = true; From 72586cd41a2fa2044c71eb6a020ad8fa3e9d2ce2 Mon Sep 17 00:00:00 2001 From: Tobias Reiter Date: Tue, 11 Nov 2025 11:22:19 +0100 Subject: [PATCH 2/6] Implement rejection control on GPU --- gpu/include/raygLaunchParams.hpp | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/gpu/include/raygLaunchParams.hpp b/gpu/include/raygLaunchParams.hpp index b09736d..5be299d 100644 --- a/gpu/include/raygLaunchParams.hpp +++ b/gpu/include/raygLaunchParams.hpp @@ -69,10 +69,31 @@ getIdxOffset(int dataIdx, const LaunchParams &launchParams) { } __device__ __forceinline__ bool continueRay(const LaunchParams &launchParams, - const PerRayData &prd) { - return prd.rayWeight > launchParams.rayWeightThreshold && - prd.numBoundaryHits < launchParams.maxBoundaryHits && - prd.energy >= 0.f && prd.numReflections < launchParams.maxReflections; + PerRayData &prd) { + if (prd.numReflections > launchParams.maxReflections || + prd.numBoundaryHits > launchParams.maxBoundaryHits) + return false; + + // If the weight of the ray is above a certain threshold, we always reflect. + // If the weight of the ray is below the threshold, we randomly decide to + // either kill the ray or increase its weight (in an unbiased way). + if (prd.rayWeight >= launchParams.rayWeightThreshold && prd.energy >= 0.f) + return true; + + // We want to set the weight of (the reflection of) the ray to the value of + // renewWeight. In order to stay unbiased we kill the reflection with a + // probability of (1 - rayWeight / renewWeight). + float renewWeight = 0.3f; + float rnd = getNextRand(&prd.RNGstate); + float killProbability = 1.f - prd.rayWeight / renewWeight; + if (rnd < killProbability) { + // kill the ray + return false; + } + // set rayWeight to new weight + prd.rayWeight = renewWeight; + // continue ray + return true; } #endif From d0cbcd4d9147b4b0dcd14fc4f04973fdd0141ed7 Mon Sep 17 00:00:00 2001 From: Tobias Reiter Date: Tue, 11 Nov 2025 12:01:30 +0100 Subject: [PATCH 3/6] Correct rejection control --- gpu/include/raygLaunchParams.hpp | 6 ++++++ include/viennaray/rayTraceKernel.hpp | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/gpu/include/raygLaunchParams.hpp b/gpu/include/raygLaunchParams.hpp index 5be299d..01be609 100644 --- a/gpu/include/raygLaunchParams.hpp +++ b/gpu/include/raygLaunchParams.hpp @@ -70,10 +70,16 @@ getIdxOffset(int dataIdx, const LaunchParams &launchParams) { __device__ __forceinline__ bool continueRay(const LaunchParams &launchParams, PerRayData &prd) { + if (prd.rayWeight <= 0.f || prd.energy < 0.f) + return false; + if (prd.numReflections > launchParams.maxReflections || prd.numBoundaryHits > launchParams.maxBoundaryHits) return false; + if (prd.numReflections > 1e4) + return false; + // If the weight of the ray is above a certain threshold, we always reflect. // If the weight of the ray is below the threshold, we randomly decide to // either kill the ray or increase its weight (in an unbiased way). diff --git a/include/viennaray/rayTraceKernel.hpp b/include/viennaray/rayTraceKernel.hpp index fa39ce1..4a81090 100644 --- a/include/viennaray/rayTraceKernel.hpp +++ b/include/viennaray/rayTraceKernel.hpp @@ -460,9 +460,9 @@ template class TraceKernel { // We want to set the weight of (the reflection of) the ray to the value of // renewWeight. In order to stay unbiased we kill the reflection with a // probability of (1 - rayWeight / renewWeight). - auto rnd = static_cast(rng() / RNG::max()); + std::uniform_real_distribution<> dist; auto killProbability = 1.0 - rayWeight / renewWeight; - if (rnd < killProbability) { + if (dist(rng) < killProbability) { // kill the ray return false; } From ec82a4504a506480fde75f4ffa2719ee8cdb88b1 Mon Sep 17 00:00:00 2001 From: Tobias Reiter Date: Tue, 11 Nov 2025 14:06:02 +0100 Subject: [PATCH 4/6] Simplify getPRD --- gpu/include/raygPerRayData.hpp | 4 ++-- gpu/pipelines/GeneralPipelineDisk.cu | 6 +++--- gpu/pipelines/GeneralPipelineLine.cu | 6 +++--- gpu/pipelines/GeneralPipelineTriangle.cu | 4 ++-- include/viennaray/rayTraceKernel.hpp | 8 ++++---- 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/gpu/include/raygPerRayData.hpp b/gpu/include/raygPerRayData.hpp index dbd7b8e..395ca56 100644 --- a/gpu/include/raygPerRayData.hpp +++ b/gpu/include/raygPerRayData.hpp @@ -59,10 +59,10 @@ static __forceinline__ __device__ void packPointer(void *ptr, uint32_t &i0, i1 = uptr & 0x00000000ffffffff; } -template static __forceinline__ __device__ T *getPRD() { +static __forceinline__ __device__ PerRayData *getPRD() { const uint32_t u0 = optixGetPayload_0(); const uint32_t u1 = optixGetPayload_1(); - return reinterpret_cast(unpackPointer(u0, u1)); + return reinterpret_cast(unpackPointer(u0, u1)); } static __device__ void initializeRNGState(PerRayData *prd, diff --git a/gpu/pipelines/GeneralPipelineDisk.cu b/gpu/pipelines/GeneralPipelineDisk.cu index 695c8d4..027292f 100644 --- a/gpu/pipelines/GeneralPipelineDisk.cu +++ b/gpu/pipelines/GeneralPipelineDisk.cu @@ -22,7 +22,7 @@ extern "C" __constant__ viennaray::gpu::LaunchParams launchParams; extern "C" __global__ void __intersection__() { const HitSBTDataDisk *sbtData = (const HitSBTDataDisk *)optixGetSbtDataPointer(); - PerRayData *prd = (PerRayData *)getPRD(); + PerRayData *prd = getPRD(); // Get the index of the AABB box that was hit const unsigned int primID = optixGetPrimitiveIndex(); @@ -69,7 +69,7 @@ extern "C" __global__ void __intersection__() { extern "C" __global__ void __closesthit__() { const HitSBTDataDisk *sbtData = (const HitSBTDataDisk *)optixGetSbtDataPointer(); - PerRayData *prd = (PerRayData *)getPRD(); + PerRayData *prd = getPRD(); const unsigned int primID = optixGetPrimitiveIndex(); prd->tMin = optixGetRayTmax() - launchParams.tThreshold; @@ -143,7 +143,7 @@ extern "C" __global__ void __closesthit__() { } } -extern "C" __global__ void __miss__() { getPRD()->rayWeight = 0.f; } +extern "C" __global__ void __miss__() { getPRD()->rayWeight = 0.f; } extern "C" __global__ void __raygen__() { const uint3 idx = optixGetLaunchIndex(); diff --git a/gpu/pipelines/GeneralPipelineLine.cu b/gpu/pipelines/GeneralPipelineLine.cu index aff1f23..be4cdce 100644 --- a/gpu/pipelines/GeneralPipelineLine.cu +++ b/gpu/pipelines/GeneralPipelineLine.cu @@ -22,7 +22,7 @@ extern "C" __constant__ viennaray::gpu::LaunchParams launchParams; extern "C" __global__ void __intersection__() { const HitSBTDataLine *sbtData = (const HitSBTDataLine *)optixGetSbtDataPointer(); - PerRayData *prd = (PerRayData *)getPRD(); + PerRayData *prd = getPRD(); // Get the index of the AABB box that was hit const int primID = optixGetPrimitiveIndex(); @@ -50,7 +50,7 @@ extern "C" __global__ void __intersection__() { extern "C" __global__ void __closesthit__() { const HitSBTDataLine *sbtData = (const HitSBTDataLine *)optixGetSbtDataPointer(); - PerRayData *prd = (PerRayData *)getPRD(); + PerRayData *prd = getPRD(); const unsigned int primID = optixGetPrimitiveIndex(); prd->tMin = optixGetRayTmax(); @@ -80,7 +80,7 @@ extern "C" __global__ void __closesthit__() { } } -extern "C" __global__ void __miss__() { getPRD()->rayWeight = 0.f; } +extern "C" __global__ void __miss__() { getPRD()->rayWeight = 0.f; } extern "C" __global__ void __raygen__() { const uint3 idx = optixGetLaunchIndex(); diff --git a/gpu/pipelines/GeneralPipelineTriangle.cu b/gpu/pipelines/GeneralPipelineTriangle.cu index 62fe89d..655b991 100644 --- a/gpu/pipelines/GeneralPipelineTriangle.cu +++ b/gpu/pipelines/GeneralPipelineTriangle.cu @@ -24,7 +24,7 @@ extern "C" __constant__ viennaray::gpu::LaunchParams launchParams; extern "C" __global__ void __closesthit__() { const HitSBTDataTriangle *sbtData = (const HitSBTDataTriangle *)optixGetSbtDataPointer(); - PerRayData *prd = (PerRayData *)getPRD(); + PerRayData *prd = getPRD(); const unsigned int primID = optixGetPrimitiveIndex(); prd->tMin = optixGetRayTmax(); @@ -55,7 +55,7 @@ extern "C" __global__ void __closesthit__() { } } -extern "C" __global__ void __miss__() { getPRD()->rayWeight = 0.f; } +extern "C" __global__ void __miss__() { getPRD()->rayWeight = 0.f; } extern "C" __global__ void __raygen__() { const uint3 idx = optixGetLaunchIndex(); diff --git a/include/viennaray/rayTraceKernel.hpp b/include/viennaray/rayTraceKernel.hpp index 4a81090..95caf69 100644 --- a/include/viennaray/rayTraceKernel.hpp +++ b/include/viennaray/rayTraceKernel.hpp @@ -321,10 +321,6 @@ template class TraceKernel { if (rayWeight <= 0) { break; } - reflect = rejectionControl(rayWeight, initialRayWeight, rngState); - if (!reflect) { - break; - } if (++numReflections > config_.maxReflections) { // terminate ray if too many reflections break; @@ -334,6 +330,10 @@ template class TraceKernel { ++raysTerminated; break; } + reflect = rejectionControl(rayWeight, initialRayWeight, rngState); + if (!reflect) { + break; + } // Update ray direction and origin fillRayPosition(rayHit.ray, hitPoint); From ca93254b0dd96dc3ce89149d1d2ffb838905039c Mon Sep 17 00:00:00 2001 From: Tobias Reiter Date: Tue, 11 Nov 2025 14:25:28 +0100 Subject: [PATCH 5/6] Small fixes --- gpu/include/raygPerRayData.hpp | 2 +- gpu/include/raygReflection.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gpu/include/raygPerRayData.hpp b/gpu/include/raygPerRayData.hpp index 395ca56..e27d9a7 100644 --- a/gpu/include/raygPerRayData.hpp +++ b/gpu/include/raygPerRayData.hpp @@ -68,7 +68,7 @@ static __forceinline__ __device__ PerRayData *getPRD() { static __device__ void initializeRNGState(PerRayData *prd, unsigned int linearLaunchIndex, unsigned int seed) { - auto rngSeed = tea<4>(linearLaunchIndex, seed); + auto rngSeed = tea<3>(linearLaunchIndex, seed); curand_init(rngSeed, 0, 0, &prd->RNGstate); } #endif diff --git a/gpu/include/raygReflection.hpp b/gpu/include/raygReflection.hpp index c6e1bd5..277700e 100644 --- a/gpu/include/raygReflection.hpp +++ b/gpu/include/raygReflection.hpp @@ -28,7 +28,7 @@ __device__ __inline__ Vec3Df computeNormal(const void *sbtData, const Vec3Df &A = sbt->vertex[index[0]]; const Vec3Df &B = sbt->vertex[index[1]]; const Vec3Df &C = sbt->vertex[index[2]]; - return Normalize(CrossProduct(B - A, C - A)); + return Normalize(CrossProduct(B - A, C - A)); } break; case 1: { // Disks From fdb4152b59b98bafc55fd8bdc85a9657d202661b Mon Sep 17 00:00:00 2001 From: Tobias Reiter Date: Tue, 11 Nov 2025 17:16:10 +0100 Subject: [PATCH 6/6] Bump version --- CMakeLists.txt | 2 +- README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e2e5f0c..66970ac 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.20 FATAL_ERROR) project( ViennaRay LANGUAGES CXX - VERSION 3.7.1) + VERSION 3.7.2) # -------------------------------------------------------------------------------------------------------- # Library switches diff --git a/README.md b/README.md index 258f9ad..310230b 100644 --- a/README.md +++ b/README.md @@ -63,7 +63,7 @@ We recommend using [CPM.cmake](https://github.com/cpm-cmake/CPM.cmake) to consum * Installation with CPM ```cmake - CPMAddPackage("gh:viennatools/viennaray@3.7.1") # Use the latest release version + CPMAddPackage("gh:viennatools/viennaray@3.7.2") # Use the latest release version ``` * With a local installation