From 93a7896226ec7413f73a32cf51bb3fb655e495e1 Mon Sep 17 00:00:00 2001 From: reiter Date: Tue, 28 Oct 2025 15:47:46 +0100 Subject: [PATCH 1/9] Enable custom direction basis and optimize data types --- gpu/include/raygLaunchParams.hpp | 26 +++++++++++++----------- gpu/include/raygPerRayData.hpp | 14 ++++++------- gpu/include/raygSource.hpp | 21 +++++++++---------- gpu/include/raygTrace.hpp | 15 ++++++++------ gpu/pipelines/GeneralPipelineDisk.cu | 17 ++++++++++------ gpu/pipelines/GeneralPipelineLine.cu | 9 ++++++-- gpu/pipelines/GeneralPipelineTriangle.cu | 9 ++++++-- include/viennaray/rayParticle.hpp | 9 ++++++-- 8 files changed, 72 insertions(+), 48 deletions(-) diff --git a/gpu/include/raygLaunchParams.hpp b/gpu/include/raygLaunchParams.hpp index 0d454b4..d7cb856 100644 --- a/gpu/include/raygLaunchParams.hpp +++ b/gpu/include/raygLaunchParams.hpp @@ -15,32 +15,34 @@ __both__ __forceinline__ unsigned callableIndex(unsigned p, CallableSlot s) { struct LaunchParams { float *resultBuffer; + float rayWeightThreshold = 0.1f; + float tThreshold = 0.5f; + unsigned int seed = 0; - unsigned int numElements; - unsigned int *dataPerParticle; // to determine result buffer index bool periodicBoundary = false; - unsigned int maxBoundaryHits = 100; - unsigned int particleIdx = 0; - unsigned particleType = 0; - float gridDelta = 1.f; - float tThreshold = 0.5f; - int D = 3; // Dimension + unsigned int numElements; // to determine result buffer index + unsigned int *dataPerParticle; // to determine result buffer index + + uint16_t maxBoundaryHits = 100; + uint16_t particleIdx = 0; + uint16_t particleType = 0; + uint16_t D = 3; // Dimension - // std::unordered_map sticking; - int *materialIds; - float *materialSticking; float sticking = 1.f; float cosineExponent = 1.f; + int *materialIds; + float *materialSticking; void *customData; // source plane params - struct { + struct SourcePlane { viennacore::Vec2Df minPoint; viennacore::Vec2Df maxPoint; float planeHeight; std::array directionBasis; + bool customDirectionBasis = false; } source; OptixTraversableHandle traversable; diff --git a/gpu/include/raygPerRayData.hpp b/gpu/include/raygPerRayData.hpp index 7010d8b..02e58e9 100644 --- a/gpu/include/raygPerRayData.hpp +++ b/gpu/include/raygPerRayData.hpp @@ -22,15 +22,15 @@ struct PerRayData { float energy = 0.f; unsigned int numBoundaryHits = 0; - unsigned primID = 0; - float tMin = 1e20f; + unsigned int primID = 0; // primID of closest hit + float tMin = 1e20f; // distance to closest hit // Variables for neighbor intersections (overlapping disks and lines) - int TIndex[MAX_NEIGHBORS]; // Indices of neighbor hits - int ISCount = 0; // Number of hits starting from 1 - int tempCount = 0; // total intersections recorded - float tValues[MAX_NEIGHBORS]; // all intersection distances - int primIDs[MAX_NEIGHBORS]; // their primitive IDs + unsigned int TIndex[MAX_NEIGHBORS]; // Indices of neighbor hits + uint16_t ISCount = 0; // Number of hits starting from 1 + uint16_t tempCount = 0; // total intersections recorded + float tValues[MAX_NEIGHBORS]; // all intersection distances + unsigned int primIDs[MAX_NEIGHBORS]; // their primitive IDs bool hitFromBack = false; }; diff --git a/gpu/include/raygSource.hpp b/gpu/include/raygSource.hpp index 70c4dc4..8dd2b03 100644 --- a/gpu/include/raygSource.hpp +++ b/gpu/include/raygSource.hpp @@ -22,7 +22,7 @@ getOrthonormalBasis(const viennacore::Vec3Df &n) { } __device__ void initializeRayDirection(viennaray::gpu::PerRayData *prd, - const float power, const int D) { + const float power, const uint16_t D) { // source direction const float4 u = curand_uniform4(&prd->RNGstate); // (0,1] const float tt = powf(u.w, 2.f / (power + 1.f)); @@ -43,7 +43,7 @@ __device__ void initializeRayDirection(viennaray::gpu::PerRayData *prd, __device__ void initializeRayDirection(viennaray::gpu::PerRayData *prd, const float power, const std::array &basis, - const int D) { + const uint16_t D) { // source direction do { const float4 u = curand_uniform4(&prd->RNGstate); // (0,1] @@ -71,20 +71,19 @@ initializeRayDirection(viennaray::gpu::PerRayData *prd, const float power, __device__ void initializeRayPosition(viennaray::gpu::PerRayData *prd, - viennaray::gpu::LaunchParams *launchParams, const int D) { + const viennaray::gpu::LaunchParams::SourcePlane &source, + const uint16_t D) { const float4 u = curand_uniform4(&prd->RNGstate); // (0,1] - prd->pos[0] = launchParams->source.minPoint[0] + - u.x * (launchParams->source.maxPoint[0] - - launchParams->source.minPoint[0]); + prd->pos[0] = + source.minPoint[0] + u.x * (source.maxPoint[0] - source.minPoint[0]); if (D == 2) { - prd->pos[1] = launchParams->source.planeHeight; + prd->pos[1] = source.planeHeight; prd->pos[2] = 0.f; } else { - prd->pos[1] = launchParams->source.minPoint[1] + - u.y * (launchParams->source.maxPoint[1] - - launchParams->source.minPoint[1]); - prd->pos[2] = launchParams->source.planeHeight; + prd->pos[1] = + source.minPoint[1] + u.y * (source.maxPoint[1] - source.minPoint[1]); + prd->pos[2] = source.planeHeight; } } diff --git a/gpu/include/raygTrace.hpp b/gpu/include/raygTrace.hpp index 445e36a..d292a90 100644 --- a/gpu/include/raygTrace.hpp +++ b/gpu/include/raygTrace.hpp @@ -104,7 +104,6 @@ template class Trace { launchParams.seed = gen(rd); } - launchParams.gridDelta = gridDelta_; launchParams.tThreshold = 1.1 * gridDelta_; // TODO: find the best value int numPointsPerDim = @@ -181,11 +180,15 @@ template class Trace { launchParams.materialSticking = (float *)materialStickingBuffer_[i].dPointer(); } - Vec3Df direction{static_cast(particles_[i].direction[0]), - static_cast(particles_[i].direction[1]), - static_cast(particles_[i].direction[2])}; - launchParams.source.directionBasis = - rayInternal::getOrthonormalBasis(direction); + + if (particles_[i].useCustomDirection) { + Vec3Df direction{static_cast(particles_[i].direction[0]), + static_cast(particles_[i].direction[1]), + static_cast(particles_[i].direction[2])}; + launchParams.source.directionBasis = + rayInternal::getOrthonormalBasis(direction); + launchParams.source.customDirectionBasis = true; + } launchParamsBuffers[i].alloc(sizeof(launchParams)); launchParamsBuffers[i].upload(&launchParams, 1); diff --git a/gpu/pipelines/GeneralPipelineDisk.cu b/gpu/pipelines/GeneralPipelineDisk.cu index 09e5ed3..1ebbed1 100644 --- a/gpu/pipelines/GeneralPipelineDisk.cu +++ b/gpu/pipelines/GeneralPipelineDisk.cu @@ -28,7 +28,7 @@ extern "C" __global__ void __intersection__() { PerRayData *prd = (PerRayData *)getPRD(); // Get the index of the AABB box that was hit - const int primID = optixGetPrimitiveIndex(); + const unsigned int primID = optixGetPrimitiveIndex(); // Read geometric data from the primitive that is inside that AABB box const Vec3Df diskOrigin = sbtData->point[primID]; @@ -48,8 +48,7 @@ extern "C" __global__ void __intersection__() { float ddneg = DotProduct(diskOrigin, normal); float t = (ddneg - DotProduct(normal, prd->pos)) / prodOfDirections; // Avoid negative t or self intersections - valid &= t > 1e-4f; // Maybe lower this further, but 1e-4f works for now - + valid &= t > optixGetRayTmin(); const Vec3Df intersection = prd->pos + prd->dir * t; // Check if within disk radius @@ -94,7 +93,6 @@ extern "C" __global__ void __closesthit__() { } if (sbtData->base.isBoundary) { - prd->numBoundaryHits++; // This is effectively the miss shader if (launchParams.D == 2 && (primID == 2 || primID == 3)) { // bottom or top - ymin or ymax @@ -112,6 +110,8 @@ extern "C" __global__ void __closesthit__() { } else { reflectFromBoundary(prd, sbtData, launchParams.D); } + prd->numBoundaryHits++; + } else { // ------------- NEIGHBOR FILTERING --------------- // // Keep only hits close to tMin @@ -161,8 +161,13 @@ extern "C" __global__ void __raygen__() { initializeRNGState(&prd, linearLaunchIndex, launchParams.seed); // initialize ray position and direction - initializeRayPosition(&prd, &launchParams, launchParams.D); - initializeRayDirection(&prd, launchParams.cosineExponent, launchParams.D); + initializeRayPosition(&prd, launchParams.source, launchParams.D); + if (launchParams.source.customDirectionBasis) { + initializeRayDirection(&prd, launchParams.cosineExponent, + launchParams.source.directionBasis, launchParams.D); + } else { + initializeRayDirection(&prd, launchParams.cosineExponent, launchParams.D); + } unsigned callIdx = callableIndex(launchParams.particleType, CallableSlot::INIT); diff --git a/gpu/pipelines/GeneralPipelineLine.cu b/gpu/pipelines/GeneralPipelineLine.cu index ec456af..f6fb116 100644 --- a/gpu/pipelines/GeneralPipelineLine.cu +++ b/gpu/pipelines/GeneralPipelineLine.cu @@ -98,8 +98,13 @@ extern "C" __global__ void __raygen__() { initializeRNGState(&prd, linearLaunchIndex, launchParams.seed); // initialize ray position and direction - initializeRayPosition(&prd, &launchParams, launchParams.D); - initializeRayDirection(&prd, launchParams.cosineExponent, launchParams.D); + initializeRayPosition(&prd, launchParams.source, launchParams.D); + if (launchParams.source.customDirectionBasis) { + initializeRayDirection(&prd, launchParams.cosineExponent, + launchParams.source.directionBasis, launchParams.D); + } else { + initializeRayDirection(&prd, launchParams.cosineExponent, launchParams.D); + } unsigned callIdx = callableIndex(launchParams.particleType, CallableSlot::INIT); diff --git a/gpu/pipelines/GeneralPipelineTriangle.cu b/gpu/pipelines/GeneralPipelineTriangle.cu index 8011df6..37fe920 100644 --- a/gpu/pipelines/GeneralPipelineTriangle.cu +++ b/gpu/pipelines/GeneralPipelineTriangle.cu @@ -73,8 +73,13 @@ extern "C" __global__ void __raygen__() { initializeRNGState(&prd, linearLaunchIndex, launchParams.seed); // initialize ray position and direction - initializeRayPosition(&prd, &launchParams, launchParams.D); - initializeRayDirection(&prd, launchParams.cosineExponent, launchParams.D); + initializeRayPosition(&prd, launchParams.source, launchParams.D); + if (launchParams.source.customDirectionBasis) { + initializeRayDirection(&prd, launchParams.cosineExponent, + launchParams.source.directionBasis, launchParams.D); + } else { + initializeRayDirection(&prd, launchParams.cosineExponent, launchParams.D); + } unsigned callIdx = callableIndex(launchParams.particleType, CallableSlot::INIT); diff --git a/include/viennaray/rayParticle.hpp b/include/viennaray/rayParticle.hpp index 9d6bd8a..2d32aca 100644 --- a/include/viennaray/rayParticle.hpp +++ b/include/viennaray/rayParticle.hpp @@ -9,6 +9,11 @@ #include +#define VIENNARAY_PARTICLE_STOP \ + std::pair> { \ + 1., Vec3D { 0., 0., 0. } \ + } + namespace viennaray { using namespace viennacore; @@ -95,8 +100,7 @@ class Particle : public AbstractParticle { RNG &rngState) override { // return the sticking probability and direction after reflection for this // hit - return std::pair>{ - 1., Vec3D{0., 0., 0.}}; + return VIENNARAY_PARTICLE_STOP; } void surfaceCollision(NumericType rayWeight, const Vec3D &rayDir, const Vec3D &geomNormal, @@ -243,6 +247,7 @@ template struct Particle { std::unordered_map materialSticking; T cosineExponent = 1.; + bool useCustomDirection = false; Vec3D direction = {0., 0., -1.0}; }; } // namespace gpu From 827797b8a9f1f8b85c4eeb8acb9fe533c92bb21e Mon Sep 17 00:00:00 2001 From: reiter Date: Wed, 29 Oct 2025 09:57:54 +0100 Subject: [PATCH 2/9] Add ray load --- gpu/include/raygPerRayData.hpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/gpu/include/raygPerRayData.hpp b/gpu/include/raygPerRayData.hpp index 02e58e9..d38e093 100644 --- a/gpu/include/raygPerRayData.hpp +++ b/gpu/include/raygPerRayData.hpp @@ -13,15 +13,16 @@ namespace viennaray::gpu { struct PerRayData { - float rayWeight = 1.f; viennacore::Vec3Df pos; viennacore::Vec3Df dir; + float rayWeight = 1.f; + float energy = 0.f; + float load = 0.f; + RNGState RNGstate; - float energy = 0.f; unsigned int numBoundaryHits = 0; - unsigned int primID = 0; // primID of closest hit float tMin = 1e20f; // distance to closest hit From 9ea38a656928790df3a468fcfd405166eb9e7cef Mon Sep 17 00:00:00 2001 From: reiter Date: Wed, 29 Oct 2025 15:49:51 +0100 Subject: [PATCH 3/9] Remove TIndex array from PRD --- gpu/include/raygLaunchParams.hpp | 8 ++++---- gpu/include/raygPerRayData.hpp | 9 ++++++--- gpu/pipelines/GeneralPipelineDisk.cu | 18 +++++++++--------- gpu/pipelines/GeneralPipelineLine.cu | 4 ++-- gpu/pipelines/GeneralPipelineTriangle.cu | 2 +- gpu/pipelines/Particle.cuh | 2 +- gpu/tests/deviceStructs/CMakeLists.txt | 8 ++++++++ gpu/tests/deviceStructs/deviceStructs.cpp | 15 +++++++++++++++ 8 files changed, 46 insertions(+), 20 deletions(-) create mode 100644 gpu/tests/deviceStructs/CMakeLists.txt create mode 100644 gpu/tests/deviceStructs/deviceStructs.cpp diff --git a/gpu/include/raygLaunchParams.hpp b/gpu/include/raygLaunchParams.hpp index d7cb856..f106d25 100644 --- a/gpu/include/raygLaunchParams.hpp +++ b/gpu/include/raygLaunchParams.hpp @@ -25,10 +25,10 @@ struct LaunchParams { unsigned int numElements; // to determine result buffer index unsigned int *dataPerParticle; // to determine result buffer index - uint16_t maxBoundaryHits = 100; - uint16_t particleIdx = 0; - uint16_t particleType = 0; - uint16_t D = 3; // Dimension + unsigned maxBoundaryHits = 100; + uint8_t particleIdx = 0; + uint8_t particleType = 0; + uint8_t D = 3; // Dimension float sticking = 1.f; float cosineExponent = 1.f; diff --git a/gpu/include/raygPerRayData.hpp b/gpu/include/raygPerRayData.hpp index d38e093..9a2bd14 100644 --- a/gpu/include/raygPerRayData.hpp +++ b/gpu/include/raygPerRayData.hpp @@ -13,23 +13,26 @@ namespace viennaray::gpu { struct PerRayData { + // Position and direction viennacore::Vec3Df pos; viennacore::Vec3Df dir; + // Simulation specific data float rayWeight = 1.f; float energy = 0.f; float load = 0.f; + // RNG RNGState RNGstate; + // Hit data unsigned int numBoundaryHits = 0; unsigned int primID = 0; // primID of closest hit float tMin = 1e20f; // distance to closest hit // Variables for neighbor intersections (overlapping disks and lines) - unsigned int TIndex[MAX_NEIGHBORS]; // Indices of neighbor hits - uint16_t ISCount = 0; // Number of hits starting from 1 - uint16_t tempCount = 0; // total intersections recorded + uint8_t ISCount = 0; // Number of hits starting from 1 + uint8_t totalCount = 0; // total intersections recorded float tValues[MAX_NEIGHBORS]; // all intersection distances unsigned int primIDs[MAX_NEIGHBORS]; // their primitive IDs bool hitFromBack = false; diff --git a/gpu/pipelines/GeneralPipelineDisk.cu b/gpu/pipelines/GeneralPipelineDisk.cu index 1ebbed1..bf332c0 100644 --- a/gpu/pipelines/GeneralPipelineDisk.cu +++ b/gpu/pipelines/GeneralPipelineDisk.cu @@ -58,10 +58,10 @@ extern "C" __global__ void __intersection__() { if (valid) { // Collect all intersections and filter neighbors in CH shader - if (!sbtData->base.isBoundary && prd->tempCount < MAX_NEIGHBORS) { - prd->tValues[prd->tempCount] = t; - prd->primIDs[prd->tempCount] = primID; - prd->tempCount++; + if (!sbtData->base.isBoundary && prd->totalCount < MAX_NEIGHBORS) { + prd->tValues[prd->totalCount] = t; + prd->primIDs[prd->totalCount] = primID; + prd->totalCount++; } // Has to pass a dummy t value so later intersections are not ignored @@ -105,27 +105,27 @@ extern "C" __global__ void __closesthit__() { return; } + prd->numBoundaryHits++; if (launchParams.periodicBoundary) { applyPeriodicBoundary(prd, sbtData, launchParams.D); } else { reflectFromBoundary(prd, sbtData, launchParams.D); } - prd->numBoundaryHits++; } else { // ------------- NEIGHBOR FILTERING --------------- // // Keep only hits close to tMin prd->ISCount = 0; - for (int i = 0; i < prd->tempCount; ++i) { + for (int i = 0; i < prd->totalCount; ++i) { if (fabsf(prd->tValues[i] - prd->tMin) < launchParams.tThreshold && prd->ISCount < MAX_NEIGHBORS) { - prd->TIndex[prd->ISCount++] = prd->primIDs[i]; + prd->primIDs[prd->ISCount++] = prd->primIDs[i]; } } // // CPU like neighbor detection // prd->ISCount = 0; - // for (int i = 0; i < prd->tempCount; ++i) { + // for (int i = 0; i < prd->totalCount; ++i) { // float distance = viennacore::Distance(sbtData->point[primID], // sbtData->point[prd->primIDs[i]]); // if (distance < 2 * sbtData->radius && prd->ISCount < MAX_NEIGHBORS) { @@ -191,6 +191,6 @@ extern "C" __global__ void __raygen__() { RAY_TYPE_COUNT, // SBT stride SURFACE_RAY_TYPE, // missSBTIndex u0, u1); - prd.tempCount = 0; // Reset PerRayData + prd.totalCount = 0; // Reset PerRayData } } diff --git a/gpu/pipelines/GeneralPipelineLine.cu b/gpu/pipelines/GeneralPipelineLine.cu index f6fb116..68aa94d 100644 --- a/gpu/pipelines/GeneralPipelineLine.cu +++ b/gpu/pipelines/GeneralPipelineLine.cu @@ -68,7 +68,7 @@ extern "C" __global__ void __closesthit__() { } } else { prd->ISCount = 1; - prd->TIndex[0] = primID; + prd->primIDs[0] = primID; // ------------- SURFACE COLLISION --------------- // unsigned callIdx = @@ -128,6 +128,6 @@ extern "C" __global__ void __raygen__() { RAY_TYPE_COUNT, // SBT stride SURFACE_RAY_TYPE, // missSBTIndex u0, u1); - prd.tempCount = 0; // Reset PerRayData + prd.totalCount = 0; // Reset PerRayData } } diff --git a/gpu/pipelines/GeneralPipelineTriangle.cu b/gpu/pipelines/GeneralPipelineTriangle.cu index 37fe920..625161c 100644 --- a/gpu/pipelines/GeneralPipelineTriangle.cu +++ b/gpu/pipelines/GeneralPipelineTriangle.cu @@ -42,7 +42,7 @@ extern "C" __global__ void __closesthit__() { } } else { prd->ISCount = 1; - prd->TIndex[0] = primID; + prd->primIDs[0] = primID; // ------------- SURFACE COLLISION --------------- // unsigned callIdx; diff --git a/gpu/pipelines/Particle.cuh b/gpu/pipelines/Particle.cuh index 4f7de70..bfc856c 100644 --- a/gpu/pipelines/Particle.cuh +++ b/gpu/pipelines/Particle.cuh @@ -16,7 +16,7 @@ __forceinline__ __device__ void particleCollision(viennaray::gpu::PerRayData *prd) { for (int i = 0; i < prd->ISCount; ++i) { atomicAdd(&launchParams.resultBuffer[getIdxOffset(0, launchParams) + - prd->TIndex[i]], + prd->primIDs[i]], prd->rayWeight); } } diff --git a/gpu/tests/deviceStructs/CMakeLists.txt b/gpu/tests/deviceStructs/CMakeLists.txt new file mode 100644 index 0000000..44a48f7 --- /dev/null +++ b/gpu/tests/deviceStructs/CMakeLists.txt @@ -0,0 +1,8 @@ +project(deviceStructs LANGUAGES CXX) + +add_executable(${PROJECT_NAME} "${PROJECT_NAME}.cpp" ${generated_files}) +target_include_directories(${PROJECT_NAME} PRIVATE ${OptiX_INCLUDE_DIR}) +target_link_libraries(${PROJECT_NAME} PRIVATE ViennaRay ${VIENNACORE_GPU_LIBS}) + +add_dependencies(ViennaRay-GPU_Tests ${PROJECT_NAME}) +add_test(NAME ${PROJECT_NAME} COMMAND $) diff --git a/gpu/tests/deviceStructs/deviceStructs.cpp b/gpu/tests/deviceStructs/deviceStructs.cpp new file mode 100644 index 0000000..60a80b8 --- /dev/null +++ b/gpu/tests/deviceStructs/deviceStructs.cpp @@ -0,0 +1,15 @@ +#include +#include + +#include + +int main() { + viennaray::gpu::PerRayData prd; + viennaray::gpu::LaunchParams launchParams; + + std::cout << "Size of PerRayData: " << sizeof(prd) << " bytes" << std::endl; + std::cout << "Size of LaunchParams: " << sizeof(launchParams) << " bytes" + << std::endl; + + return 0; +} \ No newline at end of file From 7b1953535965d39e40a4a7a526993297fb487139 Mon Sep 17 00:00:00 2001 From: reiter Date: Wed, 29 Oct 2025 17:03:21 +0100 Subject: [PATCH 4/9] Correct numBoundaryHits --- gpu/include/raygBoundary.hpp | 4 ++-- gpu/pipelines/GeneralPipelineDisk.cu | 1 - gpu/pipelines/GeneralPipelineLine.cu | 1 - gpu/pipelines/GeneralPipelineTriangle.cu | 1 - 4 files changed, 2 insertions(+), 5 deletions(-) diff --git a/gpu/include/raygBoundary.hpp b/gpu/include/raygBoundary.hpp index 5f423b6..a339080 100644 --- a/gpu/include/raygBoundary.hpp +++ b/gpu/include/raygBoundary.hpp @@ -16,6 +16,7 @@ __device__ __inline__ void reflectFromBoundary(viennaray::gpu::PerRayData *prd, const int D) { using namespace viennacore; const unsigned int primID = optixGetPrimitiveIndex(); + prd->numBoundaryHits++; if constexpr (std::is_same::value) { prd->pos = @@ -32,7 +33,6 @@ __device__ __inline__ void reflectFromBoundary(viennaray::gpu::PerRayData *prd, unsigned dim = primID / 4; prd->dir[dim] -= 2 * prd->dir[dim]; prd->pos[dim] = hsd->vertex[hsd->index[primID][0]][dim]; - prd->numBoundaryHits++; } else if constexpr (std::is_same::value) { prd->pos = prd->pos + prd->dir * (optixGetRayTmax()); @@ -47,6 +47,7 @@ applyPeriodicBoundary(viennaray::gpu::PerRayData *prd, const SBTData *hsd, const int D) { using namespace viennacore; const unsigned int primID = optixGetPrimitiveIndex(); + prd->numBoundaryHits++; if constexpr (std::is_same::value) { prd->pos = @@ -66,7 +67,6 @@ applyPeriodicBoundary(viennaray::gpu::PerRayData *prd, const SBTData *hsd, prd->pos = prd->pos + prd->dir * optixGetRayTmax(); unsigned dim = primID / 4; prd->pos[dim] = hsd->vertex[hsd->index[primID ^ 2][0]][dim]; - prd->numBoundaryHits++; } else if constexpr (std::is_same::value) { prd->pos = prd->pos + prd->dir * (optixGetRayTmax()); diff --git a/gpu/pipelines/GeneralPipelineDisk.cu b/gpu/pipelines/GeneralPipelineDisk.cu index bf332c0..d28a095 100644 --- a/gpu/pipelines/GeneralPipelineDisk.cu +++ b/gpu/pipelines/GeneralPipelineDisk.cu @@ -105,7 +105,6 @@ extern "C" __global__ void __closesthit__() { return; } - prd->numBoundaryHits++; if (launchParams.periodicBoundary) { applyPeriodicBoundary(prd, sbtData, launchParams.D); } else { diff --git a/gpu/pipelines/GeneralPipelineLine.cu b/gpu/pipelines/GeneralPipelineLine.cu index 68aa94d..84d2ca1 100644 --- a/gpu/pipelines/GeneralPipelineLine.cu +++ b/gpu/pipelines/GeneralPipelineLine.cu @@ -60,7 +60,6 @@ extern "C" __global__ void __closesthit__() { prd->primID = primID; if (sbtData->base.isBoundary) { - prd->numBoundaryHits++; if (launchParams.periodicBoundary) { applyPeriodicBoundary(prd, sbtData, launchParams.D); } else { diff --git a/gpu/pipelines/GeneralPipelineTriangle.cu b/gpu/pipelines/GeneralPipelineTriangle.cu index 625161c..77cd490 100644 --- a/gpu/pipelines/GeneralPipelineTriangle.cu +++ b/gpu/pipelines/GeneralPipelineTriangle.cu @@ -34,7 +34,6 @@ extern "C" __global__ void __closesthit__() { prd->primID = primID; if (sbtData->base.isBoundary) { - prd->numBoundaryHits++; if (launchParams.periodicBoundary) { applyPeriodicBoundary(prd, sbtData, launchParams.D); } else { From 972816e3250f0bc19b9db51755b94803280ce26f Mon Sep 17 00:00:00 2001 From: Tobias Reiter Date: Fri, 31 Oct 2025 14:32:26 +0100 Subject: [PATCH 5/9] Add option to precompute all normals --- gpu/include/raygLineGeometry.hpp | 2 ++ gpu/include/raygMesh.hpp | 2 ++ gpu/include/raygReflection.hpp | 15 ++++++++++----- gpu/include/raygSBTRecords.hpp | 2 +- gpu/include/raygTraceDisk.hpp | 4 ++-- gpu/include/raygTraceLine.hpp | 2 ++ gpu/include/raygTraceTriangle.hpp | 2 ++ gpu/include/raygTriangleGeometry.hpp | 3 +++ gpu/pipelines/GeneralPipelineDisk.cu | 4 ++-- 9 files changed, 26 insertions(+), 10 deletions(-) diff --git a/gpu/include/raygLineGeometry.hpp b/gpu/include/raygLineGeometry.hpp index 19b28e3..e7da715 100644 --- a/gpu/include/raygLineGeometry.hpp +++ b/gpu/include/raygLineGeometry.hpp @@ -15,6 +15,7 @@ template struct LineGeometry { // geometry CudaBuffer geometryNodesBuffer; CudaBuffer geometryLinesBuffer; + CudaBuffer geometryNormalsBuffer; // boundary CudaBuffer boundaryNodesBuffer; @@ -52,6 +53,7 @@ template struct LineGeometry { // upload the model to the device: the builder geometryNodesBuffer.allocUpload(mesh.nodes); geometryLinesBuffer.allocUpload(mesh.lines); + geometryNormalsBuffer.allocUpload(mesh.normals); // create local variables, because we need a *pointer* to the // device pointers diff --git a/gpu/include/raygMesh.hpp b/gpu/include/raygMesh.hpp index 57f4c7e..ec87938 100644 --- a/gpu/include/raygMesh.hpp +++ b/gpu/include/raygMesh.hpp @@ -13,6 +13,7 @@ using namespace viennacore; struct LineMesh { std::vector nodes; std::vector> lines; + std::vector normals; Vec3Df minimumExtent; Vec3Df maximumExtent; @@ -22,6 +23,7 @@ struct LineMesh { struct TriangleMesh { std::vector nodes; std::vector> triangles; + std::vector normals; Vec3Df minimumExtent; Vec3Df maximumExtent; diff --git a/gpu/include/raygReflection.hpp b/gpu/include/raygReflection.hpp index 0304274..9e6499b 100644 --- a/gpu/include/raygReflection.hpp +++ b/gpu/include/raygReflection.hpp @@ -13,11 +13,12 @@ using namespace viennaray::gpu; #ifdef __CUDACC__ __device__ __inline__ viennacore::Vec3Df computeNormal(const void *sbtData, const unsigned int primID) { + using namespace viennacore; const HitSBTDataBase *baseData = reinterpret_cast(sbtData); switch (baseData->geometryType) { case 0: { - using namespace viennacore; + // Triangles const HitSBTDataTriangle *sbt = reinterpret_cast(sbtData); const Vec3D &index = sbt->index[primID]; @@ -27,12 +28,11 @@ computeNormal(const void *sbtData, const unsigned int primID) { return Normalize(CrossProduct(B - A, C - A)); } break; case 1: { - const HitSBTDataDisk *sbt = - reinterpret_cast(sbtData); - return sbt->normal[primID]; + // Disks + return baseData->normal[primID]; } break; case 2: { - using namespace viennacore; + // Lines const HitSBTDataLine *sbt = reinterpret_cast(sbtData); Vec3Df p0 = sbt->nodes[sbt->lines[primID][0]]; @@ -48,6 +48,11 @@ computeNormal(const void *sbtData, const unsigned int primID) { } } +__device__ __forceinline__ viennacore::Vec3Df +getNormal(const void *sbtData, const unsigned int primID) { + return reinterpret_cast(sbtData)->normal[primID]; +} + static __device__ __forceinline__ void specularReflection(viennaray::gpu::PerRayData *prd, const viennacore::Vec3Df &geoNormal) { diff --git a/gpu/include/raygSBTRecords.hpp b/gpu/include/raygSBTRecords.hpp index d529b31..0381b8f 100644 --- a/gpu/include/raygSBTRecords.hpp +++ b/gpu/include/raygSBTRecords.hpp @@ -9,12 +9,12 @@ struct HitSBTDataBase { void *cellData; bool isBoundary; int geometryType; + viennacore::Vec3Df *normal; // optional normal buffer }; struct HitSBTDataDisk { HitSBTDataBase base; viennacore::Vec3Df *point; - viennacore::Vec3Df *normal; float radius; }; diff --git a/gpu/include/raygTraceDisk.hpp b/gpu/include/raygTraceDisk.hpp index 4f2213f..f234de6 100644 --- a/gpu/include/raygTraceDisk.hpp +++ b/gpu/include/raygTraceDisk.hpp @@ -177,13 +177,13 @@ template class TraceDisk : public Trace { optixSbtRecordPackHeader(hitgroupPG, &geometryHitgroupRecord); geometryHitgroupRecord.data.point = (Vec3Df *)diskGeometry.geometryPointBuffer.dPointer(); - geometryHitgroupRecord.data.normal = - (Vec3Df *)diskGeometry.geometryNormalBuffer.dPointer(); geometryHitgroupRecord.data.radius = diskMesh.radius; geometryHitgroupRecord.data.base.geometryType = 1; geometryHitgroupRecord.data.base.isBoundary = false; geometryHitgroupRecord.data.base.cellData = (void *)this->cellDataBuffer_.dPointer(); + geometryHitgroupRecord.data.base.normal = + (Vec3Df *)diskGeometry.geometryNormalBuffer.dPointer(); hitgroupRecords.push_back(geometryHitgroupRecord); // boundary hitgroup diff --git a/gpu/include/raygTraceLine.hpp b/gpu/include/raygTraceLine.hpp index db76c0e..1cc6b8c 100644 --- a/gpu/include/raygTraceLine.hpp +++ b/gpu/include/raygTraceLine.hpp @@ -172,6 +172,8 @@ template class TraceLine : public Trace { geometryHitgroupRecord.data.base.isBoundary = false; geometryHitgroupRecord.data.base.cellData = (void *)this->cellDataBuffer_.dPointer(); + geometryHitgroupRecord.data.base.normal = + (Vec3Df *)lineGeometry.geometryNormalsBuffer.dPointer(); hitgroupRecords.push_back(geometryHitgroupRecord); // boundary hitgroup diff --git a/gpu/include/raygTraceTriangle.hpp b/gpu/include/raygTraceTriangle.hpp index 80b40a7..df996ef 100644 --- a/gpu/include/raygTraceTriangle.hpp +++ b/gpu/include/raygTraceTriangle.hpp @@ -61,6 +61,8 @@ template class TraceTriangle : public Trace { geometryHitgroupRecord.data.base.isBoundary = false; geometryHitgroupRecord.data.base.cellData = (void *)this->cellDataBuffer_.dPointer(); + geometryHitgroupRecord.data.base.normal = + (Vec3Df *)triangleGeometry.geometryNormalBuffer.dPointer(); // add geometry hitgroup record hitgroupRecords.push_back(geometryHitgroupRecord); diff --git a/gpu/include/raygTriangleGeometry.hpp b/gpu/include/raygTriangleGeometry.hpp index 7d9a15b..582fd68 100644 --- a/gpu/include/raygTriangleGeometry.hpp +++ b/gpu/include/raygTriangleGeometry.hpp @@ -14,6 +14,7 @@ struct TriangleGeometry { // geometry CudaBuffer geometryVertexBuffer; CudaBuffer geometryIndexBuffer; + CudaBuffer geometryNormalBuffer; // boundary CudaBuffer boundaryVertexBuffer; @@ -42,6 +43,7 @@ struct TriangleGeometry { // upload the model to the device: the builder geometryVertexBuffer.allocUpload(mesh.nodes); geometryIndexBuffer.allocUpload(mesh.triangles); + geometryNormalBuffer.allocUpload(mesh.normals); // triangle inputs triangleInput[0] = {}; @@ -51,6 +53,7 @@ struct TriangleGeometry { // device pointers CUdeviceptr d_geoVertices = geometryVertexBuffer.dPointer(); CUdeviceptr d_geoIndices = geometryIndexBuffer.dPointer(); + CUdeviceptr d_geoNormals = geometryNormalBuffer.dPointer(); triangleInput[0].triangleArray.vertexFormat = OPTIX_VERTEX_FORMAT_FLOAT3; triangleInput[0].triangleArray.vertexStrideInBytes = sizeof(Vec3Df); diff --git a/gpu/pipelines/GeneralPipelineDisk.cu b/gpu/pipelines/GeneralPipelineDisk.cu index bf332c0..df47d1d 100644 --- a/gpu/pipelines/GeneralPipelineDisk.cu +++ b/gpu/pipelines/GeneralPipelineDisk.cu @@ -32,7 +32,7 @@ extern "C" __global__ void __intersection__() { // Read geometric data from the primitive that is inside that AABB box const Vec3Df diskOrigin = sbtData->point[primID]; - const Vec3Df normal = sbtData->normal[primID]; + const Vec3Df normal = sbtData->base.normal[primID]; const float radius = sbtData->radius; bool valid = true; @@ -78,7 +78,7 @@ extern "C" __global__ void __closesthit__() { prd->tMin = optixGetRayTmax() - launchParams.tThreshold; prd->primID = primID; - const Vec3Df normal = sbtData->normal[primID]; + const Vec3Df &normal = sbtData->base.normal[primID]; // If closest hit was on backside, let it through once if (DotProduct(prd->dir, normal) > 0.0f) { From 389c01648511b46f3be5a8e6fe500f88a47d6627 Mon Sep 17 00:00:00 2001 From: Tobias Reiter Date: Fri, 31 Oct 2025 14:42:09 +0100 Subject: [PATCH 6/9] Fix type --- gpu/include/raygTraceDisk.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpu/include/raygTraceDisk.hpp b/gpu/include/raygTraceDisk.hpp index f234de6..504de11 100644 --- a/gpu/include/raygTraceDisk.hpp +++ b/gpu/include/raygTraceDisk.hpp @@ -191,7 +191,7 @@ template class TraceDisk : public Trace { optixSbtRecordPackHeader(hitgroupPG, &boundaryHitgroupRecord); boundaryHitgroupRecord.data.point = (Vec3Df *)diskGeometry.boundaryPointBuffer.dPointer(); - boundaryHitgroupRecord.data.normal = + boundaryHitgroupRecord.data.base.normal = (Vec3Df *)diskGeometry.boundaryNormalBuffer.dPointer(); boundaryHitgroupRecord.data.radius = diskGeometry.boundaryRadius; boundaryHitgroupRecord.data.base.geometryType = 1; From f23b1e090856b8c30a12fed78a81415e050be4f2 Mon Sep 17 00:00:00 2001 From: reiter Date: Mon, 3 Nov 2025 09:44:09 +0100 Subject: [PATCH 7/9] Small fixes --- gpu/include/raygRNG.hpp | 7 +++--- gpu/include/raygTrace.hpp | 45 ++++++++++++++++++--------------------- 2 files changed, 24 insertions(+), 28 deletions(-) diff --git a/gpu/include/raygRNG.hpp b/gpu/include/raygRNG.hpp index b4742b2..db6c302 100644 --- a/gpu/include/raygRNG.hpp +++ b/gpu/include/raygRNG.hpp @@ -37,10 +37,9 @@ __device__ __inline__ float getNextRand(viennaray::gpu::RNGState *state) { } __device__ __inline__ float getNormalDistRand(viennaray::gpu::RNGState *state) { - float u0 = curand_uniform(state); - float u1 = curand_uniform(state); - float r = sqrtf(-2.f * logf(u0)); - float theta = 2.f * M_PIf * u1; + float4 u0 = curand_uniform4(state); + float r = sqrtf(-2.f * logf(u0.x)); + float theta = 2.f * M_PIf * u0.y; return r * sinf(theta); } #endif diff --git a/gpu/include/raygTrace.hpp b/gpu/include/raygTrace.hpp index d292a90..237ddbc 100644 --- a/gpu/include/raygTrace.hpp +++ b/gpu/include/raygTrace.hpp @@ -89,7 +89,7 @@ template class Trace { .print(); } - // resize our cuda result buffer + // Resize our cuda result buffer resultBuffer.allocInit(launchParams.numElements * numFluxes_, float(0)); launchParams.resultBuffer = (float *)resultBuffer.dPointer(); @@ -104,6 +104,7 @@ template class Trace { launchParams.seed = gen(rd); } + // Threshold value for neighbor detection in disk-based geometries launchParams.tThreshold = 1.1 * gridDelta_; // TODO: find the best value int numPointsPerDim = @@ -220,30 +221,25 @@ template class Trace { } #endif - // std::cout << util::prettyDouble(numRays * particles.size()) << std::endl; - // float *temp = new float[launchParams.numElements]; - // resultBuffer.download(temp, launchParams.numElements); - // for (int i = 0; i < launchParams.numElements; i++) { - // std::cout << temp[i] << " "; - // } - // delete temp; - - // sync - maybe remove in future - // cudaDeviceSynchronize(); + // sync for (auto &s : streams) { CUDA_CHECK(StreamSynchronize(s)); CUDA_CHECK(StreamDestroy(s)); } - normalize(); + + normalize(); // on device results.resize(launchParams.numElements * numFluxes_); - // cudaDeviceSynchronize(); // download is sync anyway resultBuffer.download(results.data(), launchParams.numElements * numFluxes_); } void setElementData(CudaBuffer &passedCellDataBuffer, unsigned numData) { - assert(passedCellDataBuffer.sizeInBytes / sizeof(float) / numData == - launchParams.numElements); + if (passedCellDataBuffer.sizeInBytes / sizeof(float) / numData != + launchParams.numElements) { + Logger::getInstance() + .addWarning("Passed cell data does not match number of elements.") + .print(); + } cellDataBuffer_ = passedCellDataBuffer; numCellData = numData; } @@ -295,10 +291,9 @@ template class Trace { config_.useRandomSeed = false; } - void - setParticleCallableMap(std::tuple, - std::vector> - maps) { + void setParticleCallableMap( + std::tuple, + std::vector> const &maps) { particleMap_ = std::get<0>(maps); callableMap_ = std::get<1>(maps); } @@ -321,8 +316,6 @@ template class Trace { std::memcpy(flux, temp.data(), launchParams.numElements * sizeof(float)); } - virtual void smoothFlux(std::vector &flux, int smoothingNeighbors) {} - void setUseCellData(unsigned numData) { numCellData = numData; } void setPeriodicBoundary(const bool periodic) { @@ -391,9 +384,15 @@ template class Trace { launchParams.customData = (void *)d_params; } + virtual void smoothFlux(std::vector &flux, int smoothingNeighbors) {} + protected: - virtual void normalize() {} + // To be implemented by derived classes + virtual void normalize() = 0; + + virtual void buildHitGroups() = 0; +private: void initRayTracer() { context_->addModule(normModuleName); normKernelName.append(geometryType_ + "_f"); @@ -615,8 +614,6 @@ template class Trace { 1)); // nested traversable graph depth } - virtual void buildHitGroups() {} - /// constructs the shader binding table void generateSBT() { // build raygen record From ae2da6cd6f9e7badf4f38f496d51cc60ec9624b7 Mon Sep 17 00:00:00 2001 From: reiter Date: Mon, 3 Nov 2025 09:59:54 +0100 Subject: [PATCH 8/9] Bump version --- CMakeLists.txt | 2 +- README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c458dec..5e75248 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.20 FATAL_ERROR) project( ViennaRay LANGUAGES CXX - VERSION 3.6.0) + VERSION 3.6.1) # -------------------------------------------------------------------------------------------------------- # Library switches diff --git a/README.md b/README.md index 3a001c9..033caa9 100644 --- a/README.md +++ b/README.md @@ -63,7 +63,7 @@ We recommend using [CPM.cmake](https://github.com/cpm-cmake/CPM.cmake) to consum * Installation with CPM ```cmake - CPMAddPackage("gh:viennatools/viennaray@3.6.0") + CPMAddPackage("gh:viennatools/viennaray@3.6.1") ``` * With a local installation From 13c55abf35c5e0cdbda9b478ccd15b7a2fd7387e Mon Sep 17 00:00:00 2001 From: reiter Date: Mon, 3 Nov 2025 11:00:23 +0100 Subject: [PATCH 9/9] Remove unnecessary enums --- gpu/examples/CMakeLists.txt | 3 ++- gpu/include/raygPerRayData.hpp | 2 ++ gpu/include/raygReflection.hpp | 6 +++--- gpu/pipelines/GeneralPipelineDisk.cu | 12 +++++------- gpu/pipelines/GeneralPipelineLine.cu | 12 +++++------- gpu/pipelines/GeneralPipelineTriangle.cu | 10 ++++------ 6 files changed, 21 insertions(+), 24 deletions(-) diff --git a/gpu/examples/CMakeLists.txt b/gpu/examples/CMakeLists.txt index 44b0c53..9b8bde3 100644 --- a/gpu/examples/CMakeLists.txt +++ b/gpu/examples/CMakeLists.txt @@ -7,7 +7,8 @@ configure_file(Resources/trenchMesh.dat ${CMAKE_CURRENT_BINARY_DIR}/trenchMesh.d add_dependencies(ViennaRay-GPU_Examples ${target_name}) add_gpu_executable(GPU_trenchDisks target_name trenchDisks.cpp) -configure_file(${CMAKE_SOURCE_DIR}/examples/trench/trenchGrid3D.dat ${CMAKE_CURRENT_BINARY_DIR}/trenchGrid3D.dat COPYONLY) +configure_file(${CMAKE_SOURCE_DIR}/examples/trench/trenchGrid3D.dat + ${CMAKE_CURRENT_BINARY_DIR}/trenchGrid3D.dat COPYONLY) add_dependencies(ViennaRay-GPU_Examples ${target_name}) add_gpu_executable(GPU_trenchLines target_name trenchLines.cpp) diff --git a/gpu/include/raygPerRayData.hpp b/gpu/include/raygPerRayData.hpp index 9a2bd14..d826dc2 100644 --- a/gpu/include/raygPerRayData.hpp +++ b/gpu/include/raygPerRayData.hpp @@ -12,6 +12,8 @@ namespace viennaray::gpu { +// Per-ray data structure associated with each ray. Should be kept small to +// optimize memory usage and performance. struct PerRayData { // Position and direction viennacore::Vec3Df pos; diff --git a/gpu/include/raygReflection.hpp b/gpu/include/raygReflection.hpp index 9e6499b..7ef160b 100644 --- a/gpu/include/raygReflection.hpp +++ b/gpu/include/raygReflection.hpp @@ -168,7 +168,7 @@ PickRandomPointOnUnitSphere(viennaray::gpu::RNGState *state) { static __device__ void diffuseReflection(viennaray::gpu::PerRayData *prd, const viennacore::Vec3Df &geoNormal, - const int D) { + const uint8_t D) { using namespace viennacore; #ifndef VIENNARAY_TEST prd->pos = prd->pos + prd->tMin * prd->dir; @@ -183,7 +183,7 @@ static __device__ void diffuseReflection(viennaray::gpu::PerRayData *prd, } static __device__ void diffuseReflection(viennaray::gpu::PerRayData *prd, - const int D) { + const uint8_t D) { using namespace viennacore; const viennaray::gpu::HitSBTDataDisk *sbtData = @@ -195,7 +195,7 @@ static __device__ void diffuseReflection(viennaray::gpu::PerRayData *prd, static __device__ __forceinline__ void conedCosineReflection(viennaray::gpu::PerRayData *prd, const viennacore::Vec3Df &geomNormal, - const float maxConeAngle, const int D) { + const float maxConeAngle, const uint8_t D) { using namespace viennacore; // TODO: Is this needed? Done in the CPU version diff --git a/gpu/pipelines/GeneralPipelineDisk.cu b/gpu/pipelines/GeneralPipelineDisk.cu index 19f0536..ef165d6 100644 --- a/gpu/pipelines/GeneralPipelineDisk.cu +++ b/gpu/pipelines/GeneralPipelineDisk.cu @@ -20,8 +20,6 @@ using namespace viennacore; extern "C" __constant__ viennaray::gpu::LaunchParams launchParams; -enum { SURFACE_RAY_TYPE = 0, RAY_TYPE_COUNT }; - extern "C" __global__ void __intersection__() { const HitSBTDataDisk *sbtData = (const HitSBTDataDisk *)optixGetSbtDataPointer(); @@ -186,10 +184,10 @@ extern "C" __global__ void __raygen__() { 0.0f, // rayTime OptixVisibilityMask(255), OPTIX_RAY_FLAG_DISABLE_ANYHIT, // OPTIX_RAY_FLAG_NONE, - SURFACE_RAY_TYPE, // SBT offset - RAY_TYPE_COUNT, // SBT stride - SURFACE_RAY_TYPE, // missSBTIndex - u0, u1); - prd.totalCount = 0; // Reset PerRayData + 0, // SBT offset + 1, // SBT stride + 0, // missSBTIndex + u0, u1); // Payload + prd.totalCount = 0; // Reset PerRayData } } diff --git a/gpu/pipelines/GeneralPipelineLine.cu b/gpu/pipelines/GeneralPipelineLine.cu index 84d2ca1..0ddfa03 100644 --- a/gpu/pipelines/GeneralPipelineLine.cu +++ b/gpu/pipelines/GeneralPipelineLine.cu @@ -20,8 +20,6 @@ using namespace viennacore; extern "C" __constant__ viennaray::gpu::LaunchParams launchParams; -enum { SURFACE_RAY_TYPE = 0, RAY_TYPE_COUNT }; - extern "C" __global__ void __intersection__() { const HitSBTDataLine *sbtData = (const HitSBTDataLine *)optixGetSbtDataPointer(); @@ -123,10 +121,10 @@ extern "C" __global__ void __raygen__() { 0.0f, // rayTime OptixVisibilityMask(255), OPTIX_RAY_FLAG_DISABLE_ANYHIT, // OPTIX_RAY_FLAG_NONE, - SURFACE_RAY_TYPE, // SBT offset - RAY_TYPE_COUNT, // SBT stride - SURFACE_RAY_TYPE, // missSBTIndex - u0, u1); - prd.totalCount = 0; // Reset PerRayData + 0, // SBT offset + 1, // SBT stride + 0, // missSBTIndex + u0, u1); // Payload + prd.totalCount = 0; // Reset PerRayData } } diff --git a/gpu/pipelines/GeneralPipelineTriangle.cu b/gpu/pipelines/GeneralPipelineTriangle.cu index 77cd490..4f65bf9 100644 --- a/gpu/pipelines/GeneralPipelineTriangle.cu +++ b/gpu/pipelines/GeneralPipelineTriangle.cu @@ -22,8 +22,6 @@ using namespace viennacore; extern "C" __constant__ viennaray::gpu::LaunchParams launchParams; -enum { SURFACE_RAY_TYPE = 0, RAY_TYPE_COUNT }; - extern "C" __global__ void __closesthit__() { const HitSBTDataTriangle *sbtData = (const HitSBTDataTriangle *)optixGetSbtDataPointer(); @@ -98,10 +96,10 @@ extern "C" __global__ void __raygen__() { 0.0f, // rayTime OptixVisibilityMask(255), OPTIX_RAY_FLAG_DISABLE_ANYHIT, // OPTIX_RAY_FLAG_NONE, - SURFACE_RAY_TYPE, // SBT offset - RAY_TYPE_COUNT, // SBT stride - SURFACE_RAY_TYPE, // missSBTIndex - u0, u1); + 0, // SBT offset + 1, // SBT stride + 0, // missSBTIndex + u0, u1); // Payload #ifdef COUNT_RAYS int *counter = reinterpret_cast(launchParams.customData);