diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 1f80d3b..ca117c3 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -43,6 +43,12 @@ jobs: with: os: ${{ matrix.os }} + - name: ๐Ÿ“‹ Install MacOS Dependencies + shell: bash + if: ${{ matrix.os == 'macos-latest' }} + run: | + brew install embree tbb + - name: ๐Ÿฆฅ Cache Dependencies uses: actions/cache@v4 with: @@ -50,7 +56,7 @@ jobs: path: build - name: ๐Ÿ—๏ธ Compile - run: cmake -B build -DCPM_SOURCE_CACHE=deps-cache -DVIENNARAY_BUILD_TESTS=ON && cmake --build build --config ${{ matrix.config }} + run: cmake -DVIENNARAY_BUILD_TESTS=ON -B build && cmake --build build --config ${{ matrix.config }} - name: ๐Ÿงช Test - run: ctest -C ${{ matrix.config }} --test-dir build + run: ctest -E "Benchmark|Performance" -C ${{ matrix.config }} --test-dir build diff --git a/CMakeLists.txt b/CMakeLists.txt index 6ded2ee..e2e5f0c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.20 FATAL_ERROR) project( ViennaRay LANGUAGES CXX - VERSION 3.7.0) + VERSION 3.7.1) # -------------------------------------------------------------------------------------------------------- # Library switches @@ -92,7 +92,7 @@ include("cmake/cpm.cmake") CPMAddPackage( NAME ViennaCore - VERSION 1.6.2 + VERSION 1.6.3 GIT_REPOSITORY "https://github.com/ViennaTools/ViennaCore" OPTIONS "VIENNACORE_USE_GPU ${VIENNARAY_USE_GPU}") diff --git a/README.md b/README.md index 101ea32..258f9ad 100644 --- a/README.md +++ b/README.md @@ -63,7 +63,7 @@ We recommend using [CPM.cmake](https://github.com/cpm-cmake/CPM.cmake) to consum * Installation with CPM ```cmake - CPMAddPackage("gh:viennatools/viennaray@3.7.0") + CPMAddPackage("gh:viennatools/viennaray@3.7.1") # Use the latest release version ``` * With a local installation diff --git a/examples/disk2D/disk2D.cpp b/examples/disk2D/disk2D.cpp index d27b80b..9834bd0 100644 --- a/examples/disk2D/disk2D.cpp +++ b/examples/disk2D/disk2D.cpp @@ -49,7 +49,12 @@ int main() { rayTracer.setNumberOfRaysPerPoint(2000); // Run the ray tracer + Timer timer; + timer.start(); rayTracer.apply(); + timer.finish(); + + std::cout << "Tracing time: " << timer.currentDuration / 1e9 << " s\n"; // Extract the normalized hit counts for each geometry point auto &flux = rayTracer.getLocalData().getVectorData("flux"); diff --git a/examples/triangle3D/CMakeLists.txt b/examples/triangle3D/CMakeLists.txt index 15e4b9d..af6f95a 100644 --- a/examples/triangle3D/CMakeLists.txt +++ b/examples/triangle3D/CMakeLists.txt @@ -2,7 +2,6 @@ project(triangle3D LANGUAGES CXX) add_executable(${PROJECT_NAME} "${PROJECT_NAME}.cpp") target_link_libraries(${PROJECT_NAME} PRIVATE ViennaRay) -configure_file(${CMAKE_SOURCE_DIR}/gpu/examples/Resources/trenchMesh.dat - ${CMAKE_CURRENT_BINARY_DIR}/trenchMesh.dat COPYONLY) +configure_file(trenchMesh.dat ${CMAKE_CURRENT_BINARY_DIR}/trenchMesh.dat COPYONLY) add_dependencies(ViennaRay_Examples ${PROJECT_NAME}) diff --git a/gpu/examples/Resources/trenchMesh.dat b/examples/triangle3D/trenchMesh.dat similarity index 99% rename from gpu/examples/Resources/trenchMesh.dat rename to examples/triangle3D/trenchMesh.dat index a8dd5ef..f611620 100644 --- a/gpu/examples/Resources/trenchMesh.dat +++ b/examples/triangle3D/trenchMesh.dat @@ -1,6 +1,6 @@ grid_delta 1 min_extent -25 -25 -40 -max_extent 25 25 2.22507e-308 +max_extent 25 25 0 n_nodes 6579 n_triangles 12800 n -7 -24 -40 diff --git a/gpu/examples/CMakeLists.txt b/gpu/examples/CMakeLists.txt index 470b201..f4dd7df 100644 --- a/gpu/examples/CMakeLists.txt +++ b/gpu/examples/CMakeLists.txt @@ -3,7 +3,8 @@ project(ViennaRay-GPU_Examples) add_custom_target(ViennaRay-GPU_Examples ALL) add_gpu_executable(GPU_trenchTriangles target_name trenchTriangles.cpp) -configure_file(Resources/trenchMesh.dat ${CMAKE_CURRENT_BINARY_DIR}/trenchMesh.dat COPYONLY) +configure_file(${CMAKE_SOURCE_DIR}/examples/triangle3D/trenchMesh.dat + ${CMAKE_CURRENT_BINARY_DIR}/trenchMesh.dat COPYONLY) add_dependencies(ViennaRay-GPU_Examples ${target_name}) add_gpu_executable(GPU_trenchDisks target_name trenchDisks.cpp) diff --git a/gpu/examples/Resources/CMakeLists.txt b/gpu/examples/Resources/CMakeLists.txt deleted file mode 100644 index a3efd4d..0000000 --- a/gpu/examples/Resources/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -configure_file(trenchMesh.dat ${CMAKE_CURRENT_BINARY_DIR}/trenchMesh.dat COPYONLY) diff --git a/gpu/include/raygBoundary.hpp b/gpu/include/raygBoundary.hpp index a339080..ef477e2 100644 --- a/gpu/include/raygBoundary.hpp +++ b/gpu/include/raygBoundary.hpp @@ -10,15 +10,18 @@ extern "C" __constant__ viennaray::gpu::LaunchParams launchParams; // this can only get compiled if included in a cuda kernel #ifdef __CUDACC__ + +namespace viennaray::gpu { + +using namespace viennacore; + template -__device__ __inline__ void reflectFromBoundary(viennaray::gpu::PerRayData *prd, - const SBTData *hsd, - const int D) { - using namespace viennacore; +__device__ __inline__ void +reflectFromBoundary(PerRayData *prd, const SBTData *hsd, const int D) { const unsigned int primID = optixGetPrimitiveIndex(); prd->numBoundaryHits++; - if constexpr (std::is_same::value) { + if constexpr (std::is_same::value) { prd->pos = prd->pos + prd->dir * (optixGetRayTmax() - launchParams.tThreshold); if (primID == 0 || primID == 1) { @@ -26,15 +29,12 @@ __device__ __inline__ void reflectFromBoundary(viennaray::gpu::PerRayData *prd, } else if ((primID == 2 || primID == 3) && D == 3) { prd->dir[1] -= 2 * prd->dir[1]; // y boundary } - } else if constexpr (std::is_same< - SBTData, - viennaray::gpu::HitSBTDataTriangle>::value) { + } else if constexpr (std::is_same::value) { prd->pos = prd->pos + prd->dir * optixGetRayTmax(); unsigned dim = primID / 4; prd->dir[dim] -= 2 * prd->dir[dim]; prd->pos[dim] = hsd->vertex[hsd->index[primID][0]][dim]; - } else if constexpr (std::is_same::value) { + } else if constexpr (std::is_same::value) { prd->pos = prd->pos + prd->dir * (optixGetRayTmax()); if (primID == 0 || primID == 1) // x boundary prd->dir[0] -= 2 * prd->dir[0]; @@ -43,13 +43,12 @@ __device__ __inline__ void reflectFromBoundary(viennaray::gpu::PerRayData *prd, template __device__ __inline__ void -applyPeriodicBoundary(viennaray::gpu::PerRayData *prd, const SBTData *hsd, - const int D) { +applyPeriodicBoundary(PerRayData *prd, const SBTData *hsd, const int D) { using namespace viennacore; const unsigned int primID = optixGetPrimitiveIndex(); prd->numBoundaryHits++; - if constexpr (std::is_same::value) { + if constexpr (std::is_same::value) { prd->pos = prd->pos + prd->dir * (optixGetRayTmax() - launchParams.tThreshold); if (primID == 0) { // xmin @@ -61,14 +60,11 @@ applyPeriodicBoundary(viennaray::gpu::PerRayData *prd, const SBTData *hsd, } else if (D == 3 && primID == 3) { // ymax prd->pos[1] = hsd->point[2][1]; } - } else if constexpr (std::is_same< - SBTData, - viennaray::gpu::HitSBTDataTriangle>::value) { + } else if constexpr (std::is_same::value) { prd->pos = prd->pos + prd->dir * optixGetRayTmax(); unsigned dim = primID / 4; prd->pos[dim] = hsd->vertex[hsd->index[primID ^ 2][0]][dim]; - } else if constexpr (std::is_same::value) { + } else if constexpr (std::is_same::value) { prd->pos = prd->pos + prd->dir * (optixGetRayTmax()); if (primID == 0) { // xmin prd->pos[0] = hsd->nodes[1][0]; @@ -77,4 +73,5 @@ applyPeriodicBoundary(viennaray::gpu::PerRayData *prd, const SBTData *hsd, } } } +} // namespace viennaray::gpu #endif diff --git a/gpu/include/raygPerRayData.hpp b/gpu/include/raygPerRayData.hpp index d826dc2..d8b4cd5 100644 --- a/gpu/include/raygPerRayData.hpp +++ b/gpu/include/raygPerRayData.hpp @@ -12,12 +12,14 @@ namespace viennaray::gpu { +using namespace viennacore; + // Per-ray data structure associated with each ray. Should be kept small to // optimize memory usage and performance. struct PerRayData { // Position and direction - viennacore::Vec3Df pos; - viennacore::Vec3Df dir; + Vec3Df pos; + Vec3Df dir; // Simulation specific data float rayWeight = 1.f; @@ -40,8 +42,6 @@ struct PerRayData { bool hitFromBack = false; }; -} // namespace viennaray::gpu - // this can only get compiled if included in a cuda kernel #ifdef __CUDACC__ static __forceinline__ __device__ void *unpackPointer(uint32_t i0, @@ -64,10 +64,12 @@ template static __forceinline__ __device__ T *getPRD() { return reinterpret_cast(unpackPointer(u0, u1)); } -static __device__ void initializeRNGState(viennaray::gpu::PerRayData *prd, +static __device__ void initializeRNGState(PerRayData *prd, unsigned int linearLaunchIndex, unsigned int seed) { auto rngSeed = tea<4>(linearLaunchIndex, seed); curand_init(rngSeed, 0, 0, &prd->RNGstate); } #endif + +} // namespace viennaray::gpu diff --git a/gpu/include/raygRNG.hpp b/gpu/include/raygRNG.hpp index db6c302..e4abd75 100644 --- a/gpu/include/raygRNG.hpp +++ b/gpu/include/raygRNG.hpp @@ -7,8 +7,6 @@ namespace viennaray::gpu { typedef curandStatePhilox4_32_10_t RNGState; -} - // Other possible RNGState types: // typedef curandStateXORWOW_t curtRNGState; // bad // typedef curandStateMRG32k3a_t curtRNGState // not tested @@ -17,10 +15,8 @@ typedef curandStatePhilox4_32_10_t RNGState; #ifdef __CUDACC__ template -static __device__ __inline__ unsigned int tea(unsigned int val0, - unsigned int val1) { - unsigned int v0 = val0; - unsigned int v1 = val1; +static __device__ __inline__ unsigned int tea(unsigned int v0, + unsigned int v1) { unsigned int s0 = 0; for (unsigned int n = 0; n < N; n++) { @@ -32,14 +28,16 @@ static __device__ __inline__ unsigned int tea(unsigned int val0, return v0; } -__device__ __inline__ float getNextRand(viennaray::gpu::RNGState *state) { +__device__ __inline__ float getNextRand(RNGState *state) { return (float)(curand_uniform(state)); } -__device__ __inline__ float getNormalDistRand(viennaray::gpu::RNGState *state) { +__device__ __inline__ float getNormalDistRand(RNGState *state) { float4 u0 = curand_uniform4(state); float r = sqrtf(-2.f * logf(u0.x)); float theta = 2.f * M_PIf * u0.y; return r * sinf(theta); } #endif + +} // namespace viennaray::gpu diff --git a/gpu/include/raygReflection.hpp b/gpu/include/raygReflection.hpp index c694498..c6e1bd5 100644 --- a/gpu/include/raygReflection.hpp +++ b/gpu/include/raygReflection.hpp @@ -1,5 +1,8 @@ #pragma once +// Device code for handling reflections on various geometry types + +#ifdef __CUDACC__ #include #include "raygPerRayData.hpp" @@ -8,12 +11,12 @@ #include -using namespace viennaray::gpu; +namespace viennaray::gpu { -#ifdef __CUDACC__ -__device__ __inline__ viennacore::Vec3Df -computeNormal(const void *sbtData, const unsigned int primID) { - using namespace viennacore; +using namespace viennacore; + +__device__ __inline__ Vec3Df computeNormal(const void *sbtData, + const unsigned int primID) { const HitSBTDataBase *baseData = reinterpret_cast(sbtData); switch (baseData->geometryType) { @@ -48,23 +51,20 @@ computeNormal(const void *sbtData, const unsigned int primID) { } } -__device__ __forceinline__ viennacore::Vec3Df -getNormal(const void *sbtData, const unsigned int primID) { +__device__ __forceinline__ Vec3Df getNormal(const void *sbtData, + const unsigned int primID) { return reinterpret_cast(sbtData)->normal[primID]; } static __device__ __forceinline__ void -specularReflection(viennaray::gpu::PerRayData *prd, - const viennacore::Vec3Df &geoNormal) { - using namespace viennacore; +specularReflection(PerRayData *prd, const Vec3Df &geoNormal) { #ifndef VIENNARAY_TEST prd->pos = prd->pos + prd->tMin * prd->dir; #endif prd->dir = prd->dir - (2 * DotProduct(prd->dir, geoNormal)) * geoNormal; } -static __device__ viennacore::Vec3Df -PickRandomPointOnUnitSphere(viennaray::gpu::RNGState *state) { +static __device__ Vec3Df PickRandomPointOnUnitSphere(RNGState *state) { const float4 u = curand_uniform4(state); // (0,1] const float z = 1.0f - 2.0f * u.x; // uniform in [-1,1] const float r2 = fmaxf(0.0f, 1.0f - z * z); @@ -72,13 +72,11 @@ PickRandomPointOnUnitSphere(viennaray::gpu::RNGState *state) { const float phi = 2.0f * M_PIf * u.y; float s, c; __sincosf(phi, &s, &c); // branch-free sin+cos - return viennacore::Vec3Df{r * c, r * s, z}; + return Vec3Df{r * c, r * s, z}; } -static __device__ void diffuseReflection(viennaray::gpu::PerRayData *prd, - const viennacore::Vec3Df &geoNormal, - const uint8_t D) { - using namespace viennacore; +static __device__ void +diffuseReflection(PerRayData *prd, const Vec3Df &geoNormal, const uint8_t D) { #ifndef VIENNARAY_TEST prd->pos = prd->pos + prd->tMin * prd->dir; #endif @@ -91,22 +89,17 @@ static __device__ void diffuseReflection(viennaray::gpu::PerRayData *prd, Normalize(prd->dir); } -static __device__ void diffuseReflection(viennaray::gpu::PerRayData *prd, - const uint8_t D) { - using namespace viennacore; +static __device__ void diffuseReflection(PerRayData *prd, const uint8_t D) { - const viennaray::gpu::HitSBTDataDisk *sbtData = - (const viennaray::gpu::HitSBTDataDisk *)optixGetSbtDataPointer(); + const HitSBTDataDisk *sbtData = + (const HitSBTDataDisk *)optixGetSbtDataPointer(); const Vec3Df geoNormal = computeNormal(sbtData, optixGetPrimitiveIndex()); diffuseReflection(prd, geoNormal, D); } static __device__ __forceinline__ void -conedCosineReflection(viennaray::gpu::PerRayData *prd, - const viennacore::Vec3Df &geomNormal, +conedCosineReflection(PerRayData *prd, const Vec3Df &geomNormal, const float maxConeAngle, const uint8_t D) { - using namespace viennacore; - // Calculate specular direction specularReflection(prd, geomNormal); @@ -165,4 +158,5 @@ conedCosineReflection(viennaray::gpu::PerRayData *prd, Normalize(dir); prd->dir = dir; } +} // namespace viennaray::gpu #endif diff --git a/gpu/include/raygSBTRecords.hpp b/gpu/include/raygSBTRecords.hpp index 0381b8f..64a0cc0 100644 --- a/gpu/include/raygSBTRecords.hpp +++ b/gpu/include/raygSBTRecords.hpp @@ -5,29 +5,31 @@ namespace viennaray::gpu { +using namespace viennacore; + struct HitSBTDataBase { void *cellData; bool isBoundary; int geometryType; - viennacore::Vec3Df *normal; // optional normal buffer + Vec3Df *normal; // optional normal buffer }; struct HitSBTDataDisk { HitSBTDataBase base; - viennacore::Vec3Df *point; + Vec3Df *point; float radius; }; struct HitSBTDataTriangle { HitSBTDataBase base; - viennacore::Vec3Df *vertex; - viennacore::Vec3D *index; + Vec3Df *vertex; + Vec3D *index; }; struct HitSBTDataLine { HitSBTDataBase base; - viennacore::Vec3Df *nodes; - viennacore::Vec2D *lines; + Vec3Df *nodes; + Vec2D *lines; }; // SBT record for a raygen program diff --git a/gpu/include/raygSource.hpp b/gpu/include/raygSource.hpp index 8dd2b03..f5bbcf2 100644 --- a/gpu/include/raygSource.hpp +++ b/gpu/include/raygSource.hpp @@ -1,28 +1,32 @@ #pragma once +#ifdef __CUDACC__ #include "raygLaunchParams.hpp" #include "raygPerRayData.hpp" #include -#ifdef __CUDACC__ -__device__ __forceinline__ std::array -getOrthonormalBasis(const viennacore::Vec3Df &n) { +namespace viennaray::gpu { + +using namespace viennacore; + +__device__ __forceinline__ std::array +getOrthonormalBasis(const Vec3Df &n) { // Frisvad 2012 (branchless) const float sign = copysignf(1.0f, n[2]); const float a = -1.0f / (sign + n[2]); const float b = n[0] * n[1] * a; - viennacore::Vec3Df t{1.0f + sign * n[0] * n[0] * a, sign * b, -sign * n[0]}; - viennacore::Vec3Df b2{b, sign + n[1] * n[1] * a, -n[1]}; + Vec3Df t{1.0f + sign * n[0] * n[0] * a, sign * b, -sign * n[0]}; + Vec3Df b2{b, sign + n[1] * n[1] * a, -n[1]}; // (t, b2) are already unit and orthogonal to n; no extra normalize needed. return {n, t, b2}; } -__device__ void initializeRayDirection(viennaray::gpu::PerRayData *prd, - const float power, const uint16_t D) { +__device__ void initializeRayDirection(PerRayData *prd, const float power, + const uint16_t D) { // source direction const float4 u = curand_uniform4(&prd->RNGstate); // (0,1] const float tt = powf(u.w, 2.f / (power + 1.f)); @@ -37,18 +41,17 @@ __device__ void initializeRayDirection(viennaray::gpu::PerRayData *prd, prd->dir[1] = s * sqrt1mtt; prd->dir[2] = -1.f * sqrtf(tt); } - viennacore::Normalize(prd->dir); + Normalize(prd->dir); } -__device__ void -initializeRayDirection(viennaray::gpu::PerRayData *prd, const float power, - const std::array &basis, - const uint16_t D) { +__device__ void initializeRayDirection(PerRayData *prd, const float power, + const std::array &basis, + const uint16_t D) { // source direction do { const float4 u = curand_uniform4(&prd->RNGstate); // (0,1] const float tt = powf(u.w, 2.f / (power + 1.f)); - viennacore::Vec3Df rndDirection; + Vec3Df rndDirection; rndDirection[0] = sqrtf(tt); float s, c, sqrt1mtt = sqrtf(1 - tt); __sincosf(2.f * M_PIf * u.x, &s, &c); @@ -66,13 +69,12 @@ initializeRayDirection(viennaray::gpu::PerRayData *prd, const float power, if (D == 2) prd->dir[2] = 0.f; - viennacore::Normalize(prd->dir); + Normalize(prd->dir); } -__device__ void -initializeRayPosition(viennaray::gpu::PerRayData *prd, - const viennaray::gpu::LaunchParams::SourcePlane &source, - const uint16_t D) { +__device__ void initializeRayPosition(PerRayData *prd, + const LaunchParams::SourcePlane &source, + const uint16_t D) { const float4 u = curand_uniform4(&prd->RNGstate); // (0,1] prd->pos[0] = source.minPoint[0] + u.x * (source.maxPoint[0] - source.minPoint[0]); @@ -88,9 +90,8 @@ initializeRayPosition(viennaray::gpu::PerRayData *prd, } // This is slightly faster because there is only one call to curand_uniform4 -__device__ void -initializeRayPositionAndDirection(viennaray::gpu::PerRayData *prd, - viennaray::gpu::LaunchParams *launchParams) { +__device__ void initializeRayPositionAndDirection(PerRayData *prd, + LaunchParams *launchParams) { const float4 u = curand_uniform4(&prd->RNGstate); // (0,1] prd->pos[0] = launchParams->source.minPoint[0] + u.x * (launchParams->source.maxPoint[0] - @@ -107,6 +108,7 @@ initializeRayPositionAndDirection(viennaray::gpu::PerRayData *prd, prd->dir[0] = c * sqrt1mtt; prd->dir[1] = s * sqrt1mtt; prd->dir[2] = -1.f * sqrtf(tt); - viennacore::Normalize(prd->dir); + Normalize(prd->dir); } +} // namespace viennaray::gpu #endif diff --git a/gpu/pipelines/GeneralPipelineDisk.cu b/gpu/pipelines/GeneralPipelineDisk.cu index ef165d6..a6928ed 100644 --- a/gpu/pipelines/GeneralPipelineDisk.cu +++ b/gpu/pipelines/GeneralPipelineDisk.cu @@ -16,7 +16,6 @@ #include using namespace viennaray::gpu; -using namespace viennacore; extern "C" __constant__ viennaray::gpu::LaunchParams launchParams; diff --git a/gpu/pipelines/GeneralPipelineLine.cu b/gpu/pipelines/GeneralPipelineLine.cu index 0ddfa03..86a511f 100644 --- a/gpu/pipelines/GeneralPipelineLine.cu +++ b/gpu/pipelines/GeneralPipelineLine.cu @@ -16,7 +16,6 @@ #include using namespace viennaray::gpu; -using namespace viennacore; extern "C" __constant__ viennaray::gpu::LaunchParams launchParams; diff --git a/gpu/pipelines/GeneralPipelineTriangle.cu b/gpu/pipelines/GeneralPipelineTriangle.cu index 4f65bf9..2625c34 100644 --- a/gpu/pipelines/GeneralPipelineTriangle.cu +++ b/gpu/pipelines/GeneralPipelineTriangle.cu @@ -18,7 +18,6 @@ // #define COUNT_RAYS using namespace viennaray::gpu; -using namespace viennacore; extern "C" __constant__ viennaray::gpu::LaunchParams launchParams; diff --git a/gpu/pipelines/Particle.cuh b/gpu/pipelines/Particle.cuh index b5e0389..1b6edf7 100644 --- a/gpu/pipelines/Particle.cuh +++ b/gpu/pipelines/Particle.cuh @@ -15,8 +15,9 @@ extern "C" __constant__ viennaray::gpu::LaunchParams launchParams; __forceinline__ __device__ void particleCollision(viennaray::gpu::PerRayData *prd) { for (int i = 0; i < prd->ISCount; ++i) { - atomicAdd(&launchParams.resultBuffer[getIdxOffset(0, launchParams) + - prd->primIDs[i]], + atomicAdd(&launchParams + .resultBuffer[viennaray::gpu::getIdxOffset(0, launchParams) + + prd->primIDs[i]], prd->rayWeight); } } @@ -25,8 +26,8 @@ __forceinline__ __device__ void particleReflection(const void *sbtData, viennaray::gpu::PerRayData *prd) { int materialId = launchParams.materialIds[prd->primID]; prd->rayWeight -= prd->rayWeight * launchParams.materialSticking[materialId]; - auto geoNormal = computeNormal(sbtData, prd->primID); - diffuseReflection(prd, geoNormal, launchParams.D); + auto geoNormal = viennaray::gpu::computeNormal(sbtData, prd->primID); + viennaray::gpu::diffuseReflection(prd, geoNormal, launchParams.D); } __forceinline__ __device__ void particleInit(viennaray::gpu::PerRayData *prd) { diff --git a/include/viennaray/rayGeometry.hpp b/include/viennaray/rayGeometry.hpp index 3e7c347..3ecb804 100644 --- a/include/viennaray/rayGeometry.hpp +++ b/include/viennaray/rayGeometry.hpp @@ -30,9 +30,13 @@ template class Geometry { virtual Vec3D getPrimNormal(const unsigned int primID) const = 0; - virtual std::array &getPrimRef(unsigned int primID) { return zero; } + virtual std::array &getPrimRef(unsigned int primID) { + assert(false && "Geometry: getPrimRef not implemented"); + return zero; + } virtual std::array &getNormalRef(unsigned int primID) { + assert(false && "Geometry: getNormalRef not implemented"); return *reinterpret_cast *>(zero.data()); } diff --git a/include/viennaray/rayGeometryDisk.hpp b/include/viennaray/rayGeometryDisk.hpp index 75a37fe..38a2a49 100644 --- a/include/viennaray/rayGeometryDisk.hpp +++ b/include/viennaray/rayGeometryDisk.hpp @@ -142,14 +142,15 @@ class GeometryDisk : public Geometry { (NumericType)normal.zz}; } - std::array &getPrimRef(unsigned int primID) { + std::array & + getPrimRef(unsigned int primID) override { assert(primID < this->numPrimitives_ && "Geometry: Prim ID out of bounds"); return *reinterpret_cast *>( &pPointBuffer_[primID]); } std::array & - getNormalRef(unsigned int primID) { + getNormalRef(unsigned int primID) override { assert(primID < this->numPrimitives_ && "Geometry: Prim ID out of bounds"); return *reinterpret_cast *>( &pNormalVecBuffer_[primID]); diff --git a/include/viennaray/rayGeometryTriangle.hpp b/include/viennaray/rayGeometryTriangle.hpp index 97f2a01..6b4a057 100644 --- a/include/viennaray/rayGeometryTriangle.hpp +++ b/include/viennaray/rayGeometryTriangle.hpp @@ -129,11 +129,10 @@ class GeometryTriangle : public Geometry { // *>(&pPointBuffer_[primID]); // } - // std::array &getNormalRef(unsigned int primID) override { - // assert(primID < this->numPrimitives_ && "Geometry: Prim ID out of - // bounds"); return *reinterpret_cast - // *>(&pTriangleBuffer_[primID]); - // } + std::array &getNormalRef(unsigned int primID) override { + assert(primID < this->numPrimitives_ && "Geometry: Prim ID out of bounds"); + return normals_[primID]; + } bool checkGeometryEmpty() const override { if (pPointBuffer_ == nullptr || pTriangleBuffer_ == nullptr || @@ -170,7 +169,7 @@ class GeometryTriangle : public Geometry { }; triangle_3u_t *pTriangleBuffer_ = nullptr; - std::vector> normals_; + std::vector normals_; std::vector areas_; }; diff --git a/include/viennaray/rayTraceDisk.hpp b/include/viennaray/rayTraceDisk.hpp index 7386641..23112f0 100644 --- a/include/viennaray/rayTraceDisk.hpp +++ b/include/viennaray/rayTraceDisk.hpp @@ -94,8 +94,9 @@ class TraceDisk : public Trace { /// Helper function to normalize the recorded flux in a post-processing step. /// The flux can be normalized to the source flux and the maximum recorded /// value. - void normalizeFlux(std::vector &flux, - NormalizationType norm = NormalizationType::SOURCE) { + void + normalizeFlux(std::vector &flux, + NormalizationType norm = NormalizationType::SOURCE) override { assert(flux.size() == geometry_.getNumPrimitives() && "Unequal number of points in normalizeFlux"); diff --git a/include/viennaray/rayTraceKernel.hpp b/include/viennaray/rayTraceKernel.hpp index 63ce4ec..1651a75 100644 --- a/include/viennaray/rayTraceKernel.hpp +++ b/include/viennaray/rayTraceKernel.hpp @@ -52,7 +52,9 @@ template class TraceKernel { size_t nonGeoHits = 0; size_t totalTraces = 0; size_t particleHits = 0; - size_t boundaryHits = 0; + size_t totalBoundaryHits = 0; + size_t totalReflections = 0; + size_t raysTerminated = 0; auto const lambda = pParticle_->getMeanFreePath(); const long long numRays = config_.numRaysFixed == 0 @@ -84,9 +86,9 @@ template class TraceKernel { Timer timer; timer.start(); -#pragma omp parallel reduction(+ : geoHits, nonGeoHits, totalTraces, \ - particleHits, boundaryHits) \ - shared(threadLocalData) +#pragma omp parallel reduction( \ + + : geoHits, nonGeoHits, totalTraces, particleHits, totalBoundaryHits, \ + totalReflections, raysTerminated) shared(threadLocalData) { rtcJoinCommitScene(rtcScene); @@ -124,6 +126,7 @@ template class TraceKernel { const NumericType initialRayWeight = pSource_->getInitialRayWeight(idx); NumericType rayWeight = initialRayWeight; unsigned int numReflections = 0; + unsigned int boundaryHits = 0; { particle->initNew(rngState); @@ -208,7 +211,11 @@ template class TraceKernel { /* -------- Boundary hit -------- */ if (rayHit.hit.geomID == boundaryID) { - ++boundaryHits; + if (++boundaryHits > 100) { + // terminate ray if too many boundary hits + ++raysTerminated; + break; + } boundary_.processHit(rayHit, reflect); continue; } @@ -322,12 +329,19 @@ template class TraceKernel { // terminate ray if too many reflections break; } + if (numReflections > 1e4) { + // terminate ray if too many reflections + ++raysTerminated; + break; + } // Update ray direction and origin fillRayPosition(rayHit.ray, hitPoint); fillRayDirection(rayHit.ray, stickingDirection.second); } while (reflect); + totalBoundaryHits += boundaryHits; + totalReflections += numReflections; } // end ray tracing for loop } // end parallel section @@ -408,9 +422,20 @@ template class TraceKernel { traceInfo_.nonGeometryHits = nonGeoHits; traceInfo_.geometryHits = geoHits; traceInfo_.particleHits = particleHits; - traceInfo_.boundaryHits = boundaryHits; + traceInfo_.boundaryHits = totalBoundaryHits; + traceInfo_.reflections = totalReflections; traceInfo_.time = static_cast(timer.currentDuration) * 1e-9; + if (raysTerminated > 1e3) { + Logger::getInstance() + .addWarning( + "A total of " + std::to_string(raysTerminated) + + " rays were terminated early due to excessive boundary hits or " + "reflections.") + .print(); + traceInfo_.warning = true; + } + rtcReleaseScene(rtcScene); } diff --git a/include/viennaray/rayUtil.hpp b/include/viennaray/rayUtil.hpp index 661f0c0..0220e1a 100644 --- a/include/viennaray/rayUtil.hpp +++ b/include/viennaray/rayUtil.hpp @@ -68,6 +68,7 @@ struct TraceInfo { size_t geometryHits = 0; size_t particleHits = 0; size_t boundaryHits = 0; + size_t reflections = 0; double time = 0.0; bool warning = false; bool error = false;