From 909928edca0f061d2b3e4c5b7f175cd4018fed86 Mon Sep 17 00:00:00 2001 From: Tobias Reiter Date: Tue, 21 Oct 2025 10:34:04 +0200 Subject: [PATCH 1/2] Add GPU disk example --- CMakeLists.txt | 1 + gpu/examples/CMakeLists.txt | 5 +- gpu/examples/trenchDisks.cpp | 65 +++++++++++++++++++ .../{trench.cpp => trenchTriangles.cpp} | 9 +-- gpu/include/raygMesh.hpp | 19 +++++- gpu/include/raygTrace.hpp | 52 ++++++--------- gpu/include/raygTraceDisk.hpp | 38 +++++++---- gpu/include/raygTraceLine.hpp | 24 ++++--- gpu/pipelines/GeneralPipelineTriangle.cu | 2 + 9 files changed, 153 insertions(+), 62 deletions(-) create mode 100644 gpu/examples/trenchDisks.cpp rename gpu/examples/{trench.cpp => trenchTriangles.cpp} (91%) diff --git a/CMakeLists.txt b/CMakeLists.txt index ee1103f..f233f8d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -93,6 +93,7 @@ include("cmake/cpm.cmake") CPMAddPackage( NAME ViennaCore VERSION 1.6.1 + GIT_TAG main GIT_REPOSITORY "https://github.com/ViennaTools/ViennaCore" OPTIONS "VIENNACORE_USE_GPU ${VIENNARAY_USE_GPU}") diff --git a/gpu/examples/CMakeLists.txt b/gpu/examples/CMakeLists.txt index cd04810..1a2e0b6 100644 --- a/gpu/examples/CMakeLists.txt +++ b/gpu/examples/CMakeLists.txt @@ -1,4 +1,7 @@ project(ViennaRay-GPU_Examples) -add_gpu_executable(trenchGPU target_name trench.cpp) +add_gpu_executable(GPU_trenchTriangles target_name trenchTriangles.cpp) configure_file(Resources/trenchMesh.dat ${CMAKE_CURRENT_BINARY_DIR}/trenchMesh.dat COPYONLY) + +add_gpu_executable(GPU_trenchDisks target_name trenchDisks.cpp) +configure_file(${CMAKE_SOURCE_DIR}/examples/trench/trenchGrid3D.dat ${CMAKE_CURRENT_BINARY_DIR}/trenchGrid3D.dat COPYONLY) diff --git a/gpu/examples/trenchDisks.cpp b/gpu/examples/trenchDisks.cpp new file mode 100644 index 0000000..79b9362 --- /dev/null +++ b/gpu/examples/trenchDisks.cpp @@ -0,0 +1,65 @@ +#include + +#include + +using namespace viennaray; + +int main() { + + omp_set_num_threads(16); + constexpr int D = 3; + using NumericType = float; + Logger::setLogLevel(LogLevel::DEBUG); + + auto context = DeviceContext::createContext("../../lib/ptx", 0); + // relative to build directory + + // Read stored geometry grid + NumericType gridDelta; + std::vector> points; + std::vector> normals; + rayInternal::readGridFromFile("trenchGrid3D.dat", gridDelta, points, normals); + + gpu::DiskMesh mesh; + mesh.points = points; + mesh.normals = normals; + mesh.gridDelta = static_cast(gridDelta); + mesh.computeBoundingBox(); + + std::vector materialIds(mesh.points.size(), 7); + for (int i = mesh.points.size() / 2; i < mesh.points.size(); ++i) { + materialIds[i] = 1; + } + + gpu::Particle particle; + particle.direction = {0.0f, 0.0f, -1.0f}; + particle.name = "Particle"; + particle.sticking = 1.f; + particle.dataLabels = {"particleFlux"}; + particle.materialSticking[7] = 1.f; + particle.materialSticking[1] = .1f; + + std::unordered_map pMap = {{"Particle", 0}}; + std::vector cMap = { + {0, gpu::CallableSlot::COLLISION, "__direct_callable__particleCollision"}, + {0, gpu::CallableSlot::REFLECTION, + "__direct_callable__particleReflection"}}; + + gpu::TraceDisk tracer(context); + tracer.setGeometry(mesh); + tracer.setMaterialIds(materialIds); + tracer.setCallables("CallableWrapper", context->modulePath); + tracer.setParticleCallableMap({pMap, cMap}); + tracer.setNumberOfRaysPerPoint(1000); + tracer.insertNextParticle(particle); + tracer.prepareParticlePrograms(); + + tracer.apply(); + + std::vector flux(mesh.points.size()); + tracer.getFlux(flux.data(), 0, 0, 1); + + rayInternal::writeVTK("trenchResult.vtk", points, flux); + + return 0; +} diff --git a/gpu/examples/trench.cpp b/gpu/examples/trenchTriangles.cpp similarity index 91% rename from gpu/examples/trench.cpp rename to gpu/examples/trenchTriangles.cpp index 536903b..8cfc55a 100644 --- a/gpu/examples/trench.cpp +++ b/gpu/examples/trenchTriangles.cpp @@ -1,12 +1,8 @@ -#include -#include -#include #include -#include #include -#define COUNT_RAYS +// #define COUNT_RAYS using namespace viennaray; @@ -43,7 +39,6 @@ int main(int argc, char **argv) { gpu::TraceTriangle tracer(context); tracer.setGeometry(mesh); tracer.setMaterialIds(materialIds); - tracer.setPipeline("GeneralPipeline", context->modulePath); tracer.setCallables("CallableWrapper", context->modulePath); tracer.setParticleCallableMap({pMap, cMap}); tracer.setNumberOfRaysPerPoint(100); @@ -63,8 +58,10 @@ int main(int argc, char **argv) { std::vector flux(mesh.triangles.size()); tracer.getFlux(flux.data(), 0, 0); +#ifdef COUNT_RAYS rayCountBuffer.download(&rayCount, 1); std::cout << "Trace count: " << rayCount << std::endl; +#endif tracer.freeBuffers(); diff --git a/gpu/include/raygMesh.hpp b/gpu/include/raygMesh.hpp index cc2bcef..a697fb5 100644 --- a/gpu/include/raygMesh.hpp +++ b/gpu/include/raygMesh.hpp @@ -34,8 +34,23 @@ struct DiskMesh { Vec3Df minimumExtent; Vec3Df maximumExtent; - float radius; - float gridDelta; + float radius = 0.f; + float gridDelta = 0.f; + + void computeBoundingBox() { + if (points.empty()) + return; + minimumExtent = points[0]; + maximumExtent = points[0]; + for (const auto &p : points) { + for (int d = 0; d < 3; ++d) { + if (p[d] < minimumExtent[d]) + minimumExtent[d] = p[d]; + if (p[d] > maximumExtent[d]) + maximumExtent[d] = p[d]; + } + } + } }; struct SphereMesh { diff --git a/gpu/include/raygTrace.hpp b/gpu/include/raygTrace.hpp index 2785494..3a54c71 100644 --- a/gpu/include/raygTrace.hpp +++ b/gpu/include/raygTrace.hpp @@ -52,27 +52,6 @@ template class Trace { ~Trace() { freeBuffers(); } - void setPipeline(std::string fileName, const std::filesystem::path &path) { - // check if filename ends in .optixir - if (fileName.find(".optixir") == std::string::npos) { - if (fileName.find(".ptx") == std::string::npos) - fileName += ".optixir"; - } - - std::filesystem::path p(fileName); - std::string base = p.stem().string(); - std::string ext = p.extension().string(); - std::string finalName = base + geometryType_ + ext; - - pipelineFile = path / finalName; - - if (!std::filesystem::exists(pipelineFile)) { - Logger::getInstance() - .addError("Pipeline file " + finalName + " not found.") - .print(); - } - } - void setCallables(std::string fileName, const std::filesystem::path &path) { // check if filename ends in .optixir if (fileName.find(".optixir") == std::string::npos) { @@ -440,8 +419,20 @@ template class Trace { char log[2048]; size_t sizeof_log = sizeof(log); + std::string pipelineFile = "GeneralPipeline" + geometryType_ + ".optixir"; + std::filesystem::path pipelinePath = context->modulePath / pipelineFile; + if (!std::filesystem::exists(pipelinePath)) { + Logger::getInstance() + .addError("Pipeline file " + pipelinePath.string() + " not found.") + .print(); + } - auto pipelineInput = getInputData(pipelineFile.c_str(), inputSize); + auto pipelineInput = getInputData(pipelinePath.c_str(), inputSize); + if (!pipelineInput) { + Logger::getInstance() + .addError("Pipeline file " + pipelinePath.string() + " not found.") + .print(); + } OPTIX_CHECK(optixModuleCreate(context->optix, &moduleCompileOptions, &pipelineCompileOptions, pipelineInput, @@ -453,13 +444,19 @@ template class Trace { size_t sizeof_log_callable = sizeof(logCallable); auto callableInput = getInputData(callableFile.c_str(), inputSize); + if (!callableInput) { + Logger::getInstance() + .addError("Callable file " + callableFile.string() + " not found.") + .print(); + } OPTIX_CHECK(optixModuleCreate(context->optix, &moduleCompileOptions, &pipelineCompileOptions, callableInput, inputSize, logCallable, &sizeof_log_callable, &moduleCallable)); - // if (sizeof_log_callable > 1) - // PRINT(logCallable); + // if (sizeof_log_callable > 1) { + // std::cout << "Callable module log: " << logCallable << std::endl; + // } } /// does all setup for the raygen program @@ -648,12 +645,8 @@ template class Trace { protected: // context for cuda kernels std::shared_ptr context; - std::filesystem::path pipelineFile; std::filesystem::path callableFile; - // Disk specific - PointNeighborhood pointNeighborhood_; - std::string geometryType_; std::unordered_map particleMap_; std::vector callableMap_; @@ -666,9 +659,6 @@ template class Trace { CudaBuffer areaBuffer; - Vec3Df minBox; - Vec3Df maxBox; - // particles unsigned int numRates = 0; std::vector> particles; diff --git a/gpu/include/raygTraceDisk.hpp b/gpu/include/raygTraceDisk.hpp index ee08bf9..207474b 100644 --- a/gpu/include/raygTraceDisk.hpp +++ b/gpu/include/raygTraceDisk.hpp @@ -2,6 +2,7 @@ #include "raygDiskGeometry.hpp" #include "raygTrace.hpp" + #include #include @@ -20,19 +21,28 @@ template class TraceDisk : public Trace { void setGeometry(const DiskMesh &passedMesh) { assert(context); - this->minBox = static_cast(passedMesh.minimumExtent); - this->maxBox = static_cast(passedMesh.maximumExtent); + diskMesh = passedMesh; + if (diskMesh.gridDelta <= 0.f) { + Logger::getInstance() + .addError("DiskMesh gridDelta must be positive and non-zero.") + .print(); + } + if (diskMesh.radius <= 0.f) { + diskMesh.radius = rayInternal::DiskFactor<3> * diskMesh.gridDelta; + } + + minBox = diskMesh.minimumExtent; + maxBox = diskMesh.maximumExtent; if constexpr (D == 2) { - this->minBox[2] = -passedMesh.gridDelta; - this->maxBox[2] = passedMesh.gridDelta; + minBox[2] = -diskMesh.gridDelta; + maxBox[2] = diskMesh.gridDelta; } - this->gridDelta = static_cast(passedMesh.gridDelta); + this->gridDelta = static_cast(diskMesh.gridDelta); launchParams.D = D; - diskMesh = passedMesh; - this->pointNeighborhood_.template init<3>( - passedMesh.points, 2 * passedMesh.radius, passedMesh.minimumExtent, - passedMesh.maximumExtent); - diskGeometry.buildAccel(*context, passedMesh, launchParams); + pointNeighborhood_.template init<3>(diskMesh.points, 2 * diskMesh.radius, + diskMesh.minimumExtent, + diskMesh.maximumExtent); + diskGeometry.buildAccel(*context, diskMesh, launchParams); } void smoothFlux(std::vector &flux, int smoothingNeighbors) override { @@ -40,7 +50,7 @@ template class TraceDisk : public Trace { PointNeighborhood pointNeighborhood; if (smoothingNeighbors == 1) { // re-use the neighborhood from setGeometry - pointNeighborhood = this->pointNeighborhood_; + pointNeighborhood = pointNeighborhood_; } else { // TODO: this will rebuild the neighborhood for every call // to getFlux (number of rates) // create a new neighborhood with a larger radius @@ -84,7 +94,7 @@ template class TraceDisk : public Trace { CUdeviceptr d_normals = diskGeometry.geometryNormalBuffer.dPointer(); // calculate areas on host and send to device for now - Vec2D bdBox = {this->minBox, this->maxBox}; + Vec2D bdBox = {minBox, maxBox}; std::vector areas(launchParams.numElements); DiskBoundingBoxXYIntersector bdDiskIntersector(bdBox); @@ -197,6 +207,10 @@ template class TraceDisk : public Trace { DiskMesh diskMesh; DiskGeometry diskGeometry; + PointNeighborhood pointNeighborhood_; + Vec3Df minBox; + Vec3Df maxBox; + using Trace::context; using Trace::geometryType_; diff --git a/gpu/include/raygTraceLine.hpp b/gpu/include/raygTraceLine.hpp index 5437a32..adf1ab0 100644 --- a/gpu/include/raygTraceLine.hpp +++ b/gpu/include/raygTraceLine.hpp @@ -18,8 +18,8 @@ template class TraceLine : public Trace { ~TraceLine() { lineGeometry.freeBuffers(); } void setGeometry(const LineMesh &passedMesh) { - this->minBox = static_cast(passedMesh.minimumExtent); - this->maxBox = static_cast(passedMesh.maximumExtent); + minBox = passedMesh.minimumExtent; + maxBox = passedMesh.maximumExtent; this->gridDelta = static_cast(passedMesh.gridDelta); lineMesh = passedMesh; launchParams.D = D; @@ -29,16 +29,15 @@ template class TraceLine : public Trace { midPoints[i] = 0.5f * (lineMesh.nodes[lineMesh.lines[i][0]] + lineMesh.nodes[lineMesh.lines[i][1]]); } - this->pointNeighborhood_.template init<3>( - midPoints, 1 * std::sqrt(2) * this->gridDelta, this->minBox, - this->maxBox); + pointNeighborhood_.template init<3>( + midPoints, 1 * std::sqrt(2) * this->gridDelta, minBox, maxBox); } void smoothFlux(std::vector &flux, int numNeighbors) override { PointNeighborhood pointNeighborhood; if (numNeighbors == 1) { // re-use the neighborhood from the setGeometry - pointNeighborhood = this->pointNeighborhood_; + pointNeighborhood = pointNeighborhood_; } else { // TODO: this will rebuild the neighborhood for every call // to getFlux (number of rates) @@ -50,11 +49,11 @@ template class TraceLine : public Trace { lineMesh.nodes[lineMesh.lines[i][1]]); } pointNeighborhood.template init<3>( - midPoints, numNeighbors * std::sqrt(2) * this->gridDelta, - this->minBox, this->maxBox); + midPoints, numNeighbors * std::sqrt(2) * this->gridDelta, minBox, + maxBox); } - assert(flux.size() == this->pointNeighborhood_.getNumPoints() && + assert(flux.size() == pointNeighborhood_.getNumPoints() && "Unequal number of points in smoothFlux"); auto oldFlux = flux; @@ -99,7 +98,7 @@ template class TraceLine : public Trace { CUdeviceptr d_lines = lineGeometry.geometryLinesBuffer.dPointer(); // calculate areas on host and send to device for now - Vec2D bdBox = {this->minBox, this->maxBox}; + Vec2D bdBox = {minBox, maxBox}; std::vector areas(launchParams.numElements, 0.f); // 0 = REFLECTIVE, 1 = PERIODIC, 2 = IGNORE @@ -192,9 +191,14 @@ template class TraceLine : public Trace { sbt.hitgroupRecordCount = 2; } +private: LineMesh lineMesh; LineGeometry lineGeometry; + PointNeighborhood pointNeighborhood_; + Vec3Df minBox; + Vec3Df maxBox; + using Trace::context; using Trace::geometryType_; diff --git a/gpu/pipelines/GeneralPipelineTriangle.cu b/gpu/pipelines/GeneralPipelineTriangle.cu index 7abfea6..e66710e 100644 --- a/gpu/pipelines/GeneralPipelineTriangle.cu +++ b/gpu/pipelines/GeneralPipelineTriangle.cu @@ -15,6 +15,8 @@ #include +// #define COUNT_RAYS + using namespace viennaray::gpu; using namespace viennacore; From 20bdc9330f81fffddb3a585e46ed06ef6fdd3f3d Mon Sep 17 00:00:00 2001 From: Tobias Reiter Date: Tue, 21 Oct 2025 11:25:30 +0200 Subject: [PATCH 2/2] Add line example, change mesh nodes --- gpu/examples/CMakeLists.txt | 7 + gpu/examples/trenchDisks.cpp | 10 +- gpu/examples/trenchLines.cpp | 60 +++++ gpu/include/raygDiskGeometry.hpp | 35 ++- gpu/include/raygMesh.hpp | 57 ++--- gpu/include/raygTrace.hpp | 319 ++++++++++++++------------- gpu/include/raygTraceDisk.hpp | 22 +- gpu/include/raygTraceLine.hpp | 20 +- gpu/include/raygTraceTriangle.hpp | 17 +- gpu/include/raygTriangleGeometry.hpp | 27 ++- include/viennaray/rayTraceKernel.hpp | 13 -- include/viennaray/rayUtil.hpp | 13 ++ 12 files changed, 324 insertions(+), 276 deletions(-) create mode 100644 gpu/examples/trenchLines.cpp diff --git a/gpu/examples/CMakeLists.txt b/gpu/examples/CMakeLists.txt index 1a2e0b6..44b0c53 100644 --- a/gpu/examples/CMakeLists.txt +++ b/gpu/examples/CMakeLists.txt @@ -1,7 +1,14 @@ project(ViennaRay-GPU_Examples) +add_custom_target(ViennaRay-GPU_Examples ALL) + add_gpu_executable(GPU_trenchTriangles target_name trenchTriangles.cpp) configure_file(Resources/trenchMesh.dat ${CMAKE_CURRENT_BINARY_DIR}/trenchMesh.dat COPYONLY) +add_dependencies(ViennaRay-GPU_Examples ${target_name}) add_gpu_executable(GPU_trenchDisks target_name trenchDisks.cpp) configure_file(${CMAKE_SOURCE_DIR}/examples/trench/trenchGrid3D.dat ${CMAKE_CURRENT_BINARY_DIR}/trenchGrid3D.dat COPYONLY) +add_dependencies(ViennaRay-GPU_Examples ${target_name}) + +add_gpu_executable(GPU_trenchLines target_name trenchLines.cpp) +add_dependencies(ViennaRay-GPU_Examples ${target_name}) diff --git a/gpu/examples/trenchDisks.cpp b/gpu/examples/trenchDisks.cpp index 79b9362..b16969b 100644 --- a/gpu/examples/trenchDisks.cpp +++ b/gpu/examples/trenchDisks.cpp @@ -21,13 +21,13 @@ int main() { rayInternal::readGridFromFile("trenchGrid3D.dat", gridDelta, points, normals); gpu::DiskMesh mesh; - mesh.points = points; + mesh.nodes = points; mesh.normals = normals; mesh.gridDelta = static_cast(gridDelta); - mesh.computeBoundingBox(); + computeBoundingBox(mesh); - std::vector materialIds(mesh.points.size(), 7); - for (int i = mesh.points.size() / 2; i < mesh.points.size(); ++i) { + std::vector materialIds(mesh.nodes.size(), 7); + for (int i = mesh.nodes.size() / 2; i < mesh.nodes.size(); ++i) { materialIds[i] = 1; } @@ -56,7 +56,7 @@ int main() { tracer.apply(); - std::vector flux(mesh.points.size()); + std::vector flux(mesh.nodes.size()); tracer.getFlux(flux.data(), 0, 0, 1); rayInternal::writeVTK("trenchResult.vtk", points, flux); diff --git a/gpu/examples/trenchLines.cpp b/gpu/examples/trenchLines.cpp new file mode 100644 index 0000000..fb9d9a9 --- /dev/null +++ b/gpu/examples/trenchLines.cpp @@ -0,0 +1,60 @@ +#include + +#include + +using namespace viennaray; + +int main() { + + omp_set_num_threads(16); + constexpr int D = 3; + using NumericType = float; + Logger::setLogLevel(LogLevel::DEBUG); + + auto context = DeviceContext::createContext("../../lib/ptx", 0); + // relative to build directory + + // Read stored geometry grid + NumericType gridDelta; + std::vector> points; + + gpu::LineMesh mesh; + mesh.nodes = points; + mesh.gridDelta = static_cast(gridDelta); + computeBoundingBox(mesh); + + std::vector materialIds(mesh.lines.size(), 7); + for (int i = mesh.lines.size() / 2; i < mesh.lines.size(); ++i) { + materialIds[i] = 1; + } + + gpu::Particle particle; + particle.direction = {0.0f, 0.0f, -1.0f}; + particle.name = "Particle"; + particle.sticking = 1.f; + particle.dataLabels = {"particleFlux"}; + particle.materialSticking[7] = 1.f; + particle.materialSticking[1] = .1f; + + std::unordered_map pMap = {{"Particle", 0}}; + std::vector cMap = { + {0, gpu::CallableSlot::COLLISION, "__direct_callable__particleCollision"}, + {0, gpu::CallableSlot::REFLECTION, + "__direct_callable__particleReflection"}}; + + gpu::TraceLine tracer(context); + tracer.setGeometry(mesh); + tracer.setMaterialIds(materialIds); + tracer.setCallables("CallableWrapper", context->modulePath); + tracer.setParticleCallableMap({pMap, cMap}); + tracer.setNumberOfRaysPerPoint(1000); + tracer.insertNextParticle(particle); + tracer.prepareParticlePrograms(); + + tracer.apply(); + + std::vector flux(mesh.lines.size()); + tracer.getFlux(flux.data(), 0, 0, 1); + + return 0; +} diff --git a/gpu/include/raygDiskGeometry.hpp b/gpu/include/raygDiskGeometry.hpp index 873504f..edff9e5 100644 --- a/gpu/include/raygDiskGeometry.hpp +++ b/gpu/include/raygDiskGeometry.hpp @@ -42,7 +42,7 @@ template struct DiskGeometry { launchParams.source.maxPoint[1] = mesh.maximumExtent[1]; launchParams.source.planeHeight = mesh.maximumExtent[2] + 2 * mesh.radius; } - launchParams.numElements = mesh.points.size(); + launchParams.numElements = mesh.nodes.size(); // 2 inputs: one for the geometry, one for the boundary std::array diskInput{}; @@ -50,7 +50,7 @@ template struct DiskGeometry { // ------------------- geometry input ------------------- // upload the model to the device: the builder - geometryPointBuffer.allocUpload(mesh.points); + geometryPointBuffer.allocUpload(mesh.nodes); geometryNormalBuffer.allocUpload(mesh.normals); // create local variables, because we need a *pointer* to the @@ -59,10 +59,10 @@ template struct DiskGeometry { CUdeviceptr d_geoNormals = geometryNormalBuffer.dPointer(); // AABB build input - std::vector aabb(mesh.points.size()); + std::vector aabb(mesh.nodes.size()); - for (size_t i = 0; i < mesh.points.size(); ++i) { - Vec3Df C = mesh.points[i]; + for (size_t i = 0; i < mesh.nodes.size(); ++i) { + Vec3Df C = mesh.nodes[i]; Vec3Df N = mesh.normals[i]; Normalize(N); @@ -89,7 +89,7 @@ template struct DiskGeometry { diskInput[0] = {}; diskInput[0].type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES; diskInput[0].customPrimitiveArray.aabbBuffers = &d_aabb; - diskInput[0].customPrimitiveArray.numPrimitives = mesh.points.size(); + diskInput[0].customPrimitiveArray.numPrimitives = mesh.nodes.size(); uint32_t diskInput_flags[1] = {OPTIX_GEOMETRY_FLAG_NONE}; diskInput[0].customPrimitiveArray.flags = diskInput_flags; @@ -104,7 +104,7 @@ template struct DiskGeometry { // ------------------------- boundary input ------------------------- auto boundaryMesh = makeBoundary(mesh); // upload the model to the device: the builder - boundaryPointBuffer.allocUpload(boundaryMesh.points); + boundaryPointBuffer.allocUpload(boundaryMesh.nodes); boundaryNormalBuffer.allocUpload(boundaryMesh.normals); // create local variables, because we need a *pointer* to the @@ -113,9 +113,9 @@ template struct DiskGeometry { CUdeviceptr d_boundNormals = boundaryNormalBuffer.dPointer(); // AABB build input for boundary disks - std::vector aabbBoundary(boundaryMesh.points.size()); - for (size_t i = 0; i < boundaryMesh.points.size(); ++i) { - Vec3Df C = boundaryMesh.points[i]; + std::vector aabbBoundary(boundaryMesh.nodes.size()); + for (size_t i = 0; i < boundaryMesh.nodes.size(); ++i) { + Vec3Df C = boundaryMesh.nodes[i]; Vec3Df N = boundaryMesh.normals[i]; Normalize(N); @@ -136,8 +136,7 @@ template struct DiskGeometry { diskInput[1] = {}; diskInput[1].type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES; diskInput[1].customPrimitiveArray.aabbBuffers = &d_aabbBoundary; - diskInput[1].customPrimitiveArray.numPrimitives = - boundaryMesh.points.size(); + diskInput[1].customPrimitiveArray.numPrimitives = boundaryMesh.nodes.size(); diskInput[1].customPrimitiveArray.flags = diskInput_flags; diskInput[1].customPrimitiveArray.numSbtRecords = 1; @@ -235,37 +234,37 @@ template struct DiskGeometry { // xmin - back 0 Vec3Df xminPoint = {bbMin[0], (bbMin[1] + bbMax[1]) / 2, (bbMin[2] + bbMax[2]) / 2}; - boundaryMesh.points.push_back(xminPoint); + boundaryMesh.nodes.push_back(xminPoint); boundaryMesh.normals.push_back({1, 0, 0}); // xmax - front 1 Vec3Df xmaxPoint = {bbMax[0], (bbMin[1] + bbMax[1]) / 2, (bbMin[2] + bbMax[2]) / 2}; - boundaryMesh.points.push_back(xmaxPoint); + boundaryMesh.nodes.push_back(xmaxPoint); boundaryMesh.normals.push_back({-1, 0, 0}); // ymin - left 2 Vec3Df yminPoint = {(bbMin[0] + bbMax[0]) / 2, bbMin[1], (bbMin[2] + bbMax[2]) / 2}; - boundaryMesh.points.push_back(yminPoint); + boundaryMesh.nodes.push_back(yminPoint); boundaryMesh.normals.push_back({0, 1, 0}); // ymax - right 3 Vec3Df ymaxPoint = {(bbMin[0] + bbMax[0]) / 2, bbMax[1], (bbMin[2] + bbMax[2]) / 2}; - boundaryMesh.points.push_back(ymaxPoint); + boundaryMesh.nodes.push_back(ymaxPoint); boundaryMesh.normals.push_back({0, -1, 0}); // zmin - bottom 4 Vec3Df zminPoint = {(bbMin[0] + bbMax[0]) / 2, (bbMin[1] + bbMax[1]) / 2, bbMin[2]}; - boundaryMesh.points.push_back(zminPoint); + boundaryMesh.nodes.push_back(zminPoint); boundaryMesh.normals.push_back({0, 0, 1}); // zmax - top 5 Vec3Df zmaxPoint = {(bbMin[0] + bbMax[0]) / 2, (bbMin[1] + bbMax[1]) / 2, bbMax[2]}; - boundaryMesh.points.push_back(zmaxPoint); + boundaryMesh.nodes.push_back(zmaxPoint); boundaryMesh.normals.push_back({0, 0, -1}); return boundaryMesh; diff --git a/gpu/include/raygMesh.hpp b/gpu/include/raygMesh.hpp index a697fb5..57f4c7e 100644 --- a/gpu/include/raygMesh.hpp +++ b/gpu/include/raygMesh.hpp @@ -16,60 +16,42 @@ struct LineMesh { Vec3Df minimumExtent; Vec3Df maximumExtent; - float gridDelta; + float gridDelta = 0.f; }; struct TriangleMesh { - std::vector vertices; + std::vector nodes; std::vector> triangles; Vec3Df minimumExtent; Vec3Df maximumExtent; - float gridDelta; + float gridDelta = 0.f; }; struct DiskMesh { - std::vector points; + std::vector nodes; std::vector normals; Vec3Df minimumExtent; Vec3Df maximumExtent; float radius = 0.f; float gridDelta = 0.f; +}; - void computeBoundingBox() { - if (points.empty()) - return; - minimumExtent = points[0]; - maximumExtent = points[0]; - for (const auto &p : points) { - for (int d = 0; d < 3; ++d) { - if (p[d] < minimumExtent[d]) - minimumExtent[d] = p[d]; - if (p[d] > maximumExtent[d]) - maximumExtent[d] = p[d]; - } +template void computeBoundingBox(MeshType &mesh) { + if (mesh.nodes.empty()) + return; + mesh.minimumExtent = mesh.nodes[0]; + mesh.maximumExtent = mesh.nodes[0]; + for (const auto &p : mesh.nodes) { + for (int d = 0; d < 3; ++d) { + if (p[d] < mesh.minimumExtent[d]) + mesh.minimumExtent[d] = p[d]; + if (p[d] > mesh.maximumExtent[d]) + mesh.maximumExtent[d] = p[d]; } } -}; - -struct SphereMesh { - std::vector vertices; - std::vector radii; - - Vec3Df minimumExtent; - Vec3Df maximumExtent; - float gridDelta; -}; - -struct OrientedPointCloud { - std::vector vertices; - std::vector normals; - - Vec3Df minimumExtent; - Vec3Df maximumExtent; - float gridDelta; -}; +} inline TriangleMesh readMeshFromFile(const std::string &fileName) { TriangleMesh mesh; @@ -105,13 +87,12 @@ inline TriangleMesh readMeshFromFile(const std::string &fileName) { size_t numTriangles; dataFile >> numTriangles; - mesh.vertices.resize(numPoints); + mesh.nodes.resize(numPoints); mesh.triangles.resize(numTriangles); for (size_t i = 0; i < numPoints; ++i) { dataFile >> id; assert(id == "n"); - dataFile >> mesh.vertices[i][0] >> mesh.vertices[i][1] >> - mesh.vertices[i][2]; + dataFile >> mesh.nodes[i][0] >> mesh.nodes[i][1] >> mesh.nodes[i][2]; } for (size_t i = 0; i < numTriangles; ++i) { dataFile >> id; diff --git a/gpu/include/raygTrace.hpp b/gpu/include/raygTrace.hpp index 3a54c71..fc062f6 100644 --- a/gpu/include/raygTrace.hpp +++ b/gpu/include/raygTrace.hpp @@ -29,18 +29,16 @@ using namespace viennacore; template class Trace { public: - /// constructor - performs all setup, including initializing - /// optix, creates module, pipeline, programs, SBT, etc. - explicit Trace(std::shared_ptr &passedContext, - std::string &&geometryType) - : context(passedContext), geometryType_(std::move(geometryType)) { + Trace(std::shared_ptr &passedContext, + std::string &&geometryType) + : context_(passedContext), geometryType_(std::move(geometryType)) { initRayTracer(); } - explicit Trace(std::string &&geometryType, unsigned deviceID = 0) + Trace(std::string &&geometryType, unsigned deviceID = 0) : geometryType_(std::move(geometryType)) { - context = DeviceContext::getContextFromRegistry(deviceID); - if (!context) { + context_ = DeviceContext::getContextFromRegistry(deviceID); + if (!context_) { Logger::getInstance() .addError("No context found for device ID " + std::to_string(deviceID) + @@ -64,9 +62,9 @@ template class Trace { std::string ext = p.extension().string(); std::string finalName = base + ext; - callableFile = path / finalName; + callableFile_ = path / finalName; - if (!std::filesystem::exists(callableFile)) { + if (!std::filesystem::exists(callableFile_)) { Logger::getInstance() .addError("Callable file " + finalName + " not found.") .print(); @@ -74,86 +72,91 @@ template class Trace { } void insertNextParticle(const Particle &particle) { - particles.push_back(particle); + particles_.push_back(particle); } void apply() { - // if (numCellData != 0 && cellDataBuffer.sizeInBytes == 0) { - // cellDataBuffer.allocInit(numCellData * launchParams.numElements, - // float(0)); - // } - assert(cellDataBuffer.sizeInBytes / sizeof(float) == - numCellData * launchParams.numElements); + if (particles_.empty()) { + Logger::getInstance() + .addError("No particles inserted. Use insertNextParticle first.") + .print(); + } + + if (cellDataBuffer_.sizeInBytes / sizeof(float) != + numCellData * launchParams.numElements) { + Logger::getInstance() + .addError("Cell data buffer size does not match the expected size.") + .print(); + } // resize our cuda result buffer - resultBuffer.allocInit(launchParams.numElements * numRates, float(0)); + resultBuffer.allocInit(launchParams.numElements * numFluxes_, float(0)); launchParams.resultBuffer = (float *)resultBuffer.dPointer(); - if (materialIdsBuffer.sizeInBytes != 0) { - launchParams.materialIds = (int *)materialIdsBuffer.dPointer(); + if (materialIdsBuffer_.sizeInBytes != 0) { + launchParams.materialIds = (int *)materialIdsBuffer_.dPointer(); } - if (useRandomSeed) { + launchParams.seed = config_.rngSeed + config_.runNumber++; + if (config_.useRandomSeed) { std::random_device rd; std::uniform_int_distribution gen; launchParams.seed = gen(rd); - } else { - launchParams.seed = runNumber++; } - launchParams.gridDelta = gridDelta; - launchParams.tThreshold = 1.1 * gridDelta; // TODO: find the best value + launchParams.gridDelta = gridDelta_; + launchParams.tThreshold = 1.1 * gridDelta_; // TODO: find the best value int numPointsPerDim = static_cast(std::sqrt(static_cast(launchParams.numElements))); - if (numberOfRaysFixed > 0) { + if (config_.numRaysFixed > 0) { numPointsPerDim = 1; - numberOfRaysPerPoint = numberOfRaysFixed; + config_.numRaysPerPoint = config_.numRaysFixed; } - numRays = numPointsPerDim * numPointsPerDim * numberOfRaysPerPoint; + numRays = numPointsPerDim * numPointsPerDim * config_.numRaysPerPoint; if (numRays > (1 << 29)) { Logger::getInstance() .addWarning("Too many rays for single launch: " + util::prettyDouble(numRays)) .print(); - numberOfRaysPerPoint = (1 << 29) / (numPointsPerDim * numPointsPerDim); - numRays = numPointsPerDim * numPointsPerDim * numberOfRaysPerPoint; + config_.numRaysPerPoint = (1 << 29) / (numPointsPerDim * numPointsPerDim); + numRays = numPointsPerDim * numPointsPerDim * config_.numRaysPerPoint; } Logger::getInstance() .addDebug("Number of rays: " + util::prettyDouble(numRays)) .print(); // set up material specific sticking probabilities - materialStickingBuffer.resize(particles.size()); - for (size_t i = 0; i < particles.size(); i++) { - if (!particles[i].materialSticking.empty()) { - if (uniqueMaterialIds.empty() || materialIdsBuffer.sizeInBytes == 0) { + materialStickingBuffer_.resize(particles_.size()); + for (size_t i = 0; i < particles_.size(); i++) { + if (!particles_[i].materialSticking.empty()) { + if (uniqueMaterialIds_.empty() || materialIdsBuffer_.sizeInBytes == 0) { Logger::getInstance() .addError("Material IDs not set, when using material dependent " "sticking.") .print(); } - std::vector materialSticking(uniqueMaterialIds.size()); + std::vector materialSticking(uniqueMaterialIds_.size()); unsigned currentId = 0; - for (auto &matId : uniqueMaterialIds) { - if (particles[i].materialSticking.find(matId) == - particles[i].materialSticking.end()) { + for (auto &matId : uniqueMaterialIds_) { + if (particles_[i].materialSticking.find(matId) == + particles_[i].materialSticking.end()) { materialSticking[currentId++] = - static_cast(particles[i].sticking); + static_cast(particles_[i].sticking); } else { materialSticking[currentId++] = - static_cast(particles[i].materialSticking[matId]); + static_cast(particles_[i].materialSticking[matId]); } } - materialStickingBuffer[i].allocUpload(materialSticking); + materialStickingBuffer_[i].allocUpload(materialSticking); } } // Every particle gets its own stream and launch parameters - std::vector streams(particles.size()); - launchParamsBuffers.resize(particles.size()); + std::vector streams(particles_.size()); + launchParamsBuffers.resize(particles_.size()); if (particleMap_.empty()) { Logger::getInstance() @@ -161,26 +164,26 @@ template class Trace { .print(); } - for (size_t i = 0; i < particles.size(); i++) { - auto it = particleMap_.find(particles[i].name); + for (size_t i = 0; i < particles_.size(); i++) { + auto it = particleMap_.find(particles_[i].name); if (it == particleMap_.end()) { Logger::getInstance() - .addError("Unknown particle name: " + particles[i].name) + .addError("Unknown particle name: " + particles_[i].name) .print(); } launchParams.particleType = it->second; launchParams.particleIdx = static_cast(i); launchParams.cosineExponent = - static_cast(particles[i].cosineExponent); - launchParams.sticking = static_cast(particles[i].sticking); - if (!particles[i].materialSticking.empty()) { - assert(materialStickingBuffer[i].sizeInBytes != 0); + static_cast(particles_[i].cosineExponent); + launchParams.sticking = static_cast(particles_[i].sticking); + if (!particles_[i].materialSticking.empty()) { + assert(materialStickingBuffer_[i].sizeInBytes != 0); launchParams.materialSticking = - (float *)materialStickingBuffer[i].dPointer(); + (float *)materialStickingBuffer_[i].dPointer(); } - Vec3Df direction{static_cast(particles[i].direction[0]), - static_cast(particles[i].direction[1]), - static_cast(particles[i].direction[2])}; + Vec3Df direction{static_cast(particles_[i].direction[0]), + static_cast(particles_[i].direction[1]), + static_cast(particles_[i].direction[2])}; launchParams.source.directionBasis = rayInternal::getOrthonormalBasis(direction); @@ -199,17 +202,17 @@ template class Trace { launchParamsBuffers[i].dPointer(), launchParamsBuffers[i].sizeInBytes, &sbt, /*! dimensions of the launch: */ - numberOfRaysPerPoint, numPointsPerDim, + config_.numRaysPerPoint, numPointsPerDim, numPointsPerDim)); } #else // Launch on multiple streams in release mode - for (size_t i = 0; i < particles.size(); i++) { - OPTIX_CHECK(optixLaunch(pipeline, streams[i], + for (size_t i = 0; i < particles_.size(); i++) { + OPTIX_CHECK(optixLaunch(pipeline_, streams[i], /*! parameters and SBT */ launchParamsBuffers[i].dPointer(), launchParamsBuffers[i].sizeInBytes, &sbt, /*! dimensions of the launch: */ - numberOfRaysPerPoint, numPointsPerDim, + config_.numRaysPerPoint, numPointsPerDim, numPointsPerDim)); } #endif @@ -229,15 +232,16 @@ template class Trace { CUDA_CHECK(StreamDestroy(s)); } normalize(); - results.resize(launchParams.numElements * numRates); + results.resize(launchParams.numElements * numFluxes_); // cudaDeviceSynchronize(); // download is sync anyway - resultBuffer.download(results.data(), launchParams.numElements * numRates); + resultBuffer.download(results.data(), + launchParams.numElements * numFluxes_); } void setElementData(CudaBuffer &passedCellDataBuffer, unsigned numData) { assert(passedCellDataBuffer.sizeInBytes / sizeof(float) / numData == launchParams.numElements); - cellDataBuffer = passedCellDataBuffer; + cellDataBuffer_ = passedCellDataBuffer; numCellData = numData; } @@ -247,13 +251,13 @@ template class Trace { assert(materialIds.size() == launchParams.numElements); if (mapToConsecutive) { - uniqueMaterialIds.clear(); + uniqueMaterialIds_.clear(); for (auto &matId : materialIds) { - uniqueMaterialIds.insert(static_cast(matId)); + uniqueMaterialIds_.insert(static_cast(matId)); } std::unordered_map materialIdMap; int currentId = 0; - for (auto &uniqueMaterialId : uniqueMaterialIds) { + for (auto &uniqueMaterialId : uniqueMaterialIds_) { materialIdMap[uniqueMaterialId] = currentId++; } assert(currentId == materialIdMap.size()); @@ -263,25 +267,30 @@ template class Trace { for (size_t i = 0; i < launchParams.numElements; i++) { materialIdsMapped[i] = materialIdMap[materialIds[i]]; } - materialIdsBuffer.allocUpload(materialIdsMapped); + materialIdsBuffer_.allocUpload(materialIdsMapped); } else { std::vector materialIdsMapped(launchParams.numElements); for (size_t i = 0; i < launchParams.numElements; i++) { materialIdsMapped[i] = static_cast(materialIds[i]); } - materialIdsBuffer.allocUpload(materialIdsMapped); + materialIdsBuffer_.allocUpload(materialIdsMapped); } } void setNumberOfRaysPerPoint(const size_t pNumRays) { - numberOfRaysPerPoint = pNumRays; + config_.numRaysPerPoint = pNumRays; } void setNumberOfRaysFixed(const size_t pNumRays) { - numberOfRaysFixed = pNumRays; + config_.numRaysFixed = pNumRays; } - void setUseRandomSeeds(const bool set) { useRandomSeed = set; } + void setUseRandomSeeds(const bool set) { config_.useRandomSeed = set; } + + void setRngSeed(const unsigned seed) { + config_.rngSeed = seed; + config_.useRandomSeed = false; + } void setParticleCallableMap(std::tuple, @@ -294,9 +303,9 @@ template class Trace { void getFlux(float *flux, int particleIdx, int dataIdx, int smoothingNeighbors = 0) { unsigned int offset = 0; - for (size_t i = 0; i < particles.size(); i++) { + for (size_t i = 0; i < particles_.size(); i++) { if (particleIdx > i) - offset += particles[i].dataLabels.size(); + offset += particles_[i].dataLabels.size(); } offset = (offset + dataIdx) * launchParams.numElements; std::vector temp(launchParams.numElements); @@ -321,20 +330,20 @@ template class Trace { missRecordBuffer.free(); raygenRecordBuffer.free(); directCallableRecordBuffer.free(); - dataPerParticleBuffer.free(); + dataPerParticleBuffer_.free(); for (auto &buffer : launchParamsBuffers) { buffer.free(); } - materialIdsBuffer.free(); - for (auto &buffer : materialStickingBuffer) { + materialIdsBuffer_.free(); + for (auto &buffer : materialStickingBuffer_) { buffer.free(); } - neighborsBuffer.free(); - areaBuffer.free(); + neighborsBuffer_.free(); + areaBuffer_.free(); } unsigned int prepareParticlePrograms() { - if (particles.empty()) { + if (particles_.empty()) { Logger::getInstance().addWarning("No particles defined.").print(); return 0; } @@ -346,31 +355,29 @@ template class Trace { createDirectCallablePrograms(); createPipelines(); - numRates = 0; + numFluxes_ = 0; std::vector dataPerParticle; - for (const auto &p : particles) { + for (const auto &p : particles_) { dataPerParticle.push_back(p.dataLabels.size()); - numRates += p.dataLabels.size(); + numFluxes_ += p.dataLabels.size(); } - dataPerParticleBuffer.allocUpload(dataPerParticle); + dataPerParticleBuffer_.allocUpload(dataPerParticle); launchParams.dataPerParticle = - (unsigned int *)dataPerParticleBuffer.dPointer(); + (unsigned int *)dataPerParticleBuffer_.dPointer(); Logger::getInstance() - .addDebug("Number of flux arrays: " + std::to_string(numRates)) + .addDebug("Number of flux arrays: " + std::to_string(numFluxes_)) .print(); - return numRates; + return numFluxes_; } - CudaBuffer &getData() { return cellDataBuffer; } + CudaBuffer &getData() { return cellDataBuffer_; } CudaBuffer &getResults() { return resultBuffer; } - [[nodiscard]] std::size_t getNumberOfRays() const { return numRays; } + std::vector> &getParticles() { return particles_; } - std::vector> &getParticles() { return particles; } - - [[nodiscard]] unsigned int getNumberOfRates() const { return numRates; } + [[nodiscard]] unsigned int getNumberOfRates() const { return numFluxes_; } [[nodiscard]] unsigned int getNumberOfElements() const { return launchParams.numElements; @@ -384,43 +391,43 @@ template class Trace { virtual void normalize() {} void initRayTracer() { - context->addModule(normModuleName); + context_->addModule(normModuleName); + normKernelName.append(geometryType_ + "_f"); // launchParamsBuffer.alloc(sizeof(launchParams)); - normKernelName.append(geometryType_ + "_"); - normKernelName.push_back(NumericType); + // normKernelName.push_back(NumericType); } /// Creates the modules that contain all the programs we are going to use. /// We use one module for the pipeline programs, and one for the direct /// callables void createModules() { - moduleCompileOptions.maxRegisterCount = + moduleCompileOptions_.maxRegisterCount = OPTIX_COMPILE_DEFAULT_MAX_REGISTER_COUNT; #ifndef NDEBUG - moduleCompileOptions.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_0; - moduleCompileOptions.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL; + moduleCompileOptions_.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_0; + moduleCompileOptions_.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL; #else - moduleCompileOptions.optLevel = OPTIX_COMPILE_OPTIMIZATION_DEFAULT; - moduleCompileOptions.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_NONE; + moduleCompileOptions_.optLevel = OPTIX_COMPILE_OPTIMIZATION_DEFAULT; + moduleCompileOptions_.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_NONE; #endif - pipelineCompileOptions = {}; - pipelineCompileOptions.traversableGraphFlags = + pipelineCompileOptions_ = {}; + pipelineCompileOptions_.traversableGraphFlags = OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_GAS; - pipelineCompileOptions.usesMotionBlur = false; - pipelineCompileOptions.numPayloadValues = 2; - pipelineCompileOptions.numAttributeValues = 0; - pipelineCompileOptions.exceptionFlags = OPTIX_EXCEPTION_FLAG_NONE; - pipelineCompileOptions.pipelineLaunchParamsVariableName = + pipelineCompileOptions_.usesMotionBlur = false; + pipelineCompileOptions_.numPayloadValues = 2; + pipelineCompileOptions_.numAttributeValues = 0; + pipelineCompileOptions_.exceptionFlags = OPTIX_EXCEPTION_FLAG_NONE; + pipelineCompileOptions_.pipelineLaunchParamsVariableName = globalParamsName.c_str(); - pipelineLinkOptions.maxTraceDepth = 1; + pipelineLinkOptions_.maxTraceDepth = 1; size_t inputSize = 0; char log[2048]; size_t sizeof_log = sizeof(log); std::string pipelineFile = "GeneralPipeline" + geometryType_ + ".optixir"; - std::filesystem::path pipelinePath = context->modulePath / pipelineFile; + std::filesystem::path pipelinePath = context_->modulePath / pipelineFile; if (!std::filesystem::exists(pipelinePath)) { Logger::getInstance() .addError("Pipeline file " + pipelinePath.string() + " not found.") @@ -434,26 +441,26 @@ template class Trace { .print(); } - OPTIX_CHECK(optixModuleCreate(context->optix, &moduleCompileOptions, - &pipelineCompileOptions, pipelineInput, - inputSize, log, &sizeof_log, &module)); + OPTIX_CHECK(optixModuleCreate(context_->optix, &moduleCompileOptions_, + &pipelineCompileOptions_, pipelineInput, + inputSize, log, &sizeof_log, &module_)); // if (sizeof_log > 1) // PRINT(log); char logCallable[2048]; size_t sizeof_log_callable = sizeof(logCallable); - auto callableInput = getInputData(callableFile.c_str(), inputSize); + auto callableInput = getInputData(callableFile_.c_str(), inputSize); if (!callableInput) { Logger::getInstance() - .addError("Callable file " + callableFile.string() + " not found.") + .addError("Callable file " + callableFile_.string() + " not found.") .print(); } - OPTIX_CHECK(optixModuleCreate(context->optix, &moduleCompileOptions, - &pipelineCompileOptions, callableInput, + OPTIX_CHECK(optixModuleCreate(context_->optix, &moduleCompileOptions_, + &pipelineCompileOptions_, callableInput, inputSize, logCallable, &sizeof_log_callable, - &moduleCallable)); + &moduleCallable_)); // if (sizeof_log_callable > 1) { // std::cout << "Callable module log: " << logCallable << std::endl; // } @@ -465,13 +472,13 @@ template class Trace { OptixProgramGroupOptions pgOptions = {}; OptixProgramGroupDesc pgDesc = {}; pgDesc.kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN; - pgDesc.raygen.module = module; + pgDesc.raygen.module = module_; pgDesc.raygen.entryFunctionName = entryFunctionName.c_str(); // OptixProgramGroup raypg; char log[2048]; size_t sizeof_log = sizeof(log); - OPTIX_CHECK(optixProgramGroupCreate(context->optix, &pgDesc, 1, &pgOptions, + OPTIX_CHECK(optixProgramGroupCreate(context_->optix, &pgDesc, 1, &pgOptions, log, &sizeof_log, &raygenPG)); // if (sizeof_log > 1) // PRINT(log); @@ -483,13 +490,13 @@ template class Trace { OptixProgramGroupOptions pgOptions = {}; OptixProgramGroupDesc pgDesc = {}; pgDesc.kind = OPTIX_PROGRAM_GROUP_KIND_MISS; - pgDesc.miss.module = module; + pgDesc.miss.module = module_; pgDesc.miss.entryFunctionName = entryFunctionName.c_str(); // OptixProgramGroup raypg; char log[2048]; size_t sizeof_log = sizeof(log); - OPTIX_CHECK(optixProgramGroupCreate(context->optix, &pgDesc, 1, &pgOptions, + OPTIX_CHECK(optixProgramGroupCreate(context_->optix, &pgDesc, 1, &pgOptions, log, &sizeof_log, &missPG)); // if (sizeof_log > 1) // PRINT(log); @@ -502,17 +509,17 @@ template class Trace { OptixProgramGroupOptions pgOptions = {}; OptixProgramGroupDesc pgDesc = {}; pgDesc.kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP; - pgDesc.hitgroup.moduleCH = module; + pgDesc.hitgroup.moduleCH = module_; pgDesc.hitgroup.entryFunctionNameCH = entryFunctionNameCH.c_str(); if (geometryType_ != "Triangle") { - pgDesc.hitgroup.moduleIS = module; + pgDesc.hitgroup.moduleIS = module_; pgDesc.hitgroup.entryFunctionNameIS = entryFunctionNameIS.c_str(); } char log[2048]; size_t sizeof_log = sizeof(log); - OPTIX_CHECK(optixProgramGroupCreate(context->optix, &pgDesc, 1, &pgOptions, + OPTIX_CHECK(optixProgramGroupCreate(context_->optix, &pgDesc, 1, &pgOptions, log, &sizeof_log, &hitgroupPG)); // if (sizeof_log > 1) // PRINT(log); @@ -543,12 +550,12 @@ template class Trace { OptixProgramGroupOptions dcOptions = {}; OptixProgramGroupDesc dcDesc = {}; dcDesc.kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES; - dcDesc.callables.moduleDC = moduleCallable; + dcDesc.callables.moduleDC = moduleCallable_; dcDesc.callables.entryFunctionNameDC = entryFunctionNames[i].c_str(); char log[2048]; size_t sizeof_log = sizeof(log); - OPTIX_CHECK(optixProgramGroupCreate(context->optix, &dcDesc, 1, + OPTIX_CHECK(optixProgramGroupCreate(context_->optix, &dcDesc, 1, &dcOptions, log, &sizeof_log, &directCallablePGs[i])); // if (sizeof_log > 1) @@ -569,10 +576,10 @@ template class Trace { char log[2048]; size_t sizeof_log = sizeof(log); - OPTIX_CHECK(optixPipelineCreate(context->optix, &pipelineCompileOptions, - &pipelineLinkOptions, programGroups.data(), + OPTIX_CHECK(optixPipelineCreate(context_->optix, &pipelineCompileOptions_, + &pipelineLinkOptions_, programGroups.data(), static_cast(programGroups.size()), log, - &sizeof_log, &pipeline)); + &sizeof_log, &pipeline_)); // #ifndef NDEBUG // if (sizeof_log > 1) // PRINT(log); @@ -580,7 +587,7 @@ template class Trace { OptixStackSizes stackSizes = {}; for (auto &pg : programGroups) { - optixUtilAccumulateStackSizes(pg, &stackSizes, pipeline); + optixUtilAccumulateStackSizes(pg, &stackSizes, pipeline_); } unsigned int dcStackFromTrav = 0; @@ -591,13 +598,13 @@ template class Trace { // or recursive tracing OPTIX_CHECK(optixUtilComputeStackSizes( &stackSizes, - pipelineLinkOptions.maxTraceDepth, // OptixTrace recursion depth - 0, // continuation callable depth - 1, // direct callable depth + pipelineLinkOptions_.maxTraceDepth, // OptixTrace recursion depth + 0, // continuation callable depth + 1, // direct callable depth &dcStackFromTrav, &dcStackFromState, &continuationStack)); OPTIX_CHECK(optixPipelineSetStackSize( - pipeline, + pipeline_, dcStackFromTrav, // stack size for DirectCallables from IS or AH. dcStackFromState, // stack size for DirectCallables from RG, MS or CH. continuationStack, // continuation stack size @@ -644,37 +651,37 @@ template class Trace { protected: // context for cuda kernels - std::shared_ptr context; - std::filesystem::path callableFile; + std::shared_ptr context_; + std::filesystem::path callableFile_; std::string geometryType_; std::unordered_map particleMap_; std::vector callableMap_; - std::set uniqueMaterialIds; - CudaBuffer materialIdsBuffer; + std::set uniqueMaterialIds_; + CudaBuffer materialIdsBuffer_; - CudaBuffer neighborsBuffer; - float gridDelta = 0.0f; + CudaBuffer neighborsBuffer_; + float gridDelta_ = 0.0f; - CudaBuffer areaBuffer; + CudaBuffer areaBuffer_; // particles - unsigned int numRates = 0; - std::vector> particles; - CudaBuffer dataPerParticleBuffer; // same for all particles - std::vector materialStickingBuffer; // different for particles + unsigned int numFluxes_ = 0; + std::vector> particles_; + CudaBuffer dataPerParticleBuffer_; // same for all particles + std::vector materialStickingBuffer_; // different for particles // sbt data - CudaBuffer cellDataBuffer; + CudaBuffer cellDataBuffer_; - OptixPipeline pipeline{}; - OptixPipelineCompileOptions pipelineCompileOptions = {}; - OptixPipelineLinkOptions pipelineLinkOptions = {}; + OptixPipeline pipeline_{}; + OptixPipelineCompileOptions pipelineCompileOptions_ = {}; + OptixPipelineLinkOptions pipelineLinkOptions_ = {}; - OptixModule module{}; - OptixModule moduleCallable{}; - OptixModuleCompileOptions moduleCompileOptions = {}; + OptixModule module_{}; + OptixModule moduleCallable_{}; + OptixModuleCompileOptions moduleCompileOptions_ = {}; // program groups, and the SBT built around OptixProgramGroup raygenPG; @@ -695,20 +702,14 @@ template class Trace { CudaBuffer resultBuffer; std::vector results; - bool useRandomSeed = false; - unsigned numCellData = 0; - unsigned numberOfRaysPerPoint = 3000; - unsigned numberOfRaysFixed = 0; - int runNumber = 0; + rayInternal::KernelConfig config_; - size_t numRays = 0; - std::string globalParamsName = "launchParams"; + size_t numRays; + unsigned numCellData = 0; + const std::string globalParamsName = "launchParams"; const std::string normModuleName = "normKernels.ptx"; std::string normKernelName = "normalize_surface_"; - - static constexpr char NumericType = - 'f'; // std::is_same_v ? 'f' : 'd'; }; } // namespace viennaray::gpu diff --git a/gpu/include/raygTraceDisk.hpp b/gpu/include/raygTraceDisk.hpp index 207474b..7cd3bdd 100644 --- a/gpu/include/raygTraceDisk.hpp +++ b/gpu/include/raygTraceDisk.hpp @@ -37,12 +37,12 @@ template class TraceDisk : public Trace { minBox[2] = -diskMesh.gridDelta; maxBox[2] = diskMesh.gridDelta; } - this->gridDelta = static_cast(diskMesh.gridDelta); + this->gridDelta_ = static_cast(diskMesh.gridDelta); launchParams.D = D; - pointNeighborhood_.template init<3>(diskMesh.points, 2 * diskMesh.radius, + pointNeighborhood_.template init<3>(diskMesh.nodes, 2 * diskMesh.radius, diskMesh.minimumExtent, diskMesh.maximumExtent); - diskGeometry.buildAccel(*context, diskMesh, launchParams); + diskGeometry.buildAccel(*context_, diskMesh, launchParams); } void smoothFlux(std::vector &flux, int smoothingNeighbors) override { @@ -55,7 +55,7 @@ template class TraceDisk : public Trace { // to getFlux (number of rates) // create a new neighborhood with a larger radius pointNeighborhood.template init<3>( - diskMesh.points, smoothingNeighbors * 2 * diskMesh.radius, + diskMesh.nodes, smoothingNeighbors * 2 * diskMesh.radius, diskMesh.minimumExtent, diskMesh.maximumExtent); } #pragma omp parallel for @@ -106,7 +106,7 @@ template class TraceDisk : public Trace { #pragma omp for for (long idx = 0; idx < launchParams.numElements; ++idx) { std::array disk{0.f, 0.f, 0.f, diskMesh.radius}; - Vec3Df coord = diskMesh.points[idx]; + Vec3Df coord = diskMesh.nodes[idx]; Vec3Df normal = diskMesh.normals[idx]; disk[0] = coord[0]; disk[1] = coord[1]; @@ -160,14 +160,14 @@ template class TraceDisk : public Trace { } } } - this->areaBuffer.allocUpload(areas); - CUdeviceptr d_areas = this->areaBuffer.dPointer(); + this->areaBuffer_.allocUpload(areas); + CUdeviceptr d_areas = this->areaBuffer_.dPointer(); void *kernel_args[] = { &d_data, &d_areas, &launchParams.numElements, - &sourceArea, &this->numRays, &this->numRates}; + &sourceArea, &this->numRays, &this->numFluxes_}; LaunchKernel::launch(this->normModuleName, this->normKernelName, - kernel_args, *context); + kernel_args, *context_); } void buildHitGroups() override { @@ -183,7 +183,7 @@ template class TraceDisk : public Trace { geometryHitgroupRecord.data.base.geometryType = 1; geometryHitgroupRecord.data.base.isBoundary = false; geometryHitgroupRecord.data.base.cellData = - (void *)this->cellDataBuffer.dPointer(); + (void *)this->cellDataBuffer_.dPointer(); hitgroupRecords.push_back(geometryHitgroupRecord); // boundary hitgroup @@ -211,7 +211,7 @@ template class TraceDisk : public Trace { Vec3Df minBox; Vec3Df maxBox; - using Trace::context; + using Trace::context_; using Trace::geometryType_; using Trace::launchParams; diff --git a/gpu/include/raygTraceLine.hpp b/gpu/include/raygTraceLine.hpp index adf1ab0..db76c0e 100644 --- a/gpu/include/raygTraceLine.hpp +++ b/gpu/include/raygTraceLine.hpp @@ -20,17 +20,17 @@ template class TraceLine : public Trace { void setGeometry(const LineMesh &passedMesh) { minBox = passedMesh.minimumExtent; maxBox = passedMesh.maximumExtent; - this->gridDelta = static_cast(passedMesh.gridDelta); + this->gridDelta_ = static_cast(passedMesh.gridDelta); lineMesh = passedMesh; launchParams.D = D; - lineGeometry.buildAccel(*context, passedMesh, launchParams); + lineGeometry.buildAccel(*context_, passedMesh, launchParams); std::vector midPoints(lineMesh.lines.size()); for (int i = 0; i < lineMesh.lines.size(); ++i) { midPoints[i] = 0.5f * (lineMesh.nodes[lineMesh.lines[i][0]] + lineMesh.nodes[lineMesh.lines[i][1]]); } pointNeighborhood_.template init<3>( - midPoints, 1 * std::sqrt(2) * this->gridDelta, minBox, maxBox); + midPoints, 1 * std::sqrt(2) * this->gridDelta_, minBox, maxBox); } void smoothFlux(std::vector &flux, int numNeighbors) override { @@ -49,7 +49,7 @@ template class TraceLine : public Trace { lineMesh.nodes[lineMesh.lines[i][1]]); } pointNeighborhood.template init<3>( - midPoints, numNeighbors * std::sqrt(2) * this->gridDelta, minBox, + midPoints, numNeighbors * std::sqrt(2) * this->gridDelta_, minBox, maxBox); } @@ -148,14 +148,14 @@ template class TraceLine : public Trace { } } - this->areaBuffer.allocUpload(areas); - CUdeviceptr d_areas = this->areaBuffer.dPointer(); + this->areaBuffer_.allocUpload(areas); + CUdeviceptr d_areas = this->areaBuffer_.dPointer(); void *kernel_args[] = { &d_data, &d_areas, &launchParams.numElements, - &sourceArea, &this->numRays, &this->numRates}; + &sourceArea, &this->numRays, &this->numFluxes_}; LaunchKernel::launch(this->normModuleName, this->normKernelName, - kernel_args, *context); + kernel_args, *context_); } void buildHitGroups() override { @@ -171,7 +171,7 @@ template class TraceLine : public Trace { geometryHitgroupRecord.data.base.geometryType = 2; geometryHitgroupRecord.data.base.isBoundary = false; geometryHitgroupRecord.data.base.cellData = - (void *)this->cellDataBuffer.dPointer(); + (void *)this->cellDataBuffer_.dPointer(); hitgroupRecords.push_back(geometryHitgroupRecord); // boundary hitgroup @@ -199,7 +199,7 @@ template class TraceLine : public Trace { Vec3Df minBox; Vec3Df maxBox; - using Trace::context; + using Trace::context_; using Trace::geometryType_; using Trace::launchParams; diff --git a/gpu/include/raygTraceTriangle.hpp b/gpu/include/raygTraceTriangle.hpp index 895b6c7..425be43 100644 --- a/gpu/include/raygTraceTriangle.hpp +++ b/gpu/include/raygTraceTriangle.hpp @@ -17,8 +17,8 @@ template class TraceTriangle : public Trace { ~TraceTriangle() { triangleGeometry.freeBuffers(); } void setGeometry(const TriangleMesh &passedMesh) { - assert(context); - triangleGeometry.buildAccel(*context, passedMesh, launchParams); + assert(context_); + triangleGeometry.buildAccel(*context_, passedMesh, launchParams); } void smoothFlux(std::vector &flux, int smoothingNeighbors) override {} @@ -39,11 +39,12 @@ template class TraceTriangle : public Trace { CUdeviceptr d_data = resultBuffer.dPointer(); CUdeviceptr d_vertex = triangleGeometry.geometryVertexBuffer.dPointer(); CUdeviceptr d_index = triangleGeometry.geometryIndexBuffer.dPointer(); - void *kernel_args[] = { - &d_data, &d_vertex, &d_index, &launchParams.numElements, - &sourceArea, &this->numRays, &this->numRates}; + void *kernel_args[] = {&d_data, &d_vertex, + &d_index, &launchParams.numElements, + &sourceArea, &this->numRays, + &this->numFluxes_}; LaunchKernel::launch(this->normModuleName, this->normKernelName, - kernel_args, *context); + kernel_args, *context_); } void buildHitGroups() override { @@ -58,7 +59,7 @@ template class TraceTriangle : public Trace { geometryHitgroupRecord.data.base.geometryType = 0; geometryHitgroupRecord.data.base.isBoundary = false; geometryHitgroupRecord.data.base.cellData = - (void *)this->cellDataBuffer.dPointer(); + (void *)this->cellDataBuffer_.dPointer(); hitgroupRecords.push_back(geometryHitgroupRecord); // boundary hitgroup @@ -81,7 +82,7 @@ template class TraceTriangle : public Trace { TriangleMesh triangleMesh; TriangleGeometry triangleGeometry; - using Trace::context; + using Trace::context_; using Trace::launchParams; using Trace::resultBuffer; diff --git a/gpu/include/raygTriangleGeometry.hpp b/gpu/include/raygTriangleGeometry.hpp index e58b5ff..2a061f5 100644 --- a/gpu/include/raygTriangleGeometry.hpp +++ b/gpu/include/raygTriangleGeometry.hpp @@ -41,7 +41,7 @@ struct TriangleGeometry { // ------------------- geometry input ------------------- // upload the model to the device: the builder - geometryVertexBuffer.allocUpload(mesh.vertices); + geometryVertexBuffer.allocUpload(mesh.nodes); geometryIndexBuffer.allocUpload(mesh.triangles); // triangle inputs @@ -56,7 +56,7 @@ struct TriangleGeometry { triangleInput[0].triangleArray.vertexFormat = OPTIX_VERTEX_FORMAT_FLOAT3; triangleInput[0].triangleArray.vertexStrideInBytes = sizeof(Vec3Df); triangleInput[0].triangleArray.numVertices = - (unsigned int)mesh.vertices.size(); + (unsigned int)mesh.nodes.size(); triangleInput[0].triangleArray.vertexBuffers = &d_geoVertices; triangleInput[0].triangleArray.indexFormat = @@ -77,7 +77,7 @@ struct TriangleGeometry { // ------------------------- boundary input ------------------------- auto boundaryMesh = makeBoundary(mesh); // upload the model to the device: the builder - boundaryVertexBuffer.allocUpload(boundaryMesh.vertices); + boundaryVertexBuffer.allocUpload(boundaryMesh.nodes); boundaryIndexBuffer.allocUpload(boundaryMesh.triangles); // triangle inputs @@ -91,8 +91,7 @@ struct TriangleGeometry { triangleInput[1].triangleArray.vertexFormat = OPTIX_VERTEX_FORMAT_FLOAT3; triangleInput[1].triangleArray.vertexStrideInBytes = sizeof(Vec3Df); - triangleInput[1].triangleArray.numVertices = - (int)boundaryMesh.vertices.size(); + triangleInput[1].triangleArray.numVertices = (int)boundaryMesh.nodes.size(); triangleInput[1].triangleArray.vertexBuffers = &d_boundVertices; triangleInput[1].triangleArray.indexFormat = @@ -173,19 +172,19 @@ struct TriangleGeometry { bbMax[2] += passedMesh.gridDelta; boundaryMesh.gridDelta = passedMesh.gridDelta; - boundaryMesh.vertices.reserve(8); + boundaryMesh.nodes.reserve(8); boundaryMesh.triangles.reserve(8); // bottom - boundaryMesh.vertices.push_back(Vec3Df{bbMin[0], bbMin[1], bbMin[2]}); - boundaryMesh.vertices.push_back(Vec3Df{bbMax[0], bbMin[1], bbMin[2]}); - boundaryMesh.vertices.push_back(Vec3Df{bbMax[0], bbMax[1], bbMin[2]}); - boundaryMesh.vertices.push_back(Vec3Df{bbMin[0], bbMax[1], bbMin[2]}); + boundaryMesh.nodes.push_back(Vec3Df{bbMin[0], bbMin[1], bbMin[2]}); + boundaryMesh.nodes.push_back(Vec3Df{bbMax[0], bbMin[1], bbMin[2]}); + boundaryMesh.nodes.push_back(Vec3Df{bbMax[0], bbMax[1], bbMin[2]}); + boundaryMesh.nodes.push_back(Vec3Df{bbMin[0], bbMax[1], bbMin[2]}); // top - boundaryMesh.vertices.push_back(Vec3Df{bbMin[0], bbMin[1], bbMax[2]}); - boundaryMesh.vertices.push_back(Vec3Df{bbMax[0], bbMin[1], bbMax[2]}); - boundaryMesh.vertices.push_back(Vec3Df{bbMax[0], bbMax[1], bbMax[2]}); - boundaryMesh.vertices.push_back(Vec3Df{bbMin[0], bbMax[1], bbMax[2]}); + boundaryMesh.nodes.push_back(Vec3Df{bbMin[0], bbMin[1], bbMax[2]}); + boundaryMesh.nodes.push_back(Vec3Df{bbMax[0], bbMin[1], bbMax[2]}); + boundaryMesh.nodes.push_back(Vec3Df{bbMax[0], bbMax[1], bbMax[2]}); + boundaryMesh.nodes.push_back(Vec3Df{bbMin[0], bbMax[1], bbMax[2]}); // x min max boundaryMesh.triangles.push_back(Vec3D{0, 3, 7}); // 0 diff --git a/include/viennaray/rayTraceKernel.hpp b/include/viennaray/rayTraceKernel.hpp index 447b1d8..9db193d 100644 --- a/include/viennaray/rayTraceKernel.hpp +++ b/include/viennaray/rayTraceKernel.hpp @@ -19,19 +19,6 @@ namespace rayInternal { using namespace viennaray; -struct KernelConfig { - size_t numRaysPerPoint = 1000; - size_t numRaysFixed = 0; - unsigned maxReflections = std::numeric_limits::max(); - unsigned rngSeed = 0; - - bool useRandomSeed = true; - bool calcFlux = true; - bool printProgress = false; - - unsigned runNumber = 1; -}; - template class TraceKernel { public: TraceKernel(RTCDevice &device, Geometry &geometry, diff --git a/include/viennaray/rayUtil.hpp b/include/viennaray/rayUtil.hpp index 890da47..70a75ab 100644 --- a/include/viennaray/rayUtil.hpp +++ b/include/viennaray/rayUtil.hpp @@ -78,6 +78,19 @@ namespace rayInternal { using namespace viennaray; using namespace viennacore; +struct KernelConfig { + size_t numRaysPerPoint = 1000; + size_t numRaysFixed = 0; + unsigned maxReflections = std::numeric_limits::max(); + unsigned rngSeed = 0; + + bool useRandomSeed = true; + bool calcFlux = true; + bool printProgress = false; + + unsigned runNumber = 1; +}; + // embree uses float internally using rtcNumericType = float;