From 882f13525ba0f4f9758e103d00cf4fc26ed1d175 Mon Sep 17 00:00:00 2001 From: Corey Williams Date: Tue, 6 Jan 2026 14:53:03 -0600 Subject: [PATCH] restarted PR --- include/nbl/asset/IAsset.h | 1 + include/nbl/asset/ICPUMeshPipeline.h | 145 +++++ .../nbl/builtin/hlsl/indirect_commands.hlsl | 3 + include/nbl/video/IGPUCommandBuffer.h | 24 +- include/nbl/video/IGPUCommandPool.h | 52 +- include/nbl/video/IGPUMeshPipeline.h | 165 +++++ include/nbl/video/ILogicalDevice.h | 34 +- include/nbl/video/asset_traits.h | 76 ++- src/nbl/CMakeLists.txt | 3 +- src/nbl/video/CVulkanCommandBuffer.cpp | 22 +- src/nbl/video/CVulkanCommandBuffer.h | 6 +- src/nbl/video/CVulkanLogicalDevice.cpp | 574 ++++++++++++------ src/nbl/video/CVulkanLogicalDevice.h | 16 +- src/nbl/video/CVulkanMeshPipeline.cpp | 27 + src/nbl/video/CVulkanMeshPipeline.h | 33 + src/nbl/video/CVulkanPhysicalDevice.cpp | 52 ++ src/nbl/video/IGPUCommandBuffer.cpp | 87 ++- src/nbl/video/ILogicalDevice.cpp | 441 ++++++++------ .../device_capabilities/device_features.json | 10 + .../device_capabilities/device_limits.json | 117 ++++ 20 files changed, 1461 insertions(+), 427 deletions(-) create mode 100644 include/nbl/asset/ICPUMeshPipeline.h create mode 100644 include/nbl/video/IGPUMeshPipeline.h create mode 100644 src/nbl/video/CVulkanMeshPipeline.cpp create mode 100644 src/nbl/video/CVulkanMeshPipeline.h diff --git a/include/nbl/asset/IAsset.h b/include/nbl/asset/IAsset.h index 7c6a33193d..bef9b151f6 100644 --- a/include/nbl/asset/IAsset.h +++ b/include/nbl/asset/IAsset.h @@ -95,6 +95,7 @@ class IAsset : virtual public core::IReferenceCounted ET_PIPELINE_CACHE = 1ull<<21, //!< asset::ICPUPipelineCache ET_SCENE = 1ull<<22, //!< reserved, to implement later ET_RAYTRACING_PIPELINE = 1ull << 23, //!< asset::ICPURayTracingPipeline + ET_MESH_PIPELINE = 1ull << 24, ET_IMPLEMENTATION_SPECIFIC_METADATA = 1ull<<31u, //!< lights, etc. //! Reserved special value used for things like terminating lists of this enum diff --git a/include/nbl/asset/ICPUMeshPipeline.h b/include/nbl/asset/ICPUMeshPipeline.h new file mode 100644 index 0000000000..1b48ed06f6 --- /dev/null +++ b/include/nbl/asset/ICPUMeshPipeline.h @@ -0,0 +1,145 @@ +#ifndef _NBL_I_CPU_MESH_PIPELINE_H_INCLUDED_ +#define _NBL_I_CPU_MESH_PIPELINE_H_INCLUDED_ + + +#include "nbl/asset/IMeshPipeline.h" +#include "nbl/asset/ICPURenderpass.h" +#include "nbl/asset/ICPUPipeline.h" + + +namespace nbl::asset +{ + +class ICPUMeshPipeline final : public ICPUPipeline> +{ + using pipeline_base_t = IMeshPipeline; + using base_t = ICPUPipeline; + + public: + + static core::smart_refctd_ptr create(ICPUPipelineLayout* layout, ICPURenderpass* renderpass = nullptr) + { + auto retval = new ICPUMeshPipeline(layout, renderpass); + return core::smart_refctd_ptr(retval,core::dont_grab); + } + + constexpr static inline auto AssetType = ET_MESH_PIPELINE; + inline E_TYPE getAssetType() const override { return AssetType; } + + inline const SCachedCreationParams& getCachedCreationParams() const + { + return pipeline_base_t::getCachedCreationParams(); + } + + inline SCachedCreationParams& getCachedCreationParams() + { + assert(isMutable()); + return m_params; + } + + inline std::span getSpecInfos(const hlsl::ShaderStage stage) const override final + { + switch (stage) { + case hlsl::ShaderStage::ESS_TASK: return { &m_specInfos[0], 1 }; + case hlsl::ShaderStage::ESS_MESH: return { &m_specInfos[1], 1 }; + case hlsl::ShaderStage::ESS_FRAGMENT: return { &m_specInfos[2], 1 }; + } + return {}; + } + + inline std::span getSpecInfos(const hlsl::ShaderStage stage) + { + return base_t::getSpecInfos(stage); + } + + SShaderSpecInfo* getSpecInfo(const hlsl::ShaderStage stage) + { + if (!isMutable()) return nullptr; + switch (stage) { + case hlsl::ShaderStage::ESS_TASK: return &m_specInfos[0]; + case hlsl::ShaderStage::ESS_MESH: return &m_specInfos[1]; + case hlsl::ShaderStage::ESS_FRAGMENT: return &m_specInfos[2]; + } + return nullptr; + } + + const SShaderSpecInfo* getSpecInfo(const hlsl::ShaderStage stage) const + { + const auto stageIndex = stageToIndex(stage); + if (stageIndex != -1) + return &m_specInfos[stageIndex]; + return nullptr; + } + + inline bool valid() const override + { + if (!m_layout) return false; + if (!m_layout->valid())return false; + + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-dynamicRendering-06576 + if (!m_renderpass || m_params.subpassIx >= m_renderpass->getSubpassCount()) return false; + + core::bitflag stagePresence = {}; + for (auto shader_i = 0u; shader_i < m_specInfos.size(); shader_i++) + { + const auto& info = m_specInfos[shader_i]; + if (info.shader) + stagePresence |= indexToStage(shader_i); + } + return hasRequiredStages(stagePresence); + } + + protected: + using base_t::base_t; + virtual ~ICPUMeshPipeline() override = default; + + std::array m_specInfos; + + private: + explicit ICPUMeshPipeline(ICPUPipelineLayout* layout, ICPURenderpass* renderpass) + : base_t(layout, {}, renderpass) + {} + + static inline int8_t stageToIndex(const hlsl::ShaderStage stage) + { + const auto stageIx = hlsl::findLSB(stage); + if (stageIx < 0 || stageIx >= MESH_SHADER_STAGE_COUNT || hlsl::bitCount(stage)!=1) + return -1; + return stageIx; + } + + static inline hlsl::ShaderStage indexToStage(const int8_t index) + { + switch (index) { + case 0: return hlsl::ShaderStage::ESS_TASK; + case 1: return hlsl::ShaderStage::ESS_MESH; + case 2: return hlsl::ShaderStage::ESS_FRAGMENT; + } + return hlsl::ShaderStage::ESS_UNKNOWN; + } + + inline core::smart_refctd_ptr clone_impl(core::smart_refctd_ptr&& layout, uint32_t depth) const override final + { + auto* newPipeline = new ICPUMeshPipeline(layout.get(), m_renderpass.get()); + newPipeline->m_params = m_params; + + for (auto specInfo_i = 0u; specInfo_i < m_specInfos.size(); specInfo_i++) + { + newPipeline->m_specInfos[specInfo_i] = m_specInfos[specInfo_i].clone(depth); + } + + return core::smart_refctd_ptr(newPipeline, core::dont_grab); + } + + inline void visitDependents_impl(std::function visit) const override + { + if (!visit(m_layout.get())) return; + if (!visit(m_renderpass.get())) return; + for (const auto& info : m_specInfos) + if (!visit(info.shader.get())) return; + } +}; + +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/indirect_commands.hlsl b/include/nbl/builtin/hlsl/indirect_commands.hlsl index ca8418bde7..89f79e4f86 100644 --- a/include/nbl/builtin/hlsl/indirect_commands.hlsl +++ b/include/nbl/builtin/hlsl/indirect_commands.hlsl @@ -37,6 +37,9 @@ struct DispatchIndirectCommand_t uint32_t num_groups_z; }; +// in vulkan this struct is distinct from DispatchIndirect, but has the same data - https://docs.vulkan.org/refpages/latest/refpages/source/VkDrawMeshTasksIndirectCommandEXT.html +using DrawMeshTasksIndirectCommand_t = DispatchIndirectCommand_t; + struct TraceRaysIndirectCommand_t { uint64_t raygenShaderRecordAddress; diff --git a/include/nbl/video/IGPUCommandBuffer.h b/include/nbl/video/IGPUCommandBuffer.h index bb6460754a..63552efa20 100644 --- a/include/nbl/video/IGPUCommandBuffer.h +++ b/include/nbl/video/IGPUCommandBuffer.h @@ -328,8 +328,10 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject bool copyAccelerationStructureFromMemory(const AccelerationStructure::DeviceCopyFromMemoryInfo& copyInfo); //! state setup - bool bindComputePipeline(const IGPUComputePipeline* const pipeline); bool bindGraphicsPipeline(const IGPUGraphicsPipeline* const pipeline); + bool bindComputePipeline(const IGPUComputePipeline* const pipeline); + bool bindMeshPipeline(const IGPUMeshPipeline* const pipeline); + bool bindRayTracingPipeline(const IGPURayTracingPipeline* const pipeline); bool bindDescriptorSets( const asset::E_PIPELINE_BIND_POINT pipelineBindPoint, const IGPUPipelineLayout* const layout, @@ -442,6 +444,12 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject } bool dispatchIndirect(const asset::SBufferBinding& binding); + bool drawMeshTasks(const uint32_t groupCountX, const uint32_t groupCountY = 1, const uint32_t groupCountZ = 1); + inline bool drawMeshTasks(const hlsl::vector groupCount) { + return drawMeshTasks(groupCount.x, groupCount.y, groupCount.z); + } + bool drawMeshTasksIndirect(const asset::SBufferBinding& binding, const uint32_t drawCount, uint32_t stride); + //! Begin/End RenderPasses struct SRenderpassBeginInfo { @@ -585,7 +593,7 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject virtual const void* getNativeHandle() const = 0; inline const core::unordered_map& getBoundDescriptorSetsRecord() const { return m_boundDescriptorSetsRecord; } - const IGPUGraphicsPipeline* getBoundGraphicsPipeline() const { return m_boundGraphicsPipeline; } + const IGPUPipelineBase* getBoundGraphicsPipeline() const { return m_boundRasterizationPipeline; } const IGPUComputePipeline* getBoundComputePipeline() const { return m_boundComputePipeline; } const IGPURayTracingPipeline* getBoundRayTracingPipeline() const { return m_boundRayTracingPipeline; } @@ -670,8 +678,9 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject virtual bool copyAccelerationStructureToMemory_impl(const IGPUAccelerationStructure* src, const asset::SBufferBinding& dst) = 0; virtual bool copyAccelerationStructureFromMemory_impl(const asset::SBufferBinding& src, IGPUAccelerationStructure* dst) = 0; - virtual bool bindComputePipeline_impl(const IGPUComputePipeline* const pipeline) = 0; virtual bool bindGraphicsPipeline_impl(const IGPUGraphicsPipeline* const pipeline) = 0; + virtual bool bindComputePipeline_impl(const IGPUComputePipeline* const pipeline) = 0; + virtual bool bindMeshPipeline_impl(const IGPUMeshPipeline* const pipeline) = 0; virtual bool bindRayTracingPipeline_impl(const IGPURayTracingPipeline* const pipeline) = 0; virtual bool bindDescriptorSets_impl( const asset::E_PIPELINE_BIND_POINT pipelineBindPoint, const IGPUPipelineLayout* const layout, @@ -715,6 +724,9 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject virtual bool drawIndirectCount_impl(const asset::SBufferBinding& indirectBinding, const asset::SBufferBinding& countBinding, const uint32_t maxDrawCount, const uint32_t stride) = 0; virtual bool drawIndexedIndirectCount_impl(const asset::SBufferBinding& indirectBinding, const asset::SBufferBinding& countBinding, const uint32_t maxDrawCount, const uint32_t stride) = 0; + virtual bool drawMeshTasks_impl(const uint32_t groupCountX, const uint32_t groupCountY, const uint32_t groupCountZ) = 0; + virtual bool drawMeshTasksIndirect_impl(const asset::SBufferBinding& binding, const uint32_t drawCount, const uint32_t stride) = 0; + virtual bool blitImage_impl(const IGPUImage* const srcImage, const IGPUImage::LAYOUT srcImageLayout, IGPUImage* const dstImage, const IGPUImage::LAYOUT dstImageLayout, const std::span regions, const IGPUSampler::E_TEXTURE_FILTER filter) = 0; virtual bool resolveImage_impl(const IGPUImage* const srcImage, const IGPUImage::LAYOUT srcImageLayout, IGPUImage* const dstImage, const IGPUImage::LAYOUT dstImageLayout, const uint32_t regionCount, const SImageResolve* pRegions) = 0; @@ -750,7 +762,7 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject m_boundDescriptorSetsRecord.clear(); m_TLASTrackingOps.clear(); - m_boundGraphicsPipeline= nullptr; + m_boundRasterizationPipeline= nullptr; m_boundComputePipeline= nullptr; m_boundRayTracingPipeline= nullptr; m_haveRtPipelineStackSize = false; @@ -768,7 +780,7 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject deleteCommandList(); m_boundDescriptorSetsRecord.clear(); m_TLASTrackingOps.clear(); - m_boundGraphicsPipeline= nullptr; + m_boundRasterizationPipeline= nullptr; m_boundComputePipeline= nullptr; m_boundRayTracingPipeline= nullptr; m_haveRtPipelineStackSize = false; @@ -929,7 +941,7 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject // operations as they'll be performed in order core::vector> m_TLASTrackingOps; - const IGPUGraphicsPipeline* m_boundGraphicsPipeline; + const IGPUPipelineBase* m_boundRasterizationPipeline; const IGPUComputePipeline* m_boundComputePipeline; const IGPURayTracingPipeline* m_boundRayTracingPipeline; diff --git a/include/nbl/video/IGPUCommandPool.h b/include/nbl/video/IGPUCommandPool.h index 0424ad83bd..35442b341b 100644 --- a/include/nbl/video/IGPUCommandPool.h +++ b/include/nbl/video/IGPUCommandPool.h @@ -8,8 +8,9 @@ #include "nbl/video/IEvent.h" #include "nbl/video/IGPUDescriptorSet.h" -#include "nbl/video/IGPUComputePipeline.h" #include "nbl/video/IGPUGraphicsPipeline.h" +#include "nbl/video/IGPUComputePipeline.h" +#include "nbl/video/IGPUMeshPipeline.h" #include "nbl/video/IGPURayTracingPipeline.h" #include "nbl/video/IGPUFramebuffer.h" #include "nbl/video/IQueryPool.h" @@ -125,7 +126,6 @@ class IGPUCommandPool : public IBackendObject class CBeginRenderPassCmd; class CPipelineBarrierCmd; class CBindDescriptorSetsCmd; - class CBindComputePipelineCmd; class CUpdateBufferCmd; class CResetQueryPoolCmd; class CWriteTimestampCmd; @@ -133,6 +133,9 @@ class IGPUCommandPool : public IBackendObject class CEndQueryCmd; class CCopyQueryPoolResultsCmd; class CBindGraphicsPipelineCmd; + class CBindComputePipelineCmd; + class CBindMeshPipelineCmd; + class CBindRayTracingPipelineCmd; class CPushConstantsCmd; class CBindVertexBuffersCmd; class CCopyBufferCmd; @@ -155,7 +158,6 @@ class IGPUCommandPool : public IBackendObject class CCopyAccelerationStructureToOrFromMemoryCmd; // for both vkCmdCopyAccelerationStructureToMemoryKHR and vkCmdCopyMemoryToAccelerationStructureKHR class CTraceRaysCmd; class CTraceRaysIndirectCmd; - class CBindRayTracingPipelineCmd; protected: IGPUCommandPool(core::smart_refctd_ptr&& dev, const core::bitflag _flags, const uint8_t _familyIx) @@ -529,15 +531,6 @@ class IGPUCommandPool::CBindDescriptorSetsCmd final : public IFixedSizeCommand m_sets[IGPUPipelineLayout::DESCRIPTOR_SET_COUNT]; }; -class IGPUCommandPool::CBindComputePipelineCmd final : public IFixedSizeCommand -{ - public: - CBindComputePipelineCmd(core::smart_refctd_ptr&& pipeline) : m_pipeline(std::move(pipeline)) {} - - private: - core::smart_refctd_ptr m_pipeline; -}; - class IGPUCommandPool::CUpdateBufferCmd final : public IFixedSizeCommand { public: @@ -604,6 +597,33 @@ class IGPUCommandPool::CBindGraphicsPipelineCmd final : public IFixedSizeCommand core::smart_refctd_ptr m_pipeline; }; +class IGPUCommandPool::CBindComputePipelineCmd final : public IFixedSizeCommand +{ + public: + CBindComputePipelineCmd(core::smart_refctd_ptr&& pipeline) : m_pipeline(std::move(pipeline)) {} + + private: + core::smart_refctd_ptr m_pipeline; +}; + +class IGPUCommandPool::CBindMeshPipelineCmd final : public IFixedSizeCommand +{ +public: + CBindMeshPipelineCmd(core::smart_refctd_ptr&& pipeline) : m_pipeline(std::move(pipeline)) {} + +private: + core::smart_refctd_ptr m_pipeline; +}; + +class IGPUCommandPool::CBindRayTracingPipelineCmd final : public IFixedSizeCommand +{ + public: + CBindRayTracingPipelineCmd(core::smart_refctd_ptr&& pipeline) : m_pipeline(std::move(pipeline)) {} + + private: + core::smart_refctd_ptr m_pipeline; +}; + class IGPUCommandPool::CPushConstantsCmd final : public IFixedSizeCommand { public: @@ -870,14 +890,6 @@ class IGPUCommandPool::CTraceRaysIndirectCmd final : public IFixedSizeCommand m_bindingBuffer; }; -class IGPUCommandPool::CBindRayTracingPipelineCmd final : public IFixedSizeCommand -{ - public: - CBindRayTracingPipelineCmd(core::smart_refctd_ptr&& pipeline) : m_pipeline(std::move(pipeline)) {} - - private: - core::smart_refctd_ptr m_pipeline; -}; NBL_ENUM_ADD_BITWISE_OPERATORS(IGPUCommandPool::CREATE_FLAGS) } diff --git a/include/nbl/video/IGPUMeshPipeline.h b/include/nbl/video/IGPUMeshPipeline.h new file mode 100644 index 0000000000..7878704978 --- /dev/null +++ b/include/nbl/video/IGPUMeshPipeline.h @@ -0,0 +1,165 @@ +#ifndef _NBL_I_GPU_MESH_PIPELINE_H_INCLUDED_ +#define _NBL_I_GPU_MESH_PIPELINE_H_INCLUDED_ + +#include "nbl/asset/IMeshPipeline.h" + +#include "nbl/video/IGPUPipelineLayout.h" +#include "nbl/video/IGPURenderpass.h" +#include "nbl/video/IGPUPipeline.h" + +//related spec + +//i feel like this MIGHT get stuffed into graphicspipeline but idk + +/* +https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-PrimitiveId-06264 +** If the pipeline requires pre-rasterization shader state, it includes a mesh shader and the fragment shader code reads from an input variable that is decorated with PrimitiveId, then the mesh shader code must write to a matching output variable, decorated with PrimitiveId, in all execution paths + +https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-renderPass-07064 +* If renderPass is not VK_NULL_HANDLE, the pipeline is being created with pre-rasterization shader state, subpass viewMask is not 0, and multiviewMeshShader is not enabled, then pStages must not include a mesh shader + +https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-None-02322 +* If the pipeline requires pre-rasterization shader state, and there are any mesh shader stages in the pipeline there must not be any shader stage in the pipeline with a Xfb execution mode +*** whats a xfb + +https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-shaderMeshEnqueue-10187 +* If the shaderMeshEnqueue feature is not enabled, shaders specified by pStages must not declare the ShaderEnqueueAMDX capability +https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-flags-10188 +* If flags does not include VK_PIPELINE_CREATE_LIBRARY_BIT_KHR, shaders specified by pStages must not declare the ShaderEnqueueAMDX capability +*** my understanding is nabla strictly controls it's extensions, so this shouldnt be an issue + +https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-pDynamicStates-07065 +* If the pipeline requires pre-rasterization shader state, and includes a mesh shader, there must be no element of the +* pDynamicStates member of pDynamicState set to VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY, or VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE +*** this one seems the most relevant + +https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-pDynamicStates-07066 +* If the pipeline requires pre-rasterization shader state, and includes a mesh shader, there must be no element of the +* pDynamicStates member of pDynamicState set to VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE, or VK_DYNAMIC_STATE_PATCH_CONTROL_POINTS_EXT + +https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-pDynamicStates-07067 +* If the pipeline requires pre-rasterization shader state, and includes a mesh shader, there must be no element of the pDynamicStates member of pDynamicState set to VK_DYNAMIC_STATE_VERTEX_INPUT_EXT + +https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-renderPass-07720 +* If renderPass is VK_NULL_HANDLE, the pipeline is being created with pre-rasterization shader state, and +* VkPipelineRenderingCreateInfo::viewMask is not 0, and multiviewMeshShader is not enabled, then pStages must not include a mesh shader + + +* theres 1 or 2 more about pipeline libraries, but im not going to worry about that +*/ + +namespace nbl::video +{ + + class IGPUMeshPipeline : public IGPUPipeline> + { + using pipeline_t = asset::IMeshPipeline; + + public: + struct SCreationParams final : public SPipelineCreationParams + { + public: + #define base_flag(F) static_cast(pipeline_t::FLAGS::F) + enum class FLAGS : uint64_t + { + NONE = base_flag(NONE), + DISABLE_OPTIMIZATIONS = base_flag(DISABLE_OPTIMIZATIONS), + ALLOW_DERIVATIVES = base_flag(ALLOW_DERIVATIVES), + VIEW_INDEX_FROM_DEVICE_INDEX = 1<<3, + FAIL_ON_PIPELINE_COMPILE_REQUIRED = base_flag(FAIL_ON_PIPELINE_COMPILE_REQUIRED), + EARLY_RETURN_ON_FAILURE = base_flag(EARLY_RETURN_ON_FAILURE), + }; + #undef base_flag + + inline SSpecializationValidationResult valid() const + { + if (!layout) + return {}; + SSpecializationValidationResult retval = { .count = 0,.dataSize = 0 }; + + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-dynamicRendering-06576 + if (!renderpass || cached.subpassIx >= renderpass->getSubpassCount()) + return {}; + + // TODO: check rasterization samples, etc. + //rp->getCreationParameters().subpasses[i] + + core::bitflag stagePresence = {}; + + auto processSpecInfo = [&](const SShaderSpecInfo& specInfo, hlsl::ShaderStage stage) + { + if (!specInfo.shader) return true; + if (!specInfo.accumulateSpecializationValidationResult(&retval)) return false; + stagePresence |= stage; + return true; + }; + if (!processSpecInfo(taskShader, hlsl::ShaderStage::ESS_TASK)) return {}; + if (!processSpecInfo(meshShader, hlsl::ShaderStage::ESS_MESH)) return {}; + if (!processSpecInfo(fragmentShader, hlsl::ShaderStage::ESS_FRAGMENT)) return {}; + + if (!hasRequiredStages(stagePresence)) + return {}; + + //if (!vertexShader.shader) return {}; //i dont quite understand why this line was in IGPUGraphics. checking if the shader itself was made correctly? + + return retval; + } + + inline core::bitflag getRequiredSubgroupStages() const + { + + core::bitflag stages = {}; + auto processSpecInfo = [&](const SShaderSpecInfo& spec, hlsl::ShaderStage stage) + { + if (spec.shader && spec.requiredSubgroupSize >= SUBGROUP_SIZE::REQUIRE_4) { + stages |= stage; + } + }; + processSpecInfo(taskShader, hlsl::ESS_TASK); + processSpecInfo(meshShader, hlsl::ESS_MESH); + processSpecInfo(fragmentShader, hlsl::ESS_FRAGMENT); + return stages; + } + + inline core::bitflag& getFlags() { return flags; } + + inline core::bitflag getFlags() const { return flags; } + + const IGPUPipelineLayout* layout = nullptr; + SShaderSpecInfo taskShader; + SShaderSpecInfo meshShader; + SShaderSpecInfo fragmentShader; + SCachedCreationParams cached = {}; + renderpass_t* renderpass = nullptr; + + // TODO: Could guess the required flags from SPIR-V introspection of declared caps + core::bitflag flags = FLAGS::NONE; + + inline uint32_t getShaderCount() const + { + uint32_t count = 0; //count = 2 and only check task shader?? + count += (taskShader.shader != nullptr); + count += (meshShader.shader != nullptr); + count += (fragmentShader.shader != nullptr); + return count; + } + }; + + inline core::bitflag getCreationFlags() const {return m_flags;} + + // Vulkan: const VkPipeline* + virtual const void* getNativeHandle() const = 0; + + protected: + // not explicit? + IGPUMeshPipeline(const SCreationParams& params) : + IGPUPipeline(core::smart_refctd_ptr(params.layout->getOriginDevice()), params.layout, params.cached, params.renderpass), m_flags(params.flags) + {} + virtual ~IGPUMeshPipeline() override = default; + + const core::bitflag m_flags; + }; + +} + +#endif \ No newline at end of file diff --git a/include/nbl/video/ILogicalDevice.h b/include/nbl/video/ILogicalDevice.h index 180342e2d4..19d44b2486 100644 --- a/include/nbl/video/ILogicalDevice.h +++ b/include/nbl/video/ILogicalDevice.h @@ -1020,17 +1020,29 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe return createPipelineCache(initialData,notThreadsafe); } - bool createComputePipelines(IGPUPipelineCache* const pipelineCache, const std::span params, core::smart_refctd_ptr* const output); - bool createGraphicsPipelines( IGPUPipelineCache* const pipelineCache, const std::span params, core::smart_refctd_ptr* const output ); - bool createRayTracingPipelines(IGPUPipelineCache* const pipelineCache, - const std::span params, - core::smart_refctd_ptr* const output); + bool createComputePipelines( + IGPUPipelineCache* const pipelineCache, + const std::span params, + core::smart_refctd_ptr* const output + ); + + bool createMeshPipelines( + IGPUPipelineCache* const pipelineCache, + const std::span params, + core::smart_refctd_ptr* const output + ); + + bool createRayTracingPipelines( + IGPUPipelineCache* const pipelineCache, + const std::span params, + core::smart_refctd_ptr* const output + ); // queries inline core::smart_refctd_ptr createQueryPool(const IQueryPool::SCreationParams& params) @@ -1276,16 +1288,22 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe } return retval; } + virtual void createGraphicsPipelines_impl( + IGPUPipelineCache* const pipelineCache, + const std::span params, + core::smart_refctd_ptr* const output, + const SSpecializationValidationResult& validation + ) = 0; virtual void createComputePipelines_impl( IGPUPipelineCache* const pipelineCache, const std::span createInfos, core::smart_refctd_ptr* const output, const SSpecializationValidationResult& validation ) = 0; - virtual void createGraphicsPipelines_impl( + virtual void createMeshPipelines_impl( IGPUPipelineCache* const pipelineCache, - const std::span params, - core::smart_refctd_ptr* const output, + const std::span params, + core::smart_refctd_ptr* const output, const SSpecializationValidationResult& validation ) = 0; virtual void createRayTracingPipelines_impl( diff --git a/include/nbl/video/asset_traits.h b/include/nbl/video/asset_traits.h index c4a6c25ca5..2ed08108b5 100644 --- a/include/nbl/video/asset_traits.h +++ b/include/nbl/video/asset_traits.h @@ -9,10 +9,6 @@ #include "nbl/video/IGPUBufferView.h" #include "nbl/asset/ICPUDescriptorSet.h" #include "nbl/video/IGPUDescriptorSet.h" -#include "nbl/asset/ICPUComputePipeline.h" -#include "nbl/video/IGPUComputePipeline.h" -#include "nbl/asset/ICPUGraphicsPipeline.h" -#include "nbl/video/IGPUGraphicsPipeline.h" #include "nbl/asset/ICPUSampler.h" #include "nbl/video/IGPUSampler.h" #include "nbl/asset/ICPUImageView.h" @@ -21,6 +17,13 @@ #include "nbl/video/IGPUAccelerationStructure.h" #include "nbl/asset/ICPUPolygonGeometry.h" #include "nbl/video/IGPUPolygonGeometry.h" + +#include "nbl/asset/ICPUGraphicsPipeline.h" +#include "nbl/video/IGPUGraphicsPipeline.h" +#include "nbl/asset/ICPUComputePipeline.h" +#include "nbl/video/IGPUComputePipeline.h" +#include "nbl/asset/ICPUMeshPipeline.h" +#include "nbl/video/IGPUMeshPipeline.h" #include "nbl/asset/ICPURayTracingPipeline.h" #include "nbl/video/IGPURayTracingPipeline.h" @@ -96,19 +99,6 @@ struct asset_traits using lookup_t = const video_t*; }; -template<> -struct asset_traits -{ - // the asset type - using asset_t = asset::ICPUComputePipeline; - // Pipeline Layout references Descriptor Set Layouts - constexpr static inline bool HasChildren = true; - // the video type - using video_t = IGPUComputePipeline; - // lookup type - using lookup_t = const video_t*; -}; - template<> struct asset_traits @@ -123,19 +113,6 @@ struct asset_traits using lookup_t = const video_t*; }; -template<> -struct asset_traits -{ - // the asset type - using asset_t = asset::ICPUGraphicsPipeline; - // we reference a pipeline layout and a renderpass - constexpr static inline bool HasChildren = true; - // the video type - using video_t = IGPUGraphicsPipeline; - // lookup type - using lookup_t = const video_t*; -}; - template<> struct asset_traits @@ -246,6 +223,45 @@ struct asset_traits }; +template<> +struct asset_traits +{ + // the asset type + using asset_t = asset::ICPUGraphicsPipeline; + // we reference a pipeline layout and a renderpass + constexpr static inline bool HasChildren = true; + // the video type + using video_t = IGPUGraphicsPipeline; + // lookup type + using lookup_t = const video_t*; +}; + +template<> +struct asset_traits +{ + // the asset type + using asset_t = asset::ICPUComputePipeline; + // Pipeline Layout references Descriptor Set Layouts + constexpr static inline bool HasChildren = true; + // the video type + using video_t = IGPUComputePipeline; + // lookup type + using lookup_t = const video_t*; +}; + +template<> +struct asset_traits +{ + // the asset type + using asset_t = asset::ICPUMeshPipeline; + // Pipeline Layout references Descriptor Set Layouts + constexpr static inline bool HasChildren = true; + // the video type + using video_t = IGPUMeshPipeline; + // lookup type + using lookup_t = const video_t*; +}; + template<> struct asset_traits { diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index 512633536f..359b2cc024 100644 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -266,7 +266,6 @@ set(NBL_VIDEO_SOURCES video/CVulkanDescriptorSetLayout.cpp video/CVulkanPipelineLayout.cpp video/CVulkanPipelineCache.cpp - video/CVulkanComputePipeline.cpp video/CVulkanDescriptorPool.cpp video/CVulkanDescriptorSet.cpp video/CVulkanMemoryAllocation.cpp @@ -279,6 +278,8 @@ set(NBL_VIDEO_SOURCES video/CVulkanConnection.cpp video/CVulkanPhysicalDevice.cpp video/CVulkanGraphicsPipeline.cpp + video/CVulkanComputePipeline.cpp + video/CVulkanMeshPipeline.cpp video/CVulkanRayTracingPipeline.cpp video/CVulkanEvent.cpp video/CSurfaceVulkan.cpp diff --git a/src/nbl/video/CVulkanCommandBuffer.cpp b/src/nbl/video/CVulkanCommandBuffer.cpp index a55c3a1e7b..f33966588e 100644 --- a/src/nbl/video/CVulkanCommandBuffer.cpp +++ b/src/nbl/video/CVulkanCommandBuffer.cpp @@ -406,15 +406,21 @@ bool CVulkanCommandBuffer::copyAccelerationStructureFromMemory_impl(const asset: return true; } +bool CVulkanCommandBuffer::bindGraphicsPipeline_impl(const IGPUGraphicsPipeline* const pipeline) +{ + getFunctionTable().vkCmdBindPipeline(m_cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, static_cast(pipeline)->getInternalObject()); + return true; +} + bool CVulkanCommandBuffer::bindComputePipeline_impl(const IGPUComputePipeline* const pipeline) { getFunctionTable().vkCmdBindPipeline(m_cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE, static_cast(pipeline)->getInternalObject()); return true; } -bool CVulkanCommandBuffer::bindGraphicsPipeline_impl(const IGPUGraphicsPipeline* const pipeline) +bool CVulkanCommandBuffer::bindMeshPipeline_impl(const IGPUMeshPipeline* const pipeline) { - getFunctionTable().vkCmdBindPipeline(m_cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, static_cast(pipeline)->getInternalObject()); + getFunctionTable().vkCmdBindPipeline(m_cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, static_cast(pipeline)->getInternalObject()); return true; } @@ -639,6 +645,18 @@ bool CVulkanCommandBuffer::dispatchIndirect_impl(const asset::SBufferBinding& binding, const uint32_t drawCount, const uint32_t stride) +{ + getFunctionTable().vkCmdDrawMeshTasksIndirectEXT(m_cmdbuf, static_cast(binding.buffer.get())->getInternalObject(), binding.offset, drawCount, stride); + return true; +} + bool CVulkanCommandBuffer::beginRenderPass_impl(const SRenderpassBeginInfo& info, const SUBPASS_CONTENTS contents) { diff --git a/src/nbl/video/CVulkanCommandBuffer.h b/src/nbl/video/CVulkanCommandBuffer.h index 9383585b23..ba3925ffe2 100644 --- a/src/nbl/video/CVulkanCommandBuffer.h +++ b/src/nbl/video/CVulkanCommandBuffer.h @@ -181,8 +181,9 @@ class CVulkanCommandBuffer final : public IGPUCommandBuffer bool copyAccelerationStructureToMemory_impl(const IGPUAccelerationStructure* src, const asset::SBufferBinding& dst); bool copyAccelerationStructureFromMemory_impl(const asset::SBufferBinding& src, IGPUAccelerationStructure* dst); - bool bindComputePipeline_impl(const IGPUComputePipeline* const pipeline) override; bool bindGraphicsPipeline_impl(const IGPUGraphicsPipeline* const pipeline) override; + bool bindComputePipeline_impl(const IGPUComputePipeline* const pipeline) override; + bool bindMeshPipeline_impl(const IGPUMeshPipeline* const pipeline) override; bool bindRayTracingPipeline_impl(const IGPURayTracingPipeline* const pipeline) override; bool bindDescriptorSets_impl(const asset::E_PIPELINE_BIND_POINT pipelineBindPoint, const IGPUPipelineLayout* const layout, const uint32_t firstSet, const uint32_t descriptorSetCount, const IGPUDescriptorSet* const* const pDescriptorSets, const uint32_t dynamicOffsetCount = 0u, const uint32_t* const dynamicOffsets = nullptr) override; bool pushConstants_impl(const IGPUPipelineLayout* const layout, const core::bitflag stageFlags, const uint32_t offset, const uint32_t size, const void* const pValues) override; @@ -209,6 +210,9 @@ class CVulkanCommandBuffer final : public IGPUCommandBuffer bool dispatch_impl(const uint32_t groupCountX, const uint32_t groupCountY, const uint32_t groupCountZ) override; bool dispatchIndirect_impl(const asset::SBufferBinding& binding) override; + bool drawMeshTasks_impl(const uint32_t groupCountX, const uint32_t groupCountY, const uint32_t groupCountZ) override; + bool drawMeshTasksIndirect_impl(const asset::SBufferBinding& binding, const uint32_t drawCount, const uint32_t stride) override; + bool beginRenderPass_impl(const SRenderpassBeginInfo& info, SUBPASS_CONTENTS contents) override; bool nextSubpass_impl(const SUBPASS_CONTENTS contents) override; bool endRenderPass_impl() override; diff --git a/src/nbl/video/CVulkanLogicalDevice.cpp b/src/nbl/video/CVulkanLogicalDevice.cpp index 5390b4c3fa..34a24d30d7 100644 --- a/src/nbl/video/CVulkanLogicalDevice.cpp +++ b/src/nbl/video/CVulkanLogicalDevice.cpp @@ -1121,7 +1121,7 @@ VkPipelineShaderStageCreateInfo getVkShaderStageCreateInfoFrom( if (requireFullSubgroups) { - assert(stage==hlsl::ShaderStage::ESS_COMPUTE/*TODO: Or Mesh Or Task*/); + assert(stage == hlsl::ShaderStage::ESS_COMPUTE || stage == hlsl::ShaderStage::ESS_MESH || stage == hlsl::ShaderStage::ESS_TASK); retval.flags |= VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT; } } @@ -1176,8 +1176,8 @@ void CVulkanLogicalDevice::createComputePipelines_impl( for (const auto& info : createInfos) { initPipelineCreateInfo(outCreateInfo,info); - const auto& spec = info.shader; - outCreateInfo->stage = getVkShaderStageCreateInfoFrom(spec, hlsl::ShaderStage::ESS_COMPUTE, info.cached.requireFullSubgroups, outShaderModule, outEntryPoints, outRequiredSubgroupSize, outSpecInfo, outSpecMapEntry, outSpecData); + + outCreateInfo->stage = getVkShaderStageCreateInfoFrom(info.shader, hlsl::ShaderStage::ESS_COMPUTE, info.cached.requireFullSubgroups, outShaderModule, outEntryPoints, outRequiredSubgroupSize, outSpecInfo, outSpecMapEntry, outSpecData); outCreateInfo++; } auto vk_pipelines = reinterpret_cast(output); @@ -1202,26 +1202,136 @@ void CVulkanLogicalDevice::createComputePipelines_impl( std::fill_n(output,vk_createInfos.size(),nullptr); } -void CVulkanLogicalDevice::createGraphicsPipelines_impl( - IGPUPipelineCache* const pipelineCache, - const std::span createInfos, - core::smart_refctd_ptr* const output, - const SSpecializationValidationResult& validation -) -{ - auto getVkStencilOpStateFrom = [](const asset::SStencilOpParams& params)->VkStencilOpState - { - return { - .failOp = static_cast(params.failOp), - .passOp = static_cast(params.passOp), - .depthFailOp = static_cast(params.depthFailOp), - .compareOp = static_cast(params.compareOp) - }; +void PopulateViewport(VkPipelineViewportStateCreateInfo& outViewport, nbl::asset::SRasterizationParams const& raster) { + outViewport.viewportCount = raster.viewportCount; + // must be identical to viewport count unless VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT_EXT or VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT_EXT are used + outViewport.scissorCount = raster.viewportCount; +} + + +void PopulateRaster(VkPipelineRasterizationStateCreateInfo& outRaster, nbl::asset::SRasterizationParams const& raster) { + outRaster.depthClampEnable = raster.depthClampEnable; + outRaster.rasterizerDiscardEnable = raster.rasterizerDiscard; + outRaster.polygonMode = static_cast(raster.polygonMode); + outRaster.cullMode = static_cast(raster.faceCullingMode); + outRaster.frontFace = raster.frontFaceIsCCW ? VK_FRONT_FACE_COUNTER_CLOCKWISE : VK_FRONT_FACE_CLOCKWISE; + outRaster.depthBiasEnable = raster.depthBiasEnable; +} + +void PopulateMultisample(VkPipelineMultisampleStateCreateInfo& outMultisample, nbl::asset::SRasterizationParams const& raster) { + outMultisample.rasterizationSamples = static_cast(0x1 << raster.samplesLog2); + if (raster.minSampleShadingUnorm > 0) { + outMultisample.sampleShadingEnable = true; + outMultisample.minSampleShading = float(raster.minSampleShadingUnorm) / 255.f; + } + else { + outMultisample.sampleShadingEnable = false; + outMultisample.minSampleShading = 0.f; + } + outMultisample.pSampleMask = raster.sampleMask; + outMultisample.alphaToCoverageEnable = raster.alphaToCoverageEnable; + outMultisample.alphaToOneEnable = raster.alphaToOneEnable; +} +VkStencilOpState getVkStencilOpStateFrom(const asset::SStencilOpParams& params) { + return { + .failOp = static_cast(params.failOp), + .passOp = static_cast(params.passOp), + .depthFailOp = static_cast(params.depthFailOp), + .compareOp = static_cast(params.compareOp) }; +} - const auto& features = getEnabledFeatures(); +void PopulateDepthStencil(VkPipelineDepthStencilStateCreateInfo& outDepthStencil, nbl::asset::SRasterizationParams const& raster) { + outDepthStencil.depthTestEnable = raster.depthTestEnable(); + outDepthStencil.depthWriteEnable = raster.depthWriteEnable; + outDepthStencil.depthCompareOp = static_cast(raster.depthCompareOp); + outDepthStencil.depthBoundsTestEnable = raster.depthBoundsTestEnable; + outDepthStencil.stencilTestEnable = raster.stencilTestEnable(); + outDepthStencil.front = getVkStencilOpStateFrom(raster.frontStencilOps); + outDepthStencil.back = getVkStencilOpStateFrom(raster.backStencilOps); +} + +void PopulateColorBlend( + VkPipelineColorBlendStateCreateInfo& outColorBlend, + VkPipelineColorBlendAttachmentState*& outColorBlendAttachmentState, + nbl::asset::SBlendParams const& blend, + nbl::asset::IRenderpass::SCreationParams::SSubpassDescription const& subpass +) { + //outColorBlend->flags no attachment order access yet + outColorBlend.logicOpEnable = blend.logicOp != asset::ELO_NO_OP; + outColorBlend.logicOp = getVkLogicOpFromLogicOp(blend.logicOp); + outColorBlend.pAttachments = outColorBlendAttachmentState; + for (auto i = 0; i < IGPURenderpass::SCreationParams::SSubpassDescription::MaxColorAttachments; i++) { + if (subpass.colorAttachments[i].render.used()) { + const auto& params = blend.blendParams[i]; + outColorBlendAttachmentState->blendEnable = params.blendEnabled(); + outColorBlendAttachmentState->srcColorBlendFactor = getVkBlendFactorFromBlendFactor(static_cast(params.srcColorFactor)); + outColorBlendAttachmentState->dstColorBlendFactor = getVkBlendFactorFromBlendFactor(static_cast(params.dstColorFactor)); + outColorBlendAttachmentState->colorBlendOp = getVkBlendOpFromBlendOp(static_cast(params.colorBlendOp)); + outColorBlendAttachmentState->srcAlphaBlendFactor = getVkBlendFactorFromBlendFactor(static_cast(params.srcAlphaFactor)); + outColorBlendAttachmentState->dstAlphaBlendFactor = getVkBlendFactorFromBlendFactor(static_cast(params.dstAlphaFactor)); + outColorBlendAttachmentState->alphaBlendOp = getVkBlendOpFromBlendOp(static_cast(params.alphaBlendOp)); + outColorBlendAttachmentState->colorWriteMask = getVkColorComponentFlagsFromColorWriteMask(params.colorWriteMask); + outColorBlendAttachmentState++; + //^that pointer iterator is how we ensure the attachments or consecutive + } + } + outColorBlend.attachmentCount = std::distance(outColorBlend.pAttachments, outColorBlendAttachmentState); +} + +template +void PopulateMeshGraphicsCommonData( + const std::span createInfos, + core::vector& vk_createInfos, + + core::vector& vk_viewportStates, + core::vector& vk_rasterizationStates, + core::vector& vk_multisampleStates, + core::vector& vk_depthStencilStates, + core::vector& vk_colorBlendStates, + core::vector& vk_colorBlendAttachmentStates, + + core::vector& vk_dynamicStates, + const VkPipelineDynamicStateCreateInfo& vk_dynamicStateCreateInfo +) { + //the main concern is lifetime, so don't want to construct, move, or copy anything in here + + auto outColorBlendAttachmentState = vk_colorBlendAttachmentStates.data(); //the pointer iterator is used - core::vector vk_dynamicStates = { + + for (uint32_t i = 0; i < createInfos.size(); i++) { //whats the maximum number of pipelines that can be created at once? uint32_t to be safe + auto& info = createInfos[i]; + const auto& blend = info.cached.blend; + const auto& raster = info.cached.rasterization; + const auto& subpass = info.renderpass->getCreationParameters().subpasses[info.cached.subpassIx]; + + initPipelineCreateInfo(&vk_createInfos[i], info); + + PopulateViewport(vk_viewportStates[i], raster); + PopulateRaster(vk_rasterizationStates[i], raster); + PopulateMultisample(vk_multisampleStates[i], raster); + PopulateDepthStencil(vk_depthStencilStates[i], raster); + PopulateColorBlend(vk_colorBlendStates[i], outColorBlendAttachmentState, blend, subpass); + //PopulateDynamicState(dynState, ?) + + + vk_createInfos[i].pViewportState = &vk_viewportStates[i]; + vk_createInfos[i].pRasterizationState = &vk_rasterizationStates[i]; + vk_createInfos[i].pMultisampleState = &vk_multisampleStates[i]; + vk_createInfos[i].pDepthStencilState = &vk_depthStencilStates[i]; + vk_createInfos[i].pColorBlendState = &vk_colorBlendStates[i]; + vk_createInfos[i].pDynamicState = &vk_dynamicStateCreateInfo; + vk_createInfos[i].renderPass = static_cast(info.renderpass)->getInternalObject(); + vk_createInfos[i].subpass = info.cached.subpassIx; + //handle + //index + //layout? + // ^ handled in initPipelineCreateInfo + } +} + +core::vector getDefaultDynamicStates(SPhysicalDeviceFeatures const& features) { + core::vector ret = { VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, VK_DYNAMIC_STATE_LINE_WIDTH, @@ -1231,19 +1341,164 @@ void CVulkanLogicalDevice::createGraphicsPipelines_impl( VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE }; - if (features.depthBounds) - vk_dynamicStates.push_back(VK_DYNAMIC_STATE_DEPTH_BOUNDS); + if (features.depthBounds) { + ret.push_back(VK_DYNAMIC_STATE_DEPTH_BOUNDS); + } // TODO: VK_DYNAMIC_STATE_DISCARD_RECTANGLE_EXT, VK_DYNAMIC_STATE_DISCARD_RECTANGLE_ENABLE_EXT, VK_DYNAMIC_STATE_DISCARD_RECTANGLE_MODE_EXT - - const VkPipelineDynamicStateCreateInfo vk_dynamicStateCreateInfo = { + + return ret; +} + +//maximum cleanliness,i tried it and im not a big fan +//struct CommonPipelineStruct { +// VkPipelineRasterizationStateCreateInfo vk_rasterizationStates{ VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,nullptr,0 }; +// VkPipelineMultisampleStateCreateInfo vk_multisampleStates{ VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,nullptr,0 }; +// VkPipelineDepthStencilStateCreateInfo vk_depthStencilStates{ VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,nullptr,0 }; +// VkPipelineColorBlendStateCreateInfo vk_colorBlendStates{ VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,nullptr,0 }; +// core::vector vk_colorBlendAttachmentStates{ IGPURenderpass::SCreationParams::SSubpassDescription::MaxColorAttachments }; +//}; + + +void CVulkanLogicalDevice::createMeshPipelines_impl( + IGPUPipelineCache* const pipelineCache, + const std::span createInfos, + core::smart_refctd_ptr* const output, + const SSpecializationValidationResult& validation +) { + const auto& features = getEnabledFeatures(); + + const VkPipelineCache vk_pipelineCache = pipelineCache ? static_cast(pipelineCache)->getInternalObject() : VK_NULL_HANDLE; + + core::vector vk_createInfos(createInfos.size(), { VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,nullptr }); + + core::vector vk_rasterizationStates(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,nullptr,0 }); + core::vector vk_multisampleStates(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,nullptr,0 }); + core::vector vk_depthStencilStates(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,nullptr,0 }); + core::vector vk_colorBlendStates(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,nullptr,0 }); + core::vector vk_colorBlendAttachmentStates(createInfos.size() * IGPURenderpass::SCreationParams::SSubpassDescription::MaxColorAttachments); + + core::vector vk_dynamicStates = getDefaultDynamicStates(features); + + const VkPipelineDynamicStateCreateInfo vk_dynamicStateCreateInfo = { .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, .pNext = nullptr, .flags = 0u, .dynamicStateCount = static_cast(vk_dynamicStates.size()), .pDynamicStates = vk_dynamicStates.data() }; + core::vector vk_viewportStates(createInfos.size(), { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pNext = nullptr, // the extensions that interest us have a dynamic state variant anyway + .flags = 0, // must be 0 + .viewportCount = 0, + .pViewports = nullptr, + .scissorCount = 0, + .pScissors = nullptr, + }); - const VkPipelineCache vk_pipelineCache = pipelineCache ? static_cast(pipelineCache)->getInternalObject():VK_NULL_HANDLE; + PopulateMeshGraphicsCommonData( + createInfos, vk_createInfos, + + vk_viewportStates, + vk_rasterizationStates, + vk_multisampleStates, + vk_depthStencilStates, + vk_colorBlendStates, + vk_colorBlendAttachmentStates, + + vk_dynamicStates, vk_dynamicStateCreateInfo + ); + + //not used in mesh pipelines + for (auto& outCreateInfo : vk_createInfos) { + outCreateInfo.pVertexInputState = nullptr; + outCreateInfo.pInputAssemblyState = nullptr; + outCreateInfo.pTessellationState = nullptr; + } + auto outCreateInfo = vk_createInfos.data(); + + const auto maxShaderStages = createInfos.size() * IGPUMeshPipeline::MESH_SHADER_STAGE_COUNT; + core::vector vk_shaderStage(maxShaderStages, { VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,nullptr }); + core::vector vk_shaderModule(maxShaderStages, { VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,nullptr, 0 }); + core::vector entryPoints(maxShaderStages); + core::vector vk_requiredSubgroupSize(maxShaderStages, { + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO,nullptr }); + core::vector vk_specializationInfos(maxShaderStages, { 0,nullptr,0,nullptr }); + core::vector vk_specializationMapEntry(validation.count); + core::vector specializationData(validation.dataSize); + auto outShaderStage = vk_shaderStage.data(); + auto outEntryPoints = entryPoints.data(); + auto outShaderModule = vk_shaderModule.data(); + auto outRequiredSubgroupSize = vk_requiredSubgroupSize.data(); + auto outSpecInfo = vk_specializationInfos.data(); + auto outSpecMapEntry = vk_specializationMapEntry.data(); + auto outSpecData = specializationData.data(); + + //shader + for (const auto& info : createInfos) + { + outCreateInfo->pStages = outShaderStage; + auto processSpecShader = [&](IGPUPipelineBase::SShaderSpecInfo spec, hlsl::ShaderStage shaderStage) + { + if (spec.shader) + { + *(outShaderStage++) = getVkShaderStageCreateInfoFrom(spec, + shaderStage, + false, + outShaderModule, + outEntryPoints, + outRequiredSubgroupSize, + outSpecInfo, + outSpecMapEntry, + outSpecData + ); + outCreateInfo->stageCount = std::distancepStages)>(outCreateInfo->pStages, outShaderStage); + } + }; + processSpecShader(info.taskShader, hlsl::ShaderStage::ESS_TASK); + processSpecShader(info.meshShader, hlsl::ShaderStage::ESS_MESH); + processSpecShader(info.fragmentShader, hlsl::ShaderStage::ESS_FRAGMENT); + + outCreateInfo++; + } + + auto vk_pipelines = reinterpret_cast(output); + std::stringstream debugNameBuilder; + if (m_devf.vk.vkCreateGraphicsPipelines(m_vkdev, vk_pipelineCache, vk_createInfos.size(), vk_createInfos.data(), nullptr, vk_pipelines) == VK_SUCCESS) + { + for (size_t i = 0ull; i < createInfos.size(); ++i) + { + const auto& createInfo = createInfos[i]; + const VkPipeline vk_pipeline = vk_pipelines[i]; + // break the lifetime cause of the aliasing + std::uninitialized_default_construct_n(output + i, 1); + output[i] = core::make_smart_refctd_ptr(createInfos[i], vk_pipeline); + debugNameBuilder.str(""); + auto buildDebugName = [&](const IGPUPipelineBase::SShaderSpecInfo& spec, hlsl::ShaderStage stage) + { + if (spec.shader != nullptr) + debugNameBuilder << spec.shader->getFilepathHint() << "(" << spec.entryPoint << "," << stage << ")\n"; + }; + buildDebugName(createInfo.taskShader, hlsl::ESS_TASK); + buildDebugName(createInfo.meshShader, hlsl::ESS_MESH); + buildDebugName(createInfo.fragmentShader, hlsl::ESS_FRAGMENT); + output[i]->setObjectDebugName(debugNameBuilder.str().c_str()); + } + } + else + std::fill_n(output, vk_createInfos.size(), nullptr); +} + +void CVulkanLogicalDevice::createGraphicsPipelines_impl( + IGPUPipelineCache* const pipelineCache, + const std::span createInfos, + core::smart_refctd_ptr* const output, + const SSpecializationValidationResult& validation +) +{ + const auto& features = getEnabledFeatures(); + + const VkPipelineCache vk_pipelineCache = pipelineCache ? static_cast(pipelineCache)->getInternalObject() : VK_NULL_HANDLE; // Interesting things to put in pNext: // - AttachmentSampleCountInfoAMD // - Graphics Pipeline Library styff @@ -1252,24 +1507,26 @@ void CVulkanLogicalDevice::createGraphicsPipelines_impl( // - Discard Rectangle State // - Fragment Shading Rate State Creation Info // - Piepline Robustness - core::vector vk_createInfos(createInfos.size(),{VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,nullptr}); - const auto maxShaderStages = createInfos.size()*IGPUGraphicsPipeline::GRAPHICS_SHADER_STAGE_COUNT; - core::vector vk_shaderStage(maxShaderStages,{VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,nullptr}); - core::vector vk_shaderModule(maxShaderStages,{VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,nullptr, 0}); - core::vector entryPoints(maxShaderStages); - core::vector vk_requiredSubgroupSize(maxShaderStages,{ - VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO,nullptr - }); - core::vector vk_specializationInfos(maxShaderStages,{0,nullptr,0,nullptr}); - core::vector vk_specializationMapEntry(validation.count); - core::vector specializationData(validation.dataSize); - core::vector vk_vertexInput(createInfos.size(),{VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,nullptr,0}); - core::vector vk_inputBinding(createInfos.size()*asset::SVertexInputParams::MAX_ATTR_BUF_BINDING_COUNT); - core::vector vk_inputAttribute(createInfos.size()*asset::SVertexInputParams::MAX_VERTEX_ATTRIB_COUNT); - core::vector vk_inputAssembly(createInfos.size(),{VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,nullptr,0}); - core::vector vk_tessellation(createInfos.size(),{VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO,nullptr,0}); - core::vector vk_viewportStates(createInfos.size(),{ + //maximum cleanliness, I create a struct that holds this for mesh and graphics? + core::vector vk_createInfos(createInfos.size(), { VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,nullptr }); + + core::vector vk_rasterizationStates(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,nullptr,0 }); + core::vector vk_multisampleStates(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,nullptr,0 }); + core::vector vk_depthStencilStates(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,nullptr,0 }); + core::vector vk_colorBlendStates(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,nullptr,0 }); + core::vector vk_colorBlendAttachmentStates(createInfos.size() * IGPURenderpass::SCreationParams::SSubpassDescription::MaxColorAttachments); + + core::vector vk_dynamicStates = getDefaultDynamicStates(features); + + const VkPipelineDynamicStateCreateInfo vk_dynamicStateCreateInfo = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0u, + .dynamicStateCount = static_cast(vk_dynamicStates.size()), + .pDynamicStates = vk_dynamicStates.data() + }; + core::vector vk_viewportStates(createInfos.size(), { .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, .pNext = nullptr, // the extensions that interest us have a dynamic state variant anyway .flags = 0, // must be 0 @@ -1277,68 +1534,54 @@ void CVulkanLogicalDevice::createGraphicsPipelines_impl( .pViewports = nullptr, .scissorCount = 0, .pScissors = nullptr, - }); - core::vector vk_rasterizationStates(createInfos.size(),{VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,nullptr,0}); - core::vector vk_multisampleStates(createInfos.size(),{VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,nullptr,0}); - core::vector vk_depthStencilStates(createInfos.size(),{VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,nullptr,0}); - core::vector vk_colorBlendStates(createInfos.size(),{VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,nullptr,0}); - core::vector vk_colorBlendAttachmentStates(createInfos.size()*IGPURenderpass::SCreationParams::SSubpassDescription::MaxColorAttachments); + }); + + PopulateMeshGraphicsCommonData( + createInfos, vk_createInfos, + + vk_viewportStates, + vk_rasterizationStates, + vk_multisampleStates, + vk_depthStencilStates, + vk_colorBlendStates, + vk_colorBlendAttachmentStates, + + vk_dynamicStates, vk_dynamicStateCreateInfo + ); + + + core::vector vk_inputBinding(createInfos.size() * asset::SVertexInputParams::MAX_ATTR_BUF_BINDING_COUNT); + core::vector vk_inputAttribute(createInfos.size() * asset::SVertexInputParams::MAX_VERTEX_ATTRIB_COUNT); + core::vector vk_inputAssembly(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,nullptr,0 }); + core::vector vk_tessellation(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO,nullptr,0 }); + core::vector vk_vertexInput(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,nullptr,0 }); auto outCreateInfo = vk_createInfos.data(); - auto outShaderStage = vk_shaderStage.data(); - auto outEntryPoints = entryPoints.data(); - auto outShaderModule = vk_shaderModule.data(); - auto outRequiredSubgroupSize = vk_requiredSubgroupSize.data(); - auto outSpecInfo = vk_specializationInfos.data(); - auto outSpecMapEntry = vk_specializationMapEntry.data(); - auto outSpecData = specializationData.data(); auto outVertexInput = vk_vertexInput.data(); auto outInputBinding = vk_inputBinding.data(); auto outInputAttribute = vk_inputAttribute.data(); - auto outInputAssembly = vk_inputAssembly.data(); auto outTessellation = vk_tessellation.data(); - auto outViewport = vk_viewportStates.data(); - auto outRaster = vk_rasterizationStates.data(); - auto outMultisample = vk_multisampleStates.data(); - auto outDepthStencil = vk_depthStencilStates.data(); - auto outColorBlend = vk_colorBlendStates.data(); - auto outColorBlendAttachmentState = vk_colorBlendAttachmentStates.data(); + auto outInputAssembly = vk_inputAssembly.data(); + //ill acknowledge this additional looping is a little ugly + //input and tess for (const auto& info : createInfos) { - initPipelineCreateInfo(outCreateInfo,info); - outCreateInfo->pStages = outShaderStage; - auto processSpecShader = [&](IGPUPipelineBase::SShaderSpecInfo spec, hlsl::ShaderStage shaderStage) { - if (spec.shader) - { - *(outShaderStage++) = getVkShaderStageCreateInfoFrom(spec, shaderStage, false, outShaderModule, outEntryPoints, outRequiredSubgroupSize, outSpecInfo, outSpecMapEntry, outSpecData); - outCreateInfo->stageCount = std::distancepStages)>(outCreateInfo->pStages, outShaderStage); - } - }; - processSpecShader(info.vertexShader, hlsl::ShaderStage::ESS_VERTEX); - processSpecShader(info.tesselationControlShader, hlsl::ShaderStage::ESS_TESSELLATION_CONTROL); - processSpecShader(info.tesselationEvaluationShader, hlsl::ShaderStage::ESS_TESSELLATION_EVALUATION); - processSpecShader(info.geometryShader, hlsl::ShaderStage::ESS_GEOMETRY); - processSpecShader(info.fragmentShader, hlsl::ShaderStage::ESS_FRAGMENT); - - // when dealing with mesh shaders, the vertex input and assembly state will be null - { - { - const auto& vertexInputParams = info.cached.vertexInput; - outVertexInput->pVertexBindingDescriptions = outInputBinding; - for (auto b=0u; bpVertexBindingDescriptions = outInputBinding; + for (auto b = 0u; b < asset::SVertexInputParams::MAX_ATTR_BUF_BINDING_COUNT; b++) + if (vertexInputParams.enabledBindingFlags & (1 << b)) { outInputBinding->binding = b; outInputBinding->stride = vertexInputParams.bindings[b].stride; outInputBinding->inputRate = static_cast(vertexInputParams.bindings[b].inputRate); outInputBinding++; } - outVertexInput->vertexBindingDescriptionCount = std::distance(outVertexInput->pVertexBindingDescriptions,outInputBinding); - outVertexInput->pVertexAttributeDescriptions = outInputAttribute; - for (auto l=0u; lvertexBindingDescriptionCount = std::distance(outVertexInput->pVertexBindingDescriptions, outInputBinding); + outVertexInput->pVertexAttributeDescriptions = outInputAttribute; + for (auto l = 0u; l < asset::SVertexInputParams::MAX_VERTEX_ATTRIB_COUNT; l++) + if (vertexInputParams.enabledAttribFlags & (1 << l)) { outInputAttribute->location = l; outInputAttribute->binding = vertexInputParams.attributes[l].binding; @@ -1346,16 +1589,15 @@ void CVulkanLogicalDevice::createGraphicsPipelines_impl( outInputAttribute->offset = vertexInputParams.attributes[l].relativeOffset; outInputAttribute++; } - outVertexInput->vertexAttributeDescriptionCount = std::distance(outVertexInput->pVertexAttributeDescriptions,outInputAttribute); - } - outCreateInfo->pVertexInputState = outVertexInput++; - { - const auto& primAssParams = info.cached.primitiveAssembly; - outInputAssembly->topology = static_cast(primAssParams.primitiveType); - outInputAssembly->primitiveRestartEnable = primAssParams.primitiveRestartEnable; - } - outCreateInfo->pInputAssemblyState = outInputAssembly++; + outVertexInput->vertexAttributeDescriptionCount = std::distance(outVertexInput->pVertexAttributeDescriptions, outInputAttribute); } + outCreateInfo->pVertexInputState = outVertexInput++; + { + const auto& primAssParams = info.cached.primitiveAssembly; + outInputAssembly->topology = static_cast(primAssParams.primitiveType); + outInputAssembly->primitiveRestartEnable = primAssParams.primitiveRestartEnable; + } + outCreateInfo->pInputAssemblyState = outInputAssembly++; if (info.tesselationControlShader.shader || info.tesselationEvaluationShader.shader) { @@ -1363,96 +1605,76 @@ void CVulkanLogicalDevice::createGraphicsPipelines_impl( outCreateInfo->pTessellationState = outTessellation++; } - const auto& raster = info.cached.rasterization; - { - outViewport->viewportCount = raster.viewportCount; - // must be identical to viewport count unless VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT_EXT or VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT_EXT are used - outViewport->scissorCount = raster.viewportCount; - outCreateInfo->pViewportState = outViewport++; - } - { - outRaster->depthClampEnable = raster.depthClampEnable; - outRaster->rasterizerDiscardEnable = raster.rasterizerDiscard; - outRaster->polygonMode = static_cast(raster.polygonMode); - outRaster->cullMode = static_cast(raster.faceCullingMode); - outRaster->frontFace = raster.frontFaceIsCCW ? VK_FRONT_FACE_COUNTER_CLOCKWISE:VK_FRONT_FACE_CLOCKWISE; - outRaster->depthBiasEnable = raster.depthBiasEnable; - outCreateInfo->pRasterizationState = outRaster++; - } - { - outMultisample->rasterizationSamples = static_cast(0x1<0) - { - outMultisample->sampleShadingEnable = true; - outMultisample->minSampleShading = float(raster.minSampleShadingUnorm)/255.f; - } - else - { - outMultisample->sampleShadingEnable = false; - outMultisample->minSampleShading = 0.f; - } - outMultisample->pSampleMask = raster.sampleMask; - outMultisample->alphaToCoverageEnable = raster.alphaToCoverageEnable; - outMultisample->alphaToOneEnable = raster.alphaToOneEnable; - outCreateInfo->pMultisampleState = outMultisample++; - } - { - //outDepthStencil->flags no attachment order access yet - outDepthStencil->depthTestEnable = raster.depthTestEnable(); - outDepthStencil->depthWriteEnable = raster.depthWriteEnable; - outDepthStencil->depthCompareOp = static_cast(raster.depthCompareOp); - outDepthStencil->depthBoundsTestEnable = raster.depthBoundsTestEnable; - outDepthStencil->stencilTestEnable = raster.stencilTestEnable(); - outDepthStencil->front = getVkStencilOpStateFrom(raster.frontStencilOps); - outDepthStencil->back = getVkStencilOpStateFrom(raster.backStencilOps); - outCreateInfo->pDepthStencilState = outDepthStencil++; - } - { - const auto& blend = info.cached.blend; - const auto& subpass = info.renderpass->getCreationParameters().subpasses[info.cached.subpassIx]; - //outColorBlend->flags no attachment order access yet - outColorBlend->logicOpEnable = blend.logicOp!=asset::ELO_NO_OP; - outColorBlend->logicOp = getVkLogicOpFromLogicOp(blend.logicOp); - outColorBlend->pAttachments = outColorBlendAttachmentState; - for (auto i=0; i vk_shaderStage(maxShaderStages, { VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,nullptr }); + core::vector vk_shaderModule(maxShaderStages, { VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,nullptr, 0 }); + core::vector entryPoints(maxShaderStages); + core::vector vk_requiredSubgroupSize(maxShaderStages, { + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO,nullptr + }); + core::vector vk_specializationInfos(maxShaderStages, { 0,nullptr,0,nullptr }); + core::vector vk_specializationMapEntry(validation.count); + core::vector specializationData(validation.dataSize); + + outCreateInfo = vk_createInfos.data(); + auto outShaderStage = vk_shaderStage.data(); + auto outEntryPoints = entryPoints.data(); + auto outShaderModule = vk_shaderModule.data(); + auto outRequiredSubgroupSize = vk_requiredSubgroupSize.data(); + auto outSpecInfo = vk_specializationInfos.data(); + auto outSpecMapEntry = vk_specializationMapEntry.data(); + auto outSpecData = specializationData.data(); + + //shader + for (const auto& info : createInfos) + { + outCreateInfo->pStages = outShaderStage; + auto processSpecShader = [&](IGPUPipelineBase::SShaderSpecInfo spec, hlsl::ShaderStage shaderStage) { - const auto& params = blend.blendParams[i]; - outColorBlendAttachmentState->blendEnable = params.blendEnabled(); - outColorBlendAttachmentState->srcColorBlendFactor = getVkBlendFactorFromBlendFactor(static_cast(params.srcColorFactor)); - outColorBlendAttachmentState->dstColorBlendFactor = getVkBlendFactorFromBlendFactor(static_cast(params.dstColorFactor)); - outColorBlendAttachmentState->colorBlendOp = getVkBlendOpFromBlendOp(static_cast(params.colorBlendOp)); - outColorBlendAttachmentState->srcAlphaBlendFactor = getVkBlendFactorFromBlendFactor(static_cast(params.srcAlphaFactor)); - outColorBlendAttachmentState->dstAlphaBlendFactor = getVkBlendFactorFromBlendFactor(static_cast(params.dstAlphaFactor)); - outColorBlendAttachmentState->alphaBlendOp = getVkBlendOpFromBlendOp(static_cast(params.alphaBlendOp)); - outColorBlendAttachmentState->colorWriteMask = getVkColorComponentFlagsFromColorWriteMask(params.colorWriteMask); - outColorBlendAttachmentState++; - } - outColorBlend->attachmentCount = std::distance(outColorBlend->pAttachments,outColorBlendAttachmentState); - outCreateInfo->pColorBlendState = outColorBlend++; - } - outCreateInfo->pDynamicState = &vk_dynamicStateCreateInfo; - outCreateInfo->renderPass = static_cast(info.renderpass)->getInternalObject(); - outCreateInfo->subpass = info.cached.subpassIx; + if (spec.shader) + { + *(outShaderStage++) = getVkShaderStageCreateInfoFrom(spec, + shaderStage, + false, + outShaderModule, + outEntryPoints, + outRequiredSubgroupSize, + outSpecInfo, + outSpecMapEntry, + outSpecData + ); + outCreateInfo->stageCount = std::distancepStages)>(outCreateInfo->pStages, outShaderStage); + } + }; + processSpecShader(info.vertexShader, hlsl::ShaderStage::ESS_VERTEX); + processSpecShader(info.tesselationControlShader, hlsl::ShaderStage::ESS_TESSELLATION_CONTROL); + processSpecShader(info.tesselationEvaluationShader, hlsl::ShaderStage::ESS_TESSELLATION_EVALUATION); + processSpecShader(info.geometryShader, hlsl::ShaderStage::ESS_GEOMETRY); + processSpecShader(info.fragmentShader, hlsl::ShaderStage::ESS_FRAGMENT); + outCreateInfo++; } + auto vk_pipelines = reinterpret_cast(output); std::stringstream debugNameBuilder; - if (m_devf.vk.vkCreateGraphicsPipelines(m_vkdev,vk_pipelineCache,vk_createInfos.size(),vk_createInfos.data(),nullptr,vk_pipelines)==VK_SUCCESS) + if (m_devf.vk.vkCreateGraphicsPipelines(m_vkdev, vk_pipelineCache, vk_createInfos.size(), vk_createInfos.data(), nullptr, vk_pipelines) == VK_SUCCESS) { - for (size_t i=0ull; i(createInfos[i],vk_pipeline); + std::uninitialized_default_construct_n(output + i, 1); + output[i] = core::make_smart_refctd_ptr(createInfos[i], vk_pipeline); debugNameBuilder.str(""); auto buildDebugName = [&](const IGPUPipelineBase::SShaderSpecInfo& spec, hlsl::ShaderStage stage) - { - if (spec.shader != nullptr) - debugNameBuilder <getFilepathHint() << "(" << spec.entryPoint << "," << stage << ")\n"; - }; + { + if (spec.shader != nullptr) + debugNameBuilder << spec.shader->getFilepathHint() << "(" << spec.entryPoint << "," << stage << ")\n"; + }; buildDebugName(createInfo.vertexShader, hlsl::ESS_VERTEX); buildDebugName(createInfo.tesselationControlShader, hlsl::ESS_TESSELLATION_CONTROL); buildDebugName(createInfo.tesselationEvaluationShader, hlsl::ESS_TESSELLATION_EVALUATION); @@ -1462,7 +1684,7 @@ void CVulkanLogicalDevice::createGraphicsPipelines_impl( } } else - std::fill_n(output,vk_createInfos.size(),nullptr); + std::fill_n(output, vk_createInfos.size(), nullptr); } void CVulkanLogicalDevice::createRayTracingPipelines_impl( diff --git a/src/nbl/video/CVulkanLogicalDevice.h b/src/nbl/video/CVulkanLogicalDevice.h index 4cc633ec55..ae0da50eaf 100644 --- a/src/nbl/video/CVulkanLogicalDevice.h +++ b/src/nbl/video/CVulkanLogicalDevice.h @@ -20,7 +20,6 @@ #include "nbl/video/CVulkanSampler.h" #include "nbl/video/CVulkanPipelineLayout.h" #include "nbl/video/CVulkanPipelineCache.h" -#include "nbl/video/CVulkanComputePipeline.h" #include "nbl/video/CVulkanDescriptorPool.h" #include "nbl/video/CVulkanDescriptorSet.h" #include "nbl/video/CVulkanMemoryAllocation.h" @@ -29,7 +28,10 @@ #include "nbl/video/CVulkanImage.h" #include "nbl/video/CVulkanDeferredOperation.h" #include "nbl/video/CVulkanAccelerationStructure.h" + #include "nbl/video/CVulkanGraphicsPipeline.h" +#include "nbl/video/CVulkanComputePipeline.h" +#include "nbl/video/CVulkanMeshPipeline.h" #include "nbl/video/CVulkanRayTracingPipeline.h" namespace nbl::video @@ -281,16 +283,22 @@ class CVulkanLogicalDevice final : public ILogicalDevice core::smart_refctd_ptr createFramebuffer_impl(IGPUFramebuffer::SCreationParams&& params) override; // pipelines + void createGraphicsPipelines_impl( + IGPUPipelineCache* const pipelineCache, + const std::span params, + core::smart_refctd_ptr* const output, + const SSpecializationValidationResult& validation + ) override; void createComputePipelines_impl( IGPUPipelineCache* const pipelineCache, const std::span createInfos, core::smart_refctd_ptr* const output, const SSpecializationValidationResult& validation ) override; - void createGraphicsPipelines_impl( + void createMeshPipelines_impl( IGPUPipelineCache* const pipelineCache, - const std::span params, - core::smart_refctd_ptr* const output, + const std::span params, + core::smart_refctd_ptr* const output, const SSpecializationValidationResult& validation ) override; diff --git a/src/nbl/video/CVulkanMeshPipeline.cpp b/src/nbl/video/CVulkanMeshPipeline.cpp new file mode 100644 index 0000000000..8fa3cc63eb --- /dev/null +++ b/src/nbl/video/CVulkanMeshPipeline.cpp @@ -0,0 +1,27 @@ +#include "nbl/video/CVulkanMeshPipeline.h" + +#include "nbl/video/CVulkanLogicalDevice.h" + +namespace nbl::video +{ + + CVulkanMeshPipeline::~CVulkanMeshPipeline() + { + const CVulkanLogicalDevice* vulkanDevice = static_cast(getOriginDevice()); + auto* vk = vulkanDevice->getFunctionTable(); + vk->vk.vkDestroyPipeline(vulkanDevice->getInternalObject(), m_vkPipeline, nullptr); + } + void CVulkanMeshPipeline::setObjectDebugName(const char* label) const + { + IBackendObject::setObjectDebugName(label); + + if (vkSetDebugUtilsObjectNameEXT == 0) return; + + const CVulkanLogicalDevice* vulkanDevice = static_cast(getOriginDevice()); + VkDebugUtilsObjectNameInfoEXT nameInfo = { VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT, nullptr }; + nameInfo.objectType = VK_OBJECT_TYPE_PIPELINE; + nameInfo.objectHandle = reinterpret_cast(getInternalObject()); + nameInfo.pObjectName = getObjectDebugName(); + vkSetDebugUtilsObjectNameEXT(vulkanDevice->getInternalObject(), &nameInfo); + } +} \ No newline at end of file diff --git a/src/nbl/video/CVulkanMeshPipeline.h b/src/nbl/video/CVulkanMeshPipeline.h new file mode 100644 index 0000000000..39b11695a7 --- /dev/null +++ b/src/nbl/video/CVulkanMeshPipeline.h @@ -0,0 +1,33 @@ +#ifndef _NBL_C_VULKAN_MESH_PIPELINE_H_INCLUDED_ +#define _NBL_C_VULKAN_MESH_PIPELINE_H_INCLUDED_ + + +#include "nbl/video/IGPUMeshPipeline.h" + +#include + +namespace nbl::video +{ + + //potentially collapse this so Mesh just uses CVulkanGraphicsPipeline + //if thats done, BindMesh can go away +class CVulkanMeshPipeline final : public IGPUMeshPipeline +{ + public: + CVulkanMeshPipeline(const SCreationParams& params, const VkPipeline vk_pipeline) : + IGPUMeshPipeline(params), m_vkPipeline(vk_pipeline) {} + + inline const void* getNativeHandle() const override {return &m_vkPipeline;} + + inline VkPipeline getInternalObject() const {return m_vkPipeline;} + + void setObjectDebugName(const char* label) const override; //exists in compute but not in graphics + private: + ~CVulkanMeshPipeline(); + + const VkPipeline m_vkPipeline; +}; + +} + +#endif \ No newline at end of file diff --git a/src/nbl/video/CVulkanPhysicalDevice.cpp b/src/nbl/video/CVulkanPhysicalDevice.cpp index da86d7c9d9..163bd93026 100644 --- a/src/nbl/video/CVulkanPhysicalDevice.cpp +++ b/src/nbl/video/CVulkanPhysicalDevice.cpp @@ -732,6 +732,11 @@ std::unique_ptr CVulkanPhysicalDevice::create(core::smart VkPhysicalDeviceCooperativeMatrixFeaturesKHR cooperativeMatrixFeatures = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_FEATURES_KHR }; VkPhysicalDeviceMaintenance5FeaturesKHR maintenance5Features = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_5_FEATURES_KHR }; VkPhysicalDeviceGraphicsPipelineLibraryFeaturesEXT graphicsPipelineLibraryFeatures = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GRAPHICS_PIPELINE_LIBRARY_FEATURES_EXT }; + VkPhysicalDeviceMeshShaderFeaturesEXT meshShaderFeatures = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_FEATURES_EXT }; + + if (isExtensionSupported(VK_EXT_MESH_SHADER_EXTENSION_NAME)) { + addToPNextChain(&meshShaderFeatures); + } if (isExtensionSupported(VK_EXT_CONDITIONAL_RENDERING_EXTENSION_NAME)) addToPNextChain(&conditionalRenderingFeatures); @@ -818,6 +823,44 @@ std::unique_ptr CVulkanPhysicalDevice::create(core::smart features.geometryShader = deviceFeatures.features.geometryShader; features.tessellationShader = deviceFeatures.features.tessellationShader; + //check if features are existant first + //potentially put a copy of VkPhysicalDeviceMeshShaderFeaturesEXT directly into features + //depends on the less obvious properties + if (isExtensionSupported(VK_EXT_MESH_SHADER_EXTENSION_NAME)) { + features.meshShader = meshShaderFeatures.meshShader; + features.taskShader = meshShaderFeatures.taskShader; + //TODO + //VkBool32 multiviewMeshShader; + //VkBool32 primitiveFragmentShadingRateMeshShader; + //VkBool32 meshShaderQueries; + + //VkPhysicalDeviceMeshShaderPropertiesEXT + //#define LIMIT_INIT_MESH(limitMemberName) properties.limits.limitMemberName = meshShaderProperties.limitMemberName + //LIMIT_INIT_MESH(maxTaskWorkGroupTotalCount); + //LIMIT_INIT_MESH(maxTaskWorkGroupInvocations); + //LIMIT_INIT_MESH(maxTaskPayloadSize); + //LIMIT_INIT_MESH(maxTaskSharedMemorySize); + //LIMIT_INIT_MESH(maxTaskPayloadAndSharedMemorySize); + //LIMIT_INIT_MESH(maxMeshWorkGroupInvocations); + //LIMIT_INIT_MESH(maxMeshSharedMemorySize); + //LIMIT_INIT_MESH(maxMeshPayloadAndSharedMemorySize); + //LIMIT_INIT_MESH(maxMeshOutputMemorySize); + //LIMIT_INIT_MESH(maxMeshOutputComponents); + //LIMIT_INIT_MESH(maxMeshOutputVertices); + //LIMIT_INIT_MESH(maxMeshOutputPrimitives); + //LIMIT_INIT_MESH(maxMeshOutputLayers); + //LIMIT_INIT_MESH(maxMeshMultiviewViewCount); + //LIMIT_INIT_MESH(maxMeshOutputPerVertexGranularity); + //LIMIT_INIT_MESH(maxMeshOutputPerPrimitiveGranularity); + + //for(uint8_t i = 0; i < 3; i++){ + // LIMIT_INIT_MESH(maxTaskWorkGroupCount[i]); + // LIMIT_INIT_MESH(maxTaskWorkGroupSize[i]); + // LIMIT_INIT_MESH(maxMeshWorkGroupCount[i]); + // LIMIT_INIT_MESH(maxMeshWorkGroupSize[i]); + //} + //#undef LIMIT_INIT_MESH + } if (!deviceFeatures.features.sampleRateShading || !deviceFeatures.features.dualSrcBlend) RETURN_NULL_PHYSICAL_DEVICE; properties.limits.logicOp = deviceFeatures.features.logicOp; @@ -1491,6 +1534,9 @@ core::smart_refctd_ptr CVulkanPhysicalDevice::createLogicalDevic enableExtensionIfAvailable(VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME); + VkPhysicalDeviceMeshShaderFeaturesEXT meshShaderFeatures = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_FEATURES_EXT, nullptr}; + REQUIRE_EXTENSION_IF(enabledFeatures.meshShader, VK_EXT_MESH_SHADER_EXTENSION_NAME, &meshShaderFeatures); + VkPhysicalDeviceAccelerationStructureFeaturesKHR accelerationStructureFeatures = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR,nullptr }; VkPhysicalDeviceRayTracingMaintenance1FeaturesKHR rayTracingMaintenance1Features = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_MAINTENANCE_1_FEATURES_KHR,nullptr }; REQUIRE_EXTENSION_IF(enabledFeatures.accelerationStructure,VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME,&accelerationStructureFeatures); // feature dependency taken care of @@ -1821,6 +1867,12 @@ core::smart_refctd_ptr CVulkanPhysicalDevice::createLogicalDevic //shaderSMBuiltinsFeaturesNV [LIMIT SO ENABLE EVERYTHING BY DEFAULT] representativeFragmentTestFeatures.representativeFragmentTest = enabledFeatures.representativeFragmentTest; + + meshShaderFeatures.taskShader = enabledFeatures.taskShader; + meshShaderFeatures.meshShader = enabledFeatures.meshShader; + meshShaderFeatures.primitiveFragmentShadingRateMeshShader = VK_FALSE;//needs to be explicitly set? + meshShaderFeatures.meshShaderQueries = VK_FALSE; + meshShaderFeatures.multiviewMeshShader = VK_FALSE; //shaderClockFeatures [LIMIT SO ENABLE EVERYTHING BY DEFAULT] diff --git a/src/nbl/video/IGPUCommandBuffer.cpp b/src/nbl/video/IGPUCommandBuffer.cpp index 1f619666ab..1807829b1b 100644 --- a/src/nbl/video/IGPUCommandBuffer.cpp +++ b/src/nbl/video/IGPUCommandBuffer.cpp @@ -963,6 +963,32 @@ template NBL_API2 bool IGPUCommandBuffer::copyAccelerationStructureFromMemory(const IGPUTopLevelAccelerationStructure::DeviceCopyFromMemoryInfo&); +bool IGPUCommandBuffer::bindGraphicsPipeline(const IGPUGraphicsPipeline* const pipeline) +{ + // Because binding of the Gfx pipeline can happen outside of a Renderpass Scope, + // we cannot check renderpass-pipeline compatibility here. + // And checking before every drawcall would be performance suicide. + if (!checkStateBeforeRecording(queue_flags_t::GRAPHICS_BIT)) + return false; + + if (!pipeline || !this->isCompatibleDevicewise(pipeline)) + { + NBL_LOG_ERROR("incompatible pipeline device!"); + return false; + } + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(pipeline))) + { + NBL_LOG_ERROR("out of host memory!"); + return false; + } + + m_boundRasterizationPipeline = reinterpret_cast(pipeline); + + m_noCommands = false; + return bindGraphicsPipeline_impl(pipeline); +} + bool IGPUCommandBuffer::bindComputePipeline(const IGPUComputePipeline* const pipeline) { if (!checkStateBeforeRecording(queue_flags_t::COMPUTE_BIT)) @@ -988,7 +1014,7 @@ bool IGPUCommandBuffer::bindComputePipeline(const IGPUComputePipeline* const pip return true; } -bool IGPUCommandBuffer::bindGraphicsPipeline(const IGPUGraphicsPipeline* const pipeline) +bool IGPUCommandBuffer::bindMeshPipeline(const IGPUMeshPipeline* const pipeline) { // Because binding of the Gfx pipeline can happen outside of a Renderpass Scope, // we cannot check renderpass-pipeline compatibility here. @@ -1002,16 +1028,16 @@ bool IGPUCommandBuffer::bindGraphicsPipeline(const IGPUGraphicsPipeline* const p return false; } - if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(pipeline))) + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(pipeline))) { NBL_LOG_ERROR("out of host memory!"); return false; } - m_boundGraphicsPipeline = pipeline; + m_boundRasterizationPipeline = reinterpret_cast(pipeline); m_noCommands = false; - return bindGraphicsPipeline_impl(pipeline); + return bindMeshPipeline_impl(pipeline); } bool IGPUCommandBuffer::bindRayTracingPipeline(const IGPURayTracingPipeline* const pipeline) @@ -1462,6 +1488,59 @@ bool IGPUCommandBuffer::dispatchIndirect(const asset::SBufferBindinggetPhysicalDevice()->getLimits(); + if (groupCountX > limits.maxMeshWorkGroupCount[0] || groupCountY > limits.maxMeshWorkGroupCount[1] || groupCountZ > limits.maxMeshWorkGroupCount[2]) + { + NBL_LOG_ERROR("group counts (%d, %d, %d) exceeds maximum counts (%d, %d, %d)!", groupCountX, groupCountY, groupCountZ, limits.maxMeshWorkGroupCount[0], limits.maxMeshWorkGroupCount[1], limits.maxMeshWorkGroupCount[2]); + return false; + } + + m_noCommands = false; + return drawMeshTasks_impl(groupCountX, groupCountY, groupCountZ); +} + +bool IGPUCommandBuffer::drawMeshTasksIndirect(const asset::SBufferBinding& binding, const uint32_t drawCount, uint32_t stride) +{ + if (!checkStateBeforeRecording(queue_flags_t::GRAPHICS_BIT,RENDERPASS_SCOPE::INSIDE)) + return false; + if (invalidBufferBinding(binding,4u/*TODO: is it really 4?*/,IGPUBuffer::EUF_INDIRECT_BUFFER_BIT)){ + return false; + } + + if (drawCount) { + if (drawCount==1u) + stride = sizeof(hlsl::DrawMeshTasksIndirectCommand_t); + if (stride&0x3u || stride getOriginDevice()->getPhysicalDevice()->getLimits().maxDrawIndirectCount) { + NBL_LOG_ERROR("draw count (%d) exceeds maximum allowed amount (%d)!", drawCount, getOriginDevice()->getPhysicalDevice()->getLimits().maxDrawIndirectCount); + return false; + } + if (invalidBufferRange({ binding.offset,stride * (drawCount - 1u) + sizeof(hlsl::DrawMeshTasksIndirectCommand_t),binding.buffer }, alignof(uint32_t), IGPUBuffer::EUF_INDIRECT_BUFFER_BIT)) + return false; + } // i get the feeling the vk command shouldnt be called if drawCount is 0, but this is how drawindirect does it + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList,core::smart_refctd_ptr(binding.buffer))) + { + NBL_LOG_ERROR("out of host memory!"); + return false; + } + + m_noCommands = false; + return drawMeshTasksIndirect_impl(binding, drawCount, stride); +} bool IGPUCommandBuffer::beginRenderPass(SRenderpassBeginInfo info, const SUBPASS_CONTENTS contents) { diff --git a/src/nbl/video/ILogicalDevice.cpp b/src/nbl/video/ILogicalDevice.cpp index 7c3f5dbb81..79f7f507d4 100644 --- a/src/nbl/video/ILogicalDevice.cpp +++ b/src/nbl/video/ILogicalDevice.cpp @@ -9,53 +9,53 @@ using namespace nbl::video; class SpirvTrimTask { - public: - using EntryPoints = core::set; - struct ShaderInfo - { - EntryPoints entryPoints; - const asset::IShader* trimmedShader; - }; +public: + using EntryPoints = core::set; + struct ShaderInfo + { + EntryPoints entryPoints; + const asset::IShader* trimmedShader; + }; - SpirvTrimTask(asset::ISPIRVEntryPointTrimmer* trimer, system::logger_opt_ptr logger) : m_trimmer(trimer), m_logger(logger) - { - - } + SpirvTrimTask(asset::ISPIRVEntryPointTrimmer* trimer, system::logger_opt_ptr logger) : m_trimmer(trimer), m_logger(logger) + { - void insertEntryPoint(const IGPUPipelineBase::SShaderSpecInfo& shaderSpec, const hlsl::ShaderStage stage) - { - const auto* shader = shaderSpec.shader; - auto it = m_shaderInfoMap.find(shader); - if (it == m_shaderInfoMap.end() || it->first != shader) - it = m_shaderInfoMap.emplace_hint(it, shader, ShaderInfo{ EntryPoints(), nullptr } ); - it->second.entryPoints.insert({ .name = shaderSpec.entryPoint, .stage = stage }); - } + } - IGPUPipelineBase::SShaderSpecInfo trim(const IGPUPipelineBase::SShaderSpecInfo& shaderSpec, core::vector>& outShaders) + void insertEntryPoint(const IGPUPipelineBase::SShaderSpecInfo& shaderSpec, const hlsl::ShaderStage stage) + { + const auto* shader = shaderSpec.shader; + auto it = m_shaderInfoMap.find(shader); + if (it == m_shaderInfoMap.end() || it->first != shader) + it = m_shaderInfoMap.emplace_hint(it, shader, ShaderInfo{ EntryPoints(), nullptr } ); + it->second.entryPoints.insert({ .name = shaderSpec.entryPoint, .stage = stage }); + } + + IGPUPipelineBase::SShaderSpecInfo trim(const IGPUPipelineBase::SShaderSpecInfo& shaderSpec, core::vector>& outShaders) + { + const auto* shader = shaderSpec.shader; + auto findResult = m_shaderInfoMap.find(shader); + assert(findResult != m_shaderInfoMap.end()); + const auto& entryPoints = findResult->second.entryPoints; + auto& trimmedShader = findResult->second.trimmedShader; + + auto trimmedShaderSpec = shaderSpec; + if (shader != nullptr) { - const auto* shader = shaderSpec.shader; - auto findResult = m_shaderInfoMap.find(shader); - assert(findResult != m_shaderInfoMap.end()); - const auto& entryPoints = findResult->second.entryPoints; - auto& trimmedShader = findResult->second.trimmedShader; - - auto trimmedShaderSpec = shaderSpec; - if (shader != nullptr) + if (trimmedShader == nullptr) { - if (trimmedShader == nullptr) - { - outShaders.push_back(m_trimmer->trim(shader, entryPoints, m_logger)); - trimmedShader = outShaders.back().get(); - } - trimmedShaderSpec.shader = trimmedShader; + outShaders.push_back(m_trimmer->trim(shader, entryPoints, m_logger)); + trimmedShader = outShaders.back().get(); } - return trimmedShaderSpec; + trimmedShaderSpec.shader = trimmedShader; } - - private: - core::map m_shaderInfoMap; - asset::ISPIRVEntryPointTrimmer* m_trimmer; - const system::logger_opt_ptr m_logger; + return trimmedShaderSpec; + } + +private: + core::map m_shaderInfoMap; + asset::ISPIRVEntryPointTrimmer* m_trimmer; + const system::logger_opt_ptr m_logger; }; ILogicalDevice::ILogicalDevice(core::smart_refctd_ptr&& api, const IPhysicalDevice* const physicalDevice, const SCreationParams& params, const bool runningInRenderdoc) @@ -511,23 +511,23 @@ bool ILogicalDevice::updateDescriptorSets(const std::spanvalidateWrite(write); switch (asset::IDescriptor::GetTypeCategory(*outCategory = writeValidationResults[i].type)) { - case asset::IDescriptor::EC_BUFFER: - params.bufferCount += writeCount; - break; - case asset::IDescriptor::EC_SAMPLER: - case asset::IDescriptor::EC_IMAGE: - params.imageCount += writeCount; - break; - case asset::IDescriptor::EC_BUFFER_VIEW: - params.bufferViewCount += writeCount; - break; - case asset::IDescriptor::EC_ACCELERATION_STRUCTURE: - params.accelerationStructureCount += writeCount; - params.accelerationStructureWriteCount++; - break; - default: // validation failed - NBL_LOG_ERROR("Invalid descriptor type (descriptorWrites[%u])", i); - return false; + case asset::IDescriptor::EC_BUFFER: + params.bufferCount += writeCount; + break; + case asset::IDescriptor::EC_SAMPLER: + case asset::IDescriptor::EC_IMAGE: + params.imageCount += writeCount; + break; + case asset::IDescriptor::EC_BUFFER_VIEW: + params.bufferViewCount += writeCount; + break; + case asset::IDescriptor::EC_ACCELERATION_STRUCTURE: + params.accelerationStructureCount += writeCount; + params.accelerationStructureWriteCount++; + break; + default: // validation failed + NBL_LOG_ERROR("Invalid descriptor type (descriptorWrites[%u])", i); + return false; } outCategory++; } @@ -591,23 +591,23 @@ bool ILogicalDevice::nullifyDescriptors(const std::span newParams(params.begin(), params.end()); const auto shaderCount = params.size(); - + core::vector> trimmedShaders; // vector to hold all the trimmed shaders, so the pointer from the new ShaderSpecInfo is not dangling trimmedShaders.reserve(shaderCount); @@ -818,7 +818,7 @@ bool ILogicalDevice::createComputePipelines(IGPUPipelineCache* const pipelineCac } createComputePipelines_impl(pipelineCache,newParams,output,specConstantValidation); - + bool retval = true; for (auto i=0u; igetCreationParameters(); + const auto& subpass = passParams.subpasses[subpassIndex]; + if (subpass.viewMask) + { + /* + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-renderPass-06047 + if (!limits.multiviewTessellationShader && .test(tesS_contrOL)) + return false; + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-renderPass-06048 + if (!limits.multiviewGeomtryShader && .test(GEOMETRY)) + return false; + */ + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-renderPass-06578 + //NOTE: index of MSB must be less than maxMultiviewViewCount; wrong negation here, should be >= + if (hlsl::findMSB(subpass.viewMask) > limits.maxMultiviewViewCount) + { + NBL_LOG_ERROR("Invalid viewMask (params[%u])", subpassIndex); + return false; + } + } + if (subpass.depthStencilAttachment.render.used()) + { + const auto& attachment = passParams.depthStencilAttachments[subpass.depthStencilAttachment.render.attachmentIndex]; + + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-multisampledRenderToSingleSampled-06853 + bool sampleCountNeedsToMatch = !features.mixedAttachmentSamples /*&& !features.multisampledRenderToSingleSampled*/; + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-subpass-01411 + if (/*detect NV version && */(rasterParams.depthTestEnable() || rasterParams.stencilTestEnable() || rasterParams.depthBoundsTestEnable)) + sampleCountNeedsToMatch = true; + if (sampleCountNeedsToMatch && attachment.samples != samples) + { + NBL_LOG_ERROR("Depth stencil and rasterization samples need to match (params[%u])", subpassIndex); + return false; + } + } + for (auto i = 0; i < IGPURenderpass::SCreationParams::SSubpassDescription::MaxColorAttachments; i++) + { + const auto& render = subpass.colorAttachments[i].render; + if (render.used()) + { + const auto& attachment = passParams.colorAttachments[render.attachmentIndex]; + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-renderPass-06041 + if (blendParams.blendParams[i].blendEnabled() && !formatUsages[attachment.format].attachmentBlend) + { + NBL_LOG_ERROR("Invalid color attachment (params[%u].colorAttachments[%u])", subpassIndex, i); + return false; + } + + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-multisampledRenderToSingleSampled-06853 + if (!features.mixedAttachmentSamples /*&& !features.multisampledRenderToSingleSampled*/ && attachment.samples != samples) + { + NBL_LOG_ERROR("Color attachment and rasterization samples need to match (params[%u].colorAttachments[%u])", subpassIndex, i); + return false; + } + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-subpass-01412 + if (/*detect NV version && */(attachment.samples > samples)) + { + NBL_LOG_ERROR("Invalid color attachment (params[%u].colorAttachments[%u])", subpassIndex, i); + return false; + } + } + } + + return true; +} + +//this is a COPY of graphics pipeline, with MINOR adjustments. +//no changes should be made DIRECTLY here +//UNLESS it's DIRECTLY for mesh/task +//there SHOULD be a function that duplicates functionality between graphics and mesh pipeline that can be adjusted first +bool ILogicalDevice::createMeshPipelines( + IGPUPipelineCache* const pipelineCache, + const std::span params, + core::smart_refctd_ptr* const output +) { + std::fill_n(output, params.size(), nullptr); + SSpecializationValidationResult specConstantValidation = commonCreatePipelines(pipelineCache, params); + if (!specConstantValidation) { + NBL_LOG_ERROR("Invalid parameters were given"); + return false; + } + + const auto& features = getEnabledFeatures(); + const auto& limits = getPhysicalDeviceLimits(); + + core::vector newParams(params.begin(), params.end()); + const auto shaderCount = std::accumulate(params.begin(), params.end(), 0, [](uint32_t sum, auto& param) + {return sum + param.getShaderCount(); } + ); + core::vector> trimmedShaders; // vector to hold all the trimmed shaders, so the pointer from the new ShaderSpecInfo is not dangling + trimmedShaders.reserve(shaderCount); + + for (auto ix = 0u; ix < params.size(); ix++) + { + const auto& ci = params[ix]; + + if (params[ix].taskShader.shader != nullptr) { + if (!features.taskShader) { + NBL_LOG_ERROR("Feature `mesh shader` is not enabled"); + return false; + } + } + + //check extensions here + //it SEEMS like createGraphicsPipeline does, but it does it in a weird way I don't understand? + //geo and tess are just flat disabled?? + if (!features.meshShader) { + NBL_LOG_ERROR("Feature `mesh shader` is not enabled"); + return false; + } + + auto renderpass = ci.renderpass; + if (!renderpass->wasCreatedBy(this)) { + NBL_LOG_ERROR("Invalid renderpass was given (params[%u])", ix); + return false; + } + + + MeshGraphicsCommonValidation(renderpass, ci.cached.subpassIx, limits, features, ci.cached.rasterization, ci.cached.blend, m_logger, getPhysicalDevice()->getImageFormatUsagesOptimalTiling()); + + SpirvTrimTask trimTask(m_spirvTrimmer.get(), m_logger); + trimTask.insertEntryPoint(ci.taskShader, hlsl::ShaderStage::ESS_TASK); + trimTask.insertEntryPoint(ci.meshShader, hlsl::ShaderStage::ESS_MESH); + trimTask.insertEntryPoint(ci.fragmentShader, hlsl::ShaderStage::ESS_FRAGMENT); + + newParams[ix].taskShader = trimTask.trim(ci.taskShader, trimmedShaders); + newParams[ix].meshShader = trimTask.trim(ci.meshShader, trimmedShaders); + newParams[ix].fragmentShader = trimTask.trim(ci.fragmentShader, trimmedShaders); + } + createMeshPipelines_impl(pipelineCache, newParams, output, specConstantValidation); + + for (auto i = 0u; i < params.size(); i++) + { + if (!output[i]) + { + NBL_LOG_ERROR("MeshPipeline was not created (params[%u])", i); + return false; + } + else + { + m_logger.log("shader[%d] mesh debug name - %s\n", nbl::system::ILogger::ELL_DEBUG, i, params[i].meshShader.shader->getDebugName()); + // TODO: set pipeline debug name thats a concatenation of all active stages' shader file path hints + } + } + return true; +} + bool ILogicalDevice::createGraphicsPipelines( IGPUPipelineCache* const pipelineCache, const std::span params, @@ -851,9 +1017,9 @@ bool ILogicalDevice::createGraphicsPipelines( const auto& limits = getPhysicalDeviceLimits(); core::vector newParams(params.begin(), params.end()); const auto shaderCount = std::accumulate(params.begin(), params.end(), 0, [](uint32_t sum, auto& param) - { - return sum + param.getShaderCount(); - }); + { + return sum + param.getShaderCount(); + }); core::vector> trimmedShaders; // vector to hold all the trimmed shaders, so the pointer from the new ShaderSpecInfo is not dangling trimmedShaders.reserve(shaderCount); @@ -880,7 +1046,7 @@ bool ILogicalDevice::createGraphicsPipelines( NBL_LOG_ERROR("Cannot create IGPUShader for %p, Geometry Shader feature not enabled!", ci.geometryShader.shader); return false; } - + auto renderpass = ci.renderpass; if (!renderpass->wasCreatedBy(this)) { @@ -888,88 +1054,13 @@ bool ILogicalDevice::createGraphicsPipelines( return false; } - const auto& rasterParams = ci.cached.rasterization; - if (rasterParams.alphaToOneEnable && !features.alphaToOne) - { - NBL_LOG_ERROR("Feature `alpha to one` is not enabled"); - return false; - } - if (rasterParams.depthBoundsTestEnable && !features.depthBounds) - { - NBL_LOG_ERROR("Feature `depth bounds` is not enabled"); - return false; - } - - const auto samples = 0x1u << rasterParams.samplesLog2; - // TODO: loads more validation on extra parameters here! // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-lineRasterizationMode-02766 // TODO: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-subpass-01505 // baiscally the AMD version must have the rasterization samples equal to the maximum of all attachment samples counts - const auto& passParams = renderpass->getCreationParameters(); - const auto& subpass = passParams.subpasses[ci.cached.subpassIx]; - if (subpass.viewMask) - { - /* - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-renderPass-06047 - if (!limits.multiviewTessellationShader && .test(tesS_contrOL)) - return false; - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-renderPass-06048 - if (!limits.multiviewGeomtryShader && .test(GEOMETRY)) - return false; - */ - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-renderPass-06578 - //NOTE: index of MSB must be less than maxMultiviewViewCount; wrong negation here, should be >= - if (hlsl::findMSB(subpass.viewMask) > limits.maxMultiviewViewCount) - { - NBL_LOG_ERROR("Invalid viewMask (params[%u])", ix); - return false; - } - } - if (subpass.depthStencilAttachment.render.used()) - { - const auto& attachment = passParams.depthStencilAttachments[subpass.depthStencilAttachment.render.attachmentIndex]; - - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-multisampledRenderToSingleSampled-06853 - bool sampleCountNeedsToMatch = !features.mixedAttachmentSamples /*&& !features.multisampledRenderToSingleSampled*/; - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-subpass-01411 - if (/*detect NV version && */(rasterParams.depthTestEnable() || rasterParams.stencilTestEnable() || rasterParams.depthBoundsTestEnable)) - sampleCountNeedsToMatch = true; - if (sampleCountNeedsToMatch && attachment.samples != samples) - { - NBL_LOG_ERROR("Invalid depth stencil attachment (params[%u])", ix); - return false; - } - } - for (auto i = 0; i < IGPURenderpass::SCreationParams::SSubpassDescription::MaxColorAttachments; i++) - { - const auto& render = subpass.colorAttachments[i].render; - if (render.used()) - { - const auto& attachment = passParams.colorAttachments[render.attachmentIndex]; - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-renderPass-06041 - if (ci.cached.blend.blendParams[i].blendEnabled() && !getPhysicalDevice()->getImageFormatUsagesOptimalTiling()[attachment.format].attachmentBlend) - { - NBL_LOG_ERROR("Invalid color attachment (params[%u].colorAttachments[%u])", ix, i); - return false; - } - - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-multisampledRenderToSingleSampled-06853 - if (!features.mixedAttachmentSamples /*&& !features.multisampledRenderToSingleSampled*/ && attachment.samples != samples) - { - NBL_LOG_ERROR("Invalid color attachment (params[%u].colorAttachments[%u])", ix, i); - return false; - } - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-subpass-01412 - if (/*detect NV version && */(attachment.samples > samples)) - { - NBL_LOG_ERROR("Invalid color attachment (params[%u].colorAttachments[%u])", ix, i); - return false; - } - } - } + MeshGraphicsCommonValidation(renderpass, ci.cached.subpassIx, limits, features, ci.cached.rasterization, ci.cached.blend, m_logger, getPhysicalDevice()->getImageFormatUsagesOptimalTiling()); SpirvTrimTask trimTask(m_spirvTrimmer.get(), m_logger); trimTask.insertEntryPoint(ci.vertexShader, hlsl::ShaderStage::ESS_VERTEX); @@ -977,7 +1068,7 @@ bool ILogicalDevice::createGraphicsPipelines( trimTask.insertEntryPoint(ci.tesselationEvaluationShader, hlsl::ShaderStage::ESS_TESSELLATION_EVALUATION); trimTask.insertEntryPoint(ci.geometryShader, hlsl::ShaderStage::ESS_GEOMETRY); trimTask.insertEntryPoint(ci.fragmentShader, hlsl::ShaderStage::ESS_FRAGMENT); - + newParams[ix].vertexShader = trimTask.trim(ci.vertexShader, trimmedShaders); newParams[ix].tesselationControlShader = trimTask.trim(ci.tesselationControlShader, trimmedShaders); newParams[ix].tesselationEvaluationShader = trimTask.trim(ci.tesselationEvaluationShader, trimmedShaders); @@ -1002,9 +1093,9 @@ bool ILogicalDevice::createGraphicsPipelines( return true; } -bool ILogicalDevice::createRayTracingPipelines(IGPUPipelineCache* const pipelineCache, - const std::span params, - core::smart_refctd_ptr* const output) +bool ILogicalDevice::createRayTracingPipelines(IGPUPipelineCache* const pipelineCache, + const std::span params, + core::smart_refctd_ptr* const output) { std::fill_n(output,params.size(),nullptr); SSpecializationValidationResult specConstantValidation = commonCreatePipelines(pipelineCache,params); @@ -1044,15 +1135,15 @@ bool ILogicalDevice::createRayTracingPipelines(IGPUPipelineCache* const pipeline // https://registry.khronos.org/vulkan/specs/latest/html/vkspec.html#VUID-VkRayTracingPipelineCreateInfoKHR-rayTraversalPrimitiveCulling-03596 if (skipAABBs && !features.rayTraversalPrimitiveCulling) { - NBL_LOG_ERROR("Feature `rayTraversalPrimitiveCulling` is not enabled when pipeline is created with SKIP_AABBS"); - return false; + NBL_LOG_ERROR("Feature `rayTraversalPrimitiveCulling` is not enabled when pipeline is created with SKIP_AABBS"); + return false; } // https://registry.khronos.org/vulkan/specs/latest/html/vkspec.html#VUID-VkRayTracingPipelineCreateInfoKHR-rayTraversalPrimitiveCulling-03597 if (skipBuiltin && !features.rayTraversalPrimitiveCulling) { - NBL_LOG_ERROR("Feature `rayTraversalPrimitiveCulling` is not enabled when pipeline is created with SKIP_BUILT_IN_PRIMITIVES"); - return false; + NBL_LOG_ERROR("Feature `rayTraversalPrimitiveCulling` is not enabled when pipeline is created with SKIP_BUILT_IN_PRIMITIVES"); + return false; } } @@ -1061,17 +1152,17 @@ bool ILogicalDevice::createRayTracingPipelines(IGPUPipelineCache* const pipeline core::vector> trimmedShaders; // vector to hold all the trimmed shaders, so the pointer from the new ShaderSpecInfo is not dangling const auto missGroupCount = std::accumulate(params.begin(), params.end(), 0, [](uint32_t sum, auto& param) - { - return sum + static_cast(param.shaderGroups.misses.size()); - }); + { + return sum + static_cast(param.shaderGroups.misses.size()); + }); const auto hitGroupCount = std::accumulate(params.begin(), params.end(), 0, [](uint32_t sum, auto& param) - { - return sum + static_cast(param.shaderGroups.hits.size()); - }); + { + return sum + static_cast(param.shaderGroups.hits.size()); + }); const auto callableGroupCount = std::accumulate(params.begin(), params.end(), 0, [](uint32_t sum, auto& param) - { - return sum + static_cast(param.shaderGroups.callables.size()); - }); + { + return sum + static_cast(param.shaderGroups.callables.size()); + }); core::vector trimmedMissSpecs(missGroupCount); @@ -1084,7 +1175,7 @@ bool ILogicalDevice::createRayTracingPipelines(IGPUPipelineCache* const pipeline const auto& limits = getPhysicalDeviceLimits(); for (auto ix = 0u; ix < params.size(); ix++) { - + const auto& param = params[ix]; // https://docs.vulkan.org/spec/latest/chapters/pipelines.html#VUID-VkRayTracingPipelineCreateInfoKHR-maxPipelineRayRecursionDepth-03589 @@ -1137,7 +1228,7 @@ bool ILogicalDevice::createRayTracingPipelines(IGPUPipelineCache* const pipeline } createRayTracingPipelines_impl(pipelineCache, newParams,output,specConstantValidation); - + bool retval = true; for (auto i=0u; i