From d293823b4987062b8abb5988dadec6d1463500dd Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Sat, 7 Mar 2026 21:52:59 +0100 Subject: [PATCH 1/9] Created example 75 --- 75_CAD_3D/CMakeLists.txt | 89 ++ 75_CAD_3D/CTriangleMesh.cpp | 1 + 75_CAD_3D/CTriangleMesh.h | 54 + 75_CAD_3D/DrawResourcesFiller.cpp | 235 ++++ 75_CAD_3D/DrawResourcesFiller.h | 220 ++++ 75_CAD_3D/config.json.template | 28 + 75_CAD_3D/main.cpp | 1097 +++++++++++++++++ 75_CAD_3D/scripts/generate_mipmaps.py | 47 + 75_CAD_3D/scripts/tiled_grid.py | 266 ++++ 75_CAD_3D/shaders/globals.hlsl | 105 ++ 75_CAD_3D/shaders/main_pipeline/common.hlsl | 17 + .../main_pipeline/fragment_shader.hlsl | 7 + .../shaders/main_pipeline/vertex_shader.hlsl | 17 + CMakeLists.txt | 1 + 14 files changed, 2184 insertions(+) create mode 100644 75_CAD_3D/CMakeLists.txt create mode 100644 75_CAD_3D/CTriangleMesh.cpp create mode 100644 75_CAD_3D/CTriangleMesh.h create mode 100644 75_CAD_3D/DrawResourcesFiller.cpp create mode 100644 75_CAD_3D/DrawResourcesFiller.h create mode 100644 75_CAD_3D/config.json.template create mode 100644 75_CAD_3D/main.cpp create mode 100644 75_CAD_3D/scripts/generate_mipmaps.py create mode 100644 75_CAD_3D/scripts/tiled_grid.py create mode 100644 75_CAD_3D/shaders/globals.hlsl create mode 100644 75_CAD_3D/shaders/main_pipeline/common.hlsl create mode 100644 75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl create mode 100644 75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl diff --git a/75_CAD_3D/CMakeLists.txt b/75_CAD_3D/CMakeLists.txt new file mode 100644 index 000000000..794ba1c3c --- /dev/null +++ b/75_CAD_3D/CMakeLists.txt @@ -0,0 +1,89 @@ +include(common RESULT_VARIABLE RES) +if(NOT RES) + message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") +endif() + +set(EXAMPLE_SOURCES + "${CMAKE_CURRENT_SOURCE_DIR}/DrawResourcesFiller.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/DrawResourcesFiller.h" +) +set(EXAMPLE_INCLUDES + "${CMAKE_CURRENT_SOURCE_DIR}/../../3rdparty/boost/superproject/libs/math/include") +nbl_create_executable_project("${EXAMPLE_SOURCES}" "" "${EXAMPLE_INCLUDES}" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") +target_link_libraries(${EXECUTABLE_NAME} PRIVATE Nabla::ext::FullScreenTriangle) + +# if enabled then try use Nabla "Text Rendering" extension +# with an implemented interface using the 3rdparty deps + +set(NBL_CAD_EX_USE_TEXT_RENDERING_EXT OFF) # do not enable, for future usage when the extension is written + +if(NBL_BUILD_TEXT_RENDERING AND NBL_CAD_EX_USE_TEXT_RENDERING_EXT) + add_dependencies(${EXECUTABLE_NAME} ${NBL_EXT_TEXT_RENDERING_TARGET}) + target_link_libraries(${EXECUTABLE_NAME} PRIVATE ${NBL_EXT_TEXT_RENDERING_TARGET}) + target_include_directories(${EXECUTABLE_NAME} PUBLIC $) +else() + # Freetype + add_dependencies(${EXECUTABLE_NAME} freetype) + target_link_libraries(${EXECUTABLE_NAME} PRIVATE freetype) + target_include_directories(${EXECUTABLE_NAME} PUBLIC $) + + # msdfgen + add_dependencies(${EXECUTABLE_NAME} ${NBL_MSDFGEN_TARGETS}) + target_link_libraries(${EXECUTABLE_NAME} PRIVATE ${NBL_MSDFGEN_TARGETS}) + foreach(NBL_TARGET IN LISTS NBL_MSDFGEN_TARGETS) + target_include_directories(${EXECUTABLE_NAME} PUBLIC $) + endforeach() +endif() + +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") + +set(SM 6_8) +set(REQUIRED_CAPS [=[ +{ + "kind": "features", + "name": "fragmentShaderPixelInterlock", + "type": "bool", + "values": [1] +} +]=]) + +set(JSON [=[ +[ + { + "INPUT": "shaders/main_pipeline/vertex_shader.hlsl", + "KEY": "main_pipeline_vertex_shader", + "CAPS": [] + }, + { + "INPUT": "shaders/main_pipeline/fragment_shader.hlsl", + "KEY": "main_pipeline_fragment_shader", + "CAPS": [] + } +] +]=]) +string(CONFIGURE "${JSON}" JSON) + +set(COMPILE_OPTIONS + -I "${CMAKE_CURRENT_SOURCE_DIR}" + -T lib_${SM} +) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS ${COMPILE_OPTIONS} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} +) diff --git a/75_CAD_3D/CTriangleMesh.cpp b/75_CAD_3D/CTriangleMesh.cpp new file mode 100644 index 000000000..5564c0a51 --- /dev/null +++ b/75_CAD_3D/CTriangleMesh.cpp @@ -0,0 +1 @@ +#include "CTriangleMesh.h" \ No newline at end of file diff --git a/75_CAD_3D/CTriangleMesh.h b/75_CAD_3D/CTriangleMesh.h new file mode 100644 index 000000000..8f941928a --- /dev/null +++ b/75_CAD_3D/CTriangleMesh.h @@ -0,0 +1,54 @@ +#pragma once + +#include +#include +#include "shaders/globals.hlsl" + +using namespace nbl; + +class CTriangleMesh final +{ +public: + using index_t = uint32_t; + using vertex_t = TriangleMeshVertex; + + inline void setVertices(core::vector&& vertices) + { + m_vertices = std::move(vertices); + } + inline void setIndices(core::vector&& indices) + { + m_indices = std::move(indices); + } + + inline const core::vector& getVertices() const + { + return m_vertices; + } + inline const core::vector& getIndices() const + { + return m_indices; + } + + inline size_t getVertexBuffByteSize() const + { + return sizeof(vertex_t) * m_vertices.size(); + } + inline size_t getIndexBuffByteSize() const + { + return sizeof(index_t) * m_indices.size(); + } + inline size_t getIndexCount() const + { + return m_indices.size(); + } + + inline void clear() + { + m_vertices.clear(); + m_indices.clear(); + } + + core::vector m_vertices; + core::vector m_indices; +}; \ No newline at end of file diff --git a/75_CAD_3D/DrawResourcesFiller.cpp b/75_CAD_3D/DrawResourcesFiller.cpp new file mode 100644 index 000000000..313c74358 --- /dev/null +++ b/75_CAD_3D/DrawResourcesFiller.cpp @@ -0,0 +1,235 @@ +#include "DrawResourcesFiller.h" + +using namespace nbl; + +DrawResourcesFiller::DrawResourcesFiller() +{} + +DrawResourcesFiller::DrawResourcesFiller(smart_refctd_ptr&& device, smart_refctd_ptr&& bufferUploadUtils, IQueue* copyQueue, core::smart_refctd_ptr&& logger) : + m_device(std::move(device)), + m_bufferUploadUtils(std::move(bufferUploadUtils)), + m_copyQueue(copyQueue), + m_logger(std::move(logger)) +{ +} + +// function is called when buffer is filled and we should submit draws and clear the buffers and continue filling +void DrawResourcesFiller::setSubmitDrawsFunction(const SubmitFunc& func) +{ + submitDraws = func; +} + +// TODO: redo it completely +bool DrawResourcesFiller::allocateDrawResources(ILogicalDevice* logicalDevice, size_t requiredImageMemorySize, size_t requiredBufferMemorySize, std::span memoryTypeIndexTryOrder) +{ + const size_t adjustedBuffersMemorySize = requiredBufferMemorySize; + const size_t totalResourcesSize = adjustedBuffersMemorySize; + + IGPUBuffer::SCreationParams resourcesBufferCreationParams = {}; + resourcesBufferCreationParams.size = adjustedBuffersMemorySize; + resourcesBufferCreationParams.usage = bitflag(IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT) | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INDEX_BUFFER_BIT; + resourcesGPUBuffer = logicalDevice->createBuffer(std::move(resourcesBufferCreationParams)); + + if (!resourcesGPUBuffer) + { + m_logger.log("Failed to create resourcesGPUBuffer.", nbl::system::ILogger::ELL_ERROR); + return false; + } + + resourcesGPUBuffer->setObjectDebugName("drawResourcesBuffer"); + + IDeviceMemoryBacked::SDeviceMemoryRequirements memReq = resourcesGPUBuffer->getMemoryReqs(); + + nbl::video::IDeviceMemoryBacked::SDeviceMemoryRequirements gpuBufferMemoryReqs = resourcesGPUBuffer->getMemoryReqs(); + const bool memoryRequirementsMatch = + (logicalDevice->getPhysicalDevice()->getDeviceLocalMemoryTypeBits() & gpuBufferMemoryReqs.memoryTypeBits) != 0 && // should have device local memory compatible + (gpuBufferMemoryReqs.requiresDedicatedAllocation == false); // should not require dedicated allocation + + if (!memoryRequirementsMatch) + { + m_logger.log("Shouldn't happen: Buffer Memory Requires Dedicated Allocation or can't biind to device local memory.", nbl::system::ILogger::ELL_ERROR); + return false; + } + + const auto& memoryProperties = logicalDevice->getPhysicalDevice()->getMemoryProperties(); + + video::IDeviceMemoryAllocator::SAllocation allocation = {}; + for (const auto& memoryTypeIdx : memoryTypeIndexTryOrder) + { + IDeviceMemoryAllocator::SAllocateInfo allocationInfo = + { + .size = totalResourcesSize, + .flags = IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS::EMAF_DEVICE_ADDRESS_BIT, // for the buffers + .memoryTypeIndex = memoryTypeIdx, + .dedication = nullptr, + }; + + allocation = logicalDevice->allocate(allocationInfo); + + if (allocation.isValid()) + break; + } + + if (!allocation.isValid()) + { + m_logger.log("Failed Allocation for draw resources!", nbl::system::ILogger::ELL_ERROR); + return false; + } + + buffersMemoryArena = { + .memory = allocation.memory, + .offset = core::alignUp(allocation.offset, GPUStructsMaxNaturalAlignment), // first natural alignment after images section of the memory allocation + }; + + video::ILogicalDevice::SBindBufferMemoryInfo bindBufferMemory = { + .buffer = resourcesGPUBuffer.get(), + .binding = { + .memory = buffersMemoryArena.memory.get(), + .offset = buffersMemoryArena.offset, + } + }; + + if (!logicalDevice->bindBufferMemory(1, &bindBufferMemory)) + { + m_logger.log("DrawResourcesFiller::allocateDrawResources, bindBufferMemory failed.", nbl::system::ILogger::ELL_ERROR); + return false; + } + + return true; +} + +bool DrawResourcesFiller::allocateDrawResourcesWithinAvailableVRAM(ILogicalDevice* logicalDevice, size_t maxImageMemorySize, size_t maxBufferMemorySize, std::span memoryTypeIndexTryOrder, uint32_t reductionPercent, uint32_t maxTries) +{ + const size_t minimumAcceptableSize = MinimumDrawResourcesMemorySize; + + size_t currentBufferSize = maxBufferMemorySize; + size_t currentImageSize = maxImageMemorySize; + const size_t totalInitialSize = currentBufferSize + currentImageSize; + + // If initial size is less than minimum acceptable then increase the buffer and image size to sum up to minimumAcceptableSize with image:buffer ratios preserved + if (totalInitialSize < minimumAcceptableSize) + { + // Preserve ratio: R = buffer / (buffer + image) + // scaleFactor = minimumAcceptableSize / totalInitialSize; + const double scaleFactor = static_cast(minimumAcceptableSize) / totalInitialSize; + currentBufferSize = static_cast(currentBufferSize * scaleFactor); + currentImageSize = minimumAcceptableSize - currentBufferSize; // ensures exact sum + } + + uint32_t numTries = 0u; + while ((currentBufferSize + currentImageSize) >= minimumAcceptableSize && numTries < maxTries) + { + if (allocateDrawResources(logicalDevice, currentImageSize, currentBufferSize, memoryTypeIndexTryOrder)) + { + m_logger.log("Successfully allocated memory for images (%zu) and buffers (%zu).", system::ILogger::ELL_INFO, currentImageSize, currentBufferSize); + return true; + } + + m_logger.log("Allocation of memory for images(%zu) and buffers(%zu) failed; Reducing allocation size by %u%% and retrying...", system::ILogger::ELL_WARNING, currentImageSize, currentBufferSize, reductionPercent); + currentBufferSize = (currentBufferSize * (100 - reductionPercent)) / 100; + currentImageSize = (currentImageSize * (100 - reductionPercent)) / 100; + numTries++; + } + + m_logger.log("All attempts to allocate memory for images(%zu) and buffers(%zu) failed.", system::ILogger::ELL_ERROR, currentImageSize, currentBufferSize); + return false; +} + +void DrawResourcesFiller::drawTriangleMesh( + const CTriangleMesh& mesh, + SIntendedSubmitInfo& intendedNextSubmit) +{ + // TODO: main objects + // beginMainObject(); + + // TODO: for now we add whole mesh at once, instead we should add triangle by triangle and see check if we overflow memory + + const size_t vertexBuffByteSize = mesh.getVertexBuffByteSize(); + const size_t indexBuffByteSize = mesh.getIndexBuffByteSize(); + const auto& indexBuffer = mesh.getIndices(); + const auto& vertexBuffer = mesh.getVertices(); + assert(indexBuffer.size() == vertexBuffer.size()); // TODO: figure out why it was needed then decide if this constraint needs to be kept + + DrawCallData drawCallData = {}; + + // Copy VertexBuffer + size_t geometryBufferOffset = resourcesCollection.geometryInfo.increaseSizeAndGetOffset(vertexBuffByteSize, alignof(CTriangleMesh::vertex_t)); + drawCallData.triangleMeshVerticesBaseAddress = geometryBufferOffset; + void* dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; + memcpy(dst, vertexBuffer.data(), vertexBuffByteSize); + geometryBufferOffset += vertexBuffByteSize; + + // Copy IndexBuffer + dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; + drawCallData.indexBufferOffset = geometryBufferOffset; + memcpy(dst, indexBuffer.data(), indexBuffByteSize); + + drawCallData.triangleMeshMainObjectIndex = 0u; // TODO: fix when implementing main objects + drawCallData.indexCount = mesh.getIndexCount(); + drawCalls.push_back(drawCallData); + + //endMainObject(); +} + +bool DrawResourcesFiller::pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit) +{ + if (!intendedNextSubmit.valid()) + { + // It is a caching submit without command buffer, just for the purpose of accumulation of staging resources + // In that case we don't push any uploads (i.e. we don't record any imageRecord commmand in active command buffer, because there is no active command buffer) + return false; + } + + bool success = true; + success &= pushBufferUploads(intendedNextSubmit, resourcesCollection); + + return success; +} + +bool DrawResourcesFiller::pushBufferUploads(SIntendedSubmitInfo& intendedNextSubmit, ResourcesCollection& resources) +{ + copiedResourcesSize = 0ull; + + if (resourcesCollection.calculateTotalConsumption() > resourcesGPUBuffer->getSize()) + { + m_logger.log("some bug has caused the resourcesCollection to consume more memory than available in resourcesGPUBuffer without overflow submit", nbl::system::ILogger::ELL_ERROR); + assert(false); + return false; + } + + auto copyCPUFilledDrawBuffer = [&](auto& drawBuffer) -> bool + { + // drawBuffer must be of type CPUGeneratedResource + SBufferRange copyRange = { copiedResourcesSize, drawBuffer.getStorageSize(), resourcesGPUBuffer }; + + if (copyRange.offset + copyRange.size > resourcesGPUBuffer->getSize()) + { + m_logger.log("`copyRange.offset + copyRange.size > resourcesGPUBuffer->getSize()` is true in `copyCPUFilledDrawBuffer`, this shouldn't happen with correct auto-submission mechanism.", nbl::system::ILogger::ELL_ERROR); + assert(false); + return false; + } + + drawBuffer.bufferOffset = copyRange.offset; + if (copyRange.size > 0ull) + { + if (!m_bufferUploadUtils->updateBufferRangeViaStagingBuffer(intendedNextSubmit, copyRange, drawBuffer.vector.data())) + return false; + copiedResourcesSize += drawBuffer.getAlignedStorageSize(); + } + return true; + }; + + copyCPUFilledDrawBuffer(resources.drawObjects); + copyCPUFilledDrawBuffer(resources.indexBuffer); + copyCPUFilledDrawBuffer(resources.geometryInfo); + + return true; +} + +void DrawResourcesFiller::markFrameUsageComplete(uint64_t drawSubmitWaitValue) +{ + // m_logger.log(std::format("Finished Frame Idx = {}", currentFrameIndex).c_str(), nbl::system::ILogger::ELL_INFO); + currentFrameIndex++; + // TODO[LATER]: take into account that currentFrameIndex was submitted with drawSubmitWaitValue; Use that value when deallocating the resources marked with this frame index + // Currently, for evictions the worst case value will be waited for, as there is no way yet to know which semaphoroe value will signal the completion of the (to be evicted) resource's usage +} \ No newline at end of file diff --git a/75_CAD_3D/DrawResourcesFiller.h b/75_CAD_3D/DrawResourcesFiller.h new file mode 100644 index 000000000..ea2bca02e --- /dev/null +++ b/75_CAD_3D/DrawResourcesFiller.h @@ -0,0 +1,220 @@ +/******************************************************************************/ +/* DrawResourcesFiller: This class provides important functionality to manage resources needed for a draw. +/******************************************************************************/ +#pragma once + +#if __has_include("glm/glm/glm.hpp") // legacy +#include "glm/glm/glm.hpp" +#else +#include "glm/glm.hpp" // new build system +#endif +#include +#include +#include +#include +#include +#include +#include "CTriangleMesh.h" +#include "Shaders/globals.hlsl" + +using namespace nbl; +using namespace nbl::video; +using namespace nbl::core; +using namespace nbl::asset; + +static_assert(sizeof(DrawObject) == 16u); +static_assert(sizeof(MainObject) == 20u); + +// ! DrawResourcesFiller +// ! This class provides important functionality to manage resources needed for a draw. +// ! Drawing new objects (polylines, hatches, etc.) should go through this function. +// ! Contains all the scene resources (buffers and images) +// ! In the case of overflow (i.e. not enough remaining v-ram) will auto-submit/render everything recorded so far, +// and additionally makes sure relavant data needed for those draw calls are present in memory +struct DrawResourcesFiller +{ + struct DrawCallData + { + uint64_t indexBufferOffset; + uint64_t indexCount; + uint64_t triangleMeshVerticesBaseAddress; + uint32_t triangleMeshMainObjectIndex; + }; + +public: + + // We pack multiple data types in a single buffer, we need to makes sure each offset starts aligned to avoid mis-aligned accesses + static constexpr size_t GPUStructsMaxNaturalAlignment = 8u; + static constexpr size_t MinimumDrawResourcesMemorySize = 512u * 1 << 20u; // 512MB + + /// @brief general parent struct for 1.ReservedCompute and 2.CPUGenerated Resources + struct ResourceBase + { + static constexpr size_t InvalidBufferOffset = ~0u; + size_t bufferOffset = InvalidBufferOffset; // set when copy to gpu buffer is issued + virtual size_t getCount() const = 0; + virtual size_t getStorageSize() const = 0; + virtual size_t getAlignedStorageSize() const { return core::alignUp(getStorageSize(), GPUStructsMaxNaturalAlignment); } + }; + + /// @brief ResourceBase reserved for compute shader stages input/output + template + struct ReservedComputeResource : ResourceBase + { + size_t count = 0ull; + size_t getCount() const override { return count; } + size_t getStorageSize() const override { return count * sizeof(T); } + }; + + /// @brief ResourceBase which is filled by CPU, packed and sent to GPU + template + struct CPUGeneratedResource : ResourceBase + { + core::vector vector; + size_t getCount() const { return vector.size(); } + size_t getStorageSize() const { return vector.size() * sizeof(T); } + + /// @return pointer to start of the data to be filled, up to additionalCount + T* increaseCountAndGetPtr(size_t additionalCount) + { + size_t offset = vector.size(); + vector.resize(offset + additionalCount); + return &vector[offset]; + } + + /// @brief increases size of general-purpose resources that hold bytes + /// @param alignment: Alignment of the pointer returned to be filled, should be PoT and <= GPUStructsMaxNaturalAlignment, only use this if storing raw bytes in vector + /// @return pointer to start of the data to be filled, up to additional size + size_t increaseSizeAndGetOffset(size_t additionalSize, size_t alignment) + { + assert(core::isPoT(alignment) && alignment <= GPUStructsMaxNaturalAlignment); + size_t offset = core::alignUp(vector.size(), alignment); + vector.resize(offset + additionalSize); + return offset; + } + + uint32_t addAndGetOffset(const T& val) + { + vector.push_back(val); + return vector.size() - 1u; + } + + T* data() { return vector.data(); } + }; + + /// @brief struct to hold all resources + // TODO: rename to staged resources buffers or something like that + struct ResourcesCollection + { + // auto-submission level 1 buffers (mainObj that drawObjs references, if all drawObjs+idxBuffer+geometryInfo doesn't fit into mem this will be broken down into many) + //CPUGeneratedResource mainObjects; + + // auto-submission level 2 buffers + CPUGeneratedResource drawObjects; + CPUGeneratedResource indexBuffer; // TODO: this is going to change to ReservedComputeResource where index buffer gets filled by compute shaders + CPUGeneratedResource geometryInfo; // general purpose byte buffer for custom data for geometries (eg. line points, bezier definitions, aabbs) + + // Get Total memory consumption, If all ResourcesCollection get packed together with GPUStructsMaxNaturalAlignment + // used to decide the remaining memory and when to overflow + size_t calculateTotalConsumption() const + { + return + drawObjects.getAlignedStorageSize() + + indexBuffer.getAlignedStorageSize() + + geometryInfo.getAlignedStorageSize(); + } + }; + + DrawResourcesFiller(); + + DrawResourcesFiller(smart_refctd_ptr&& device, smart_refctd_ptr&& bufferUploadUtils, IQueue* copyQueue, core::smart_refctd_ptr&& logger); + + typedef std::function SubmitFunc; + void setSubmitDrawsFunction(const SubmitFunc& func); + + /** + * @brief Attempts to allocate a single contiguous device-local memory block for draw resources, divided into image and buffer sections. + * + * The function allocates a single memory block and splits it into image and buffer arenas. + * + * @param logicalDevice Pointer to the logical device used for memory allocation and resource creation. + * @param requiredImageMemorySize The size in bytes of the memory required for images. + * @param requiredBufferMemorySize The size in bytes of the memory required for buffers. + * @param memoryTypeIndexTryOrder Ordered list of memory type indices to attempt allocation with, in the order they should be tried. + * + * @return true if the memory allocation and resource setup succeeded; false otherwise. + */ + bool allocateDrawResources(ILogicalDevice* logicalDevice, size_t requiredImageMemorySize, size_t requiredBufferMemorySize, std::span memoryTypeIndexTryOrder); + + /** + * @brief Attempts to allocate draw resources within a given VRAM budget, retrying with progressively smaller sizes on failure. + * + * This function preserves the initial image-to-buffer memory ratio. If the initial sizes are too small, + * it scales them up to meet a minimum required threshold. On allocation failure, it reduces the memory + * sizes by a specified percentage and retries, until it either succeeds or the number of attempts exceeds `maxTries`. + * + * @param logicalDevice Pointer to the logical device used for allocation. + * @param maxImageMemorySize Initial image memory size (in bytes) to attempt allocation with. + * @param maxBufferMemorySize Initial buffer memory size (in bytes) to attempt allocation with. + * @param memoryTypeIndexTryOrder Ordered list of memory type indices to attempt allocation with, in the order they should be tried. + * @param reductionPercent The percentage by which to reduce the memory sizes after each failed attempt (e.g., 10 means reduce by 10%). + * @param maxTries Maximum number of attempts to try reducing and allocating memory. + * + * @return true if the allocation succeeded at any iteration; false if all attempts failed. + */ + bool allocateDrawResourcesWithinAvailableVRAM(ILogicalDevice* logicalDevice, size_t maxImageMemorySize, size_t maxBufferMemorySize, std::span memoryTypeIndexTryOrder, uint32_t reductionPercent = 10u, uint32_t maxTries = 32u); + + // Must be called at the end of each frame. + // right before submitting the main draw that uses the currently queued geometry, images, or other objects/resources. + // Registers the semaphore/value that will signal completion of this frame�s draw, + // This allows future frames to safely deallocate or evict resources used in the current frame by waiting on this signal before reuse or destruction. + // `drawSubmitWaitValue` should reference the wait value of the draw submission finishing this frame using the `intendedNextSubmit`; + void markFrameUsageComplete(uint64_t drawSubmitWaitValue); + + void drawTriangleMesh( + const CTriangleMesh& mesh, + SIntendedSubmitInfo& intendedNextSubmit); + + /// @brief call this function before submitting to ensure all buffer and textures resourcesCollection requested via drawing calls are copied to GPU + /// records copy command into intendedNextSubmit's active command buffer and might possibly submits if fails allocation on staging upload memory. + bool pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit); + + /// @brief Records GPU copy commands for all staged buffer resourcesCollection into the active command buffer. + bool pushBufferUploads(SIntendedSubmitInfo& intendedNextSubmit, ResourcesCollection& resourcesCollection); + + /// @brief resets staging buffers and images + void reset() + { + drawCalls.clear(); + } + + /// @brief collection of all the resources that will eventually be reserved or copied to in the resourcesGPUBuffer, will be accessed via individual BDA pointers in shaders + const ResourcesCollection& getResourcesCollection() const { return resourcesCollection; } + /// @brief buffer containing all non-texture type resources + nbl::core::smart_refctd_ptr getResourcesGPUBuffer() const { return resourcesGPUBuffer; } + /// @return how far resourcesGPUBuffer was copied to by `finalizeAllCopiesToGPU` in `resourcesCollection` + const size_t getCopiedResourcesSize() { return copiedResourcesSize; } + const core::vector& getDrawCalls() const { return drawCalls; } + +private: + nbl::system::logger_opt_smart_ptr m_logger = nullptr; + + smart_refctd_ptr m_device; + core::smart_refctd_ptr m_bufferUploadUtils; + + IQueue* m_copyQueue; + + // FrameIndex used as a criteria for resource/image eviction in case of limitations + uint32_t currentFrameIndex = 0u; + + // DrawCalls Data + core::vector drawCalls; + + // ResourcesCollection and packed into GPUBuffer + ResourcesCollection resourcesCollection; + IDeviceMemoryAllocator::SAllocation buffersMemoryArena; + nbl::core::smart_refctd_ptr resourcesGPUBuffer; + size_t copiedResourcesSize; + + SubmitFunc submitDraws; +}; \ No newline at end of file diff --git a/75_CAD_3D/config.json.template b/75_CAD_3D/config.json.template new file mode 100644 index 000000000..f961745c1 --- /dev/null +++ b/75_CAD_3D/config.json.template @@ -0,0 +1,28 @@ +{ + "enableParallelBuild": true, + "threadsPerBuildProcess" : 2, + "isExecuted": false, + "scriptPath": "", + "cmake": { + "configurations": [ "Release", "Debug", "RelWithDebInfo" ], + "buildModes": [], + "requiredOptions": [] + }, + "profiles": [ + { + "backend": "vulkan", + "platform": "windows", + "buildModes": [], + "runConfiguration": "Release", + "gpuArchitectures": [] + } + ], + "dependencies": [], + "data": [ + { + "dependencies": [], + "command": [""], + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/75_CAD_3D/main.cpp b/75_CAD_3D/main.cpp new file mode 100644 index 000000000..ed7ddc039 --- /dev/null +++ b/75_CAD_3D/main.cpp @@ -0,0 +1,1097 @@ +// TODO: Copyright notice +#include "nbl/this_example/builtin/build/spirv/keys.hpp" + +#include "nbl/examples/examples.hpp" + +using namespace nbl::hlsl; +using namespace nbl; +using namespace core; +using namespace system; +using namespace asset; +using namespace ui; +using namespace video; + +#include +#include "DrawResourcesFiller.h" + +#include "nbl/builtin/hlsl/math/linalg/transform.hlsl" +#include "nbl/builtin/hlsl/math/thin_lens_projection.hlsl" + +class CEventCallback : public ISimpleManagedSurface::ICallback +{ +public: + CEventCallback(nbl::core::smart_refctd_ptr&& m_inputSystem, nbl::system::logger_opt_smart_ptr&& logger) : m_inputSystem(std::move(m_inputSystem)), m_logger(std::move(logger)) {} + CEventCallback() {} + + void setLogger(nbl::system::logger_opt_smart_ptr& logger) + { + m_logger = logger; + } + void setInputSystem(nbl::core::smart_refctd_ptr&& m_inputSystem) + { + m_inputSystem = std::move(m_inputSystem); + } +private: + + void onMouseConnected_impl(nbl::core::smart_refctd_ptr&& mch) override + { + m_logger.log("A mouse %p has been connected", nbl::system::ILogger::ELL_INFO, mch.get()); + m_inputSystem.get()->add(m_inputSystem.get()->m_mouse, std::move(mch)); + } + void onMouseDisconnected_impl(nbl::ui::IMouseEventChannel* mch) override + { + m_logger.log("A mouse %p has been disconnected", nbl::system::ILogger::ELL_INFO, mch); + m_inputSystem.get()->remove(m_inputSystem.get()->m_mouse, mch); + } + void onKeyboardConnected_impl(nbl::core::smart_refctd_ptr&& kbch) override + { + m_logger.log("A keyboard %p has been connected", nbl::system::ILogger::ELL_INFO, kbch.get()); + m_inputSystem.get()->add(m_inputSystem.get()->m_keyboard, std::move(kbch)); + } + void onKeyboardDisconnected_impl(nbl::ui::IKeyboardEventChannel* kbch) override + { + m_logger.log("A keyboard %p has been disconnected", nbl::system::ILogger::ELL_INFO, kbch); + m_inputSystem.get()->remove(m_inputSystem.get()->m_keyboard, kbch); + } + +private: + nbl::core::smart_refctd_ptr m_inputSystem = nullptr; + nbl::system::logger_opt_smart_ptr m_logger = nullptr; +}; + +class CSwapchainResources : public ISimpleManagedSurface::ISwapchainResources +{ + public: + CSwapchainResources() = default; + + inline E_FORMAT deduceRenderpassFormat(ISurface* surface, IPhysicalDevice* physDev) + { + ISwapchain::SCreationParams swapchainParams = {.surface=smart_refctd_ptr(surface), }; + // Need to choose a surface format + if (!swapchainParams.deduceFormat(physDev, getPreferredFormats(), getPreferredEOTFs(), getPreferredColorPrimaries())) + return EF_UNKNOWN; + return swapchainParams.surfaceFormat.format; + } + + // When needing to recreate the framebuffer, We need to have access to a renderpass compatible to renderpass used to render to the framebuffer + inline void setCompatibleRenderpass(core::smart_refctd_ptr renderpass) + { + m_renderpass = renderpass; + } + + inline IGPUFramebuffer* getFramebuffer(const uint8_t imageIx) + { + if (imageIx(m_renderpass->getOriginDevice()); + + const auto swapchain = getSwapchain(); + const auto count = swapchain->getImageCount(); + const auto& sharedParams = swapchain->getCreationParameters().sharedParams; + for (uint8_t i=0u; icreateImageView({ + .flags = IGPUImageView::ECF_NONE, + .subUsages = IGPUImage::EUF_RENDER_ATTACHMENT_BIT, + .image = core::smart_refctd_ptr(getImage(i)), + .viewType = IGPUImageView::ET_2D, + .format = getImage(i)->getCreationParameters().format + }); + m_framebuffers[i] = device->createFramebuffer({{ + .renderpass = core::smart_refctd_ptr(m_renderpass), + .colorAttachments = &imageView.get(), + .width = sharedParams.width, + .height = sharedParams.height + }}); + if (!m_framebuffers[i]) + return false; + } + return true; + } + + // Per-swapchain + core::smart_refctd_ptr m_renderpass; + std::array,ISwapchain::MaxImages> m_framebuffers; +}; + +class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplication, public nbl::examples::BuiltinResourcesApplication +{ + using device_base_t = nbl::examples::SimpleWindowedApplication; + using asset_base_t = nbl::examples::BuiltinResourcesApplication; + using clock_t = std::chrono::steady_clock; + + constexpr static uint32_t WindowWidthRequest = 1600u; + constexpr static uint32_t WindowHeightRequest = 900u; + constexpr static uint32_t MaxFramesInFlight = 3u; + constexpr static uint32_t MaxSubmitsInFlight = 16u; +public: + + void allocateResources() + { + // TODO: currently using the same utils for buffers and images, make them separate staging buffers + drawResourcesFiller = DrawResourcesFiller(core::smart_refctd_ptr(m_device), core::smart_refctd_ptr(m_utils), getGraphicsQueue(), core::smart_refctd_ptr(m_logger)); + + // Just wanting to try memory type indices with device local flag, TODO: later improve to prioritize pure device local + std::vector deviceLocalMemoryTypeIndices; + for (uint32_t i = 0u; i < m_physicalDevice->getMemoryProperties().memoryTypeCount; ++i) + { + const auto& memType = m_physicalDevice->getMemoryProperties().memoryTypes[i]; + if (memType.propertyFlags.hasFlags(IDeviceMemoryAllocation::EMPF_DEVICE_LOCAL_BIT)) + deviceLocalMemoryTypeIndices.push_back(i); + } + + size_t maxImagesMemSize = 1024ull * 1024ull * 1024ull; // 1024 MB + size_t maxBufferMemSize = 1024ull * 1024ull * 1024ull; // 1024 MB + + drawResourcesFiller.allocateDrawResourcesWithinAvailableVRAM(m_device.get(), maxImagesMemSize, maxBufferMemSize, deviceLocalMemoryTypeIndices); + + { + IGPUBuffer::SCreationParams globalsCreationParams = {}; + globalsCreationParams.size = sizeof(Globals); + globalsCreationParams.usage = IGPUBuffer::EUF_UNIFORM_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INLINE_UPDATE_VIA_CMDBUF; + m_globalsBuffer = m_device->createBuffer(std::move(globalsCreationParams)); + + IDeviceMemoryBacked::SDeviceMemoryRequirements memReq = m_globalsBuffer->getMemoryReqs(); + memReq.memoryTypeBits &= m_device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); + auto globalsBufferMem = m_device->allocate(memReq, m_globalsBuffer.get()); + } + + // pseudoStencil + { + asset::E_FORMAT pseudoStencilFormat = asset::EF_R32_UINT; + { + IGPUImage::SCreationParams imgInfo; + imgInfo.format = pseudoStencilFormat; + imgInfo.type = IGPUImage::ET_2D; + imgInfo.extent.width = m_window->getWidth(); + imgInfo.extent.height = m_window->getHeight(); + imgInfo.extent.depth = 1u; + imgInfo.mipLevels = 1u; + imgInfo.arrayLayers = 1u; + imgInfo.samples = asset::ICPUImage::ESCF_1_BIT; + imgInfo.flags = asset::IImage::E_CREATE_FLAGS::ECF_NONE; + imgInfo.usage = asset::IImage::EUF_STORAGE_BIT | asset::IImage::EUF_TRANSFER_DST_BIT; + // [VKTODO] imgInfo.initialLayout = IGPUImage::EL_UNDEFINED; + imgInfo.tiling = IGPUImage::TILING::OPTIMAL; + + auto image = m_device->createImage(std::move(imgInfo)); + auto imageMemReqs = image->getMemoryReqs(); + imageMemReqs.memoryTypeBits &= m_device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); + m_device->allocate(imageMemReqs, image.get()); + + image->setObjectDebugName("pseudoStencil Image"); + + IGPUImageView::SCreationParams imgViewInfo; + imgViewInfo.image = std::move(image); + imgViewInfo.format = pseudoStencilFormat; + imgViewInfo.viewType = IGPUImageView::ET_2D; + imgViewInfo.flags = IGPUImageView::E_CREATE_FLAGS::ECF_NONE; + imgViewInfo.subresourceRange.aspectMask = asset::IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; + imgViewInfo.subresourceRange.baseArrayLayer = 0u; + imgViewInfo.subresourceRange.baseMipLevel = 0u; + imgViewInfo.subresourceRange.layerCount = 1u; + imgViewInfo.subresourceRange.levelCount = 1u; + + pseudoStencilImageView = m_device->createImageView(std::move(imgViewInfo)); + } + } + + // colorStorage + { + asset::E_FORMAT colorStorageFormat = asset::EF_R32_UINT; + { + IGPUImage::SCreationParams imgInfo; + imgInfo.format = colorStorageFormat; + imgInfo.type = IGPUImage::ET_2D; + imgInfo.extent.width = m_window->getWidth(); + imgInfo.extent.height = m_window->getHeight(); + imgInfo.extent.depth = 1u; + imgInfo.mipLevels = 1u; + imgInfo.arrayLayers = 1u; + imgInfo.samples = asset::ICPUImage::ESCF_1_BIT; + imgInfo.flags = asset::IImage::E_CREATE_FLAGS::ECF_NONE; + imgInfo.usage = asset::IImage::EUF_STORAGE_BIT | asset::IImage::EUF_TRANSFER_DST_BIT; + // [VKTODO] imgInfo.initialLayout = IGPUImage::EL_UNDEFINED; + imgInfo.tiling = IGPUImage::TILING::OPTIMAL; + + auto image = m_device->createImage(std::move(imgInfo)); + auto imageMemReqs = image->getMemoryReqs(); + imageMemReqs.memoryTypeBits &= m_device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); + m_device->allocate(imageMemReqs, image.get()); + + image->setObjectDebugName("colorStorage Image"); + + IGPUImageView::SCreationParams imgViewInfo; + imgViewInfo.image = std::move(image); + imgViewInfo.format = colorStorageFormat; + imgViewInfo.viewType = IGPUImageView::ET_2D; + imgViewInfo.flags = IGPUImageView::E_CREATE_FLAGS::ECF_NONE; + imgViewInfo.subresourceRange.aspectMask = asset::IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; + imgViewInfo.subresourceRange.baseArrayLayer = 0u; + imgViewInfo.subresourceRange.baseMipLevel = 0u; + imgViewInfo.subresourceRange.layerCount = 1u; + imgViewInfo.subresourceRange.levelCount = 1u; + + colorStorageImageView = m_device->createImageView(std::move(imgViewInfo)); + } + } + + // Initial Pipeline Transitions and Clearing of PseudoStencil and ColorStorage + // Recorded to Temporary CommandBuffer, Submitted to Graphics Queue, and Blocked on here + { + auto cmdPool = m_device->createCommandPool(getGraphicsQueue()->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::TRANSIENT_BIT); + smart_refctd_ptr tmpCmdBuffer; + cmdPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { &tmpCmdBuffer, 1 }); + auto tmpJobFinishedSema = m_device->createSemaphore(0ull); + + tmpCmdBuffer->begin(video::IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + { + // Clear pseudoStencil + auto pseudoStencilImage = pseudoStencilImageView->getCreationParameters().image; + auto colorStorageImage = colorStorageImageView->getCreationParameters().image; + + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t beforeClearImageBarrier[] = + { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, // previous top of pipe -> top_of_pipe in first scope = none + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::CLEAR_BIT, + .dstAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, // could be ALL_TRANSFER but let's be specific we only want to CLEAR right now + } + // .ownershipOp. No queueFam ownership transfer + }, + .image = pseudoStencilImage.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u, + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::GENERAL, + } + }; + + tmpCmdBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = beforeClearImageBarrier }); + + uint32_t pseudoStencilInvalidValue = core::bitfieldInsert(0u, 16777215, 8, 24); + IGPUCommandBuffer::SClearColorValue clear = {}; + clear.uint32[0] = pseudoStencilInvalidValue; + + asset::IImage::SSubresourceRange subresourceRange = {}; + subresourceRange.aspectMask = asset::IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; + subresourceRange.baseArrayLayer = 0u; + subresourceRange.baseMipLevel = 0u; + subresourceRange.layerCount = 1u; + subresourceRange.levelCount = 1u; + + tmpCmdBuffer->clearColorImage(pseudoStencilImage.get(), asset::IImage::LAYOUT::GENERAL, &clear, 1u, &subresourceRange); + + // prepare pseudoStencilImage for usage in drawcall + + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t beforeUsageImageBarriers[] = + { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::CLEAR_BIT, + .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS | ACCESS_FLAGS::MEMORY_WRITE_BITS, // could be ALL_TRANSFER but let's be specific we only want to CLEAR right now + } + // .ownershipOp. No queueFam ownership transfer + }, + .image = pseudoStencilImage.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u, + }, + .oldLayout = IImage::LAYOUT::GENERAL, + .newLayout = IImage::LAYOUT::GENERAL, + }, + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS | ACCESS_FLAGS::MEMORY_WRITE_BITS, // could be ALL_TRANSFER but let's be specific we only want to CLEAR right now + } + // .ownershipOp. No queueFam ownership transfer + }, + .image = colorStorageImage.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u, + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::GENERAL, + } + }; + + tmpCmdBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = beforeUsageImageBarriers }); + } + tmpCmdBuffer->end(); + + IQueue::SSubmitInfo::SCommandBufferInfo cmdbufs[1u] = { {.cmdbuf = tmpCmdBuffer.get() } }; + IQueue::SSubmitInfo::SSemaphoreInfo singalSemaphores[1] = {}; + singalSemaphores[0].semaphore = tmpJobFinishedSema.get(); + singalSemaphores[0].stageMask = asset::PIPELINE_STAGE_FLAGS::NONE; + singalSemaphores[0].value = 1u; + + IQueue::SSubmitInfo submitInfo = {}; + submitInfo.commandBuffers = cmdbufs; + submitInfo.waitSemaphores = {}; + submitInfo.signalSemaphores = singalSemaphores; + + getGraphicsQueue()->submit({ &submitInfo, 1u }); + + ISemaphore::SWaitInfo waitTmpJobFinish = { .semaphore = tmpJobFinishedSema.get(), .value = 1u}; + m_device->blockForSemaphores({ &waitTmpJobFinish, 1u }); + } + } + + smart_refctd_ptr createRenderpass( + E_FORMAT colorAttachmentFormat, + IGPURenderpass::LOAD_OP loadOp, + IImage::LAYOUT initialLayout, + IImage::LAYOUT finalLayout) + { + const IGPURenderpass::SCreationParams::SColorAttachmentDescription colorAttachments[] = { + {{ + { + .format = colorAttachmentFormat, + .samples = IGPUImage::ESCF_1_BIT, + .mayAlias = false + }, + /*.loadOp = */loadOp, + /*.storeOp = */IGPURenderpass::STORE_OP::STORE, + /*.initialLayout = */initialLayout, + /*.finalLayout = */finalLayout + }}, + IGPURenderpass::SCreationParams::ColorAttachmentsEnd + }; + + IGPURenderpass::SCreationParams::SSubpassDescription subpasses[] = { + {}, + IGPURenderpass::SCreationParams::SubpassesEnd + }; + subpasses[0].colorAttachments[0] = {.render={.attachmentIndex=0,.layout=IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}}; + + // We actually need external dependencies to ensure ordering of the Implicit Layout Transitions relative to the semaphore signals + const IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = { + // wipe-transition to ATTACHMENT_OPTIMAL + { + .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .dstSubpass = 0, + .memoryBarrier = { + // we can have NONE as Sources because ???? + .dstStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + .dstAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + // leave view offsets and flags default + }, + // ATTACHMENT_OPTIMAL to PRESENT_SRC + { + .srcSubpass = 0, + .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .memoryBarrier = { + .srcStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + .srcAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + // we can have NONE as the Destinations because the spec says so about presents + } + // leave view offsets and flags default + }, + IGPURenderpass::SCreationParams::DependenciesEnd + }; + + smart_refctd_ptr renderpass; + IGPURenderpass::SCreationParams params = {}; + params.colorAttachments = colorAttachments; + params.subpasses = subpasses; + params.dependencies = dependencies; + renderpass = m_device->createRenderpass(params); + if (!renderpass) + logFail("Failed to Create a Renderpass!"); + return renderpass; + } + + + // Yay thanks to multiple inheritance we cannot forward ctors anymore + inline ComputerAidedDesign(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : + IApplicationFramework(_localInputCWD,_localOutputCWD,_sharedInputCWD,_sharedOutputCWD) {} + + // Will get called mid-initialization, via `filterDevices` between when the API Connection is created and Physical Device is chosen + inline core::vector getSurfaces() const override + { + // So let's create our Window and Surface then! + if (!m_surface) + { + { + auto windowCallback = core::make_smart_refctd_ptr(smart_refctd_ptr(m_inputSystem), smart_refctd_ptr(m_logger)); + IWindow::SCreationParams params = {}; + params.callback = windowCallback; + params.width = WindowWidthRequest; + params.height = WindowHeightRequest; + params.x = 32; + params.y = 32; + // Don't want to have a window lingering about before we're ready so create it hidden. + // Only programmatic resize, not regular. + params.flags = IWindow::ECF_BORDERLESS|IWindow::ECF_RESIZABLE; + params.windowCaption = "CAD 3D Playground"; + const_cast&>(m_window) = m_winMgr->createWindow(std::move(params)); + } + auto surface = CSurfaceVulkanWin32::create(smart_refctd_ptr(m_api),smart_refctd_ptr_static_cast(m_window)); + const_cast&>(m_surface) = CSimpleResizeSurface::create(std::move(surface)); + } + if (m_surface) + return {{m_surface->getSurface()/*,EQF_NONE*/}}; + return {}; + } + + inline bool onAppInitialized(smart_refctd_ptr&& system) override + { + m_inputSystem = make_smart_refctd_ptr(logger_opt_smart_ptr(smart_refctd_ptr(m_logger))); + + // Remember to call the base class initialization! + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + if (!asset_base_t::onAppInitialized(std::move(system))) + return false; + + // Let's just use the same queue since there's no need for async present + if (!m_surface) + return logFail("Could not create Window & Surface!"); + + auto scResources = std::make_unique(); + const auto format = scResources->deduceRenderpassFormat(m_surface->getSurface(), m_physicalDevice); // TODO: DO I need to recreate render passes if swapchain gets recreated with different format? + renderpassInitial = createRenderpass(format, IGPURenderpass::LOAD_OP::CLEAR, IImage::LAYOUT::UNDEFINED, IImage::LAYOUT::ATTACHMENT_OPTIMAL); + renderpassInBetween = createRenderpass(format, IGPURenderpass::LOAD_OP::LOAD, IImage::LAYOUT::ATTACHMENT_OPTIMAL, IImage::LAYOUT::ATTACHMENT_OPTIMAL); + renderpassFinal = createRenderpass(format, IGPURenderpass::LOAD_OP::LOAD, IImage::LAYOUT::ATTACHMENT_OPTIMAL, IImage::LAYOUT::PRESENT_SRC); + const auto compatibleRenderPass = renderpassInitial; // all 3 above are compatible + + scResources->setCompatibleRenderpass(compatibleRenderPass); + + if (!m_surface->init(getGraphicsQueue(),std::move(scResources),{})) + return logFail("Could not initialize the Surface!"); + + allocateResources(); + + const asset::SPushConstantRange range = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_VERTEX | IShader::E_SHADER_STAGE::ESS_FRAGMENT, + .offset = 0, + .size = sizeof(PushConstants) + }; + + m_pipelineLayout = m_device->createPipelineLayout({ &range,1 }, nullptr, nullptr, nullptr, nullptr); + + smart_refctd_ptr mainPipelineFragmentShaders = {}; + smart_refctd_ptr mainPipelineVertexShader = {}; + { + // Load Custom Shader + auto loadPrecompiledShader = [&]() -> smart_refctd_ptr + { + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = m_logger.get(); + lp.workingDirectory = "app_resources"; + + auto key = nbl::this_example::builtin::build::get_spirv_key(m_device.get()); + auto assetBundle = m_assetMgr->getAsset(key.data(), lp); + const auto assets = assetBundle.getContents(); + if (assets.empty()) + { + m_logger->log("Failed to load a precompiled shader of key \"%s\".", ILogger::ELL_ERROR, ShaderKey); + return nullptr; + } + + + auto shader = IAsset::castDown(assets[0]); + return shader; + }; + + mainPipelineFragmentShaders = loadPrecompiledShader.operator()<"main_pipeline_fragment_shader">(); // "../shaders/main_pipeline/fragment_shader.hlsl" + mainPipelineVertexShader = loadPrecompiledShader.operator()<"main_pipeline_vertex_shader">(); // "../shaders/main_pipeline/vertex_shader.hlsl" + } + + IGPUGraphicsPipeline::SCreationParams mainGraphicsPipelineParams = {}; + mainGraphicsPipelineParams.layout = m_pipelineLayout.get(); + mainGraphicsPipelineParams.cached = { + .vertexInput = {}, + .primitiveAssembly = { + .primitiveType = E_PRIMITIVE_TOPOLOGY::EPT_TRIANGLE_LIST, + }, + .rasterization = { + .polygonMode = EPM_FILL, + .faceCullingMode = EFCM_NONE, + .depthWriteEnable = false, + }, + .blend = {}, + }; + mainGraphicsPipelineParams.renderpass = compatibleRenderPass.get(); + + // Create Main Graphics Pipelines + { + video::IGPUPipelineBase::SShaderSpecInfo specInfo[2] = { + { .shader = mainPipelineVertexShader.get(), .entryPoint = "vtxMain" }, + { .shader = mainPipelineFragmentShaders.get(), .entryPoint = "fragMain" }, + }; + + IGPUGraphicsPipeline::SCreationParams params[1] = { mainGraphicsPipelineParams }; + params[0].vertexShader = specInfo[0]; + params[0].fragmentShader = specInfo[1]; + + if (!m_device->createGraphicsPipelines(nullptr,params,&m_graphicsPipeline)) + return logFail("Graphics Pipeline Creation Failed."); + } + + // Create the commandbuffers and pools, this time properly 1 pool per FIF + m_graphicsCommandPool = m_device->createCommandPool(getGraphicsQueue()->getFamilyIndex(),IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + if (!m_graphicsCommandPool) + return logFail("Couldn't create Command Pool!"); + if (!m_graphicsCommandPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY,{m_commandBuffersInFlight.data(),MaxSubmitsInFlight})) + return logFail("Couldn't create Command Buffers!"); + + // Create the Semaphores + m_renderSemaphore = m_device->createSemaphore(0ull); + m_renderSemaphore->setObjectDebugName("m_renderSemaphore"); + m_overflowSubmitScratchSemaphore = m_device->createSemaphore(0ull); + m_overflowSubmitScratchSemaphore->setObjectDebugName("m_overflowSubmitScratchSemaphore"); + if (!m_renderSemaphore || !m_overflowSubmitScratchSemaphore) + return logFail("Failed to Create Semaphores!"); + + // Set Queue and ScratchSemaInfo -> wait semaphores and command buffers will be modified by workLoop each frame + m_intendedNextSubmit.queue = getGraphicsQueue(); + m_intendedNextSubmit.scratchSemaphore = { + .semaphore = m_overflowSubmitScratchSemaphore.get(), + .value = 0ull, + }; + for (uint32_t i = 0; i < MaxSubmitsInFlight; ++i) + m_commandBufferInfos[i] = { .cmdbuf = m_commandBuffersInFlight[i].get() }; + m_intendedNextSubmit.scratchCommandBuffers = m_commandBufferInfos; + m_currentRecordingCommandBufferInfo = &m_commandBufferInfos[0]; + + return true; + } + + // We do a very simple thing, display an image and wait `DisplayImageMs` to show it + inline void workLoopBody() override + { + auto now = std::chrono::high_resolution_clock::now(); + double dt = std::chrono::duration_cast(now - lastTime).count(); + lastTime = now; + m_timeElapsed += dt; + + m_inputSystem->getDefaultMouse(&mouse); + m_inputSystem->getDefaultKeyboard(&keyboard); + + mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void + { + } + , m_logger.get()); + keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void + { + for (auto eventIt = events.begin(); eventIt != events.end(); eventIt++) + { + } + } + , m_logger.get()); + + if (!beginFrameRender()) + return; + + const IQueue::SSubmitInfo::SSemaphoreInfo acquired = { + .semaphore = m_currentImageAcquire.semaphore, + .value = m_currentImageAcquire.acquireCount, + .stageMask = asset::PIPELINE_STAGE_FLAGS::NONE // NONE for Acquire, right? Yes, the Spec Says so! + }; + + // prev frame done using the scene data (is in post process stage) + const IQueue::SSubmitInfo::SSemaphoreInfo prevFrameRendered = { + .semaphore = m_renderSemaphore.get(), + .value = m_realFrameIx, + .stageMask = asset::PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS + }; + + IQueue::SSubmitInfo::SSemaphoreInfo waitSems[2u] = { acquired, prevFrameRendered }; + m_intendedNextSubmit.waitSemaphores = waitSems; + + addObjects(m_intendedNextSubmit); + + endFrameRender(m_intendedNextSubmit); + } + + bool beginFrameRender() + { + // framesInFlight: ensuring safe execution of command buffers and acquires, `framesInFlight` only affect semaphore waits, don't use this to index your resources because it can change with swapchain recreation. + const uint32_t framesInFlight = core::min(MaxFramesInFlight, m_surface->getMaxAcquiresInFlight()); + // We block for semaphores for 2 reasons here: + // A) Resource: Can't use resource like a command buffer BEFORE previous use is finished! [MaxFramesInFlight] + // B) Acquire: Can't have more acquires in flight than a certain threshold returned by swapchain or your surface helper class. [MaxAcquiresInFlight] + if (m_realFrameIx>=framesInFlight) + { + const ISemaphore::SWaitInfo cmdbufDonePending[] = { + { + .semaphore = m_renderSemaphore.get(), + .value = m_realFrameIx+1-framesInFlight + } + }; + if (m_device->blockForSemaphores(cmdbufDonePending)!=ISemaphore::WAIT_RESULT::SUCCESS) + return false; + } + + // Acquire + m_currentImageAcquire = m_surface->acquireNextImage(); + if (!m_currentImageAcquire) + return false; + + const bool beganSuccess = m_intendedNextSubmit.beginNextCommandBuffer(m_currentRecordingCommandBufferInfo); + assert(beganSuccess); + auto* cb = m_currentRecordingCommandBufferInfo->cmdbuf; + + // safe to proceed + // no need to reset and begin new command buffers as SIntendedSubmitInfo already handled that. + // cb->reset(video::IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); + // cb->begin(video::IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + cb->beginDebugMarker("Frame"); + + nbl::video::IGPUCommandBuffer::SRenderpassBeginInfo beginInfo; + auto scRes = static_cast(m_surface->getSwapchainResources()); + const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {0.68f, 0.85f, 0.90f, 1.0f} }; + { + const VkRect2D currentRenderArea = + { + .offset = {0,0}, + .extent = {m_window->getWidth(),m_window->getHeight()} + }; + + beginInfo = { + .renderpass = renderpassInitial.get(), + .framebuffer = scRes->getFramebuffer(m_currentImageAcquire.imageIndex), + .colorClearValues = &clearValue, + .depthStencilClearValues = nullptr, + .renderArea = currentRenderArea + }; + } + + cb->beginRenderPass(beginInfo, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + cb->endRenderPass(); + + return true; + } + + void _submitDraws(SIntendedSubmitInfo& intendedSubmitInfo, bool inBetweenSubmit) + { + drawResourcesFiller.pushAllUploads(intendedSubmitInfo); + + m_currentRecordingCommandBufferInfo = intendedSubmitInfo.getCommandBufferForRecording(); // drawResourcesFiller.pushAllUploads might've overflow submitted and changed the current recording command buffer + + // Use the current recording command buffer of the intendedSubmitInfos scratchCommandBuffers, it should be in recording state + auto* cb = m_currentRecordingCommandBufferInfo->cmdbuf; + + const auto& resourcesCollection = drawResourcesFiller.getResourcesCollection(); + const auto& resourcesGPUBuffer = drawResourcesFiller.getResourcesGPUBuffer(); + + float64_t4x4 viewProjection; + { + // TODO: create a proper camera + + auto view = hlsl::math::linalg::rhLookAt({ 300.0f, 300.0f, 300.0f }, { 0.0f, 0.0f, 0.0f }, { 0.0f, 0.0f, 1.0f }); + const float64_t aspectRatio = static_cast(m_window->getWidth()) / static_cast(m_window->getHeight()); + auto proj = hlsl::math::thin_lens::rhPerspectiveFovMatrix(hlsl::radians(60.0f), aspectRatio, 0.1f, 2000.0f); + + viewProjection = hlsl::mul(proj, nbl::hlsl::math::linalg::promote_affine<4, 4>(view)); + } + + Globals globalData = {}; + uint64_t baseAddress = resourcesGPUBuffer->getDeviceAddress(); + globalData.pointers = { + .drawObjects = baseAddress + resourcesCollection.drawObjects.bufferOffset, + .geometryBuffer = baseAddress + resourcesCollection.geometryInfo.bufferOffset, + }; + SBufferRange globalBufferUpdateRange = { .offset = 0ull, .size = sizeof(Globals), .buffer = m_globalsBuffer}; + bool updateSuccess = cb->updateBuffer(globalBufferUpdateRange, &globalData); + assert(updateSuccess); + + asset::SViewport vp = + { + .x = 0u, + .y = 0u, + .width = static_cast(m_window->getWidth()), + .height = static_cast(m_window->getHeight()), + .minDepth = 1.f, + .maxDepth = 0.f, + }; + cb->setViewport(0u, 1u, &vp); + + VkRect2D scissor = + { + .offset = { 0, 0 }, + .extent = { m_window->getWidth(), m_window->getHeight() }, + }; + cb->setScissor(0u, 1u, &scissor); + + // pipelineBarriersBeforeDraw + { + constexpr uint32_t MaxBufferBarriersCount = 2u; + uint32_t bufferBarriersCount = 0u; + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::buffer_barrier_t bufferBarriers[MaxBufferBarriersCount]; + + const auto& resourcesCollection = drawResourcesFiller.getResourcesCollection(); + + if (m_globalsBuffer->getSize() > 0u) + { + auto& bufferBarrier = bufferBarriers[bufferBarriersCount++]; + bufferBarrier.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT; + bufferBarrier.barrier.dep.srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT; + bufferBarrier.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT | PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT; + bufferBarrier.barrier.dep.dstAccessMask = ACCESS_FLAGS::UNIFORM_READ_BIT; + bufferBarrier.range = + { + .offset = 0u, + .size = m_globalsBuffer->getSize(), + .buffer = m_globalsBuffer, + }; + } + if (drawResourcesFiller.getCopiedResourcesSize() > 0u) + { + auto& bufferBarrier = bufferBarriers[bufferBarriersCount++]; + bufferBarrier.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT; + bufferBarrier.barrier.dep.srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT; + bufferBarrier.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::VERTEX_INPUT_BITS | PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT | PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT; + bufferBarrier.barrier.dep.dstAccessMask = ACCESS_FLAGS::MEMORY_READ_BITS | ACCESS_FLAGS::MEMORY_WRITE_BITS; + bufferBarrier.range = + { + .offset = 0u, + .size = drawResourcesFiller.getCopiedResourcesSize(), + .buffer = drawResourcesFiller.getResourcesGPUBuffer(), + }; + } + cb->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .bufBarriers = {bufferBarriers, bufferBarriersCount}, .imgBarriers = {} }); + } + + nbl::video::IGPUCommandBuffer::SRenderpassBeginInfo beginInfo; + VkRect2D currentRenderArea; + const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {0.f,0.f,0.f,0.f} }; + { + auto scRes = static_cast(m_surface->getSwapchainResources()); + currentRenderArea = + { + .offset = {0,0}, + .extent = {m_window->getWidth(),m_window->getHeight()} + }; + beginInfo = { + .renderpass = (inBetweenSubmit) ? renderpassInBetween.get():renderpassFinal.get(), + .framebuffer = scRes->getFramebuffer(m_currentImageAcquire.imageIndex), + .colorClearValues = &clearValue, + .depthStencilClearValues = nullptr, + .renderArea = currentRenderArea + }; + } + cb->beginRenderPass(beginInfo, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + + cb->bindGraphicsPipeline(m_graphicsPipeline.get()); + + for (auto& drawCall : drawResourcesFiller.getDrawCalls()) + { + cb->bindIndexBuffer({ .offset = resourcesCollection.geometryInfo.bufferOffset + drawCall.indexBufferOffset, .buffer = drawResourcesFiller.getResourcesGPUBuffer()}, asset::EIT_32BIT); + + PushConstants pc = { + .triangleMeshVerticesBaseAddress = drawCall.triangleMeshVerticesBaseAddress + resourcesGPUBuffer->getDeviceAddress() + resourcesCollection.geometryInfo.bufferOffset, + .triangleMeshMainObjectIndex = drawCall.triangleMeshMainObjectIndex + }; + cb->pushConstants(m_graphicsPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_VERTEX | IShader::E_SHADER_STAGE::ESS_FRAGMENT, 0, sizeof(PushConstants), &pc); + + cb->drawIndexed(drawCall.indexCount, 1u, 0u, 0u, 0u); + } + + cb->endRenderPass(); + + if (!inBetweenSubmit) + cb->endDebugMarker(); + + drawResourcesFiller.markFrameUsageComplete(intendedSubmitInfo.getFutureScratchSemaphore().value); + + if (inBetweenSubmit) + { + if (intendedSubmitInfo.overflowSubmit(m_currentRecordingCommandBufferInfo) != IQueue::RESULT::SUCCESS) + { + m_logger->log("overflow submit failed.", ILogger::ELL_ERROR); + } + } + else + { + // cb->end(); + + const auto nextFrameIx = m_realFrameIx+1u; + const IQueue::SSubmitInfo::SSemaphoreInfo thisFrameRendered = { + .semaphore = m_renderSemaphore.get(), + .value = nextFrameIx, + .stageMask = asset::PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS + }; + if (intendedSubmitInfo.submit(m_currentRecordingCommandBufferInfo, { &thisFrameRendered,1 }) == IQueue::RESULT::SUCCESS) + { + m_realFrameIx = nextFrameIx; + + IQueue::SSubmitInfo::SSemaphoreInfo presentWait = thisFrameRendered; + // the stages for a wait semaphore operation are about what stage you WAIT in, not what stage you wait for + presentWait.stageMask = PIPELINE_STAGE_FLAGS::NONE; // top of pipe, there's no explicit presentation engine stage + m_surface->present(m_currentImageAcquire.imageIndex,{&presentWait,1}); + } + else + { + m_logger->log("regular submit failed.", ILogger::ELL_ERROR); + } + } + } + + // TODO: remove + void submitDraws(SIntendedSubmitInfo& intendedSubmitInfo, bool inBetweenSubmit) + { + m_currentRecordingCommandBufferInfo = intendedSubmitInfo.getCommandBufferForRecording(); // drawResourcesFiller.pushAllUploads might've overflow submitted and changed the current recording command buffer + + // Use the current recording command buffer of the intendedSubmitInfos scratchCommandBuffers, it should be in recording state + auto* cb = m_currentRecordingCommandBufferInfo->cmdbuf; + + asset::SViewport vp = + { + .x = 0u, + .y = 0u, + .width = static_cast(m_window->getWidth()), + .height = static_cast(m_window->getHeight()), + .minDepth = 1.f, + .maxDepth = 0.f, + }; + cb->setViewport(0u, 1u, &vp); + + VkRect2D scissor = + { + .offset = { 0, 0 }, + .extent = { m_window->getWidth(), m_window->getHeight() }, + }; + cb->setScissor(0u, 1u, &scissor); + + nbl::video::IGPUCommandBuffer::SRenderpassBeginInfo beginInfo; + VkRect2D currentRenderArea; + const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {0.f,0.f,0.f,0.f} }; + { + auto scRes = static_cast(m_surface->getSwapchainResources()); + currentRenderArea = + { + .offset = {0,0}, + .extent = {m_window->getWidth(),m_window->getHeight()} + }; + beginInfo = { + .renderpass = (inBetweenSubmit) ? renderpassInBetween.get() : renderpassFinal.get(), + .framebuffer = scRes->getFramebuffer(m_currentImageAcquire.imageIndex), + .colorClearValues = &clearValue, + .depthStencilClearValues = nullptr, + .renderArea = currentRenderArea + }; + } + cb->beginRenderPass(beginInfo, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + + cb->bindGraphicsPipeline(m_graphicsPipeline.get()); + + { + PushConstants pc = { + .triangleMeshVerticesBaseAddress = 1, + .triangleMeshMainObjectIndex = 2 + }; + cb->pushConstants(m_graphicsPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_VERTEX | IShader::E_SHADER_STAGE::ESS_FRAGMENT, 0, sizeof(PushConstants), &pc); + + cb->draw(3, 1, 0, 0); + } + + cb->endRenderPass(); + + if (!inBetweenSubmit) + cb->endDebugMarker(); + + drawResourcesFiller.markFrameUsageComplete(intendedSubmitInfo.getFutureScratchSemaphore().value); + + if (inBetweenSubmit) + { + if (intendedSubmitInfo.overflowSubmit(m_currentRecordingCommandBufferInfo) != IQueue::RESULT::SUCCESS) + { + m_logger->log("overflow submit failed.", ILogger::ELL_ERROR); + } + } + else + { + const auto nextFrameIx = m_realFrameIx + 1u; + const IQueue::SSubmitInfo::SSemaphoreInfo thisFrameRendered = { + .semaphore = m_renderSemaphore.get(), + .value = nextFrameIx, + .stageMask = asset::PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS + }; + if (intendedSubmitInfo.submit(m_currentRecordingCommandBufferInfo, { &thisFrameRendered,1 }) == IQueue::RESULT::SUCCESS) + { + m_realFrameIx = nextFrameIx; + + IQueue::SSubmitInfo::SSemaphoreInfo presentWait = thisFrameRendered; + // the stages for a wait semaphore operation are about what stage you WAIT in, not what stage you wait for + presentWait.stageMask = PIPELINE_STAGE_FLAGS::NONE; // top of pipe, there's no explicit presentation engine stage + m_surface->present(m_currentImageAcquire.imageIndex, { &presentWait,1 }); + } + else + { + m_logger->log("regular submit failed.", ILogger::ELL_ERROR); + } + } + } + + void endFrameRender(SIntendedSubmitInfo& intendedSubmitInfo) + { + submitDraws(intendedSubmitInfo, false); + } + + inline bool keepRunning() override + { + if (duration_cast(clock_t::now()-start)>timeout) + return false; + + return m_surface && !m_surface->irrecoverable(); + } + + virtual bool onAppTerminated() override + { + m_currentRecordingCommandBufferInfo->cmdbuf->end(); + + // We actually want to wait for all the frames to finish rendering, otherwise our destructors will run out of order late + m_device->waitIdle(); + + // This is optional, but the window would close AFTER we return from this function + m_surface = nullptr; + + return device_base_t::onAppTerminated(); + } + + virtual video::IAPIConnection::SFeatures getAPIFeaturesToEnable() override + { + auto retval = base_t::getAPIFeaturesToEnable(); + // We only support one swapchain mode, surface, the other one is Display which we have not implemented yet. + retval.swapchainMode = video::E_SWAPCHAIN_MODE::ESM_SURFACE; + retval.validations = true; + retval.synchronizationValidation = false; + return retval; + } + +protected: + + void addObjects(SIntendedSubmitInfo& intendedNextSubmit) + { + drawResourcesFiller.setSubmitDrawsFunction( + [&](SIntendedSubmitInfo& intendedNextSubmit) + { + return submitDraws(intendedNextSubmit, true); + } + ); + drawResourcesFiller.reset(); + + core::vector vertices = { + //{ float64_t2(0.0, 0.0), 100.0 }, //0 + //{ float64_t2(-200.0, -200.0), 10.0 }, //1 + //{ float64_t2(200.0, -200.0), 10.0 }, //2 + //{ float64_t2(200.0, 200.0), -20.0 }, //3 + //{ float64_t2(-200.0, 200.0), 10.0 }, //4 + + { float64_t3(0.0, 0.0, 100.0) }, + { float64_t3(-200.0, -200.0, 10.0) }, + { float64_t3(200.0, -100.0, 10.0) }, + { float64_t3(0.0, 0.0, 100.0) }, + { float64_t3(200.0, -100.0, 10.0) }, + { float64_t3(200.0, 200.0, -20.0) }, + { float64_t3(0.0, 0.0, 100.0) }, + { float64_t3(200.0, 200.0, -20.0) }, + { float64_t3(-200.0, 200.0, 10.0) }, + { float64_t3(0.0, 0.0, 100.0) }, + { float64_t3(-200.0, 200.0, 10.0) }, + { float64_t3(-200.0, -200.0, 10.0) }, + }; + + core::vector indices = { + 0, 1, 2, + 3, 4, 5, + 6, 7, 8, + 9, 10, 11 + }; + + CTriangleMesh mesh; + mesh.setVertices(std::move(vertices)); + mesh.setIndices(std::move(indices)); + + drawResourcesFiller.drawTriangleMesh(mesh, intendedNextSubmit); + } + +protected: + clock_t::time_point start; + std::chrono::seconds timeout = std::chrono::seconds(0x7fffFFFFu); + + double m_timeElapsed = 0.0; + std::chrono::steady_clock::time_point lastTime; + + core::smart_refctd_ptr m_inputSystem; + nbl::examples::InputSystem::ChannelReader mouse; + nbl::examples::InputSystem::ChannelReader keyboard; + + smart_refctd_ptr renderpassInitial; // this renderpass will clear the attachment and transition it to COLOR_ATTACHMENT_OPTIMAL + smart_refctd_ptr renderpassInBetween; // this renderpass will load the attachment and transition it to COLOR_ATTACHMENT_OPTIMAL + smart_refctd_ptr renderpassFinal; // this renderpass will load the attachment and transition it to PRESENT + + smart_refctd_ptr m_graphicsCommandPool; + std::array, MaxSubmitsInFlight> m_commandBuffersInFlight; + // ref to above cmd buffers, these go into SIntendedSubmitInfo as command buffers available for recording. + std::array m_commandBufferInfos; + // pointer to one of the command buffer infos from above, this is the only command buffer used to record current submit in current frame, it will be updated by SIntendedSubmitInfo + IQueue::SSubmitInfo::SCommandBufferInfo const * m_currentRecordingCommandBufferInfo; // pointer can change, value cannot + + smart_refctd_ptr m_globalsBuffer; + DrawResourcesFiller drawResourcesFiller; // you can think of this as the scene data needed to draw everything, we only have one instance so let's use a timeline semaphore to sync all renders + + smart_refctd_ptr m_renderSemaphore; // timeline semaphore to sync frames together + + // timeline semaphore used for overflows (they need to be on their own timeline to count overflows) + smart_refctd_ptr m_overflowSubmitScratchSemaphore; + SIntendedSubmitInfo m_intendedNextSubmit; + + ISimpleManagedSurface::SAcquireResult m_currentImageAcquire = {}; + + uint64_t m_realFrameIx = 0u; + + smart_refctd_ptr descriptorSetLayout0; + smart_refctd_ptr descriptorSetLayout1; + smart_refctd_ptr m_pipelineLayout; + smart_refctd_ptr resolveAlphaGraphicsPipeline; + smart_refctd_ptr m_debugGraphicsPipeline; + smart_refctd_ptr m_graphicsPipeline; + smart_refctd_ptr m_streamedImagesGraphicsPipeline; + + smart_refctd_ptr m_window; + smart_refctd_ptr> m_surface; + smart_refctd_ptr pseudoStencilImageView; + smart_refctd_ptr colorStorageImageView; +}; + +NBL_MAIN_FUNC(ComputerAidedDesign) + diff --git a/75_CAD_3D/scripts/generate_mipmaps.py b/75_CAD_3D/scripts/generate_mipmaps.py new file mode 100644 index 000000000..78420cda5 --- /dev/null +++ b/75_CAD_3D/scripts/generate_mipmaps.py @@ -0,0 +1,47 @@ +import OpenEXR +import Imath +import numpy as np + +def read_exr(path): + exr = OpenEXR.InputFile(path) + dw = exr.header()['dataWindow'] + size = (dw.max.x - dw.min.x + 1, dw.max.y - dw.min.y + 1) + + pt = Imath.PixelType(Imath.PixelType.FLOAT) + channels = ['R', 'G', 'B'] + data = [np.frombuffer(exr.channel(c, pt), dtype=np.float32).reshape(size[1], size[0]) for c in channels] + return np.stack(data, axis=-1) # shape: (H, W, 3) + +def write_exr(path, arr): + H, W, C = arr.shape + assert C == 3, "Only RGB supported" + header = OpenEXR.Header(W, H) + pt = Imath.PixelType(Imath.PixelType.FLOAT) + channels = { + 'R': arr[:, :, 0].astype(np.float32).tobytes(), + 'G': arr[:, :, 1].astype(np.float32).tobytes(), + 'B': arr[:, :, 2].astype(np.float32).tobytes() + } + exr = OpenEXR.OutputFile(path, header) + exr.writePixels(channels) + +def mipmap_exr(): + img = read_exr("../../media/tiled_grid_mip_0.exr") + h, w, _ = img.shape + base_path = "../../media/tiled_grid_mip_" + tile_size = 128 + mip_level = 1 + tile_length = h // (2 * tile_size) + + while tile_length > 0: + # Reshape and average 2x2 blocks + reshaped = img.reshape(h//2, 2, w//2, 2, 3) + mipmap = reshaped.mean(axis=(1, 3)) + write_exr(base_path + str(mip_level) + ".exr", mipmap) + img = mipmap + mip_level = mip_level + 1 + tile_length = tile_length // 2 + h = h // 2 + w = w // 2 + +mipmap_exr() \ No newline at end of file diff --git a/75_CAD_3D/scripts/tiled_grid.py b/75_CAD_3D/scripts/tiled_grid.py new file mode 100644 index 000000000..89c637338 --- /dev/null +++ b/75_CAD_3D/scripts/tiled_grid.py @@ -0,0 +1,266 @@ +from PIL import Image, ImageDraw, ImageFont +import numpy as np +import os +import OpenImageIO as oiio + + + +def create_single_tile(tile_size, color, x_coord, y_coord, font_path=None): + """ + Creates a single square tile image with a given color and two lines of centered text. + + Args: + tile_size (int): The sidelength of the square tile in pixels. + color (tuple): A tuple of three floats (R, G, B) representing the color (0.0-1.0). + x_coord (int): The X coordinate to display on the tile. + y_coord (int): The Y coordinate to display on the tile. + font_path (str, optional): The path to a TrueType font file (.ttf). + If None, a default PIL font will be used. + Returns: + PIL.Image.Image: The created tile image with text. + """ + # Convert float color (0.0-1.0) to 8-bit integer color (0-255) + int_color = tuple(int(max(0, min(1, c)) * 255) for c in color) # Ensure color components are clamped + + img = Image.new('RGB', (tile_size, tile_size), int_color) + draw = ImageDraw.Draw(img) + + text_line1 = f"x = {x_coord}" + text_line2 = f"y = {y_coord}" + + text_fill_color = (255, 255, 255) + + # --- Dynamic Font Size Adjustment --- + # Start with a relatively large font size and shrink if needed + font_size = int(tile_size * 0.25) # Initial guess for font size + max_font_size = int(tile_size * 0.25) # Don't exceed this + + font = None + max_iterations = 100 # Prevent infinite loops in font size reduction + + for _ in range(max_iterations): + current_font_path = font_path + current_font_size = max(1, font_size) # Ensure font size is at least 1 + + try: + if current_font_path and os.path.exists(current_font_path): + font = ImageFont.truetype(current_font_path, current_font_size) + else: + # Fallback to default font (size argument might not always work perfectly) + font = ImageFont.load_default() + # For default font, try to scale if load_default(size=...) is supported and works + try: + scaled_font = ImageFont.load_default(size=current_font_size) + if draw.textbbox((0, 0), text_line1, font=scaled_font)[2] > 0: # Check if usable + font = scaled_font + except Exception: + pass # Stick with original default font + + if font is None: # Last resort if no font could be loaded + font = ImageFont.load_default() + + # Measure text dimensions + bbox1 = draw.textbbox((0, 0), text_line1, font=font) + text_width1 = bbox1[2] - bbox1[0] + text_height1 = bbox1[3] - bbox1[1] + + bbox2 = draw.textbbox((0, 0), text_line2, font=font) + text_width2 = bbox2[2] - bbox2[0] + text_height2 = bbox2[3] - bbox2[1] + + # Calculate total height needed for both lines plus some padding + # Let's assume a small gap between lines (e.g., 0.1 * text_height) + line_gap = int(text_height1 * 0.2) # 20% of line height + total_text_height = text_height1 + text_height2 + line_gap + + # Check if text fits vertically and horizontally + if (total_text_height < tile_size * 0.9) and \ + (text_width1 < tile_size * 0.9) and \ + (text_width2 < tile_size * 0.9): + break # Font size is good, break out of loop + else: + font_size -= 1 # Reduce font size + if font_size <= 0: # Prevent infinite loop if text can never fit + font_size = 1 # Smallest possible font size + break + + except Exception as e: + # Handle cases where font loading or textbbox fails + print(f"Error during font sizing: {e}. Reducing font size and retrying.") + font_size -= 1 + if font_size <= 0: + font_size = 1 + break # Cannot make font smaller, stop + + # Final check: if font_size became 0 or less, ensure it's at least 1 + if font_size <= 0: + font_size = 1 + # Reload font with minimum size if needed + if font_path and os.path.exists(font_path): + font = ImageFont.truetype(font_path, font_size) + else: + font = ImageFont.load_default() + try: + scaled_font = ImageFont.load_default(size=font_size) + if draw.textbbox((0, 0), text_line1, font=scaled_font)[2] > 0: + font = scaled_font + except Exception: + pass + + + # Re-measure with final font size to ensure accurate positioning + bbox1 = draw.textbbox((0, 0), text_line1, font=font) + text_width1 = bbox1[2] - bbox1[0] + text_height1 = bbox1[3] - bbox1[1] + + bbox2 = draw.textbbox((0, 0), text_line2, font=font) + text_width2 = bbox2[2] - bbox2[0] + text_height2 = bbox2[3] - bbox2[1] + + # Calculate positions for centering + # Line 1: centered horizontally, midpoint at 1/3 tile height + x1 = (tile_size - text_width1) / 2 + y1 = (tile_size / 3) - (text_height1 / 2) + + # Line 2: centered horizontally, midpoint at 2/3 tile height + x2 = (tile_size - text_width2) / 2 + y2 = (tile_size * 2 / 3) - (text_height2 / 2) + + # Draw the text + draw.text((x1, y1), text_line1, fill=text_fill_color, font=font) + draw.text((x2, y2), text_line2, fill=text_fill_color, font=font) + + return img + +def generate_interpolated_grid_image(tile_size, count, font_path=None): + """ + Generates a large image composed of 'count' x 'count' tiles, + with colors bilinearly interpolated from corners and text indicating tile index. + + Args: + tile_size (int): The sidelength of each individual square tile in pixels. + count (int): The number of tiles per side of the large grid (e.g., if count=3, + it's a 3x3 grid of tiles). + font_path (str, optional): Path to a TrueType font file for the tile text. + If None, a default PIL font will be used. + + Returns: + PIL.Image.Image: The generated large grid image. + """ + if count <= 0: + raise ValueError("Count must be a positive integer.") + + total_image_size = count * tile_size + main_img = Image.new('RGB', (total_image_size, total_image_size)) + + # Corner colors (R, G, B) as floats (0.0-1.0) + corner_colors = { + "top_left": (1.0, 0.0, 0.0), # Red + "top_right": (1.0, 0.0, 1.0), # Purple + "bottom_left": (0.0, 1.0, 0.0), # Green + "bottom_right": (0.0, 0.0, 1.0) # Blue + } + + # Handle the edge case where count is 1 + if count == 1: + # If count is 1, there's only one tile, which is the top-left corner + tile_color = corner_colors["top_left"] + tile_image = create_single_tile(tile_size, tile_color, 0, 0, font_path=font_path) + main_img.paste(tile_image, (0, 0)) + return main_img + + for y_tile in range(count): + for x_tile in range(count): + # Calculate normalized coordinates (u, v) for interpolation + # We divide by (count - 1) to ensure 0 and 1 values at the edges + u = x_tile / (count - 1) + v = y_tile / (count - 1) + + # Apply the simplified bilinear interpolation formulas + r_component = 1 - v + g_component = v * (1 - u) + b_component = u + + # Clamp components to be within 0.0 and 1.0 (due to potential floating point inaccuracies) + current_color = ( + max(0.0, min(1.0, r_component)), + max(0.0, min(1.0, g_component)), + max(0.0, min(1.0, b_component)) + ) + + # Create the individual tile + tile_image = create_single_tile(tile_size, current_color, x_tile, y_tile, font_path=font_path) + + # Paste the tile onto the main image + paste_x = x_tile * tile_size + paste_y = y_tile * tile_size + main_img.paste(tile_image, (paste_x, paste_y)) + + return main_img + + + + +import argparse +parser = argparse.ArgumentParser(description="Process two optional named parameters.") +parser.add_argument('--ts', type=int, default=128, help='Tile Size') +parser.add_argument('--gs', type=int, default=128, help='Grid Size') + +# Parse the arguments +args = parser.parse_args() + + +# --- Configuration --- +tile_sidelength = args.ts # Size of each individual tile in pixels +grid_count = args.gs # Number of tiles per side (e.g., 15 means 15x15 grid) + +# Path to a font file (adjust this for your system) +# On Windows, you can typically use 'C:/Windows/Fonts/arial.ttf' or similar +# You might need to find a suitable font on your system. +# For testing, you can use None to let PIL use its default font. +# If a specific font path is provided and doesn't exist, it will fall back to default. +windows_font_path = "C:/Windows/Fonts/arial.ttf" # Example path for Windows +# If Arial is not found, try Times New Roman: +# windows_font_path = "C:/Windows/Fonts/times.ttf" + +font_to_use = None +if os.name == 'nt': # Check if OS is Windows + if os.path.exists(windows_font_path): + font_to_use = windows_font_path + print(f"Using font: {windows_font_path}") + else: + print(f"Warning: Windows font not found at '{windows_font_path}'. Using default PIL font.") +else: # Assume Linux/macOS for other OS types + # Common Linux/macOS font paths (adjust as needed) + linux_font_path = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf" + mac_font_path = "/Library/Fonts/Arial.ttf" + if os.path.exists(linux_font_path): + font_to_use = linux_font_path + print(f"Using font: {linux_font_path}") + elif os.path.exists(mac_font_path): + font_to_use = mac_font_path + print(f"Using font: {mac_font_path}") + else: + print("Warning: No common Linux/macOS font found. Using default PIL font.") + + +# --- Generate and save the image --- +print(f"Generating a {grid_count}x{grid_count} grid of tiles, each {tile_sidelength}x{tile_sidelength} pixels.") +print(f"Total image size will be {grid_count * tile_sidelength}x{grid_count * tile_sidelength} pixels.") + +try: + final_image = generate_interpolated_grid_image(tile_sidelength, grid_count, font_path=font_to_use) + output_filename = "../../media/tiled_grid_mip_0.exr" + np_img = np.array(final_image).astype(np.float32) / 255.0 # Normalize for EXR + spec = oiio.ImageSpec(final_image.width, final_image.height, 3, oiio.TypeDesc("float")) + out = oiio.ImageOutput.create(output_filename) + out.open(output_filename, spec) + out.write_image(np_img.reshape(-1)) # Flatten for OIIO’s expected input + out.close() + + print(f"Successfully created '{output_filename}'") + +except ValueError as e: + print(f"Error: {e}") +except Exception as e: + print(f"An unexpected error occurred: {e}") \ No newline at end of file diff --git a/75_CAD_3D/shaders/globals.hlsl b/75_CAD_3D/shaders/globals.hlsl new file mode 100644 index 000000000..901b13958 --- /dev/null +++ b/75_CAD_3D/shaders/globals.hlsl @@ -0,0 +1,105 @@ +#ifndef _CAD_EXAMPLE_GLOBALS_HLSL_INCLUDED_ +#define _CAD_EXAMPLE_GLOBALS_HLSL_INCLUDED_ + +// TODO[Erfan]: Turn off in the future, but keep enabled to test +// #define NBL_FORCE_EMULATED_FLOAT_64 + +#include +#include +#include +#include +#include +#include +#include + +#ifdef __HLSL_VERSION +#include +#endif + +using namespace nbl::hlsl; + +#ifdef __HLSL_VERSION +using pfloat64_t = portable_float64_t; +using pfloat64_t2 = portable_float64_t2; +using pfloat64_t3 = portable_float64_t3; +#else +using pfloat64_t = float64_t; +using pfloat64_t2 = nbl::hlsl::vector; +using pfloat64_t3 = nbl::hlsl::vector; +#endif + +using pfloat64_t3x3 = portable_matrix_t3x3; +using pfloat64_t4x4 = portable_matrix_t4x4; + +struct PushConstants +{ + uint64_t triangleMeshVerticesBaseAddress; + uint32_t triangleMeshMainObjectIndex; + pfloat64_t4x4 viewProjectionMatrix; +}; + +struct Pointers +{ + uint64_t mainObjects; + uint64_t drawObjects; + uint64_t geometryBuffer; +}; +#ifndef __HLSL_VERSION +static_assert(sizeof(Pointers) == 24u); +#endif + +struct Globals +{ + Pointers pointers; + pfloat64_t4x4 defaultProjectionToNDC; +}; +#ifndef __HLSL_VERSION +static_assert(sizeof(Globals) == 152u); +#endif + +enum class MainObjectType : uint32_t +{ + NONE = 0u, + POLYLINE, + HATCH, + TEXT, + STATIC_IMAGE, + DTM, + GRID_DTM, + STREAMED_IMAGE, +}; + +// Consists of multiple DrawObjects +// [IDEA]: In GPU-driven rendering, to save mem for MainObject data fetching: many of these can be shared amongst different main objects, we could find these styles, settings, etc indices with upper_bound +// [TODO]: pack indices and members of mainObject and DrawObject + enforce max size for autosubmit --> but do it only after the mainobject definition is finalized in gpu-driven rendering work +struct MainObject +{ + uint32_t styleIdx; + uint32_t dtmSettingsIdx; + uint32_t customProjectionIndex; + uint32_t customClipRectIndex; + uint32_t transformationType; // todo pack later, it's just 2 possible values atm +}; + +struct DrawObject +{ + uint32_t type_subsectionIdx; // packed two uint16 into uint32 + uint32_t mainObjIndex; + uint64_t geometryAddress; +}; + +struct TriangleMeshVertex +{ + pfloat64_t3 pos; +}; + +#ifdef __HLSL_VERSION +[[vk::binding(0, 0)]] ConstantBuffer globals : register(b0); +#else +static_assert(alignof(pfloat64_t3x3)==8u); +static_assert(alignof(MainObject)==4u); +static_assert(alignof(DrawObject)==8u); +#endif + + +#endif diff --git a/75_CAD_3D/shaders/main_pipeline/common.hlsl b/75_CAD_3D/shaders/main_pipeline/common.hlsl new file mode 100644 index 000000000..677bf0ec9 --- /dev/null +++ b/75_CAD_3D/shaders/main_pipeline/common.hlsl @@ -0,0 +1,17 @@ +#ifndef _CAD_EXAMPLE_MAIN_PIPELINE_COMMON_HLSL_INCLUDED_ +#define _CAD_EXAMPLE_MAIN_PIPELINE_COMMON_HLSL_INCLUDED_ + +#include "../globals.hlsl" + +struct PSInput +{ + [[vk::location(0)]] float4 position : SV_Position; +}; + +// Set 0 - Scene Data and Globals, buffer bindings don't change the buffers only get updated + +// [[vk::binding(0, 0)]] ConstantBuffer globals; ---> moved to globals.hlsl + +[[vk::push_constant]] PushConstants pc; + +#endif diff --git a/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl b/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl new file mode 100644 index 000000000..f82fc9eab --- /dev/null +++ b/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl @@ -0,0 +1,7 @@ +#include "common.hlsl" + +[shader("pixel")] +float4 fragMain(PSInput input) : SV_Target +{ + return float4(0.0f, 0.0f, 1.0f, 1.0f); +} diff --git a/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl b/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl new file mode 100644 index 000000000..7e81c85f6 --- /dev/null +++ b/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl @@ -0,0 +1,17 @@ +#pragma shader_stage(vertex) + +#include "common.hlsl" + +[shader("vertex")] +PSInput vtxMain(uint vertexID : SV_VertexID) +{ + PSInput outV; + TriangleMeshVertex vtx = vk::RawBufferLoad(pc.triangleMeshVerticesBaseAddress + sizeof(TriangleMeshVertex) * vertexID, 8u); + + outV.position.x = _static_cast(vtx.pos.x); + outV.position.y = _static_cast(vtx.pos.y); + outV.position.z = _static_cast(vtx.pos.z); + outV.position.w = 1.0f; + + return outV; +} diff --git a/CMakeLists.txt b/CMakeLists.txt index d945c547a..7a24e8345 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -111,6 +111,7 @@ if(NBL_BUILD_EXAMPLES) endif() add_subdirectory(74_QuantizedSequenceTests) + add_subdirectory(75_CAD_3D) # add new examples *before* NBL_GET_ALL_TARGETS invocation, it gathers recursively all targets created so far in this subdirectory NBL_GET_ALL_TARGETS(TARGETS) From 18e71101932066506a86512ebf97e93073c2aa0c Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Tue, 10 Mar 2026 19:26:39 +0100 Subject: [PATCH 2/9] Example 75 now draws meshes --- 75_CAD_3D/CMakeLists.txt | 9 +- 75_CAD_3D/CTriangleMesh.cpp | 1 - 75_CAD_3D/CTriangleMesh.h | 1 + 75_CAD_3D/DrawResourcesFiller.cpp | 3 +- 75_CAD_3D/DrawResourcesFiller.h | 80 ++++---- 75_CAD_3D/main.cpp | 187 ++++++------------ 75_CAD_3D/shaders/globals.hlsl | 32 +-- 75_CAD_3D/shaders/main_pipeline/common.hlsl | 1 + .../main_pipeline/fragment_shader.hlsl | 10 +- .../shaders/main_pipeline/vertex_shader.hlsl | 54 ++++- 10 files changed, 166 insertions(+), 212 deletions(-) delete mode 100644 75_CAD_3D/CTriangleMesh.cpp diff --git a/75_CAD_3D/CMakeLists.txt b/75_CAD_3D/CMakeLists.txt index 794ba1c3c..144fb4a33 100644 --- a/75_CAD_3D/CMakeLists.txt +++ b/75_CAD_3D/CMakeLists.txt @@ -6,6 +6,7 @@ endif() set(EXAMPLE_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/DrawResourcesFiller.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/DrawResourcesFiller.h" + "${CMAKE_CURRENT_SOURCE_DIR}/CTriangleMesh.h" ) set(EXAMPLE_INCLUDES "${CMAKE_CURRENT_SOURCE_DIR}/../../3rdparty/boost/superproject/libs/math/include") @@ -38,14 +39,6 @@ endif() set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") set(SM 6_8) -set(REQUIRED_CAPS [=[ -{ - "kind": "features", - "name": "fragmentShaderPixelInterlock", - "type": "bool", - "values": [1] -} -]=]) set(JSON [=[ [ diff --git a/75_CAD_3D/CTriangleMesh.cpp b/75_CAD_3D/CTriangleMesh.cpp deleted file mode 100644 index 5564c0a51..000000000 --- a/75_CAD_3D/CTriangleMesh.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "CTriangleMesh.h" \ No newline at end of file diff --git a/75_CAD_3D/CTriangleMesh.h b/75_CAD_3D/CTriangleMesh.h index 8f941928a..2100c801d 100644 --- a/75_CAD_3D/CTriangleMesh.h +++ b/75_CAD_3D/CTriangleMesh.h @@ -49,6 +49,7 @@ class CTriangleMesh final m_indices.clear(); } +private: core::vector m_vertices; core::vector m_indices; }; \ No newline at end of file diff --git a/75_CAD_3D/DrawResourcesFiller.cpp b/75_CAD_3D/DrawResourcesFiller.cpp index 313c74358..f2de0793d 100644 --- a/75_CAD_3D/DrawResourcesFiller.cpp +++ b/75_CAD_3D/DrawResourcesFiller.cpp @@ -146,6 +146,7 @@ void DrawResourcesFiller::drawTriangleMesh( const size_t vertexBuffByteSize = mesh.getVertexBuffByteSize(); const size_t indexBuffByteSize = mesh.getIndexBuffByteSize(); + const size_t triangleDataByteSize = vertexBuffByteSize + indexBuffByteSize; const auto& indexBuffer = mesh.getIndices(); const auto& vertexBuffer = mesh.getVertices(); assert(indexBuffer.size() == vertexBuffer.size()); // TODO: figure out why it was needed then decide if this constraint needs to be kept @@ -153,7 +154,7 @@ void DrawResourcesFiller::drawTriangleMesh( DrawCallData drawCallData = {}; // Copy VertexBuffer - size_t geometryBufferOffset = resourcesCollection.geometryInfo.increaseSizeAndGetOffset(vertexBuffByteSize, alignof(CTriangleMesh::vertex_t)); + size_t geometryBufferOffset = resourcesCollection.geometryInfo.increaseSizeAndGetOffset(triangleDataByteSize, alignof(CTriangleMesh::vertex_t)); drawCallData.triangleMeshVerticesBaseAddress = geometryBufferOffset; void* dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; memcpy(dst, vertexBuffer.data(), vertexBuffByteSize); diff --git a/75_CAD_3D/DrawResourcesFiller.h b/75_CAD_3D/DrawResourcesFiller.h index ea2bca02e..aca81b409 100644 --- a/75_CAD_3D/DrawResourcesFiller.h +++ b/75_CAD_3D/DrawResourcesFiller.h @@ -23,7 +23,6 @@ using namespace nbl::core; using namespace nbl::asset; static_assert(sizeof(DrawObject) == 16u); -static_assert(sizeof(MainObject) == 20u); // ! DrawResourcesFiller // ! This class provides important functionality to manage resources needed for a draw. @@ -47,6 +46,28 @@ struct DrawResourcesFiller static constexpr size_t GPUStructsMaxNaturalAlignment = 8u; static constexpr size_t MinimumDrawResourcesMemorySize = 512u * 1 << 20u; // 512MB + /** + * @brief Attempts to allocate draw resources within a given VRAM budget, retrying with progressively smaller sizes on failure. + * + * This function preserves the initial image-to-buffer memory ratio. If the initial sizes are too small, + * it scales them up to meet a minimum required threshold. On allocation failure, it reduces the memory + * sizes by a specified percentage and retries, until it either succeeds or the number of attempts exceeds `maxTries`. + * + * @param logicalDevice Pointer to the logical device used for allocation. + * @param maxImageMemorySize Initial image memory size (in bytes) to attempt allocation with. + * @param maxBufferMemorySize Initial buffer memory size (in bytes) to attempt allocation with. + * @param memoryTypeIndexTryOrder Ordered list of memory type indices to attempt allocation with, in the order they should be tried. + * @param reductionPercent The percentage by which to reduce the memory sizes after each failed attempt (e.g., 10 means reduce by 10%). + * @param maxTries Maximum number of attempts to try reducing and allocating memory. + * + * @return true if the allocation succeeded at any iteration; false if all attempts failed. + */ + bool allocateDrawResourcesWithinAvailableVRAM(ILogicalDevice* logicalDevice, size_t maxImageMemorySize, size_t maxBufferMemorySize, std::span memoryTypeIndexTryOrder, uint32_t reductionPercent = 10u, uint32_t maxTries = 32u); + + /// @brief call this function before submitting to ensure all buffer and textures resourcesCollection requested via drawing calls are copied to GPU + /// records copy command into intendedNextSubmit's active command buffer and might possibly submits if fails allocation on staging upload memory. + bool pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit); + /// @brief general parent struct for 1.ReservedCompute and 2.CPUGenerated Resources struct ResourceBase { @@ -132,38 +153,6 @@ struct DrawResourcesFiller typedef std::function SubmitFunc; void setSubmitDrawsFunction(const SubmitFunc& func); - /** - * @brief Attempts to allocate a single contiguous device-local memory block for draw resources, divided into image and buffer sections. - * - * The function allocates a single memory block and splits it into image and buffer arenas. - * - * @param logicalDevice Pointer to the logical device used for memory allocation and resource creation. - * @param requiredImageMemorySize The size in bytes of the memory required for images. - * @param requiredBufferMemorySize The size in bytes of the memory required for buffers. - * @param memoryTypeIndexTryOrder Ordered list of memory type indices to attempt allocation with, in the order they should be tried. - * - * @return true if the memory allocation and resource setup succeeded; false otherwise. - */ - bool allocateDrawResources(ILogicalDevice* logicalDevice, size_t requiredImageMemorySize, size_t requiredBufferMemorySize, std::span memoryTypeIndexTryOrder); - - /** - * @brief Attempts to allocate draw resources within a given VRAM budget, retrying with progressively smaller sizes on failure. - * - * This function preserves the initial image-to-buffer memory ratio. If the initial sizes are too small, - * it scales them up to meet a minimum required threshold. On allocation failure, it reduces the memory - * sizes by a specified percentage and retries, until it either succeeds or the number of attempts exceeds `maxTries`. - * - * @param logicalDevice Pointer to the logical device used for allocation. - * @param maxImageMemorySize Initial image memory size (in bytes) to attempt allocation with. - * @param maxBufferMemorySize Initial buffer memory size (in bytes) to attempt allocation with. - * @param memoryTypeIndexTryOrder Ordered list of memory type indices to attempt allocation with, in the order they should be tried. - * @param reductionPercent The percentage by which to reduce the memory sizes after each failed attempt (e.g., 10 means reduce by 10%). - * @param maxTries Maximum number of attempts to try reducing and allocating memory. - * - * @return true if the allocation succeeded at any iteration; false if all attempts failed. - */ - bool allocateDrawResourcesWithinAvailableVRAM(ILogicalDevice* logicalDevice, size_t maxImageMemorySize, size_t maxBufferMemorySize, std::span memoryTypeIndexTryOrder, uint32_t reductionPercent = 10u, uint32_t maxTries = 32u); - // Must be called at the end of each frame. // right before submitting the main draw that uses the currently queued geometry, images, or other objects/resources. // Registers the semaphore/value that will signal completion of this frame�s draw, @@ -175,13 +164,6 @@ struct DrawResourcesFiller const CTriangleMesh& mesh, SIntendedSubmitInfo& intendedNextSubmit); - /// @brief call this function before submitting to ensure all buffer and textures resourcesCollection requested via drawing calls are copied to GPU - /// records copy command into intendedNextSubmit's active command buffer and might possibly submits if fails allocation on staging upload memory. - bool pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit); - - /// @brief Records GPU copy commands for all staged buffer resourcesCollection into the active command buffer. - bool pushBufferUploads(SIntendedSubmitInfo& intendedNextSubmit, ResourcesCollection& resourcesCollection); - /// @brief resets staging buffers and images void reset() { @@ -196,6 +178,24 @@ struct DrawResourcesFiller const size_t getCopiedResourcesSize() { return copiedResourcesSize; } const core::vector& getDrawCalls() const { return drawCalls; } +private: + /** + * @brief Attempts to allocate a single contiguous device-local memory block for draw resources, divided into image and buffer sections. + * + * The function allocates a single memory block and splits it into image and buffer arenas. + * + * @param logicalDevice Pointer to the logical device used for memory allocation and resource creation. + * @param requiredImageMemorySize The size in bytes of the memory required for images. + * @param requiredBufferMemorySize The size in bytes of the memory required for buffers. + * @param memoryTypeIndexTryOrder Ordered list of memory type indices to attempt allocation with, in the order they should be tried. + * + * @return true if the memory allocation and resource setup succeeded; false otherwise. + */ + bool allocateDrawResources(ILogicalDevice* logicalDevice, size_t requiredImageMemorySize, size_t requiredBufferMemorySize, std::span memoryTypeIndexTryOrder); + + /// @brief Records GPU copy commands for all staged buffer resourcesCollection into the active command buffer. + bool pushBufferUploads(SIntendedSubmitInfo& intendedNextSubmit, ResourcesCollection& resourcesCollection); + private: nbl::system::logger_opt_smart_ptr m_logger = nullptr; diff --git a/75_CAD_3D/main.cpp b/75_CAD_3D/main.cpp index ed7ddc039..7188e8b00 100644 --- a/75_CAD_3D/main.cpp +++ b/75_CAD_3D/main.cpp @@ -109,9 +109,11 @@ class CSwapchainResources : public ISimpleManagedSurface::ISwapchainResources .viewType = IGPUImageView::ET_2D, .format = getImage(i)->getCreationParameters().format }); - m_framebuffers[i] = device->createFramebuffer({{ + m_framebuffers[i] = device->createFramebuffer({ { .renderpass = core::smart_refctd_ptr(m_renderpass), .colorAttachments = &imageView.get(), + // TODO: + //.depthStencilAttachments = &depthImageView.get(), .width = sharedParams.width, .height = sharedParams.height }}); @@ -392,12 +394,31 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio }}, IGPURenderpass::SCreationParams::ColorAttachmentsEnd }; - + + // TODO: + //IGPURenderpass::SCreationParams::SDepthStencilAttachmentDescription depthAttachments[] = { + // {{ + // { + // .format = asset::EF_D32_SFLOAT, + // .samples = IGPUImage::ESCF_1_BIT, + // .mayAlias = false + // }, + // /*.loadOp = */{IGPURenderpass::LOAD_OP::CLEAR}, + // /*.storeOp = */{IGPURenderpass::STORE_OP::STORE}, + // /*.initialLayout = */{IGPUImage::LAYOUT::UNDEFINED}, + // /*.finalLayout = */{IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL} + // }}, + // IGPURenderpass::SCreationParams::DepthStencilAttachmentsEnd + //}; + IGPURenderpass::SCreationParams::SSubpassDescription subpasses[] = { {}, IGPURenderpass::SCreationParams::SubpassesEnd }; + subpasses[0].colorAttachments[0] = {.render={.attachmentIndex=0,.layout=IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}}; + // TODO: + //subpasses[0].depthStencilAttachment = {{.render = {.attachmentIndex=0,.layout = IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}}}; // We actually need external dependencies to ensure ordering of the Implicit Layout Transitions relative to the semaphore signals const IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = { @@ -429,6 +450,8 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio smart_refctd_ptr renderpass; IGPURenderpass::SCreationParams params = {}; params.colorAttachments = colorAttachments; + // TODO: + //params.depthStencilAttachments = depthAttachments; params.subpasses = subpasses; params.dependencies = dependencies; renderpass = m_device->createRenderpass(params); @@ -700,7 +723,7 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio return true; } - void _submitDraws(SIntendedSubmitInfo& intendedSubmitInfo, bool inBetweenSubmit) + void submitDraws(SIntendedSubmitInfo& intendedSubmitInfo, bool inBetweenSubmit) { drawResourcesFiller.pushAllUploads(intendedSubmitInfo); @@ -716,7 +739,11 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio { // TODO: create a proper camera - auto view = hlsl::math::linalg::rhLookAt({ 300.0f, 300.0f, 300.0f }, { 0.0f, 0.0f, 0.0f }, { 0.0f, 0.0f, 1.0f }); + // animated camera which rotates around and always looks at the center + const double animationFactor = m_timeElapsed * 0.0003; + const float32_t3 cameraPosition = { 300.0f * std::cos(animationFactor), 300.0f, 300.0f * std::sin(animationFactor) }; + + auto view = hlsl::math::linalg::rhLookAt(cameraPosition, { 0.0f, 0.0f, 0.0f }, { 0.0f, 1.0f, 0.0f }); const float64_t aspectRatio = static_cast(m_window->getWidth()) / static_cast(m_window->getHeight()); auto proj = hlsl::math::thin_lens::rhPerspectiveFovMatrix(hlsl::radians(60.0f), aspectRatio, 0.1f, 2000.0f); @@ -818,7 +845,8 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio PushConstants pc = { .triangleMeshVerticesBaseAddress = drawCall.triangleMeshVerticesBaseAddress + resourcesGPUBuffer->getDeviceAddress() + resourcesCollection.geometryInfo.bufferOffset, - .triangleMeshMainObjectIndex = drawCall.triangleMeshMainObjectIndex + .triangleMeshMainObjectIndex = drawCall.triangleMeshMainObjectIndex, + .viewProjectionMatrix = viewProjection }; cb->pushConstants(m_graphicsPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_VERTEX | IShader::E_SHADER_STAGE::ESS_FRAGMENT, 0, sizeof(PushConstants), &pc); @@ -865,102 +893,6 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio } } - // TODO: remove - void submitDraws(SIntendedSubmitInfo& intendedSubmitInfo, bool inBetweenSubmit) - { - m_currentRecordingCommandBufferInfo = intendedSubmitInfo.getCommandBufferForRecording(); // drawResourcesFiller.pushAllUploads might've overflow submitted and changed the current recording command buffer - - // Use the current recording command buffer of the intendedSubmitInfos scratchCommandBuffers, it should be in recording state - auto* cb = m_currentRecordingCommandBufferInfo->cmdbuf; - - asset::SViewport vp = - { - .x = 0u, - .y = 0u, - .width = static_cast(m_window->getWidth()), - .height = static_cast(m_window->getHeight()), - .minDepth = 1.f, - .maxDepth = 0.f, - }; - cb->setViewport(0u, 1u, &vp); - - VkRect2D scissor = - { - .offset = { 0, 0 }, - .extent = { m_window->getWidth(), m_window->getHeight() }, - }; - cb->setScissor(0u, 1u, &scissor); - - nbl::video::IGPUCommandBuffer::SRenderpassBeginInfo beginInfo; - VkRect2D currentRenderArea; - const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {0.f,0.f,0.f,0.f} }; - { - auto scRes = static_cast(m_surface->getSwapchainResources()); - currentRenderArea = - { - .offset = {0,0}, - .extent = {m_window->getWidth(),m_window->getHeight()} - }; - beginInfo = { - .renderpass = (inBetweenSubmit) ? renderpassInBetween.get() : renderpassFinal.get(), - .framebuffer = scRes->getFramebuffer(m_currentImageAcquire.imageIndex), - .colorClearValues = &clearValue, - .depthStencilClearValues = nullptr, - .renderArea = currentRenderArea - }; - } - cb->beginRenderPass(beginInfo, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); - - cb->bindGraphicsPipeline(m_graphicsPipeline.get()); - - { - PushConstants pc = { - .triangleMeshVerticesBaseAddress = 1, - .triangleMeshMainObjectIndex = 2 - }; - cb->pushConstants(m_graphicsPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_VERTEX | IShader::E_SHADER_STAGE::ESS_FRAGMENT, 0, sizeof(PushConstants), &pc); - - cb->draw(3, 1, 0, 0); - } - - cb->endRenderPass(); - - if (!inBetweenSubmit) - cb->endDebugMarker(); - - drawResourcesFiller.markFrameUsageComplete(intendedSubmitInfo.getFutureScratchSemaphore().value); - - if (inBetweenSubmit) - { - if (intendedSubmitInfo.overflowSubmit(m_currentRecordingCommandBufferInfo) != IQueue::RESULT::SUCCESS) - { - m_logger->log("overflow submit failed.", ILogger::ELL_ERROR); - } - } - else - { - const auto nextFrameIx = m_realFrameIx + 1u; - const IQueue::SSubmitInfo::SSemaphoreInfo thisFrameRendered = { - .semaphore = m_renderSemaphore.get(), - .value = nextFrameIx, - .stageMask = asset::PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS - }; - if (intendedSubmitInfo.submit(m_currentRecordingCommandBufferInfo, { &thisFrameRendered,1 }) == IQueue::RESULT::SUCCESS) - { - m_realFrameIx = nextFrameIx; - - IQueue::SSubmitInfo::SSemaphoreInfo presentWait = thisFrameRendered; - // the stages for a wait semaphore operation are about what stage you WAIT in, not what stage you wait for - presentWait.stageMask = PIPELINE_STAGE_FLAGS::NONE; // top of pipe, there's no explicit presentation engine stage - m_surface->present(m_currentImageAcquire.imageIndex, { &presentWait,1 }); - } - else - { - m_logger->log("regular submit failed.", ILogger::ELL_ERROR); - } - } - } - void endFrameRender(SIntendedSubmitInfo& intendedSubmitInfo) { submitDraws(intendedSubmitInfo, false); @@ -1010,24 +942,18 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio drawResourcesFiller.reset(); core::vector vertices = { - //{ float64_t2(0.0, 0.0), 100.0 }, //0 - //{ float64_t2(-200.0, -200.0), 10.0 }, //1 - //{ float64_t2(200.0, -200.0), 10.0 }, //2 - //{ float64_t2(200.0, 200.0), -20.0 }, //3 - //{ float64_t2(-200.0, 200.0), 10.0 }, //4 - - { float64_t3(0.0, 0.0, 100.0) }, - { float64_t3(-200.0, -200.0, 10.0) }, - { float64_t3(200.0, -100.0, 10.0) }, - { float64_t3(0.0, 0.0, 100.0) }, - { float64_t3(200.0, -100.0, 10.0) }, - { float64_t3(200.0, 200.0, -20.0) }, - { float64_t3(0.0, 0.0, 100.0) }, - { float64_t3(200.0, 200.0, -20.0) }, - { float64_t3(-200.0, 200.0, 10.0) }, - { float64_t3(0.0, 0.0, 100.0) }, - { float64_t3(-200.0, 200.0, 10.0) }, - { float64_t3(-200.0, -200.0, 10.0) }, + { float64_t3(0.0, 100.0, 0.0) }, + { float64_t3(-200.0, 10.0, -200.0) }, + { float64_t3(200.0, 10.0, -100.0) }, + { float64_t3(0.0, 100.0, 0.0) }, + { float64_t3(200.0, 10.0, -100.0) }, + { float64_t3(200.0, -20.0, 200.0) }, + { float64_t3(0.0, 100.0, 0.0) }, + { float64_t3(200.0, -20.0, 200.0) }, + { float64_t3(-200.0, 10.0, 200.0) }, + { float64_t3(0.0, 100.0, 0.0) }, + { float64_t3(-200.0, 10.0, 200.0) }, + { float64_t3(-200.0, 10.0, -200.0) }, }; core::vector indices = { @@ -1038,14 +964,22 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio }; CTriangleMesh mesh; - mesh.setVertices(std::move(vertices)); + mesh.setVertices(core::vector(vertices)); mesh.setIndices(std::move(indices)); + // pyramid A + drawResourcesFiller.drawTriangleMesh(mesh, intendedNextSubmit); + + // pyramid B + float64_t3 offset = { 500.0f, 0.0f, 0.0f }; + for (auto& vertex : vertices) + vertex.pos += offset; + mesh.setVertices(std::move(vertices)); drawResourcesFiller.drawTriangleMesh(mesh, intendedNextSubmit); } protected: - clock_t::time_point start; + clock_t::time_point start; // TODO: am i missing somehting? why is it never initialized std::chrono::seconds timeout = std::chrono::seconds(0x7fffFFFFu); double m_timeElapsed = 0.0; @@ -1066,7 +1000,7 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio // pointer to one of the command buffer infos from above, this is the only command buffer used to record current submit in current frame, it will be updated by SIntendedSubmitInfo IQueue::SSubmitInfo::SCommandBufferInfo const * m_currentRecordingCommandBufferInfo; // pointer can change, value cannot - smart_refctd_ptr m_globalsBuffer; + smart_refctd_ptr m_globalsBuffer; DrawResourcesFiller drawResourcesFiller; // you can think of this as the scene data needed to draw everything, we only have one instance so let's use a timeline semaphore to sync all renders smart_refctd_ptr m_renderSemaphore; // timeline semaphore to sync frames together @@ -1079,13 +1013,8 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio uint64_t m_realFrameIx = 0u; - smart_refctd_ptr descriptorSetLayout0; - smart_refctd_ptr descriptorSetLayout1; - smart_refctd_ptr m_pipelineLayout; - smart_refctd_ptr resolveAlphaGraphicsPipeline; - smart_refctd_ptr m_debugGraphicsPipeline; - smart_refctd_ptr m_graphicsPipeline; - smart_refctd_ptr m_streamedImagesGraphicsPipeline; + smart_refctd_ptr m_pipelineLayout; + smart_refctd_ptr m_graphicsPipeline; smart_refctd_ptr m_window; smart_refctd_ptr> m_surface; diff --git a/75_CAD_3D/shaders/globals.hlsl b/75_CAD_3D/shaders/globals.hlsl index 901b13958..c080c7c4a 100644 --- a/75_CAD_3D/shaders/globals.hlsl +++ b/75_CAD_3D/shaders/globals.hlsl @@ -22,10 +22,12 @@ using namespace nbl::hlsl; using pfloat64_t = portable_float64_t; using pfloat64_t2 = portable_float64_t2; using pfloat64_t3 = portable_float64_t3; +using pfloat64_t4 = portable_float64_t4; #else using pfloat64_t = float64_t; using pfloat64_t2 = nbl::hlsl::vector; using pfloat64_t3 = nbl::hlsl::vector; +using pfloat64_t4 = nbl::hlsl::vector; #endif using pfloat64_t3x3 = portable_matrix_t3x3; @@ -40,12 +42,11 @@ struct PushConstants struct Pointers { - uint64_t mainObjects; uint64_t drawObjects; uint64_t geometryBuffer; }; #ifndef __HLSL_VERSION -static_assert(sizeof(Pointers) == 24u); +static_assert(sizeof(Pointers) == 16u); #endif struct Globals @@ -54,33 +55,9 @@ struct Globals pfloat64_t4x4 defaultProjectionToNDC; }; #ifndef __HLSL_VERSION -static_assert(sizeof(Globals) == 152u); +static_assert(sizeof(Globals) == 144u); #endif -enum class MainObjectType : uint32_t -{ - NONE = 0u, - POLYLINE, - HATCH, - TEXT, - STATIC_IMAGE, - DTM, - GRID_DTM, - STREAMED_IMAGE, -}; - -// Consists of multiple DrawObjects -// [IDEA]: In GPU-driven rendering, to save mem for MainObject data fetching: many of these can be shared amongst different main objects, we could find these styles, settings, etc indices with upper_bound -// [TODO]: pack indices and members of mainObject and DrawObject + enforce max size for autosubmit --> but do it only after the mainobject definition is finalized in gpu-driven rendering work -struct MainObject -{ - uint32_t styleIdx; - uint32_t dtmSettingsIdx; - uint32_t customProjectionIndex; - uint32_t customClipRectIndex; - uint32_t transformationType; // todo pack later, it's just 2 possible values atm -}; - struct DrawObject { uint32_t type_subsectionIdx; // packed two uint16 into uint32 @@ -97,7 +74,6 @@ struct TriangleMeshVertex [[vk::binding(0, 0)]] ConstantBuffer globals : register(b0); #else static_assert(alignof(pfloat64_t3x3)==8u); -static_assert(alignof(MainObject)==4u); static_assert(alignof(DrawObject)==8u); #endif diff --git a/75_CAD_3D/shaders/main_pipeline/common.hlsl b/75_CAD_3D/shaders/main_pipeline/common.hlsl index 677bf0ec9..7866b5b8c 100644 --- a/75_CAD_3D/shaders/main_pipeline/common.hlsl +++ b/75_CAD_3D/shaders/main_pipeline/common.hlsl @@ -6,6 +6,7 @@ struct PSInput { [[vk::location(0)]] float4 position : SV_Position; + [[vk::location(1)]] float3 normal : COLOR1; }; // Set 0 - Scene Data and Globals, buffer bindings don't change the buffers only get updated diff --git a/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl b/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl index f82fc9eab..d61b99275 100644 --- a/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl +++ b/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl @@ -1,7 +1,15 @@ #include "common.hlsl" +static const float32_t3 SunlightDirection = float32_t3(0.7071f, -0.7071f, 0.0f); +static const float32_t3 TerrainColor = float32_t3(1.0f, 1.0f, 1.0f); + [shader("pixel")] float4 fragMain(PSInput input) : SV_Target { - return float4(0.0f, 0.0f, 1.0f, 1.0f); + static const float AmbientLightIntensity = 0.1f; + const float diffuseLightIntensity = max(dot(-SunlightDirection, input.normal), 0.0f); + + const float32_t3 fragColor = (AmbientLightIntensity + diffuseLightIntensity) * TerrainColor; + + return float32_t4(fragColor, 1.0f); } diff --git a/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl b/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl index 7e81c85f6..98996ba79 100644 --- a/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl +++ b/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl @@ -8,10 +8,56 @@ PSInput vtxMain(uint vertexID : SV_VertexID) PSInput outV; TriangleMeshVertex vtx = vk::RawBufferLoad(pc.triangleMeshVerticesBaseAddress + sizeof(TriangleMeshVertex) * vertexID, 8u); - outV.position.x = _static_cast(vtx.pos.x); - outV.position.y = _static_cast(vtx.pos.y); - outV.position.z = _static_cast(vtx.pos.z); - outV.position.w = 1.0f; + // calculate object space normal, for now we can treat it as the world space normal + { + const uint32_t currentVertexWithinTriangleIndex = vertexID % 3; + const uint32_t firstVertexOfCurrentTriangleIndex = vertexID - currentVertexWithinTriangleIndex; + + TriangleMeshVertex triangleVertices[3]; + triangleVertices[0] = vk::RawBufferLoad(pc.triangleMeshVerticesBaseAddress + sizeof(TriangleMeshVertex) * firstVertexOfCurrentTriangleIndex, 8u); + triangleVertices[1] = vk::RawBufferLoad(pc.triangleMeshVerticesBaseAddress + sizeof(TriangleMeshVertex) * (firstVertexOfCurrentTriangleIndex + 1), 8u); + triangleVertices[2] = vk::RawBufferLoad(pc.triangleMeshVerticesBaseAddress + sizeof(TriangleMeshVertex) * (firstVertexOfCurrentTriangleIndex + 2), 8u); + + // TODO: calculate on pfloat64_t + float32_t3 vertex0 = _static_cast(triangleVertices[0].pos); + float32_t3 vertex1 = _static_cast(triangleVertices[1].pos); + float32_t3 vertex2 = _static_cast(triangleVertices[2].pos); + + float32_t3 triangleEdge0 = vertex1 - vertex0; + float32_t3 triangleEdge1 = vertex2 - vertex0; + + outV.normal = (normalize(cross(triangleEdge1, triangleEdge0)) + 1.0f) * 0.5f; + } + + pfloat64_t4 pos; + pos.x = vtx.pos.x; + pos.y = vtx.pos.y; + pos.z = vtx.pos.z; + pos.w = _static_cast(1.0f); + + + outV.position = _static_cast(pos); + + //pos = mul(pc.viewProjectionMatrix, pos); + // TODO: use pc.viewProjectionMatrix and multiply it with pfloat64_t4 pos instead fix portable_matrix with portable_float multiplication + float4x4 viewProjMatrix; + for (int i = 0; i < 4; ++i) + { + viewProjMatrix[i][0] = _static_cast(pc.viewProjectionMatrix[i].x); + viewProjMatrix[i][1] = _static_cast(pc.viewProjectionMatrix[i].y); + viewProjMatrix[i][2] = _static_cast(pc.viewProjectionMatrix[i].z); + viewProjMatrix[i][3] = _static_cast(pc.viewProjectionMatrix[i].w); + } + + /*if (vertexID == 0) + { + printf("%f, %f, %f, %f", a[0][0], a[0][1], a[0][2], a[0][3]); + printf("%f, %f, %f, %f", a[1][0], a[1][1], a[1][2], a[1][3]); + printf("%f, %f, %f, %f", a[2][0], a[2][1], a[2][2], a[2][3]); + printf("%f, %f, %f, %f", a[3][0], a[3][1], a[3][2], a[3][3]); + }*/ + + outV.position = mul(viewProjMatrix, outV.position); return outV; } From ac6c8604adcc936ec9ad131c321d665651af5377 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Wed, 11 Mar 2026 22:53:38 +0100 Subject: [PATCH 3/9] Added camera --- 75_CAD_3D/main.cpp | 140 ++++++++++++++++++++++++++++----------------- 1 file changed, 89 insertions(+), 51 deletions(-) diff --git a/75_CAD_3D/main.cpp b/75_CAD_3D/main.cpp index 7188e8b00..37f9927b4 100644 --- a/75_CAD_3D/main.cpp +++ b/75_CAD_3D/main.cpp @@ -64,6 +64,32 @@ class CSwapchainResources : public ISimpleManagedSurface::ISwapchainResources public: CSwapchainResources() = default; + // TODO: this is a prototype, depth images should be probably not created in the initialize function + void initialize(const smart_refctd_ptr& window, const core::smart_refctd_ptr& device) + { + asset::E_FORMAT depthFormat = asset::EF_D32_SFLOAT; + + for (auto& depthImage : depthImages) + { + IGPUImage::SCreationParams imgInfo; + imgInfo.format = depthFormat; + imgInfo.type = IGPUImage::ET_2D; + imgInfo.extent.width = window->getWidth(); + imgInfo.extent.height = window->getHeight(); + imgInfo.extent.depth = 1u; + imgInfo.mipLevels = 1u; + imgInfo.arrayLayers = 1u; + imgInfo.samples = asset::ICPUImage::ESCF_1_BIT; + imgInfo.tiling = IGPUImage::TILING::OPTIMAL; + imgInfo.usage = asset::IImage::E_USAGE_FLAGS::EUF_RENDER_ATTACHMENT_BIT; + + depthImage = device->createImage(std::move(imgInfo)); + auto memReq = depthImage->getMemoryReqs(); + memReq.memoryTypeBits &= device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); + device->allocate(memReq, depthImage.get()); + } + } + inline E_FORMAT deduceRenderpassFormat(ISurface* surface, IPhysicalDevice* physDev) { ISwapchain::SCreationParams swapchainParams = {.surface=smart_refctd_ptr(surface), }; @@ -109,11 +135,24 @@ class CSwapchainResources : public ISimpleManagedSurface::ISwapchainResources .viewType = IGPUImageView::ET_2D, .format = getImage(i)->getCreationParameters().format }); + auto depthImageView = device->createImageView({ + .flags = IGPUImageView::ECF_NONE, + .subUsages = IGPUImage::EUF_RENDER_ATTACHMENT_BIT, + .image = core::smart_refctd_ptr(depthImages[i]), + .viewType = IGPUImageView::ET_2D, + .format = depthImages[i]->getCreationParameters().format, + .subresourceRange = { + .aspectMask = asset::IImage::EAF_DEPTH_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1 + } + }); m_framebuffers[i] = device->createFramebuffer({ { .renderpass = core::smart_refctd_ptr(m_renderpass), + .depthStencilAttachments = &depthImageView.get(), .colorAttachments = &imageView.get(), - // TODO: - //.depthStencilAttachments = &depthImageView.get(), .width = sharedParams.width, .height = sharedParams.height }}); @@ -126,6 +165,7 @@ class CSwapchainResources : public ISimpleManagedSurface::ISwapchainResources // Per-swapchain core::smart_refctd_ptr m_renderpass; std::array,ISwapchain::MaxImages> m_framebuffers; + std::array, ISwapchain::MaxImages> depthImages = {}; }; class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplication, public nbl::examples::BuiltinResourcesApplication @@ -395,21 +435,20 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio IGPURenderpass::SCreationParams::ColorAttachmentsEnd }; - // TODO: - //IGPURenderpass::SCreationParams::SDepthStencilAttachmentDescription depthAttachments[] = { - // {{ - // { - // .format = asset::EF_D32_SFLOAT, - // .samples = IGPUImage::ESCF_1_BIT, - // .mayAlias = false - // }, - // /*.loadOp = */{IGPURenderpass::LOAD_OP::CLEAR}, - // /*.storeOp = */{IGPURenderpass::STORE_OP::STORE}, - // /*.initialLayout = */{IGPUImage::LAYOUT::UNDEFINED}, - // /*.finalLayout = */{IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL} - // }}, - // IGPURenderpass::SCreationParams::DepthStencilAttachmentsEnd - //}; + IGPURenderpass::SCreationParams::SDepthStencilAttachmentDescription depthAttachments[] = { + {{ + { + .format = asset::EF_D32_SFLOAT, + .samples = IGPUImage::ESCF_1_BIT, + .mayAlias = false + }, + /*.loadOp = */{IGPURenderpass::LOAD_OP::CLEAR}, + /*.storeOp = */{IGPURenderpass::STORE_OP::STORE}, + /*.initialLayout = */{IGPUImage::LAYOUT::UNDEFINED}, + /*.finalLayout = */{IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL} + }}, + IGPURenderpass::SCreationParams::DepthStencilAttachmentsEnd + }; IGPURenderpass::SCreationParams::SSubpassDescription subpasses[] = { {}, @@ -417,8 +456,7 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio }; subpasses[0].colorAttachments[0] = {.render={.attachmentIndex=0,.layout=IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}}; - // TODO: - //subpasses[0].depthStencilAttachment = {{.render = {.attachmentIndex=0,.layout = IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}}}; + subpasses[0].depthStencilAttachment = {{.render = {.attachmentIndex=0,.layout = IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}}}; // We actually need external dependencies to ensure ordering of the Implicit Layout Transitions relative to the semaphore signals const IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = { @@ -450,8 +488,7 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio smart_refctd_ptr renderpass; IGPURenderpass::SCreationParams params = {}; params.colorAttachments = colorAttachments; - // TODO: - //params.depthStencilAttachments = depthAttachments; + params.depthStencilAttachments = depthAttachments; params.subpasses = subpasses; params.dependencies = dependencies; renderpass = m_device->createRenderpass(params); @@ -508,6 +545,7 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio return logFail("Could not create Window & Surface!"); auto scResources = std::make_unique(); + scResources->initialize(m_window, m_device); const auto format = scResources->deduceRenderpassFormat(m_surface->getSurface(), m_physicalDevice); // TODO: DO I need to recreate render passes if swapchain gets recreated with different format? renderpassInitial = createRenderpass(format, IGPURenderpass::LOAD_OP::CLEAR, IImage::LAYOUT::UNDEFINED, IImage::LAYOUT::ATTACHMENT_OPTIMAL); renderpassInBetween = createRenderpass(format, IGPURenderpass::LOAD_OP::LOAD, IImage::LAYOUT::ATTACHMENT_OPTIMAL, IImage::LAYOUT::ATTACHMENT_OPTIMAL); @@ -567,7 +605,8 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio .rasterization = { .polygonMode = EPM_FILL, .faceCullingMode = EFCM_NONE, - .depthWriteEnable = false, + .depthWriteEnable = true, + .depthCompareOp = asset::E_COMPARE_OP::ECO_LESS }, .blend = {}, }; @@ -614,6 +653,16 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio m_intendedNextSubmit.scratchCommandBuffers = m_commandBufferInfos; m_currentRecordingCommandBufferInfo = &m_commandBufferInfos[0]; + // camera + { + const core::vectorSIMDf cameraPosition(300.0f, 300.0f, 300.0f); + const core::vectorSIMDf cameraTarget(0.0f, 0.0f, 0.0f); + const float32_t aspectRatio = static_cast(m_window->getWidth()) / static_cast(m_window->getHeight()); + float32_t4x4 projectionMatrix = hlsl::math::thin_lens::lhPerspectiveFovMatrix(core::radians(60.0f), aspectRatio, 0.1f, 10000.0f); + camera = Camera(cameraPosition, cameraTarget, projectionMatrix, 1.069f, 0.4f); + camera.setMoveSpeed(30.0f); + } + return true; } @@ -621,24 +670,20 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio inline void workLoopBody() override { auto now = std::chrono::high_resolution_clock::now(); - double dt = std::chrono::duration_cast(now - lastTime).count(); + auto dtMilliseconds = std::chrono::duration_cast(now - lastTime); + double dt = dtMilliseconds.count(); lastTime = now; m_timeElapsed += dt; m_inputSystem->getDefaultMouse(&mouse); m_inputSystem->getDefaultKeyboard(&keyboard); - mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void - { - } - , m_logger.get()); - keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void - { - for (auto eventIt = events.begin(); eventIt != events.end(); eventIt++) - { - } - } - , m_logger.get()); + { + camera.beginInputProcessing(dtMilliseconds); + mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void { camera.mouseProcess(events); }, m_logger.get()); + keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void { camera.keyboardProcess(events); }, m_logger.get()); + camera.endInputProcessing(dtMilliseconds); + } if (!beginFrameRender()) return; @@ -708,11 +753,17 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio .extent = {m_window->getWidth(),m_window->getHeight()} }; + IGPUCommandBuffer::SClearDepthStencilValue depthClear = + { + .depth = 1.0f, + .stencil = 0 + }; + beginInfo = { .renderpass = renderpassInitial.get(), .framebuffer = scRes->getFramebuffer(m_currentImageAcquire.imageIndex), .colorClearValues = &clearValue, - .depthStencilClearValues = nullptr, + .depthStencilClearValues = &depthClear, .renderArea = currentRenderArea }; } @@ -735,21 +786,6 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio const auto& resourcesCollection = drawResourcesFiller.getResourcesCollection(); const auto& resourcesGPUBuffer = drawResourcesFiller.getResourcesGPUBuffer(); - float64_t4x4 viewProjection; - { - // TODO: create a proper camera - - // animated camera which rotates around and always looks at the center - const double animationFactor = m_timeElapsed * 0.0003; - const float32_t3 cameraPosition = { 300.0f * std::cos(animationFactor), 300.0f, 300.0f * std::sin(animationFactor) }; - - auto view = hlsl::math::linalg::rhLookAt(cameraPosition, { 0.0f, 0.0f, 0.0f }, { 0.0f, 1.0f, 0.0f }); - const float64_t aspectRatio = static_cast(m_window->getWidth()) / static_cast(m_window->getHeight()); - auto proj = hlsl::math::thin_lens::rhPerspectiveFovMatrix(hlsl::radians(60.0f), aspectRatio, 0.1f, 2000.0f); - - viewProjection = hlsl::mul(proj, nbl::hlsl::math::linalg::promote_affine<4, 4>(view)); - } - Globals globalData = {}; uint64_t baseAddress = resourcesGPUBuffer->getDeviceAddress(); globalData.pointers = { @@ -846,7 +882,7 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio PushConstants pc = { .triangleMeshVerticesBaseAddress = drawCall.triangleMeshVerticesBaseAddress + resourcesGPUBuffer->getDeviceAddress() + resourcesCollection.geometryInfo.bufferOffset, .triangleMeshMainObjectIndex = drawCall.triangleMeshMainObjectIndex, - .viewProjectionMatrix = viewProjection + .viewProjectionMatrix = static_cast(camera.getConcatenatedMatrix()) }; cb->pushConstants(m_graphicsPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_VERTEX | IShader::E_SHADER_STAGE::ESS_FRAGMENT, 0, sizeof(PushConstants), &pc); @@ -1020,6 +1056,8 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio smart_refctd_ptr> m_surface; smart_refctd_ptr pseudoStencilImageView; smart_refctd_ptr colorStorageImageView; + + Camera camera; }; NBL_MAIN_FUNC(ComputerAidedDesign) From 0bc7c7d92aa9ece2e8408c57cea5113e1bada4e6 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Thu, 12 Mar 2026 13:01:35 +0100 Subject: [PATCH 4/9] Fixed depth buffer --- 75_CAD_3D/main.cpp | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/75_CAD_3D/main.cpp b/75_CAD_3D/main.cpp index 37f9927b4..4e5bea76b 100644 --- a/75_CAD_3D/main.cpp +++ b/75_CAD_3D/main.cpp @@ -442,10 +442,10 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio .samples = IGPUImage::ESCF_1_BIT, .mayAlias = false }, - /*.loadOp = */{IGPURenderpass::LOAD_OP::CLEAR}, + /*.loadOp = */{loadOp}, /*.storeOp = */{IGPURenderpass::STORE_OP::STORE}, - /*.initialLayout = */{IGPUImage::LAYOUT::UNDEFINED}, - /*.finalLayout = */{IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL} + /*.initialLayout = */{initialLayout}, + /*.finalLayout = */{IImage::LAYOUT::ATTACHMENT_OPTIMAL} }}, IGPURenderpass::SCreationParams::DepthStencilAttachmentsEnd }; @@ -606,12 +606,14 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio .polygonMode = EPM_FILL, .faceCullingMode = EFCM_NONE, .depthWriteEnable = true, - .depthCompareOp = asset::E_COMPARE_OP::ECO_LESS + .depthCompareOp = asset::E_COMPARE_OP::ECO_GREATER }, .blend = {}, }; mainGraphicsPipelineParams.renderpass = compatibleRenderPass.get(); + assert(mainGraphicsPipelineParams.cached.rasterization.depthTestEnable()); + // Create Main Graphics Pipelines { video::IGPUPipelineBase::SShaderSpecInfo specInfo[2] = { @@ -660,7 +662,7 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio const float32_t aspectRatio = static_cast(m_window->getWidth()) / static_cast(m_window->getHeight()); float32_t4x4 projectionMatrix = hlsl::math::thin_lens::lhPerspectiveFovMatrix(core::radians(60.0f), aspectRatio, 0.1f, 10000.0f); camera = Camera(cameraPosition, cameraTarget, projectionMatrix, 1.069f, 0.4f); - camera.setMoveSpeed(30.0f); + camera.setMoveSpeed(50.0f); } return true; @@ -755,7 +757,7 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio IGPUCommandBuffer::SClearDepthStencilValue depthClear = { - .depth = 1.0f, + .depth = 0.0f, .stencil = 0 }; @@ -856,6 +858,10 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio nbl::video::IGPUCommandBuffer::SRenderpassBeginInfo beginInfo; VkRect2D currentRenderArea; const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {0.f,0.f,0.f,0.f} }; + IGPUCommandBuffer::SClearDepthStencilValue depthClearValue = { + .depth = 1.0f, + .stencil = 0 + }; { auto scRes = static_cast(m_surface->getSwapchainResources()); currentRenderArea = @@ -863,11 +869,12 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio .offset = {0,0}, .extent = {m_window->getWidth(),m_window->getHeight()} }; + beginInfo = { .renderpass = (inBetweenSubmit) ? renderpassInBetween.get():renderpassFinal.get(), .framebuffer = scRes->getFramebuffer(m_currentImageAcquire.imageIndex), .colorClearValues = &clearValue, - .depthStencilClearValues = nullptr, + .depthStencilClearValues = &depthClearValue, .renderArea = currentRenderArea }; } From c0d264d472d6581534f9744d942b81452006022a Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Tue, 17 Mar 2026 21:31:57 +0100 Subject: [PATCH 5/9] Added DTM settings --- 75_CAD_3D/CTriangleMesh.h | 64 ++++++++ 75_CAD_3D/DrawResourcesFiller.cpp | 83 +++++++++- 75_CAD_3D/DrawResourcesFiller.h | 73 ++++++++- 75_CAD_3D/main.cpp | 142 +++++++++++++++++- 75_CAD_3D/shaders/globals.hlsl | 98 +++++++++++- 75_CAD_3D/shaders/main_pipeline/common.hlsl | 1 + .../main_pipeline/fragment_shader.hlsl | 10 +- .../shaders/main_pipeline/vertex_shader.hlsl | 3 + 8 files changed, 455 insertions(+), 19 deletions(-) diff --git a/75_CAD_3D/CTriangleMesh.h b/75_CAD_3D/CTriangleMesh.h index 2100c801d..f5b9b034a 100644 --- a/75_CAD_3D/CTriangleMesh.h +++ b/75_CAD_3D/CTriangleMesh.h @@ -6,6 +6,70 @@ using namespace nbl; +struct DTMHeightShadingSettingsInfo +{ + // Height Shading Mode + E_HEIGHT_SHADING_MODE heightShadingMode; + + // Used as fixed interval length for "DISCRETE_FIXED_LENGTH_INTERVALS" shading mode + float intervalLength; + + // Converts an interval index to its corresponding height value + // For example, if this value is 10.0, then an interval index of 2 corresponds to a height of 20.0. + // This computed height is later used to determine the interpolated color for shading. + // It makes sense for this variable to be always equal to `intervalLength` but sometimes it's a different scaling so that last index corresponds to largestHeight + float intervalIndexToHeightMultiplier; + + // Used for "DISCRETE_FIXED_LENGTH_INTERVALS" shading mode + // If `isCenteredShading` is true, the intervals are centered around `minHeight`, meaning the + // first interval spans [minHeight - intervalLength / 2.0, minHeight + intervalLength / 2.0]. + // Otherwise, intervals are aligned from `minHeight` upward, so the first interval spans + // [minHeight, minHeight + intervalLength]. + bool isCenteredShading; + + void addHeightColorMapEntry(float height, float32_t4 color) + { + heightColorSet.emplace(height, color); + } + + bool fillShaderDTMSettingsHeightColorMap(DTMSettings& dtmSettings) const + { + const uint32_t mapSize = heightColorSet.size(); + if (mapSize > DTMHeightShadingSettings::HeightColorMapMaxEntries) + return false; + dtmSettings.heightShadingSettings.heightColorEntryCount = mapSize; + + int index = 0; + for (auto it = heightColorSet.begin(); it != heightColorSet.end(); ++it) + { + dtmSettings.heightShadingSettings.heightColorMapHeights[index] = it->height; + dtmSettings.heightShadingSettings.heightColorMapColors[index] = it->color; + ++index; + } + + return true; + } + +private: + struct HeightColor + { + float height; + float32_t4 color; + + bool operator<(const HeightColor& other) const + { + return height < other.height; + } + }; + + std::set heightColorSet; +}; + +struct DTMSettingsInfo +{ + DTMHeightShadingSettingsInfo heightShadingInfo; +}; + class CTriangleMesh final { public: diff --git a/75_CAD_3D/DrawResourcesFiller.cpp b/75_CAD_3D/DrawResourcesFiller.cpp index f2de0793d..81ab6b451 100644 --- a/75_CAD_3D/DrawResourcesFiller.cpp +++ b/75_CAD_3D/DrawResourcesFiller.cpp @@ -137,10 +137,19 @@ bool DrawResourcesFiller::allocateDrawResourcesWithinAvailableVRAM(ILogicalDevic void DrawResourcesFiller::drawTriangleMesh( const CTriangleMesh& mesh, + const DTMSettingsInfo& dtmSettingsInfo, SIntendedSubmitInfo& intendedNextSubmit) { - // TODO: main objects - // beginMainObject(); + setActiveDTMSettings(dtmSettingsInfo); + beginMainObject(MainObjectType::DTM); + + uint32_t mainObjectIdx = acquireActiveMainObjectIndex(intendedNextSubmit); + if (mainObjectIdx == InvalidMainObjectIdx) + { + m_logger.log("drawTriangleMesh: acquireActiveMainObjectIndex returned invalid index", nbl::system::ILogger::ELL_ERROR); + assert(false); + return; + } // TODO: for now we add whole mesh at once, instead we should add triangle by triangle and see check if we overflow memory @@ -165,11 +174,11 @@ void DrawResourcesFiller::drawTriangleMesh( drawCallData.indexBufferOffset = geometryBufferOffset; memcpy(dst, indexBuffer.data(), indexBuffByteSize); - drawCallData.triangleMeshMainObjectIndex = 0u; // TODO: fix when implementing main objects + drawCallData.triangleMeshMainObjectIndex = mainObjectIdx; drawCallData.indexCount = mesh.getIndexCount(); drawCalls.push_back(drawCallData); - //endMainObject(); + endMainObject(); } bool DrawResourcesFiller::pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit) @@ -220,6 +229,8 @@ bool DrawResourcesFiller::pushBufferUploads(SIntendedSubmitInfo& intendedNextSub return true; }; + copyCPUFilledDrawBuffer(resources.mainObjects); + copyCPUFilledDrawBuffer(resources.dtmSettings); copyCPUFilledDrawBuffer(resources.drawObjects); copyCPUFilledDrawBuffer(resources.indexBuffer); copyCPUFilledDrawBuffer(resources.geometryInfo); @@ -233,4 +244,68 @@ void DrawResourcesFiller::markFrameUsageComplete(uint64_t drawSubmitWaitValue) currentFrameIndex++; // TODO[LATER]: take into account that currentFrameIndex was submitted with drawSubmitWaitValue; Use that value when deallocating the resources marked with this frame index // Currently, for evictions the worst case value will be waited for, as there is no way yet to know which semaphoroe value will signal the completion of the (to be evicted) resource's usage +} + +uint32_t DrawResourcesFiller::acquireActiveMainObjectIndex(SIntendedSubmitInfo& intendedNextSubmit) +{ + if (activeMainObjectIndex != InvalidMainObjectIdx) + return activeMainObjectIndex; + + if (activeMainObjectType == MainObjectType::NONE) + { + assert(false); // You're probably trying to acquire mainObjectIndex outside of startMainObject, endMainObject scope + return InvalidMainObjectIdx; + } + + const bool needsDTMSettings = activeMainObjectType == MainObjectType::DTM; + + MainObject mainObject = {}; + mainObject.dtmSettingsIdx = (needsDTMSettings) ? acquireActiveDTMSettingsIndex_SubmitIfNeeded(intendedNextSubmit) : InvalidDTMSettingsIdx; + activeMainObjectIndex = resourcesCollection.mainObjects.addAndGetOffset(mainObject); + return activeMainObjectIndex; +} + +uint32_t DrawResourcesFiller::acquireActiveDTMSettingsIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit) +{ + if (activeDTMSettingsIndex == InvalidDTMSettingsIdx) + activeDTMSettingsIndex = addDTMSettings_SubmitIfNeeded(activeDTMSettings, intendedNextSubmit); + + return activeDTMSettingsIndex; +} + +uint32_t DrawResourcesFiller::addDTMSettings_SubmitIfNeeded(const DTMSettingsInfo& dtmSettings, SIntendedSubmitInfo& intendedNextSubmit) +{ + // before calling `addDTMSettings_Internal` we have made sute we have enough mem for + uint32_t outDTMSettingIdx = addDTMSettings_Internal(dtmSettings, intendedNextSubmit); + return outDTMSettingIdx; +} + +uint32_t DrawResourcesFiller::addDTMSettings_Internal(const DTMSettingsInfo& dtmSettingsInfo, SIntendedSubmitInfo& intendedNextSubmit) +{ + DTMSettings dtmSettings; + + switch (dtmSettingsInfo.heightShadingInfo.heightShadingMode) + { + case E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS: + dtmSettings.heightShadingSettings.intervalLength = std::numeric_limits::infinity(); + break; + case E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS: + dtmSettings.heightShadingSettings.intervalLength = dtmSettingsInfo.heightShadingInfo.intervalLength; + break; + case E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS: + dtmSettings.heightShadingSettings.intervalLength = 0.0f; + break; + } + dtmSettings.heightShadingSettings.intervalIndexToHeightMultiplier = dtmSettingsInfo.heightShadingInfo.intervalIndexToHeightMultiplier; + dtmSettings.heightShadingSettings.isCenteredShading = static_cast(dtmSettingsInfo.heightShadingInfo.isCenteredShading); + dtmSettingsInfo.heightShadingInfo.fillShaderDTMSettingsHeightColorMap(dtmSettings); + + for (uint32_t i = 0u; i < resourcesCollection.dtmSettings.vector.size(); ++i) + { + const DTMSettings& itr = resourcesCollection.dtmSettings.vector[i]; + if (itr == dtmSettings) + return i; + } + + return resourcesCollection.dtmSettings.addAndGetOffset(dtmSettings); // this will implicitly increase total resource consumption and reduce remaining size --> no need for mem size trackers } \ No newline at end of file diff --git a/75_CAD_3D/DrawResourcesFiller.h b/75_CAD_3D/DrawResourcesFiller.h index aca81b409..c037b634d 100644 --- a/75_CAD_3D/DrawResourcesFiller.h +++ b/75_CAD_3D/DrawResourcesFiller.h @@ -127,8 +127,11 @@ struct DrawResourcesFiller // TODO: rename to staged resources buffers or something like that struct ResourcesCollection { + // auto-submission level 0 resources (settings that mainObj references) + CPUGeneratedResource dtmSettings; + // auto-submission level 1 buffers (mainObj that drawObjs references, if all drawObjs+idxBuffer+geometryInfo doesn't fit into mem this will be broken down into many) - //CPUGeneratedResource mainObjects; + CPUGeneratedResource mainObjects; // auto-submission level 2 buffers CPUGeneratedResource drawObjects; @@ -140,6 +143,8 @@ struct DrawResourcesFiller size_t calculateTotalConsumption() const { return + dtmSettings.getAlignedStorageSize() + + mainObjects.getAlignedStorageSize() + drawObjects.getAlignedStorageSize() + indexBuffer.getAlignedStorageSize() + geometryInfo.getAlignedStorageSize(); @@ -162,11 +167,16 @@ struct DrawResourcesFiller void drawTriangleMesh( const CTriangleMesh& mesh, + const DTMSettingsInfo& dtmSettingsInfo, SIntendedSubmitInfo& intendedNextSubmit); /// @brief resets staging buffers and images void reset() { + resetDrawObjects(); + resetMainObjects(); + resetDTMSettings(); + drawCalls.clear(); } @@ -196,6 +206,60 @@ struct DrawResourcesFiller /// @brief Records GPU copy commands for all staged buffer resourcesCollection into the active command buffer. bool pushBufferUploads(SIntendedSubmitInfo& intendedNextSubmit, ResourcesCollection& resourcesCollection); + // Gets resource index to the active main object data + // TODO: submit if overflow + uint32_t acquireActiveMainObjectIndex(SIntendedSubmitInfo& intendedNextSubmit); + + uint32_t acquireActiveDTMSettingsIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit); + + uint32_t addDTMSettings_SubmitIfNeeded(const DTMSettingsInfo& dtmSettings, SIntendedSubmitInfo& intendedNextSubmit); + + uint32_t addDTMSettings_Internal(const DTMSettingsInfo& dtmSettingsInfo, SIntendedSubmitInfo& intendedNextSubmit); + + inline void beginMainObject(MainObjectType type) + { + activeMainObjectType = type; + activeMainObjectIndex = InvalidMainObjectIdx; + } + + inline void endMainObject() + { + activeMainObjectType = MainObjectType::NONE; + activeMainObjectIndex = InvalidMainObjectIdx; + } + + inline void setActiveDTMSettings(const DTMSettingsInfo& dtmSettingsInfo) + { + activeDTMSettings = dtmSettingsInfo; + activeDTMSettingsIndex = InvalidDTMSettingsIdx; + } + + inline const size_t calculateRemainingResourcesSize() const + { + assert(resourcesGPUBuffer->getSize() >= resourcesCollection.calculateTotalConsumption()); + return resourcesGPUBuffer->getSize() - resourcesCollection.calculateTotalConsumption(); + } + + void resetMainObjects() + { + resourcesCollection.mainObjects.vector.clear(); + activeMainObjectIndex = InvalidMainObjectIdx; + } + + // these resources are data related to chunks of a whole mainObject + void resetDrawObjects() + { + resourcesCollection.drawObjects.vector.clear(); + resourcesCollection.indexBuffer.vector.clear(); + resourcesCollection.geometryInfo.vector.clear(); + } + + void resetDTMSettings() + { + resourcesCollection.dtmSettings.vector.clear(); + activeDTMSettingsIndex = InvalidDTMSettingsIdx; + } + private: nbl::system::logger_opt_smart_ptr m_logger = nullptr; @@ -217,4 +281,11 @@ struct DrawResourcesFiller size_t copiedResourcesSize; SubmitFunc submitDraws; + + // Active Resources we need to keep track of and push to resources buffer if needed. + MainObjectType activeMainObjectType; + uint32_t activeMainObjectIndex = InvalidMainObjectIdx; + + DTMSettingsInfo activeDTMSettings; + uint32_t activeDTMSettingsIndex = InvalidDTMSettingsIdx; }; \ No newline at end of file diff --git a/75_CAD_3D/main.cpp b/75_CAD_3D/main.cpp index 4e5bea76b..12f13d345 100644 --- a/75_CAD_3D/main.cpp +++ b/75_CAD_3D/main.cpp @@ -559,13 +559,60 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio allocateResources(); - const asset::SPushConstantRange range = { + // Create DescriptorSetLayout, PipelineLayout and update DescriptorSets + { + video::IGPUDescriptorSetLayout::SBinding bindingsSet0[] = { + { + .binding = 0u, + .type = asset::IDescriptor::E_TYPE::ET_UNIFORM_BUFFER, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_VERTEX | asset::IShader::E_SHADER_STAGE::ESS_FRAGMENT, + .count = 1u, + } + }; + m_descriptorSetLayout0 = m_device->createDescriptorSetLayout(bindingsSet0); + if (!m_descriptorSetLayout0) + return logFail("Failed to Create Descriptor Layout 0"); + + const asset::SPushConstantRange range = { .stageFlags = IShader::E_SHADER_STAGE::ESS_VERTEX | IShader::E_SHADER_STAGE::ESS_FRAGMENT, .offset = 0, .size = sizeof(PushConstants) - }; + }; + + const video::IGPUDescriptorSetLayout* const layouts[1u] = { m_descriptorSetLayout0.get() }; + + smart_refctd_ptr descriptorPool = nullptr; + { + const uint32_t setCounts[2u] = { 1u, 1u }; + descriptorPool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT, layouts, setCounts); + if (!descriptorPool) + return logFail("Failed to Create Descriptor Pool"); + } - m_pipelineLayout = m_device->createPipelineLayout({ &range,1 }, nullptr, nullptr, nullptr, nullptr); + // Update descriptor sets + { + m_descriptorSet0 = descriptorPool->createDescriptorSet(smart_refctd_ptr(m_descriptorSetLayout0)); + + video::IGPUDescriptorSet::SDescriptorInfo descriptorInfosSet0[1] = {}; + descriptorInfosSet0[0u].info.buffer.offset = 0u; + descriptorInfosSet0[0u].info.buffer.size = m_globalsBuffer->getCreationParams().size; + descriptorInfosSet0[0u].desc = m_globalsBuffer; + + video::IGPUDescriptorSet::SWriteDescriptorSet descriptorUpdates[1] = {}; + + // globals + descriptorUpdates[0u].dstSet = m_descriptorSet0.get(); + descriptorUpdates[0u].binding = 0u; + descriptorUpdates[0u].arrayElement = 0u; + descriptorUpdates[0u].count = 1u; + descriptorUpdates[0u].info = &descriptorInfosSet0[0u]; + + m_device->updateDescriptorSets(1, descriptorUpdates, 0u, nullptr); + } + + m_pipelineLayout = m_device->createPipelineLayout({ &range,1 }, core::smart_refctd_ptr(m_descriptorSetLayout0), nullptr, nullptr, nullptr); + } smart_refctd_ptr mainPipelineFragmentShaders = {}; smart_refctd_ptr mainPipelineVertexShader = {}; @@ -683,7 +730,29 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio { camera.beginInputProcessing(dtMilliseconds); mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void { camera.mouseProcess(events); }, m_logger.get()); - keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void { camera.keyboardProcess(events); }, m_logger.get()); + keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void + { + camera.keyboardProcess(events); + + for (auto eventIt = events.begin(); eventIt != events.end(); eventIt++) + { + auto ev = *eventIt; + + if (ev.action == nbl::ui::SKeyboardEvent::E_KEY_ACTION::ECA_PRESSED && ev.keyCode == nbl::ui::E_KEY_CODE::EKC_1) + { + m_shadingModeExample = E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS; + } + if (ev.action == nbl::ui::SKeyboardEvent::E_KEY_ACTION::ECA_PRESSED && ev.keyCode == nbl::ui::E_KEY_CODE::EKC_2) + { + m_shadingModeExample = E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS; + } + if (ev.action == nbl::ui::SKeyboardEvent::E_KEY_ACTION::ECA_PRESSED && ev.keyCode == nbl::ui::E_KEY_CODE::EKC_3) + { + m_shadingModeExample = E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS; + } + } + } + , m_logger.get()); camera.endInputProcessing(dtMilliseconds); } @@ -791,8 +860,10 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio Globals globalData = {}; uint64_t baseAddress = resourcesGPUBuffer->getDeviceAddress(); globalData.pointers = { - .drawObjects = baseAddress + resourcesCollection.drawObjects.bufferOffset, - .geometryBuffer = baseAddress + resourcesCollection.geometryInfo.bufferOffset, + .mainObjects = baseAddress + resourcesCollection.mainObjects.bufferOffset, + .drawObjects = baseAddress + resourcesCollection.drawObjects.bufferOffset, + .geometryBuffer = baseAddress + resourcesCollection.geometryInfo.bufferOffset, + .dtmSettings = baseAddress + resourcesCollection.dtmSettings.bufferOffset, }; SBufferRange globalBufferUpdateRange = { .offset = 0ull, .size = sizeof(Globals), .buffer = m_globalsBuffer}; bool updateSuccess = cb->updateBuffer(globalBufferUpdateRange, &globalData); @@ -880,6 +951,9 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio } cb->beginRenderPass(beginInfo, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + IGPUDescriptorSet* descriptorSets[] = { m_descriptorSet0.get() }; + cb->bindDescriptorSets(asset::EPBP_GRAPHICS, m_pipelineLayout.get(), 0u, 1u, descriptorSets); + cb->bindGraphicsPipeline(m_graphicsPipeline.get()); for (auto& drawCall : drawResourcesFiller.getDrawCalls()) @@ -1010,15 +1084,63 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio mesh.setVertices(core::vector(vertices)); mesh.setIndices(std::move(indices)); + DTMSettingsInfo dtmInfo{}; + + // PRESS 1, 2, 3 TO SWITCH HEIGHT SHADING MODE + // 1 - DISCRETE_VARIABLE_LENGTH_INTERVALS + // 2 - DISCRETE_FIXED_LENGTH_INTERVALS + // 3 - CONTINOUS_INTERVALS + float animatedAlpha = (std::cos(m_timeElapsed * 0.0005) + 1.0) * 0.5; + switch (m_shadingModeExample) + { + case E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS: + { + dtmInfo.heightShadingInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS; + + dtmInfo.heightShadingInfo.addHeightColorMapEntry(-10.0f, float32_t4(0.5f, 1.0f, 1.0f, 1.0f)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(20.0f, float32_t4(0.0f, 1.0f, 0.0f, 1.0f)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(70.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(90.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); + + break; + } + case E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS: + { + dtmInfo.heightShadingInfo.intervalLength = 10.0f; + dtmInfo.heightShadingInfo.intervalIndexToHeightMultiplier = dtmInfo.heightShadingInfo.intervalLength; + dtmInfo.heightShadingInfo.isCenteredShading = false; + dtmInfo.heightShadingInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS; + dtmInfo.heightShadingInfo.addHeightColorMapEntry(0.0f, float32_t4(0.0f, 0.0f, 1.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 1.0f, 1.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(50.0f, float32_t4(0.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(75.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(100.0f, float32_t4(1.0f, 0.0f, 0.0f, animatedAlpha)); + + break; + } + case E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS: + { + dtmInfo.heightShadingInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS; + dtmInfo.heightShadingInfo.addHeightColorMapEntry(0.0f, float32_t4(0.0f, 0.0f, 1.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 1.0f, 1.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(50.0f, float32_t4(0.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(75.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(90.0f, float32_t4(1.0f, 0.0f, 0.0f, animatedAlpha)); + + break; + } + } + // pyramid A - drawResourcesFiller.drawTriangleMesh(mesh, intendedNextSubmit); + drawResourcesFiller.drawTriangleMesh(mesh, dtmInfo, intendedNextSubmit); // pyramid B float64_t3 offset = { 500.0f, 0.0f, 0.0f }; for (auto& vertex : vertices) vertex.pos += offset; mesh.setVertices(std::move(vertices)); - drawResourcesFiller.drawTriangleMesh(mesh, intendedNextSubmit); + drawResourcesFiller.drawTriangleMesh(mesh, dtmInfo, intendedNextSubmit); } protected: @@ -1056,6 +1178,8 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio uint64_t m_realFrameIx = 0u; + smart_refctd_ptr m_descriptorSetLayout0; + smart_refctd_ptr m_descriptorSet0; smart_refctd_ptr m_pipelineLayout; smart_refctd_ptr m_graphicsPipeline; @@ -1065,6 +1189,8 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio smart_refctd_ptr colorStorageImageView; Camera camera; + + E_HEIGHT_SHADING_MODE m_shadingModeExample = E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS; }; NBL_MAIN_FUNC(ComputerAidedDesign) diff --git a/75_CAD_3D/shaders/globals.hlsl b/75_CAD_3D/shaders/globals.hlsl index c080c7c4a..5c5791983 100644 --- a/75_CAD_3D/shaders/globals.hlsl +++ b/75_CAD_3D/shaders/globals.hlsl @@ -33,6 +33,17 @@ using pfloat64_t4 = nbl::hlsl::vector; using pfloat64_t3x3 = portable_matrix_t3x3; using pfloat64_t4x4 = portable_matrix_t4x4; +enum class MainObjectType : uint32_t +{ + NONE = 0u, + DTM, +}; + +struct MainObject +{ + uint32_t dtmSettingsIdx; +}; + struct PushConstants { uint64_t triangleMeshVerticesBaseAddress; @@ -42,20 +53,21 @@ struct PushConstants struct Pointers { + uint64_t mainObjects; uint64_t drawObjects; uint64_t geometryBuffer; + uint64_t dtmSettings; }; #ifndef __HLSL_VERSION -static_assert(sizeof(Pointers) == 16u); +static_assert(sizeof(Pointers) == 32u); #endif struct Globals { Pointers pointers; - pfloat64_t4x4 defaultProjectionToNDC; }; #ifndef __HLSL_VERSION -static_assert(sizeof(Globals) == 144u); +static_assert(sizeof(Globals) == 32u); #endif struct DrawObject @@ -70,12 +82,90 @@ struct TriangleMeshVertex pfloat64_t3 pos; }; +enum class E_HEIGHT_SHADING_MODE : uint32_t +{ + DISCRETE_VARIABLE_LENGTH_INTERVALS, + DISCRETE_FIXED_LENGTH_INTERVALS, + CONTINOUS_INTERVALS +}; + +struct DTMHeightShadingSettings +{ + const static uint32_t HeightColorMapMaxEntries = 16u; + + // height-color map + float intervalLength; + float intervalIndexToHeightMultiplier; + int isCenteredShading; + + uint32_t heightColorEntryCount; + float heightColorMapHeights[HeightColorMapMaxEntries]; + float32_t4 heightColorMapColors[HeightColorMapMaxEntries]; + + E_HEIGHT_SHADING_MODE determineHeightShadingMode() + { + if (nbl::hlsl::isinf(intervalLength)) + return E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS; + if (intervalLength == 0.0f) + return E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS; + return E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS; + } +}; + +struct DTMSettings +{ + // height shading + DTMHeightShadingSettings heightShadingSettings; +}; + +#ifndef __HLSL_VERSION +inline bool operator==(const DTMSettings& lhs, const DTMSettings& rhs) +{ + + if(true) //if (lhs.drawHeightShadingEnabled()) + { + if (lhs.heightShadingSettings.intervalLength != rhs.heightShadingSettings.intervalLength) + return false; + if (lhs.heightShadingSettings.intervalIndexToHeightMultiplier != rhs.heightShadingSettings.intervalIndexToHeightMultiplier) + return false; + if (lhs.heightShadingSettings.isCenteredShading != rhs.heightShadingSettings.isCenteredShading) + return false; + if (lhs.heightShadingSettings.heightColorEntryCount != rhs.heightShadingSettings.heightColorEntryCount) + return false; + + + if(memcmp(lhs.heightShadingSettings.heightColorMapHeights, rhs.heightShadingSettings.heightColorMapHeights, lhs.heightShadingSettings.heightColorEntryCount * sizeof(float))) + return false; + if(memcmp(lhs.heightShadingSettings.heightColorMapColors, rhs.heightShadingSettings.heightColorMapColors, lhs.heightShadingSettings.heightColorEntryCount * sizeof(float32_t4))) + return false; + } + + return true; +} +#endif + +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t MainObjectIdxBits = 24u; // It will be packed next to alpha in a texture +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t MaxIndexableMainObjects = (1u << MainObjectIdxBits) - 1u; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t InvalidMainObjectIdx = MaxIndexableMainObjects; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t InvalidDTMSettingsIdx = nbl::hlsl::numeric_limits::max; + #ifdef __HLSL_VERSION [[vk::binding(0, 0)]] ConstantBuffer globals : register(b0); + +MainObject loadMainObject(const uint32_t index) +{ + return vk::RawBufferLoad(globals.pointers.mainObjects + index * sizeof(MainObject), 4u); +} +DTMSettings loadDTMSettings(const uint32_t index) +{ + return vk::RawBufferLoad(globals.pointers.dtmSettings + index * sizeof(DTMSettings), 4u); +} + #else +static_assert(alignof(MainObject)==4u); +static_assert(alignof(DTMSettings)==4u); static_assert(alignof(pfloat64_t3x3)==8u); static_assert(alignof(DrawObject)==8u); #endif - #endif diff --git a/75_CAD_3D/shaders/main_pipeline/common.hlsl b/75_CAD_3D/shaders/main_pipeline/common.hlsl index 7866b5b8c..87dd5407a 100644 --- a/75_CAD_3D/shaders/main_pipeline/common.hlsl +++ b/75_CAD_3D/shaders/main_pipeline/common.hlsl @@ -7,6 +7,7 @@ struct PSInput { [[vk::location(0)]] float4 position : SV_Position; [[vk::location(1)]] float3 normal : COLOR1; + [[vk::location(2)]] float height : COLOR2; }; // Set 0 - Scene Data and Globals, buffer bindings don't change the buffers only get updated diff --git a/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl b/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl index d61b99275..8017ed404 100644 --- a/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl +++ b/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl @@ -1,7 +1,8 @@ +#pragma shader_stage(fragment) + #include "common.hlsl" static const float32_t3 SunlightDirection = float32_t3(0.7071f, -0.7071f, 0.0f); -static const float32_t3 TerrainColor = float32_t3(1.0f, 1.0f, 1.0f); [shader("pixel")] float4 fragMain(PSInput input) : SV_Target @@ -9,7 +10,12 @@ float4 fragMain(PSInput input) : SV_Target static const float AmbientLightIntensity = 0.1f; const float diffuseLightIntensity = max(dot(-SunlightDirection, input.normal), 0.0f); - const float32_t3 fragColor = (AmbientLightIntensity + diffuseLightIntensity) * TerrainColor; + MainObject mainObj = loadMainObject(pc.triangleMeshMainObjectIndex); + DTMSettings dtmSettings = loadDTMSettings(mainObj.dtmSettingsIdx); + + const float32_t3 HeightColor = input.height < 50.0f ? float32_t3(0.0, 1.0, 0.0) : (input.height < 75.0f ? float32_t3(1.0, 1.0, 0.0) : float32_t3(1.0, 0.0, 0.0)); + + const float32_t3 fragColor = (AmbientLightIntensity + diffuseLightIntensity) * HeightColor; return float32_t4(fragColor, 1.0f); } diff --git a/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl b/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl index 98996ba79..a0f256d60 100644 --- a/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl +++ b/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl @@ -37,6 +37,9 @@ PSInput vtxMain(uint vertexID : SV_VertexID) outV.position = _static_cast(pos); + + // TODO: we want to separate height from the Y coordinate I guess? + outV.height = _static_cast(pos.y); //pos = mul(pc.viewProjectionMatrix, pos); // TODO: use pc.viewProjectionMatrix and multiply it with pfloat64_t4 pos instead fix portable_matrix with portable_float multiplication From 28f1e340cbab2ca3c256ab3dcaa1c501ed4695c3 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Thu, 19 Mar 2026 20:23:00 +0100 Subject: [PATCH 6/9] Implemented DTM modes --- 75_CAD_3D/shaders/globals.hlsl | 4 +- 75_CAD_3D/shaders/main_pipeline/common.hlsl | 30 ++++- 75_CAD_3D/shaders/main_pipeline/dtm.hlsl | 106 ++++++++++++++++++ .../main_pipeline/fragment_shader.hlsl | 28 +++-- .../shaders/main_pipeline/vertex_shader.hlsl | 11 +- 5 files changed, 156 insertions(+), 23 deletions(-) create mode 100644 75_CAD_3D/shaders/main_pipeline/dtm.hlsl diff --git a/75_CAD_3D/shaders/globals.hlsl b/75_CAD_3D/shaders/globals.hlsl index 5c5791983..d83ffca7d 100644 --- a/75_CAD_3D/shaders/globals.hlsl +++ b/75_CAD_3D/shaders/globals.hlsl @@ -1,5 +1,5 @@ -#ifndef _CAD_EXAMPLE_GLOBALS_HLSL_INCLUDED_ -#define _CAD_EXAMPLE_GLOBALS_HLSL_INCLUDED_ +#ifndef _CAD_3D_EXAMPLE_GLOBALS_HLSL_INCLUDED_ +#define _CAD_3D_EXAMPLE_GLOBALS_HLSL_INCLUDED_ // TODO[Erfan]: Turn off in the future, but keep enabled to test // #define NBL_FORCE_EMULATED_FLOAT_64 diff --git a/75_CAD_3D/shaders/main_pipeline/common.hlsl b/75_CAD_3D/shaders/main_pipeline/common.hlsl index 87dd5407a..8fc59e1ee 100644 --- a/75_CAD_3D/shaders/main_pipeline/common.hlsl +++ b/75_CAD_3D/shaders/main_pipeline/common.hlsl @@ -1,16 +1,34 @@ -#ifndef _CAD_EXAMPLE_MAIN_PIPELINE_COMMON_HLSL_INCLUDED_ -#define _CAD_EXAMPLE_MAIN_PIPELINE_COMMON_HLSL_INCLUDED_ +#ifndef _CAD_3D_EXAMPLE_MAIN_PIPELINE_COMMON_HLSL_INCLUDED_ +#define _CAD_3D_EXAMPLE_MAIN_PIPELINE_COMMON_HLSL_INCLUDED_ #include "../globals.hlsl" struct PSInput { [[vk::location(0)]] float4 position : SV_Position; - [[vk::location(1)]] float3 normal : COLOR1; - [[vk::location(2)]] float height : COLOR2; -}; -// Set 0 - Scene Data and Globals, buffer bindings don't change the buffers only get updated + [[vk::location(1)]] nointerpolation float4 data1 : COLOR1; + [[vk::location(2)]] float4 interpolatedData1 : COLOR2; + + // TODO: do we even need vertexScreenSpacePos? +#ifndef FRAGMENT_SHADER_INPUT // vertex shader + [[vk::location(3)]] float3 vertexScreenSpacePos : COLOR3; +#else + [[vk::location(3)]] [[vk::ext_decorate(/*spv::DecoratePerVertexKHR*/5285)]] float3 vertexScreenSpacePos[3] : COLOR3; +#endif + + void setNormal(NBL_CONST_REF_ARG(float3) normal) { data1.xyz = normal; } + float3 getNormal() { return data1.xyz; } + + void setHeight(float height) { interpolatedData1.x = height; } + float getHeight() { return interpolatedData1.x; } + +#ifndef FRAGMENT_SHADER_INPUT // vertex shader + void setScreenSpaceVertexAttribs(float3 pos) { vertexScreenSpacePos = pos; } +#else // fragment shader + float3 getScreenSpaceVertexAttribs(uint32_t vertexIndex) { return vertexScreenSpacePos[vertexIndex]; } +#endif +}; // [[vk::binding(0, 0)]] ConstantBuffer globals; ---> moved to globals.hlsl diff --git a/75_CAD_3D/shaders/main_pipeline/dtm.hlsl b/75_CAD_3D/shaders/main_pipeline/dtm.hlsl new file mode 100644 index 000000000..60320647f --- /dev/null +++ b/75_CAD_3D/shaders/main_pipeline/dtm.hlsl @@ -0,0 +1,106 @@ +#ifndef _CAD_3D_EXAMPLE_DTM_HLSL_INCLUDED_ +#define _CAD_3D_EXAMPLE_DTM_HLSL_INCLUDED_ + +#include "common.hlsl" + +namespace dtm +{ + +// for usage in upper_bound function +struct DTMSettingsHeightsAccessor +{ + DTMHeightShadingSettings settings; + using value_type = float; + + float operator[](const uint32_t ix) + { + return settings.heightColorMapHeights[ix]; + } +}; + +float getIntervalPosition(in float height, in float minHeight, in float intervalLength, in bool isCenteredShading) +{ + if (isCenteredShading) + return ((height - minHeight) / intervalLength + 0.5f); + else + return ((height - minHeight) / intervalLength); +} + +float32_t4 calcIntervalColor(in int intervalIndex, in DTMHeightShadingSettings settings) +{ + const float minShadingHeight = settings.heightColorMapHeights[0]; + float heightForColor = minShadingHeight + float(intervalIndex) * settings.intervalIndexToHeightMultiplier; + + DTMSettingsHeightsAccessor dtmHeightsAccessor = { settings }; + int32_t upperBoundHeightIndex = min(nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, settings.heightColorEntryCount, heightForColor), settings.heightColorEntryCount - 1u); + int32_t lowerBoundHeightIndex = max(upperBoundHeightIndex - 1, 0); + + float upperBoundHeight = settings.heightColorMapHeights[upperBoundHeightIndex]; + float lowerBoundHeight = settings.heightColorMapHeights[lowerBoundHeightIndex]; + + float4 upperBoundColor = settings.heightColorMapColors[upperBoundHeightIndex]; + float4 lowerBoundColor = settings.heightColorMapColors[lowerBoundHeightIndex]; + + if (upperBoundHeight == lowerBoundHeight) + { + return upperBoundColor; + } + else + { + float interpolationVal = (heightForColor - lowerBoundHeight) / (upperBoundHeight - lowerBoundHeight); + return lerp(lowerBoundColor, upperBoundColor, interpolationVal); + } +} + +float32_t4 calculateDTMHeightColor(in DTMHeightShadingSettings settings, in float3 triangleVertices[3], in float2 fragPos, in float height) +{ + const uint32_t heightMapSize = settings.heightColorEntryCount; + if(heightMapSize == 0) + return float32_t4(0.0f, 0.0f, 0.0f, 0.0f); + + + const E_HEIGHT_SHADING_MODE mode = settings.determineHeightShadingMode(); + if(mode == E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS) + { + DTMSettingsHeightsAccessor dtmHeightsAccessor = { settings }; + const int upperBoundIndex = min(nbl::hlsl::upper_bound(dtmHeightsAccessor, 0u, heightMapSize, height), heightMapSize - 1u); + const int mapIndex = max(upperBoundIndex - 1, 0); + + return settings.heightColorMapColors[mapIndex]; + } + else if(mode == E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS) + { + const float minShadingHeight = settings.heightColorMapHeights[0]; + const float intervalPosition = getIntervalPosition(height, minShadingHeight, settings.intervalLength, settings.isCenteredShading); + const float positionWithinInterval = frac(intervalPosition); + const int intervalIndex = nbl::hlsl::_static_cast(intervalPosition); + + return calcIntervalColor(intervalIndex, settings); + } + else if(mode == E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS) + { + DTMSettingsHeightsAccessor dtmHeightsAccessor = { settings }; + uint32_t upperBoundHeightIndex = min(nbl::hlsl::upper_bound(dtmHeightsAccessor, 0u, heightMapSize - 1u, height), heightMapSize - 1u); + uint32_t lowerBoundHeightIndex = upperBoundHeightIndex == 0 ? upperBoundHeightIndex : upperBoundHeightIndex - 1; + + float upperBoundHeight = settings.heightColorMapHeights[upperBoundHeightIndex]; + float lowerBoundHeight = settings.heightColorMapHeights[lowerBoundHeightIndex]; + + float4 upperBoundColor = settings.heightColorMapColors[upperBoundHeightIndex]; + float4 lowerBoundColor = settings.heightColorMapColors[lowerBoundHeightIndex]; + + float interpolationVal; + if (upperBoundHeightIndex == 0) + interpolationVal = 1.0f; + else + interpolationVal = (height - lowerBoundHeight) / (upperBoundHeight - lowerBoundHeight); + + return lerp(lowerBoundColor, upperBoundColor, interpolationVal); + } + + return float32_t4(0.0f, 0.0f, 0.0f, 0.0f); +} + +} + +#endif \ No newline at end of file diff --git a/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl b/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl index 8017ed404..14f2e74c0 100644 --- a/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl +++ b/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl @@ -1,5 +1,7 @@ +#define FRAGMENT_SHADER_INPUT #pragma shader_stage(fragment) +#include "dtm.hlsl" #include "common.hlsl" static const float32_t3 SunlightDirection = float32_t3(0.7071f, -0.7071f, 0.0f); @@ -8,14 +10,22 @@ static const float32_t3 SunlightDirection = float32_t3(0.7071f, -0.7071f, 0.0f); float4 fragMain(PSInput input) : SV_Target { static const float AmbientLightIntensity = 0.1f; - const float diffuseLightIntensity = max(dot(-SunlightDirection, input.normal), 0.0f); + const float diffuseLightIntensity = max(dot(-SunlightDirection, input.getNormal()), 0.0f); - MainObject mainObj = loadMainObject(pc.triangleMeshMainObjectIndex); - DTMSettings dtmSettings = loadDTMSettings(mainObj.dtmSettingsIdx); - - const float32_t3 HeightColor = input.height < 50.0f ? float32_t3(0.0, 1.0, 0.0) : (input.height < 75.0f ? float32_t3(1.0, 1.0, 0.0) : float32_t3(1.0, 0.0, 0.0)); - - const float32_t3 fragColor = (AmbientLightIntensity + diffuseLightIntensity) * HeightColor; + const MainObject mainObj = loadMainObject(pc.triangleMeshMainObjectIndex); + const DTMSettings dtmSettings = loadDTMSettings(mainObj.dtmSettingsIdx); + + float32_t3 triangleVertices[3]; + triangleVertices[0] = input.getScreenSpaceVertexAttribs(0); + triangleVertices[1] = input.getScreenSpaceVertexAttribs(1); + triangleVertices[2] = input.getScreenSpaceVertexAttribs(2); - return float32_t4(fragColor, 1.0f); -} + const float height = input.getHeight(); + //const float32_t3 HeightColor = height < 0.0f ? float32_t3(0.0f, 0.0f, 1.0f) : height < 50.0f ? float32_t3(0.0, 1.0, 0.0) : (height < 75.0f ? float32_t3(1.0, 1.0, 0.0) : float32_t3(1.0, 0.0, 0.0)); + + const float32_t4 HeightColor = dtm::calculateDTMHeightColor(dtmSettings.heightShadingSettings, triangleVertices, input.position.xy, height); + + const float32_t4 fragColor = (AmbientLightIntensity + diffuseLightIntensity) * HeightColor; + + return fragColor; +} \ No newline at end of file diff --git a/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl b/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl index a0f256d60..c82ab696d 100644 --- a/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl +++ b/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl @@ -26,7 +26,7 @@ PSInput vtxMain(uint vertexID : SV_VertexID) float32_t3 triangleEdge0 = vertex1 - vertex0; float32_t3 triangleEdge1 = vertex2 - vertex0; - outV.normal = (normalize(cross(triangleEdge1, triangleEdge0)) + 1.0f) * 0.5f; + outV.setNormal((normalize(cross(triangleEdge1, triangleEdge0)) + 1.0f) * 0.5f); } pfloat64_t4 pos; @@ -36,10 +36,7 @@ PSInput vtxMain(uint vertexID : SV_VertexID) pos.w = _static_cast(1.0f); - outV.position = _static_cast(pos); - - // TODO: we want to separate height from the Y coordinate I guess? - outV.height = _static_cast(pos.y); + outV.setHeight(_static_cast(pos.y)); //pos = mul(pc.viewProjectionMatrix, pos); // TODO: use pc.viewProjectionMatrix and multiply it with pfloat64_t4 pos instead fix portable_matrix with portable_float multiplication @@ -52,6 +49,8 @@ PSInput vtxMain(uint vertexID : SV_VertexID) viewProjMatrix[i][3] = _static_cast(pc.viewProjectionMatrix[i].w); } + outV.setScreenSpaceVertexAttribs(_static_cast(pos).xyz); + /*if (vertexID == 0) { printf("%f, %f, %f, %f", a[0][0], a[0][1], a[0][2], a[0][3]); @@ -60,7 +59,7 @@ PSInput vtxMain(uint vertexID : SV_VertexID) printf("%f, %f, %f, %f", a[3][0], a[3][1], a[3][2], a[3][3]); }*/ - outV.position = mul(viewProjMatrix, outV.position); + outV.position = mul(viewProjMatrix, _static_cast(pos)); return outV; } From 29f1b9278bc1483d2edacba8a37faf175cabc189 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Sat, 21 Mar 2026 12:42:20 +0100 Subject: [PATCH 7/9] Implemented height shading anti aliasing --- 75_CAD_3D/main.cpp | 1 + 75_CAD_3D/shaders/globals.hlsl | 4 +- 75_CAD_3D/shaders/main_pipeline/dtm.hlsl | 97 +++++++++++++++++-- .../main_pipeline/fragment_shader.hlsl | 4 +- 4 files changed, 97 insertions(+), 9 deletions(-) diff --git a/75_CAD_3D/main.cpp b/75_CAD_3D/main.cpp index 12f13d345..f24d4d06a 100644 --- a/75_CAD_3D/main.cpp +++ b/75_CAD_3D/main.cpp @@ -865,6 +865,7 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio .geometryBuffer = baseAddress + resourcesCollection.geometryInfo.bufferOffset, .dtmSettings = baseAddress + resourcesCollection.dtmSettings.bufferOffset, }; + globalData.antiAliasingFactor = 1.0f; SBufferRange globalBufferUpdateRange = { .offset = 0ull, .size = sizeof(Globals), .buffer = m_globalsBuffer}; bool updateSuccess = cb->updateBuffer(globalBufferUpdateRange, &globalData); assert(updateSuccess); diff --git a/75_CAD_3D/shaders/globals.hlsl b/75_CAD_3D/shaders/globals.hlsl index d83ffca7d..544f05516 100644 --- a/75_CAD_3D/shaders/globals.hlsl +++ b/75_CAD_3D/shaders/globals.hlsl @@ -65,9 +65,11 @@ static_assert(sizeof(Pointers) == 32u); struct Globals { Pointers pointers; + float32_t antiAliasingFactor; + float32_t __padding; }; #ifndef __HLSL_VERSION -static_assert(sizeof(Globals) == 32u); +static_assert(sizeof(Globals) == 40u); #endif struct DrawObject diff --git a/75_CAD_3D/shaders/main_pipeline/dtm.hlsl b/75_CAD_3D/shaders/main_pipeline/dtm.hlsl index 60320647f..cf85766dd 100644 --- a/75_CAD_3D/shaders/main_pipeline/dtm.hlsl +++ b/75_CAD_3D/shaders/main_pipeline/dtm.hlsl @@ -18,6 +18,56 @@ struct DTMSettingsHeightsAccessor } }; +struct HeightSegmentTransitionData +{ + float currentHeight; + float4 currentSegmentColor; + float boundaryHeight; + float4 otherSegmentColor; +}; + +void getIntervalHeightAndColor(in int intervalIndex, in DTMHeightShadingSettings settings, out float4 outIntervalColor, out float outIntervalHeight) +{ + float minShadingHeight = settings.heightColorMapHeights[0]; + float heightForColor = minShadingHeight + float(intervalIndex) * settings.intervalIndexToHeightMultiplier; + + if (settings.isCenteredShading) + outIntervalHeight = minShadingHeight + (float(intervalIndex) - 0.5) * settings.intervalLength; + else + outIntervalHeight = minShadingHeight + (float(intervalIndex)) * settings.intervalLength; + + DTMSettingsHeightsAccessor dtmHeightsAccessor = { settings }; + int32_t upperBoundHeightIndex = min(nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, settings.heightColorEntryCount, heightForColor), settings.heightColorEntryCount - 1u); + int32_t lowerBoundHeightIndex = max(upperBoundHeightIndex - 1, 0); + + float upperBoundHeight = settings.heightColorMapHeights[upperBoundHeightIndex]; + float lowerBoundHeight = settings.heightColorMapHeights[lowerBoundHeightIndex]; + + float4 upperBoundColor = settings.heightColorMapColors[upperBoundHeightIndex]; + float4 lowerBoundColor = settings.heightColorMapColors[lowerBoundHeightIndex]; + + if (upperBoundHeight == lowerBoundHeight) + { + outIntervalColor = upperBoundColor; + } + else + { + float interpolationVal = (heightForColor - lowerBoundHeight) / (upperBoundHeight - lowerBoundHeight); + outIntervalColor = lerp(lowerBoundColor, upperBoundColor, interpolationVal); + } +} + +// This function interpolates between the current and nearest segment colors based on the +// screen-space distance to the segment boundary. The result is a smoothly blended color +// useful for visualizing discrete height levels without harsh edges. +float4 smoothHeightSegmentTransition(in HeightSegmentTransitionData transitionInfo, in float heightDeriv) +{ + float pxDistanceToNearestSegment = abs((transitionInfo.currentHeight - transitionInfo.boundaryHeight) / heightDeriv); + float nearestSegmentColorCoverage = smoothstep(-globals.antiAliasingFactor, globals.antiAliasingFactor, pxDistanceToNearestSegment); + float4 localHeightColor = lerp(transitionInfo.otherSegmentColor, transitionInfo.currentSegmentColor, nearestSegmentColorCoverage); + return localHeightColor; +} + float getIntervalPosition(in float height, in float minHeight, in float intervalLength, in bool isCenteredShading) { if (isCenteredShading) @@ -52,21 +102,34 @@ float32_t4 calcIntervalColor(in int intervalIndex, in DTMHeightShadingSettings s } } -float32_t4 calculateDTMHeightColor(in DTMHeightShadingSettings settings, in float3 triangleVertices[3], in float2 fragPos, in float height) +float32_t4 calculateDTMHeightColor(in DTMHeightShadingSettings settings, in float heightDeriv, in float3 triangleVertices[3], in float2 fragPos, in float height) { const uint32_t heightMapSize = settings.heightColorEntryCount; if(heightMapSize == 0) return float32_t4(0.0f, 0.0f, 0.0f, 0.0f); - - + const E_HEIGHT_SHADING_MODE mode = settings.determineHeightShadingMode(); if(mode == E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS) { DTMSettingsHeightsAccessor dtmHeightsAccessor = { settings }; const int upperBoundIndex = min(nbl::hlsl::upper_bound(dtmHeightsAccessor, 0u, heightMapSize, height), heightMapSize - 1u); const int mapIndex = max(upperBoundIndex - 1, 0); - - return settings.heightColorMapColors[mapIndex]; + int mapIndexPrev = max(mapIndex - 1, 0); + int mapIndexNext = min(mapIndex + 1, heightMapSize - 1); + + // logic explainer: if colorIdx is 0.0 then it means blend with next + // if color idx is >= length of the colours array then it means it's also > 0.0 and this blend with prev is true + // if color idx is > 0 and < len - 1, then it depends on the current pixel's height value and two closest height values + bool blendWithPrev = (mapIndex > 0) + && (mapIndex >= heightMapSize - 1 || (height * 2.0 < settings.heightColorMapHeights[upperBoundIndex] + settings.heightColorMapHeights[mapIndex])); + + HeightSegmentTransitionData transitionInfo; + transitionInfo.currentHeight = height; + transitionInfo.currentSegmentColor = settings.heightColorMapColors[mapIndex]; + transitionInfo.boundaryHeight = blendWithPrev ? settings.heightColorMapHeights[mapIndex] : settings.heightColorMapHeights[mapIndexNext]; + transitionInfo.otherSegmentColor = blendWithPrev ? settings.heightColorMapColors[mapIndexPrev] : settings.heightColorMapColors[mapIndexNext]; + + return smoothHeightSegmentTransition(transitionInfo, heightDeriv); } else if(mode == E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS) { @@ -75,7 +138,29 @@ float32_t4 calculateDTMHeightColor(in DTMHeightShadingSettings settings, in floa const float positionWithinInterval = frac(intervalPosition); const int intervalIndex = nbl::hlsl::_static_cast(intervalPosition); - return calcIntervalColor(intervalIndex, settings); + float4 currentIntervalColor; + float currentIntervalHeight; + getIntervalHeightAndColor(intervalIndex, settings, currentIntervalColor, currentIntervalHeight); + + bool blendWithPrev = (positionWithinInterval < 0.5f); + + HeightSegmentTransitionData transitionInfo; + transitionInfo.currentHeight = height; + transitionInfo.currentSegmentColor = currentIntervalColor; + if (blendWithPrev) + { + int prevIntervalIdx = max(intervalIndex - 1, 0); + float prevIntervalHeight; // unused, the currentIntervalHeight is the boundary height between current and prev + getIntervalHeightAndColor(prevIntervalIdx, settings, transitionInfo.otherSegmentColor, prevIntervalHeight); + transitionInfo.boundaryHeight = currentIntervalHeight; + } + else + { + int nextIntervalIdx = intervalIndex + 1; + getIntervalHeightAndColor(nextIntervalIdx, settings, transitionInfo.otherSegmentColor, transitionInfo.boundaryHeight); + } + + return smoothHeightSegmentTransition(transitionInfo, heightDeriv); } else if(mode == E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS) { diff --git a/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl b/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl index 14f2e74c0..33c5a3240 100644 --- a/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl +++ b/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl @@ -21,9 +21,9 @@ float4 fragMain(PSInput input) : SV_Target triangleVertices[2] = input.getScreenSpaceVertexAttribs(2); const float height = input.getHeight(); - //const float32_t3 HeightColor = height < 0.0f ? float32_t3(0.0f, 0.0f, 1.0f) : height < 50.0f ? float32_t3(0.0, 1.0, 0.0) : (height < 75.0f ? float32_t3(1.0, 1.0, 0.0) : float32_t3(1.0, 0.0, 0.0)); + const float heightDeriv = fwidth(height); - const float32_t4 HeightColor = dtm::calculateDTMHeightColor(dtmSettings.heightShadingSettings, triangleVertices, input.position.xy, height); + const float32_t4 HeightColor = dtm::calculateDTMHeightColor(dtmSettings.heightShadingSettings, heightDeriv, triangleVertices, input.position.xy, height); const float32_t4 fragColor = (AmbientLightIntensity + diffuseLightIntensity) * HeightColor; From 852af8af4828cae0f94d79f623d522065928bb2e Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Tue, 24 Mar 2026 15:04:23 +0100 Subject: [PATCH 8/9] Fixed normal calculation --- 75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl b/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl index c82ab696d..59482c557 100644 --- a/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl +++ b/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl @@ -26,7 +26,7 @@ PSInput vtxMain(uint vertexID : SV_VertexID) float32_t3 triangleEdge0 = vertex1 - vertex0; float32_t3 triangleEdge1 = vertex2 - vertex0; - outV.setNormal((normalize(cross(triangleEdge1, triangleEdge0)) + 1.0f) * 0.5f); + outV.setNormal(normalize(cross(triangleEdge1, triangleEdge0))); } pfloat64_t4 pos; From efbf6ce01369a4ee843b61fb3588fbe74c4004a9 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Tue, 7 Apr 2026 14:02:13 +0200 Subject: [PATCH 9/9] Fixes --- 75_CAD_3D/DTMMeshes.h | 26 ++++++++++++++ 75_CAD_3D/main.cpp | 35 ++----------------- .../main_pipeline/fragment_shader.hlsl | 2 +- .../shaders/main_pipeline/vertex_shader.hlsl | 1 + 4 files changed, 31 insertions(+), 33 deletions(-) create mode 100644 75_CAD_3D/DTMMeshes.h diff --git a/75_CAD_3D/DTMMeshes.h b/75_CAD_3D/DTMMeshes.h new file mode 100644 index 000000000..3ebefd5ad --- /dev/null +++ b/75_CAD_3D/DTMMeshes.h @@ -0,0 +1,26 @@ +#pragma once + +// pyramid +#if 1 +core::vector DTMMainMeshVertices = { + { float64_t3(0.0, 100.0, 0.0) }, + { float64_t3(-200.0, 10.0, -200.0) }, + { float64_t3(200.0, 10.0, -100.0) }, + { float64_t3(0.0, 100.0, 0.0) }, + { float64_t3(200.0, 10.0, -100.0) }, + { float64_t3(200.0, -20.0, 200.0) }, + { float64_t3(0.0, 100.0, 0.0) }, + { float64_t3(200.0, -20.0, 200.0) }, + { float64_t3(-200.0, 10.0, 200.0) }, + { float64_t3(0.0, 100.0, 0.0) }, + { float64_t3(-200.0, 10.0, 200.0) }, + { float64_t3(-200.0, 10.0, -200.0) }, +}; + +core::vector DTMMainMeshIndices = { + 0, 1, 2, + 3, 4, 5, + 6, 7, 8, + 9, 10, 11 +}; +#endif \ No newline at end of file diff --git a/75_CAD_3D/main.cpp b/75_CAD_3D/main.cpp index f24d4d06a..1b755eebd 100644 --- a/75_CAD_3D/main.cpp +++ b/75_CAD_3D/main.cpp @@ -16,6 +16,7 @@ using namespace video; #include "nbl/builtin/hlsl/math/linalg/transform.hlsl" #include "nbl/builtin/hlsl/math/thin_lens_projection.hlsl" +#include "DTMMeshes.h" class CEventCallback : public ISimpleManagedSurface::ICallback { @@ -1059,31 +1060,9 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio ); drawResourcesFiller.reset(); - core::vector vertices = { - { float64_t3(0.0, 100.0, 0.0) }, - { float64_t3(-200.0, 10.0, -200.0) }, - { float64_t3(200.0, 10.0, -100.0) }, - { float64_t3(0.0, 100.0, 0.0) }, - { float64_t3(200.0, 10.0, -100.0) }, - { float64_t3(200.0, -20.0, 200.0) }, - { float64_t3(0.0, 100.0, 0.0) }, - { float64_t3(200.0, -20.0, 200.0) }, - { float64_t3(-200.0, 10.0, 200.0) }, - { float64_t3(0.0, 100.0, 0.0) }, - { float64_t3(-200.0, 10.0, 200.0) }, - { float64_t3(-200.0, 10.0, -200.0) }, - }; - - core::vector indices = { - 0, 1, 2, - 3, 4, 5, - 6, 7, 8, - 9, 10, 11 - }; - CTriangleMesh mesh; - mesh.setVertices(core::vector(vertices)); - mesh.setIndices(std::move(indices)); + mesh.setVertices(core::vector(DTMMainMeshVertices)); + mesh.setIndices(core::vector(DTMMainMeshIndices)); DTMSettingsInfo dtmInfo{}; @@ -1133,14 +1112,6 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio } } - // pyramid A - drawResourcesFiller.drawTriangleMesh(mesh, dtmInfo, intendedNextSubmit); - - // pyramid B - float64_t3 offset = { 500.0f, 0.0f, 0.0f }; - for (auto& vertex : vertices) - vertex.pos += offset; - mesh.setVertices(std::move(vertices)); drawResourcesFiller.drawTriangleMesh(mesh, dtmInfo, intendedNextSubmit); } diff --git a/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl b/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl index 33c5a3240..aad91afbd 100644 --- a/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl +++ b/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl @@ -10,7 +10,7 @@ static const float32_t3 SunlightDirection = float32_t3(0.7071f, -0.7071f, 0.0f); float4 fragMain(PSInput input) : SV_Target { static const float AmbientLightIntensity = 0.1f; - const float diffuseLightIntensity = max(dot(-SunlightDirection, input.getNormal()), 0.0f); + const float diffuseLightIntensity = max(dot(-SunlightDirection, normalize(input.getNormal())), 0.0f); const MainObject mainObj = loadMainObject(pc.triangleMeshMainObjectIndex); const DTMSettings dtmSettings = loadDTMSettings(mainObj.dtmSettingsIdx); diff --git a/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl b/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl index 59482c557..5120c356d 100644 --- a/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl +++ b/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl @@ -26,6 +26,7 @@ PSInput vtxMain(uint vertexID : SV_VertexID) float32_t3 triangleEdge0 = vertex1 - vertex0; float32_t3 triangleEdge1 = vertex2 - vertex0; + // TODO: Whether to use cross(e0, e1) or cross(e1, e0) depends on the triangle winding (CCW vs CW). outV.setNormal(normalize(cross(triangleEdge1, triangleEdge0))); }