diff --git a/75_CAD_3D/CMakeLists.txt b/75_CAD_3D/CMakeLists.txt new file mode 100644 index 000000000..144fb4a33 --- /dev/null +++ b/75_CAD_3D/CMakeLists.txt @@ -0,0 +1,82 @@ +include(common RESULT_VARIABLE RES) +if(NOT RES) + message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") +endif() + +set(EXAMPLE_SOURCES + "${CMAKE_CURRENT_SOURCE_DIR}/DrawResourcesFiller.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/DrawResourcesFiller.h" + "${CMAKE_CURRENT_SOURCE_DIR}/CTriangleMesh.h" +) +set(EXAMPLE_INCLUDES + "${CMAKE_CURRENT_SOURCE_DIR}/../../3rdparty/boost/superproject/libs/math/include") +nbl_create_executable_project("${EXAMPLE_SOURCES}" "" "${EXAMPLE_INCLUDES}" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") +target_link_libraries(${EXECUTABLE_NAME} PRIVATE Nabla::ext::FullScreenTriangle) + +# if enabled then try use Nabla "Text Rendering" extension +# with an implemented interface using the 3rdparty deps + +set(NBL_CAD_EX_USE_TEXT_RENDERING_EXT OFF) # do not enable, for future usage when the extension is written + +if(NBL_BUILD_TEXT_RENDERING AND NBL_CAD_EX_USE_TEXT_RENDERING_EXT) + add_dependencies(${EXECUTABLE_NAME} ${NBL_EXT_TEXT_RENDERING_TARGET}) + target_link_libraries(${EXECUTABLE_NAME} PRIVATE ${NBL_EXT_TEXT_RENDERING_TARGET}) + target_include_directories(${EXECUTABLE_NAME} PUBLIC $) +else() + # Freetype + add_dependencies(${EXECUTABLE_NAME} freetype) + target_link_libraries(${EXECUTABLE_NAME} PRIVATE freetype) + target_include_directories(${EXECUTABLE_NAME} PUBLIC $) + + # msdfgen + add_dependencies(${EXECUTABLE_NAME} ${NBL_MSDFGEN_TARGETS}) + target_link_libraries(${EXECUTABLE_NAME} PRIVATE ${NBL_MSDFGEN_TARGETS}) + foreach(NBL_TARGET IN LISTS NBL_MSDFGEN_TARGETS) + target_include_directories(${EXECUTABLE_NAME} PUBLIC $) + endforeach() +endif() + +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") + +set(SM 6_8) + +set(JSON [=[ +[ + { + "INPUT": "shaders/main_pipeline/vertex_shader.hlsl", + "KEY": "main_pipeline_vertex_shader", + "CAPS": [] + }, + { + "INPUT": "shaders/main_pipeline/fragment_shader.hlsl", + "KEY": "main_pipeline_fragment_shader", + "CAPS": [] + } +] +]=]) +string(CONFIGURE "${JSON}" JSON) + +set(COMPILE_OPTIONS + -I "${CMAKE_CURRENT_SOURCE_DIR}" + -T lib_${SM} +) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS ${COMPILE_OPTIONS} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} +) diff --git a/75_CAD_3D/CTriangleMesh.h b/75_CAD_3D/CTriangleMesh.h new file mode 100644 index 000000000..f5b9b034a --- /dev/null +++ b/75_CAD_3D/CTriangleMesh.h @@ -0,0 +1,119 @@ +#pragma once + +#include +#include +#include "shaders/globals.hlsl" + +using namespace nbl; + +struct DTMHeightShadingSettingsInfo +{ + // Height Shading Mode + E_HEIGHT_SHADING_MODE heightShadingMode; + + // Used as fixed interval length for "DISCRETE_FIXED_LENGTH_INTERVALS" shading mode + float intervalLength; + + // Converts an interval index to its corresponding height value + // For example, if this value is 10.0, then an interval index of 2 corresponds to a height of 20.0. + // This computed height is later used to determine the interpolated color for shading. + // It makes sense for this variable to be always equal to `intervalLength` but sometimes it's a different scaling so that last index corresponds to largestHeight + float intervalIndexToHeightMultiplier; + + // Used for "DISCRETE_FIXED_LENGTH_INTERVALS" shading mode + // If `isCenteredShading` is true, the intervals are centered around `minHeight`, meaning the + // first interval spans [minHeight - intervalLength / 2.0, minHeight + intervalLength / 2.0]. + // Otherwise, intervals are aligned from `minHeight` upward, so the first interval spans + // [minHeight, minHeight + intervalLength]. + bool isCenteredShading; + + void addHeightColorMapEntry(float height, float32_t4 color) + { + heightColorSet.emplace(height, color); + } + + bool fillShaderDTMSettingsHeightColorMap(DTMSettings& dtmSettings) const + { + const uint32_t mapSize = heightColorSet.size(); + if (mapSize > DTMHeightShadingSettings::HeightColorMapMaxEntries) + return false; + dtmSettings.heightShadingSettings.heightColorEntryCount = mapSize; + + int index = 0; + for (auto it = heightColorSet.begin(); it != heightColorSet.end(); ++it) + { + dtmSettings.heightShadingSettings.heightColorMapHeights[index] = it->height; + dtmSettings.heightShadingSettings.heightColorMapColors[index] = it->color; + ++index; + } + + return true; + } + +private: + struct HeightColor + { + float height; + float32_t4 color; + + bool operator<(const HeightColor& other) const + { + return height < other.height; + } + }; + + std::set heightColorSet; +}; + +struct DTMSettingsInfo +{ + DTMHeightShadingSettingsInfo heightShadingInfo; +}; + +class CTriangleMesh final +{ +public: + using index_t = uint32_t; + using vertex_t = TriangleMeshVertex; + + inline void setVertices(core::vector&& vertices) + { + m_vertices = std::move(vertices); + } + inline void setIndices(core::vector&& indices) + { + m_indices = std::move(indices); + } + + inline const core::vector& getVertices() const + { + return m_vertices; + } + inline const core::vector& getIndices() const + { + return m_indices; + } + + inline size_t getVertexBuffByteSize() const + { + return sizeof(vertex_t) * m_vertices.size(); + } + inline size_t getIndexBuffByteSize() const + { + return sizeof(index_t) * m_indices.size(); + } + inline size_t getIndexCount() const + { + return m_indices.size(); + } + + inline void clear() + { + m_vertices.clear(); + m_indices.clear(); + } + +private: + core::vector m_vertices; + core::vector m_indices; +}; \ No newline at end of file diff --git a/75_CAD_3D/DTMMeshes.h b/75_CAD_3D/DTMMeshes.h new file mode 100644 index 000000000..3ebefd5ad --- /dev/null +++ b/75_CAD_3D/DTMMeshes.h @@ -0,0 +1,26 @@ +#pragma once + +// pyramid +#if 1 +core::vector DTMMainMeshVertices = { + { float64_t3(0.0, 100.0, 0.0) }, + { float64_t3(-200.0, 10.0, -200.0) }, + { float64_t3(200.0, 10.0, -100.0) }, + { float64_t3(0.0, 100.0, 0.0) }, + { float64_t3(200.0, 10.0, -100.0) }, + { float64_t3(200.0, -20.0, 200.0) }, + { float64_t3(0.0, 100.0, 0.0) }, + { float64_t3(200.0, -20.0, 200.0) }, + { float64_t3(-200.0, 10.0, 200.0) }, + { float64_t3(0.0, 100.0, 0.0) }, + { float64_t3(-200.0, 10.0, 200.0) }, + { float64_t3(-200.0, 10.0, -200.0) }, +}; + +core::vector DTMMainMeshIndices = { + 0, 1, 2, + 3, 4, 5, + 6, 7, 8, + 9, 10, 11 +}; +#endif \ No newline at end of file diff --git a/75_CAD_3D/DrawResourcesFiller.cpp b/75_CAD_3D/DrawResourcesFiller.cpp new file mode 100644 index 000000000..81ab6b451 --- /dev/null +++ b/75_CAD_3D/DrawResourcesFiller.cpp @@ -0,0 +1,311 @@ +#include "DrawResourcesFiller.h" + +using namespace nbl; + +DrawResourcesFiller::DrawResourcesFiller() +{} + +DrawResourcesFiller::DrawResourcesFiller(smart_refctd_ptr&& device, smart_refctd_ptr&& bufferUploadUtils, IQueue* copyQueue, core::smart_refctd_ptr&& logger) : + m_device(std::move(device)), + m_bufferUploadUtils(std::move(bufferUploadUtils)), + m_copyQueue(copyQueue), + m_logger(std::move(logger)) +{ +} + +// function is called when buffer is filled and we should submit draws and clear the buffers and continue filling +void DrawResourcesFiller::setSubmitDrawsFunction(const SubmitFunc& func) +{ + submitDraws = func; +} + +// TODO: redo it completely +bool DrawResourcesFiller::allocateDrawResources(ILogicalDevice* logicalDevice, size_t requiredImageMemorySize, size_t requiredBufferMemorySize, std::span memoryTypeIndexTryOrder) +{ + const size_t adjustedBuffersMemorySize = requiredBufferMemorySize; + const size_t totalResourcesSize = adjustedBuffersMemorySize; + + IGPUBuffer::SCreationParams resourcesBufferCreationParams = {}; + resourcesBufferCreationParams.size = adjustedBuffersMemorySize; + resourcesBufferCreationParams.usage = bitflag(IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT) | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INDEX_BUFFER_BIT; + resourcesGPUBuffer = logicalDevice->createBuffer(std::move(resourcesBufferCreationParams)); + + if (!resourcesGPUBuffer) + { + m_logger.log("Failed to create resourcesGPUBuffer.", nbl::system::ILogger::ELL_ERROR); + return false; + } + + resourcesGPUBuffer->setObjectDebugName("drawResourcesBuffer"); + + IDeviceMemoryBacked::SDeviceMemoryRequirements memReq = resourcesGPUBuffer->getMemoryReqs(); + + nbl::video::IDeviceMemoryBacked::SDeviceMemoryRequirements gpuBufferMemoryReqs = resourcesGPUBuffer->getMemoryReqs(); + const bool memoryRequirementsMatch = + (logicalDevice->getPhysicalDevice()->getDeviceLocalMemoryTypeBits() & gpuBufferMemoryReqs.memoryTypeBits) != 0 && // should have device local memory compatible + (gpuBufferMemoryReqs.requiresDedicatedAllocation == false); // should not require dedicated allocation + + if (!memoryRequirementsMatch) + { + m_logger.log("Shouldn't happen: Buffer Memory Requires Dedicated Allocation or can't biind to device local memory.", nbl::system::ILogger::ELL_ERROR); + return false; + } + + const auto& memoryProperties = logicalDevice->getPhysicalDevice()->getMemoryProperties(); + + video::IDeviceMemoryAllocator::SAllocation allocation = {}; + for (const auto& memoryTypeIdx : memoryTypeIndexTryOrder) + { + IDeviceMemoryAllocator::SAllocateInfo allocationInfo = + { + .size = totalResourcesSize, + .flags = IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS::EMAF_DEVICE_ADDRESS_BIT, // for the buffers + .memoryTypeIndex = memoryTypeIdx, + .dedication = nullptr, + }; + + allocation = logicalDevice->allocate(allocationInfo); + + if (allocation.isValid()) + break; + } + + if (!allocation.isValid()) + { + m_logger.log("Failed Allocation for draw resources!", nbl::system::ILogger::ELL_ERROR); + return false; + } + + buffersMemoryArena = { + .memory = allocation.memory, + .offset = core::alignUp(allocation.offset, GPUStructsMaxNaturalAlignment), // first natural alignment after images section of the memory allocation + }; + + video::ILogicalDevice::SBindBufferMemoryInfo bindBufferMemory = { + .buffer = resourcesGPUBuffer.get(), + .binding = { + .memory = buffersMemoryArena.memory.get(), + .offset = buffersMemoryArena.offset, + } + }; + + if (!logicalDevice->bindBufferMemory(1, &bindBufferMemory)) + { + m_logger.log("DrawResourcesFiller::allocateDrawResources, bindBufferMemory failed.", nbl::system::ILogger::ELL_ERROR); + return false; + } + + return true; +} + +bool DrawResourcesFiller::allocateDrawResourcesWithinAvailableVRAM(ILogicalDevice* logicalDevice, size_t maxImageMemorySize, size_t maxBufferMemorySize, std::span memoryTypeIndexTryOrder, uint32_t reductionPercent, uint32_t maxTries) +{ + const size_t minimumAcceptableSize = MinimumDrawResourcesMemorySize; + + size_t currentBufferSize = maxBufferMemorySize; + size_t currentImageSize = maxImageMemorySize; + const size_t totalInitialSize = currentBufferSize + currentImageSize; + + // If initial size is less than minimum acceptable then increase the buffer and image size to sum up to minimumAcceptableSize with image:buffer ratios preserved + if (totalInitialSize < minimumAcceptableSize) + { + // Preserve ratio: R = buffer / (buffer + image) + // scaleFactor = minimumAcceptableSize / totalInitialSize; + const double scaleFactor = static_cast(minimumAcceptableSize) / totalInitialSize; + currentBufferSize = static_cast(currentBufferSize * scaleFactor); + currentImageSize = minimumAcceptableSize - currentBufferSize; // ensures exact sum + } + + uint32_t numTries = 0u; + while ((currentBufferSize + currentImageSize) >= minimumAcceptableSize && numTries < maxTries) + { + if (allocateDrawResources(logicalDevice, currentImageSize, currentBufferSize, memoryTypeIndexTryOrder)) + { + m_logger.log("Successfully allocated memory for images (%zu) and buffers (%zu).", system::ILogger::ELL_INFO, currentImageSize, currentBufferSize); + return true; + } + + m_logger.log("Allocation of memory for images(%zu) and buffers(%zu) failed; Reducing allocation size by %u%% and retrying...", system::ILogger::ELL_WARNING, currentImageSize, currentBufferSize, reductionPercent); + currentBufferSize = (currentBufferSize * (100 - reductionPercent)) / 100; + currentImageSize = (currentImageSize * (100 - reductionPercent)) / 100; + numTries++; + } + + m_logger.log("All attempts to allocate memory for images(%zu) and buffers(%zu) failed.", system::ILogger::ELL_ERROR, currentImageSize, currentBufferSize); + return false; +} + +void DrawResourcesFiller::drawTriangleMesh( + const CTriangleMesh& mesh, + const DTMSettingsInfo& dtmSettingsInfo, + SIntendedSubmitInfo& intendedNextSubmit) +{ + setActiveDTMSettings(dtmSettingsInfo); + beginMainObject(MainObjectType::DTM); + + uint32_t mainObjectIdx = acquireActiveMainObjectIndex(intendedNextSubmit); + if (mainObjectIdx == InvalidMainObjectIdx) + { + m_logger.log("drawTriangleMesh: acquireActiveMainObjectIndex returned invalid index", nbl::system::ILogger::ELL_ERROR); + assert(false); + return; + } + + // TODO: for now we add whole mesh at once, instead we should add triangle by triangle and see check if we overflow memory + + const size_t vertexBuffByteSize = mesh.getVertexBuffByteSize(); + const size_t indexBuffByteSize = mesh.getIndexBuffByteSize(); + const size_t triangleDataByteSize = vertexBuffByteSize + indexBuffByteSize; + const auto& indexBuffer = mesh.getIndices(); + const auto& vertexBuffer = mesh.getVertices(); + assert(indexBuffer.size() == vertexBuffer.size()); // TODO: figure out why it was needed then decide if this constraint needs to be kept + + DrawCallData drawCallData = {}; + + // Copy VertexBuffer + size_t geometryBufferOffset = resourcesCollection.geometryInfo.increaseSizeAndGetOffset(triangleDataByteSize, alignof(CTriangleMesh::vertex_t)); + drawCallData.triangleMeshVerticesBaseAddress = geometryBufferOffset; + void* dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; + memcpy(dst, vertexBuffer.data(), vertexBuffByteSize); + geometryBufferOffset += vertexBuffByteSize; + + // Copy IndexBuffer + dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; + drawCallData.indexBufferOffset = geometryBufferOffset; + memcpy(dst, indexBuffer.data(), indexBuffByteSize); + + drawCallData.triangleMeshMainObjectIndex = mainObjectIdx; + drawCallData.indexCount = mesh.getIndexCount(); + drawCalls.push_back(drawCallData); + + endMainObject(); +} + +bool DrawResourcesFiller::pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit) +{ + if (!intendedNextSubmit.valid()) + { + // It is a caching submit without command buffer, just for the purpose of accumulation of staging resources + // In that case we don't push any uploads (i.e. we don't record any imageRecord commmand in active command buffer, because there is no active command buffer) + return false; + } + + bool success = true; + success &= pushBufferUploads(intendedNextSubmit, resourcesCollection); + + return success; +} + +bool DrawResourcesFiller::pushBufferUploads(SIntendedSubmitInfo& intendedNextSubmit, ResourcesCollection& resources) +{ + copiedResourcesSize = 0ull; + + if (resourcesCollection.calculateTotalConsumption() > resourcesGPUBuffer->getSize()) + { + m_logger.log("some bug has caused the resourcesCollection to consume more memory than available in resourcesGPUBuffer without overflow submit", nbl::system::ILogger::ELL_ERROR); + assert(false); + return false; + } + + auto copyCPUFilledDrawBuffer = [&](auto& drawBuffer) -> bool + { + // drawBuffer must be of type CPUGeneratedResource + SBufferRange copyRange = { copiedResourcesSize, drawBuffer.getStorageSize(), resourcesGPUBuffer }; + + if (copyRange.offset + copyRange.size > resourcesGPUBuffer->getSize()) + { + m_logger.log("`copyRange.offset + copyRange.size > resourcesGPUBuffer->getSize()` is true in `copyCPUFilledDrawBuffer`, this shouldn't happen with correct auto-submission mechanism.", nbl::system::ILogger::ELL_ERROR); + assert(false); + return false; + } + + drawBuffer.bufferOffset = copyRange.offset; + if (copyRange.size > 0ull) + { + if (!m_bufferUploadUtils->updateBufferRangeViaStagingBuffer(intendedNextSubmit, copyRange, drawBuffer.vector.data())) + return false; + copiedResourcesSize += drawBuffer.getAlignedStorageSize(); + } + return true; + }; + + copyCPUFilledDrawBuffer(resources.mainObjects); + copyCPUFilledDrawBuffer(resources.dtmSettings); + copyCPUFilledDrawBuffer(resources.drawObjects); + copyCPUFilledDrawBuffer(resources.indexBuffer); + copyCPUFilledDrawBuffer(resources.geometryInfo); + + return true; +} + +void DrawResourcesFiller::markFrameUsageComplete(uint64_t drawSubmitWaitValue) +{ + // m_logger.log(std::format("Finished Frame Idx = {}", currentFrameIndex).c_str(), nbl::system::ILogger::ELL_INFO); + currentFrameIndex++; + // TODO[LATER]: take into account that currentFrameIndex was submitted with drawSubmitWaitValue; Use that value when deallocating the resources marked with this frame index + // Currently, for evictions the worst case value will be waited for, as there is no way yet to know which semaphoroe value will signal the completion of the (to be evicted) resource's usage +} + +uint32_t DrawResourcesFiller::acquireActiveMainObjectIndex(SIntendedSubmitInfo& intendedNextSubmit) +{ + if (activeMainObjectIndex != InvalidMainObjectIdx) + return activeMainObjectIndex; + + if (activeMainObjectType == MainObjectType::NONE) + { + assert(false); // You're probably trying to acquire mainObjectIndex outside of startMainObject, endMainObject scope + return InvalidMainObjectIdx; + } + + const bool needsDTMSettings = activeMainObjectType == MainObjectType::DTM; + + MainObject mainObject = {}; + mainObject.dtmSettingsIdx = (needsDTMSettings) ? acquireActiveDTMSettingsIndex_SubmitIfNeeded(intendedNextSubmit) : InvalidDTMSettingsIdx; + activeMainObjectIndex = resourcesCollection.mainObjects.addAndGetOffset(mainObject); + return activeMainObjectIndex; +} + +uint32_t DrawResourcesFiller::acquireActiveDTMSettingsIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit) +{ + if (activeDTMSettingsIndex == InvalidDTMSettingsIdx) + activeDTMSettingsIndex = addDTMSettings_SubmitIfNeeded(activeDTMSettings, intendedNextSubmit); + + return activeDTMSettingsIndex; +} + +uint32_t DrawResourcesFiller::addDTMSettings_SubmitIfNeeded(const DTMSettingsInfo& dtmSettings, SIntendedSubmitInfo& intendedNextSubmit) +{ + // before calling `addDTMSettings_Internal` we have made sute we have enough mem for + uint32_t outDTMSettingIdx = addDTMSettings_Internal(dtmSettings, intendedNextSubmit); + return outDTMSettingIdx; +} + +uint32_t DrawResourcesFiller::addDTMSettings_Internal(const DTMSettingsInfo& dtmSettingsInfo, SIntendedSubmitInfo& intendedNextSubmit) +{ + DTMSettings dtmSettings; + + switch (dtmSettingsInfo.heightShadingInfo.heightShadingMode) + { + case E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS: + dtmSettings.heightShadingSettings.intervalLength = std::numeric_limits::infinity(); + break; + case E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS: + dtmSettings.heightShadingSettings.intervalLength = dtmSettingsInfo.heightShadingInfo.intervalLength; + break; + case E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS: + dtmSettings.heightShadingSettings.intervalLength = 0.0f; + break; + } + dtmSettings.heightShadingSettings.intervalIndexToHeightMultiplier = dtmSettingsInfo.heightShadingInfo.intervalIndexToHeightMultiplier; + dtmSettings.heightShadingSettings.isCenteredShading = static_cast(dtmSettingsInfo.heightShadingInfo.isCenteredShading); + dtmSettingsInfo.heightShadingInfo.fillShaderDTMSettingsHeightColorMap(dtmSettings); + + for (uint32_t i = 0u; i < resourcesCollection.dtmSettings.vector.size(); ++i) + { + const DTMSettings& itr = resourcesCollection.dtmSettings.vector[i]; + if (itr == dtmSettings) + return i; + } + + return resourcesCollection.dtmSettings.addAndGetOffset(dtmSettings); // this will implicitly increase total resource consumption and reduce remaining size --> no need for mem size trackers +} \ No newline at end of file diff --git a/75_CAD_3D/DrawResourcesFiller.h b/75_CAD_3D/DrawResourcesFiller.h new file mode 100644 index 000000000..c037b634d --- /dev/null +++ b/75_CAD_3D/DrawResourcesFiller.h @@ -0,0 +1,291 @@ +/******************************************************************************/ +/* DrawResourcesFiller: This class provides important functionality to manage resources needed for a draw. +/******************************************************************************/ +#pragma once + +#if __has_include("glm/glm/glm.hpp") // legacy +#include "glm/glm/glm.hpp" +#else +#include "glm/glm.hpp" // new build system +#endif +#include +#include +#include +#include +#include +#include +#include "CTriangleMesh.h" +#include "Shaders/globals.hlsl" + +using namespace nbl; +using namespace nbl::video; +using namespace nbl::core; +using namespace nbl::asset; + +static_assert(sizeof(DrawObject) == 16u); + +// ! DrawResourcesFiller +// ! This class provides important functionality to manage resources needed for a draw. +// ! Drawing new objects (polylines, hatches, etc.) should go through this function. +// ! Contains all the scene resources (buffers and images) +// ! In the case of overflow (i.e. not enough remaining v-ram) will auto-submit/render everything recorded so far, +// and additionally makes sure relavant data needed for those draw calls are present in memory +struct DrawResourcesFiller +{ + struct DrawCallData + { + uint64_t indexBufferOffset; + uint64_t indexCount; + uint64_t triangleMeshVerticesBaseAddress; + uint32_t triangleMeshMainObjectIndex; + }; + +public: + + // We pack multiple data types in a single buffer, we need to makes sure each offset starts aligned to avoid mis-aligned accesses + static constexpr size_t GPUStructsMaxNaturalAlignment = 8u; + static constexpr size_t MinimumDrawResourcesMemorySize = 512u * 1 << 20u; // 512MB + + /** + * @brief Attempts to allocate draw resources within a given VRAM budget, retrying with progressively smaller sizes on failure. + * + * This function preserves the initial image-to-buffer memory ratio. If the initial sizes are too small, + * it scales them up to meet a minimum required threshold. On allocation failure, it reduces the memory + * sizes by a specified percentage and retries, until it either succeeds or the number of attempts exceeds `maxTries`. + * + * @param logicalDevice Pointer to the logical device used for allocation. + * @param maxImageMemorySize Initial image memory size (in bytes) to attempt allocation with. + * @param maxBufferMemorySize Initial buffer memory size (in bytes) to attempt allocation with. + * @param memoryTypeIndexTryOrder Ordered list of memory type indices to attempt allocation with, in the order they should be tried. + * @param reductionPercent The percentage by which to reduce the memory sizes after each failed attempt (e.g., 10 means reduce by 10%). + * @param maxTries Maximum number of attempts to try reducing and allocating memory. + * + * @return true if the allocation succeeded at any iteration; false if all attempts failed. + */ + bool allocateDrawResourcesWithinAvailableVRAM(ILogicalDevice* logicalDevice, size_t maxImageMemorySize, size_t maxBufferMemorySize, std::span memoryTypeIndexTryOrder, uint32_t reductionPercent = 10u, uint32_t maxTries = 32u); + + /// @brief call this function before submitting to ensure all buffer and textures resourcesCollection requested via drawing calls are copied to GPU + /// records copy command into intendedNextSubmit's active command buffer and might possibly submits if fails allocation on staging upload memory. + bool pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit); + + /// @brief general parent struct for 1.ReservedCompute and 2.CPUGenerated Resources + struct ResourceBase + { + static constexpr size_t InvalidBufferOffset = ~0u; + size_t bufferOffset = InvalidBufferOffset; // set when copy to gpu buffer is issued + virtual size_t getCount() const = 0; + virtual size_t getStorageSize() const = 0; + virtual size_t getAlignedStorageSize() const { return core::alignUp(getStorageSize(), GPUStructsMaxNaturalAlignment); } + }; + + /// @brief ResourceBase reserved for compute shader stages input/output + template + struct ReservedComputeResource : ResourceBase + { + size_t count = 0ull; + size_t getCount() const override { return count; } + size_t getStorageSize() const override { return count * sizeof(T); } + }; + + /// @brief ResourceBase which is filled by CPU, packed and sent to GPU + template + struct CPUGeneratedResource : ResourceBase + { + core::vector vector; + size_t getCount() const { return vector.size(); } + size_t getStorageSize() const { return vector.size() * sizeof(T); } + + /// @return pointer to start of the data to be filled, up to additionalCount + T* increaseCountAndGetPtr(size_t additionalCount) + { + size_t offset = vector.size(); + vector.resize(offset + additionalCount); + return &vector[offset]; + } + + /// @brief increases size of general-purpose resources that hold bytes + /// @param alignment: Alignment of the pointer returned to be filled, should be PoT and <= GPUStructsMaxNaturalAlignment, only use this if storing raw bytes in vector + /// @return pointer to start of the data to be filled, up to additional size + size_t increaseSizeAndGetOffset(size_t additionalSize, size_t alignment) + { + assert(core::isPoT(alignment) && alignment <= GPUStructsMaxNaturalAlignment); + size_t offset = core::alignUp(vector.size(), alignment); + vector.resize(offset + additionalSize); + return offset; + } + + uint32_t addAndGetOffset(const T& val) + { + vector.push_back(val); + return vector.size() - 1u; + } + + T* data() { return vector.data(); } + }; + + /// @brief struct to hold all resources + // TODO: rename to staged resources buffers or something like that + struct ResourcesCollection + { + // auto-submission level 0 resources (settings that mainObj references) + CPUGeneratedResource dtmSettings; + + // auto-submission level 1 buffers (mainObj that drawObjs references, if all drawObjs+idxBuffer+geometryInfo doesn't fit into mem this will be broken down into many) + CPUGeneratedResource mainObjects; + + // auto-submission level 2 buffers + CPUGeneratedResource drawObjects; + CPUGeneratedResource indexBuffer; // TODO: this is going to change to ReservedComputeResource where index buffer gets filled by compute shaders + CPUGeneratedResource geometryInfo; // general purpose byte buffer for custom data for geometries (eg. line points, bezier definitions, aabbs) + + // Get Total memory consumption, If all ResourcesCollection get packed together with GPUStructsMaxNaturalAlignment + // used to decide the remaining memory and when to overflow + size_t calculateTotalConsumption() const + { + return + dtmSettings.getAlignedStorageSize() + + mainObjects.getAlignedStorageSize() + + drawObjects.getAlignedStorageSize() + + indexBuffer.getAlignedStorageSize() + + geometryInfo.getAlignedStorageSize(); + } + }; + + DrawResourcesFiller(); + + DrawResourcesFiller(smart_refctd_ptr&& device, smart_refctd_ptr&& bufferUploadUtils, IQueue* copyQueue, core::smart_refctd_ptr&& logger); + + typedef std::function SubmitFunc; + void setSubmitDrawsFunction(const SubmitFunc& func); + + // Must be called at the end of each frame. + // right before submitting the main draw that uses the currently queued geometry, images, or other objects/resources. + // Registers the semaphore/value that will signal completion of this frame�s draw, + // This allows future frames to safely deallocate or evict resources used in the current frame by waiting on this signal before reuse or destruction. + // `drawSubmitWaitValue` should reference the wait value of the draw submission finishing this frame using the `intendedNextSubmit`; + void markFrameUsageComplete(uint64_t drawSubmitWaitValue); + + void drawTriangleMesh( + const CTriangleMesh& mesh, + const DTMSettingsInfo& dtmSettingsInfo, + SIntendedSubmitInfo& intendedNextSubmit); + + /// @brief resets staging buffers and images + void reset() + { + resetDrawObjects(); + resetMainObjects(); + resetDTMSettings(); + + drawCalls.clear(); + } + + /// @brief collection of all the resources that will eventually be reserved or copied to in the resourcesGPUBuffer, will be accessed via individual BDA pointers in shaders + const ResourcesCollection& getResourcesCollection() const { return resourcesCollection; } + /// @brief buffer containing all non-texture type resources + nbl::core::smart_refctd_ptr getResourcesGPUBuffer() const { return resourcesGPUBuffer; } + /// @return how far resourcesGPUBuffer was copied to by `finalizeAllCopiesToGPU` in `resourcesCollection` + const size_t getCopiedResourcesSize() { return copiedResourcesSize; } + const core::vector& getDrawCalls() const { return drawCalls; } + +private: + /** + * @brief Attempts to allocate a single contiguous device-local memory block for draw resources, divided into image and buffer sections. + * + * The function allocates a single memory block and splits it into image and buffer arenas. + * + * @param logicalDevice Pointer to the logical device used for memory allocation and resource creation. + * @param requiredImageMemorySize The size in bytes of the memory required for images. + * @param requiredBufferMemorySize The size in bytes of the memory required for buffers. + * @param memoryTypeIndexTryOrder Ordered list of memory type indices to attempt allocation with, in the order they should be tried. + * + * @return true if the memory allocation and resource setup succeeded; false otherwise. + */ + bool allocateDrawResources(ILogicalDevice* logicalDevice, size_t requiredImageMemorySize, size_t requiredBufferMemorySize, std::span memoryTypeIndexTryOrder); + + /// @brief Records GPU copy commands for all staged buffer resourcesCollection into the active command buffer. + bool pushBufferUploads(SIntendedSubmitInfo& intendedNextSubmit, ResourcesCollection& resourcesCollection); + + // Gets resource index to the active main object data + // TODO: submit if overflow + uint32_t acquireActiveMainObjectIndex(SIntendedSubmitInfo& intendedNextSubmit); + + uint32_t acquireActiveDTMSettingsIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit); + + uint32_t addDTMSettings_SubmitIfNeeded(const DTMSettingsInfo& dtmSettings, SIntendedSubmitInfo& intendedNextSubmit); + + uint32_t addDTMSettings_Internal(const DTMSettingsInfo& dtmSettingsInfo, SIntendedSubmitInfo& intendedNextSubmit); + + inline void beginMainObject(MainObjectType type) + { + activeMainObjectType = type; + activeMainObjectIndex = InvalidMainObjectIdx; + } + + inline void endMainObject() + { + activeMainObjectType = MainObjectType::NONE; + activeMainObjectIndex = InvalidMainObjectIdx; + } + + inline void setActiveDTMSettings(const DTMSettingsInfo& dtmSettingsInfo) + { + activeDTMSettings = dtmSettingsInfo; + activeDTMSettingsIndex = InvalidDTMSettingsIdx; + } + + inline const size_t calculateRemainingResourcesSize() const + { + assert(resourcesGPUBuffer->getSize() >= resourcesCollection.calculateTotalConsumption()); + return resourcesGPUBuffer->getSize() - resourcesCollection.calculateTotalConsumption(); + } + + void resetMainObjects() + { + resourcesCollection.mainObjects.vector.clear(); + activeMainObjectIndex = InvalidMainObjectIdx; + } + + // these resources are data related to chunks of a whole mainObject + void resetDrawObjects() + { + resourcesCollection.drawObjects.vector.clear(); + resourcesCollection.indexBuffer.vector.clear(); + resourcesCollection.geometryInfo.vector.clear(); + } + + void resetDTMSettings() + { + resourcesCollection.dtmSettings.vector.clear(); + activeDTMSettingsIndex = InvalidDTMSettingsIdx; + } + +private: + nbl::system::logger_opt_smart_ptr m_logger = nullptr; + + smart_refctd_ptr m_device; + core::smart_refctd_ptr m_bufferUploadUtils; + + IQueue* m_copyQueue; + + // FrameIndex used as a criteria for resource/image eviction in case of limitations + uint32_t currentFrameIndex = 0u; + + // DrawCalls Data + core::vector drawCalls; + + // ResourcesCollection and packed into GPUBuffer + ResourcesCollection resourcesCollection; + IDeviceMemoryAllocator::SAllocation buffersMemoryArena; + nbl::core::smart_refctd_ptr resourcesGPUBuffer; + size_t copiedResourcesSize; + + SubmitFunc submitDraws; + + // Active Resources we need to keep track of and push to resources buffer if needed. + MainObjectType activeMainObjectType; + uint32_t activeMainObjectIndex = InvalidMainObjectIdx; + + DTMSettingsInfo activeDTMSettings; + uint32_t activeDTMSettingsIndex = InvalidDTMSettingsIdx; +}; \ No newline at end of file diff --git a/75_CAD_3D/config.json.template b/75_CAD_3D/config.json.template new file mode 100644 index 000000000..f961745c1 --- /dev/null +++ b/75_CAD_3D/config.json.template @@ -0,0 +1,28 @@ +{ + "enableParallelBuild": true, + "threadsPerBuildProcess" : 2, + "isExecuted": false, + "scriptPath": "", + "cmake": { + "configurations": [ "Release", "Debug", "RelWithDebInfo" ], + "buildModes": [], + "requiredOptions": [] + }, + "profiles": [ + { + "backend": "vulkan", + "platform": "windows", + "buildModes": [], + "runConfiguration": "Release", + "gpuArchitectures": [] + } + ], + "dependencies": [], + "data": [ + { + "dependencies": [], + "command": [""], + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/75_CAD_3D/main.cpp b/75_CAD_3D/main.cpp new file mode 100644 index 000000000..1b755eebd --- /dev/null +++ b/75_CAD_3D/main.cpp @@ -0,0 +1,1169 @@ +// TODO: Copyright notice +#include "nbl/this_example/builtin/build/spirv/keys.hpp" + +#include "nbl/examples/examples.hpp" + +using namespace nbl::hlsl; +using namespace nbl; +using namespace core; +using namespace system; +using namespace asset; +using namespace ui; +using namespace video; + +#include +#include "DrawResourcesFiller.h" + +#include "nbl/builtin/hlsl/math/linalg/transform.hlsl" +#include "nbl/builtin/hlsl/math/thin_lens_projection.hlsl" +#include "DTMMeshes.h" + +class CEventCallback : public ISimpleManagedSurface::ICallback +{ +public: + CEventCallback(nbl::core::smart_refctd_ptr&& m_inputSystem, nbl::system::logger_opt_smart_ptr&& logger) : m_inputSystem(std::move(m_inputSystem)), m_logger(std::move(logger)) {} + CEventCallback() {} + + void setLogger(nbl::system::logger_opt_smart_ptr& logger) + { + m_logger = logger; + } + void setInputSystem(nbl::core::smart_refctd_ptr&& m_inputSystem) + { + m_inputSystem = std::move(m_inputSystem); + } +private: + + void onMouseConnected_impl(nbl::core::smart_refctd_ptr&& mch) override + { + m_logger.log("A mouse %p has been connected", nbl::system::ILogger::ELL_INFO, mch.get()); + m_inputSystem.get()->add(m_inputSystem.get()->m_mouse, std::move(mch)); + } + void onMouseDisconnected_impl(nbl::ui::IMouseEventChannel* mch) override + { + m_logger.log("A mouse %p has been disconnected", nbl::system::ILogger::ELL_INFO, mch); + m_inputSystem.get()->remove(m_inputSystem.get()->m_mouse, mch); + } + void onKeyboardConnected_impl(nbl::core::smart_refctd_ptr&& kbch) override + { + m_logger.log("A keyboard %p has been connected", nbl::system::ILogger::ELL_INFO, kbch.get()); + m_inputSystem.get()->add(m_inputSystem.get()->m_keyboard, std::move(kbch)); + } + void onKeyboardDisconnected_impl(nbl::ui::IKeyboardEventChannel* kbch) override + { + m_logger.log("A keyboard %p has been disconnected", nbl::system::ILogger::ELL_INFO, kbch); + m_inputSystem.get()->remove(m_inputSystem.get()->m_keyboard, kbch); + } + +private: + nbl::core::smart_refctd_ptr m_inputSystem = nullptr; + nbl::system::logger_opt_smart_ptr m_logger = nullptr; +}; + +class CSwapchainResources : public ISimpleManagedSurface::ISwapchainResources +{ + public: + CSwapchainResources() = default; + + // TODO: this is a prototype, depth images should be probably not created in the initialize function + void initialize(const smart_refctd_ptr& window, const core::smart_refctd_ptr& device) + { + asset::E_FORMAT depthFormat = asset::EF_D32_SFLOAT; + + for (auto& depthImage : depthImages) + { + IGPUImage::SCreationParams imgInfo; + imgInfo.format = depthFormat; + imgInfo.type = IGPUImage::ET_2D; + imgInfo.extent.width = window->getWidth(); + imgInfo.extent.height = window->getHeight(); + imgInfo.extent.depth = 1u; + imgInfo.mipLevels = 1u; + imgInfo.arrayLayers = 1u; + imgInfo.samples = asset::ICPUImage::ESCF_1_BIT; + imgInfo.tiling = IGPUImage::TILING::OPTIMAL; + imgInfo.usage = asset::IImage::E_USAGE_FLAGS::EUF_RENDER_ATTACHMENT_BIT; + + depthImage = device->createImage(std::move(imgInfo)); + auto memReq = depthImage->getMemoryReqs(); + memReq.memoryTypeBits &= device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); + device->allocate(memReq, depthImage.get()); + } + } + + inline E_FORMAT deduceRenderpassFormat(ISurface* surface, IPhysicalDevice* physDev) + { + ISwapchain::SCreationParams swapchainParams = {.surface=smart_refctd_ptr(surface), }; + // Need to choose a surface format + if (!swapchainParams.deduceFormat(physDev, getPreferredFormats(), getPreferredEOTFs(), getPreferredColorPrimaries())) + return EF_UNKNOWN; + return swapchainParams.surfaceFormat.format; + } + + // When needing to recreate the framebuffer, We need to have access to a renderpass compatible to renderpass used to render to the framebuffer + inline void setCompatibleRenderpass(core::smart_refctd_ptr renderpass) + { + m_renderpass = renderpass; + } + + inline IGPUFramebuffer* getFramebuffer(const uint8_t imageIx) + { + if (imageIx(m_renderpass->getOriginDevice()); + + const auto swapchain = getSwapchain(); + const auto count = swapchain->getImageCount(); + const auto& sharedParams = swapchain->getCreationParameters().sharedParams; + for (uint8_t i=0u; icreateImageView({ + .flags = IGPUImageView::ECF_NONE, + .subUsages = IGPUImage::EUF_RENDER_ATTACHMENT_BIT, + .image = core::smart_refctd_ptr(getImage(i)), + .viewType = IGPUImageView::ET_2D, + .format = getImage(i)->getCreationParameters().format + }); + auto depthImageView = device->createImageView({ + .flags = IGPUImageView::ECF_NONE, + .subUsages = IGPUImage::EUF_RENDER_ATTACHMENT_BIT, + .image = core::smart_refctd_ptr(depthImages[i]), + .viewType = IGPUImageView::ET_2D, + .format = depthImages[i]->getCreationParameters().format, + .subresourceRange = { + .aspectMask = asset::IImage::EAF_DEPTH_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1 + } + }); + m_framebuffers[i] = device->createFramebuffer({ { + .renderpass = core::smart_refctd_ptr(m_renderpass), + .depthStencilAttachments = &depthImageView.get(), + .colorAttachments = &imageView.get(), + .width = sharedParams.width, + .height = sharedParams.height + }}); + if (!m_framebuffers[i]) + return false; + } + return true; + } + + // Per-swapchain + core::smart_refctd_ptr m_renderpass; + std::array,ISwapchain::MaxImages> m_framebuffers; + std::array, ISwapchain::MaxImages> depthImages = {}; +}; + +class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplication, public nbl::examples::BuiltinResourcesApplication +{ + using device_base_t = nbl::examples::SimpleWindowedApplication; + using asset_base_t = nbl::examples::BuiltinResourcesApplication; + using clock_t = std::chrono::steady_clock; + + constexpr static uint32_t WindowWidthRequest = 1600u; + constexpr static uint32_t WindowHeightRequest = 900u; + constexpr static uint32_t MaxFramesInFlight = 3u; + constexpr static uint32_t MaxSubmitsInFlight = 16u; +public: + + void allocateResources() + { + // TODO: currently using the same utils for buffers and images, make them separate staging buffers + drawResourcesFiller = DrawResourcesFiller(core::smart_refctd_ptr(m_device), core::smart_refctd_ptr(m_utils), getGraphicsQueue(), core::smart_refctd_ptr(m_logger)); + + // Just wanting to try memory type indices with device local flag, TODO: later improve to prioritize pure device local + std::vector deviceLocalMemoryTypeIndices; + for (uint32_t i = 0u; i < m_physicalDevice->getMemoryProperties().memoryTypeCount; ++i) + { + const auto& memType = m_physicalDevice->getMemoryProperties().memoryTypes[i]; + if (memType.propertyFlags.hasFlags(IDeviceMemoryAllocation::EMPF_DEVICE_LOCAL_BIT)) + deviceLocalMemoryTypeIndices.push_back(i); + } + + size_t maxImagesMemSize = 1024ull * 1024ull * 1024ull; // 1024 MB + size_t maxBufferMemSize = 1024ull * 1024ull * 1024ull; // 1024 MB + + drawResourcesFiller.allocateDrawResourcesWithinAvailableVRAM(m_device.get(), maxImagesMemSize, maxBufferMemSize, deviceLocalMemoryTypeIndices); + + { + IGPUBuffer::SCreationParams globalsCreationParams = {}; + globalsCreationParams.size = sizeof(Globals); + globalsCreationParams.usage = IGPUBuffer::EUF_UNIFORM_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INLINE_UPDATE_VIA_CMDBUF; + m_globalsBuffer = m_device->createBuffer(std::move(globalsCreationParams)); + + IDeviceMemoryBacked::SDeviceMemoryRequirements memReq = m_globalsBuffer->getMemoryReqs(); + memReq.memoryTypeBits &= m_device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); + auto globalsBufferMem = m_device->allocate(memReq, m_globalsBuffer.get()); + } + + // pseudoStencil + { + asset::E_FORMAT pseudoStencilFormat = asset::EF_R32_UINT; + { + IGPUImage::SCreationParams imgInfo; + imgInfo.format = pseudoStencilFormat; + imgInfo.type = IGPUImage::ET_2D; + imgInfo.extent.width = m_window->getWidth(); + imgInfo.extent.height = m_window->getHeight(); + imgInfo.extent.depth = 1u; + imgInfo.mipLevels = 1u; + imgInfo.arrayLayers = 1u; + imgInfo.samples = asset::ICPUImage::ESCF_1_BIT; + imgInfo.flags = asset::IImage::E_CREATE_FLAGS::ECF_NONE; + imgInfo.usage = asset::IImage::EUF_STORAGE_BIT | asset::IImage::EUF_TRANSFER_DST_BIT; + // [VKTODO] imgInfo.initialLayout = IGPUImage::EL_UNDEFINED; + imgInfo.tiling = IGPUImage::TILING::OPTIMAL; + + auto image = m_device->createImage(std::move(imgInfo)); + auto imageMemReqs = image->getMemoryReqs(); + imageMemReqs.memoryTypeBits &= m_device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); + m_device->allocate(imageMemReqs, image.get()); + + image->setObjectDebugName("pseudoStencil Image"); + + IGPUImageView::SCreationParams imgViewInfo; + imgViewInfo.image = std::move(image); + imgViewInfo.format = pseudoStencilFormat; + imgViewInfo.viewType = IGPUImageView::ET_2D; + imgViewInfo.flags = IGPUImageView::E_CREATE_FLAGS::ECF_NONE; + imgViewInfo.subresourceRange.aspectMask = asset::IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; + imgViewInfo.subresourceRange.baseArrayLayer = 0u; + imgViewInfo.subresourceRange.baseMipLevel = 0u; + imgViewInfo.subresourceRange.layerCount = 1u; + imgViewInfo.subresourceRange.levelCount = 1u; + + pseudoStencilImageView = m_device->createImageView(std::move(imgViewInfo)); + } + } + + // colorStorage + { + asset::E_FORMAT colorStorageFormat = asset::EF_R32_UINT; + { + IGPUImage::SCreationParams imgInfo; + imgInfo.format = colorStorageFormat; + imgInfo.type = IGPUImage::ET_2D; + imgInfo.extent.width = m_window->getWidth(); + imgInfo.extent.height = m_window->getHeight(); + imgInfo.extent.depth = 1u; + imgInfo.mipLevels = 1u; + imgInfo.arrayLayers = 1u; + imgInfo.samples = asset::ICPUImage::ESCF_1_BIT; + imgInfo.flags = asset::IImage::E_CREATE_FLAGS::ECF_NONE; + imgInfo.usage = asset::IImage::EUF_STORAGE_BIT | asset::IImage::EUF_TRANSFER_DST_BIT; + // [VKTODO] imgInfo.initialLayout = IGPUImage::EL_UNDEFINED; + imgInfo.tiling = IGPUImage::TILING::OPTIMAL; + + auto image = m_device->createImage(std::move(imgInfo)); + auto imageMemReqs = image->getMemoryReqs(); + imageMemReqs.memoryTypeBits &= m_device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); + m_device->allocate(imageMemReqs, image.get()); + + image->setObjectDebugName("colorStorage Image"); + + IGPUImageView::SCreationParams imgViewInfo; + imgViewInfo.image = std::move(image); + imgViewInfo.format = colorStorageFormat; + imgViewInfo.viewType = IGPUImageView::ET_2D; + imgViewInfo.flags = IGPUImageView::E_CREATE_FLAGS::ECF_NONE; + imgViewInfo.subresourceRange.aspectMask = asset::IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; + imgViewInfo.subresourceRange.baseArrayLayer = 0u; + imgViewInfo.subresourceRange.baseMipLevel = 0u; + imgViewInfo.subresourceRange.layerCount = 1u; + imgViewInfo.subresourceRange.levelCount = 1u; + + colorStorageImageView = m_device->createImageView(std::move(imgViewInfo)); + } + } + + // Initial Pipeline Transitions and Clearing of PseudoStencil and ColorStorage + // Recorded to Temporary CommandBuffer, Submitted to Graphics Queue, and Blocked on here + { + auto cmdPool = m_device->createCommandPool(getGraphicsQueue()->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::TRANSIENT_BIT); + smart_refctd_ptr tmpCmdBuffer; + cmdPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { &tmpCmdBuffer, 1 }); + auto tmpJobFinishedSema = m_device->createSemaphore(0ull); + + tmpCmdBuffer->begin(video::IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + { + // Clear pseudoStencil + auto pseudoStencilImage = pseudoStencilImageView->getCreationParameters().image; + auto colorStorageImage = colorStorageImageView->getCreationParameters().image; + + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t beforeClearImageBarrier[] = + { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, // previous top of pipe -> top_of_pipe in first scope = none + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::CLEAR_BIT, + .dstAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, // could be ALL_TRANSFER but let's be specific we only want to CLEAR right now + } + // .ownershipOp. No queueFam ownership transfer + }, + .image = pseudoStencilImage.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u, + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::GENERAL, + } + }; + + tmpCmdBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = beforeClearImageBarrier }); + + uint32_t pseudoStencilInvalidValue = core::bitfieldInsert(0u, 16777215, 8, 24); + IGPUCommandBuffer::SClearColorValue clear = {}; + clear.uint32[0] = pseudoStencilInvalidValue; + + asset::IImage::SSubresourceRange subresourceRange = {}; + subresourceRange.aspectMask = asset::IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; + subresourceRange.baseArrayLayer = 0u; + subresourceRange.baseMipLevel = 0u; + subresourceRange.layerCount = 1u; + subresourceRange.levelCount = 1u; + + tmpCmdBuffer->clearColorImage(pseudoStencilImage.get(), asset::IImage::LAYOUT::GENERAL, &clear, 1u, &subresourceRange); + + // prepare pseudoStencilImage for usage in drawcall + + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t beforeUsageImageBarriers[] = + { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::CLEAR_BIT, + .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS | ACCESS_FLAGS::MEMORY_WRITE_BITS, // could be ALL_TRANSFER but let's be specific we only want to CLEAR right now + } + // .ownershipOp. No queueFam ownership transfer + }, + .image = pseudoStencilImage.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u, + }, + .oldLayout = IImage::LAYOUT::GENERAL, + .newLayout = IImage::LAYOUT::GENERAL, + }, + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS | ACCESS_FLAGS::MEMORY_WRITE_BITS, // could be ALL_TRANSFER but let's be specific we only want to CLEAR right now + } + // .ownershipOp. No queueFam ownership transfer + }, + .image = colorStorageImage.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u, + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::GENERAL, + } + }; + + tmpCmdBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = beforeUsageImageBarriers }); + } + tmpCmdBuffer->end(); + + IQueue::SSubmitInfo::SCommandBufferInfo cmdbufs[1u] = { {.cmdbuf = tmpCmdBuffer.get() } }; + IQueue::SSubmitInfo::SSemaphoreInfo singalSemaphores[1] = {}; + singalSemaphores[0].semaphore = tmpJobFinishedSema.get(); + singalSemaphores[0].stageMask = asset::PIPELINE_STAGE_FLAGS::NONE; + singalSemaphores[0].value = 1u; + + IQueue::SSubmitInfo submitInfo = {}; + submitInfo.commandBuffers = cmdbufs; + submitInfo.waitSemaphores = {}; + submitInfo.signalSemaphores = singalSemaphores; + + getGraphicsQueue()->submit({ &submitInfo, 1u }); + + ISemaphore::SWaitInfo waitTmpJobFinish = { .semaphore = tmpJobFinishedSema.get(), .value = 1u}; + m_device->blockForSemaphores({ &waitTmpJobFinish, 1u }); + } + } + + smart_refctd_ptr createRenderpass( + E_FORMAT colorAttachmentFormat, + IGPURenderpass::LOAD_OP loadOp, + IImage::LAYOUT initialLayout, + IImage::LAYOUT finalLayout) + { + const IGPURenderpass::SCreationParams::SColorAttachmentDescription colorAttachments[] = { + {{ + { + .format = colorAttachmentFormat, + .samples = IGPUImage::ESCF_1_BIT, + .mayAlias = false + }, + /*.loadOp = */loadOp, + /*.storeOp = */IGPURenderpass::STORE_OP::STORE, + /*.initialLayout = */initialLayout, + /*.finalLayout = */finalLayout + }}, + IGPURenderpass::SCreationParams::ColorAttachmentsEnd + }; + + IGPURenderpass::SCreationParams::SDepthStencilAttachmentDescription depthAttachments[] = { + {{ + { + .format = asset::EF_D32_SFLOAT, + .samples = IGPUImage::ESCF_1_BIT, + .mayAlias = false + }, + /*.loadOp = */{loadOp}, + /*.storeOp = */{IGPURenderpass::STORE_OP::STORE}, + /*.initialLayout = */{initialLayout}, + /*.finalLayout = */{IImage::LAYOUT::ATTACHMENT_OPTIMAL} + }}, + IGPURenderpass::SCreationParams::DepthStencilAttachmentsEnd + }; + + IGPURenderpass::SCreationParams::SSubpassDescription subpasses[] = { + {}, + IGPURenderpass::SCreationParams::SubpassesEnd + }; + + subpasses[0].colorAttachments[0] = {.render={.attachmentIndex=0,.layout=IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}}; + subpasses[0].depthStencilAttachment = {{.render = {.attachmentIndex=0,.layout = IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}}}; + + // We actually need external dependencies to ensure ordering of the Implicit Layout Transitions relative to the semaphore signals + const IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = { + // wipe-transition to ATTACHMENT_OPTIMAL + { + .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .dstSubpass = 0, + .memoryBarrier = { + // we can have NONE as Sources because ???? + .dstStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + .dstAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + // leave view offsets and flags default + }, + // ATTACHMENT_OPTIMAL to PRESENT_SRC + { + .srcSubpass = 0, + .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .memoryBarrier = { + .srcStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + .srcAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + // we can have NONE as the Destinations because the spec says so about presents + } + // leave view offsets and flags default + }, + IGPURenderpass::SCreationParams::DependenciesEnd + }; + + smart_refctd_ptr renderpass; + IGPURenderpass::SCreationParams params = {}; + params.colorAttachments = colorAttachments; + params.depthStencilAttachments = depthAttachments; + params.subpasses = subpasses; + params.dependencies = dependencies; + renderpass = m_device->createRenderpass(params); + if (!renderpass) + logFail("Failed to Create a Renderpass!"); + return renderpass; + } + + + // Yay thanks to multiple inheritance we cannot forward ctors anymore + inline ComputerAidedDesign(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : + IApplicationFramework(_localInputCWD,_localOutputCWD,_sharedInputCWD,_sharedOutputCWD) {} + + // Will get called mid-initialization, via `filterDevices` between when the API Connection is created and Physical Device is chosen + inline core::vector getSurfaces() const override + { + // So let's create our Window and Surface then! + if (!m_surface) + { + { + auto windowCallback = core::make_smart_refctd_ptr(smart_refctd_ptr(m_inputSystem), smart_refctd_ptr(m_logger)); + IWindow::SCreationParams params = {}; + params.callback = windowCallback; + params.width = WindowWidthRequest; + params.height = WindowHeightRequest; + params.x = 32; + params.y = 32; + // Don't want to have a window lingering about before we're ready so create it hidden. + // Only programmatic resize, not regular. + params.flags = IWindow::ECF_BORDERLESS|IWindow::ECF_RESIZABLE; + params.windowCaption = "CAD 3D Playground"; + const_cast&>(m_window) = m_winMgr->createWindow(std::move(params)); + } + auto surface = CSurfaceVulkanWin32::create(smart_refctd_ptr(m_api),smart_refctd_ptr_static_cast(m_window)); + const_cast&>(m_surface) = CSimpleResizeSurface::create(std::move(surface)); + } + if (m_surface) + return {{m_surface->getSurface()/*,EQF_NONE*/}}; + return {}; + } + + inline bool onAppInitialized(smart_refctd_ptr&& system) override + { + m_inputSystem = make_smart_refctd_ptr(logger_opt_smart_ptr(smart_refctd_ptr(m_logger))); + + // Remember to call the base class initialization! + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + if (!asset_base_t::onAppInitialized(std::move(system))) + return false; + + // Let's just use the same queue since there's no need for async present + if (!m_surface) + return logFail("Could not create Window & Surface!"); + + auto scResources = std::make_unique(); + scResources->initialize(m_window, m_device); + const auto format = scResources->deduceRenderpassFormat(m_surface->getSurface(), m_physicalDevice); // TODO: DO I need to recreate render passes if swapchain gets recreated with different format? + renderpassInitial = createRenderpass(format, IGPURenderpass::LOAD_OP::CLEAR, IImage::LAYOUT::UNDEFINED, IImage::LAYOUT::ATTACHMENT_OPTIMAL); + renderpassInBetween = createRenderpass(format, IGPURenderpass::LOAD_OP::LOAD, IImage::LAYOUT::ATTACHMENT_OPTIMAL, IImage::LAYOUT::ATTACHMENT_OPTIMAL); + renderpassFinal = createRenderpass(format, IGPURenderpass::LOAD_OP::LOAD, IImage::LAYOUT::ATTACHMENT_OPTIMAL, IImage::LAYOUT::PRESENT_SRC); + const auto compatibleRenderPass = renderpassInitial; // all 3 above are compatible + + scResources->setCompatibleRenderpass(compatibleRenderPass); + + if (!m_surface->init(getGraphicsQueue(),std::move(scResources),{})) + return logFail("Could not initialize the Surface!"); + + allocateResources(); + + // Create DescriptorSetLayout, PipelineLayout and update DescriptorSets + { + video::IGPUDescriptorSetLayout::SBinding bindingsSet0[] = { + { + .binding = 0u, + .type = asset::IDescriptor::E_TYPE::ET_UNIFORM_BUFFER, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_VERTEX | asset::IShader::E_SHADER_STAGE::ESS_FRAGMENT, + .count = 1u, + } + }; + m_descriptorSetLayout0 = m_device->createDescriptorSetLayout(bindingsSet0); + if (!m_descriptorSetLayout0) + return logFail("Failed to Create Descriptor Layout 0"); + + const asset::SPushConstantRange range = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_VERTEX | IShader::E_SHADER_STAGE::ESS_FRAGMENT, + .offset = 0, + .size = sizeof(PushConstants) + }; + + const video::IGPUDescriptorSetLayout* const layouts[1u] = { m_descriptorSetLayout0.get() }; + + smart_refctd_ptr descriptorPool = nullptr; + { + const uint32_t setCounts[2u] = { 1u, 1u }; + descriptorPool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT, layouts, setCounts); + if (!descriptorPool) + return logFail("Failed to Create Descriptor Pool"); + } + + // Update descriptor sets + { + m_descriptorSet0 = descriptorPool->createDescriptorSet(smart_refctd_ptr(m_descriptorSetLayout0)); + + video::IGPUDescriptorSet::SDescriptorInfo descriptorInfosSet0[1] = {}; + descriptorInfosSet0[0u].info.buffer.offset = 0u; + descriptorInfosSet0[0u].info.buffer.size = m_globalsBuffer->getCreationParams().size; + descriptorInfosSet0[0u].desc = m_globalsBuffer; + + video::IGPUDescriptorSet::SWriteDescriptorSet descriptorUpdates[1] = {}; + + // globals + descriptorUpdates[0u].dstSet = m_descriptorSet0.get(); + descriptorUpdates[0u].binding = 0u; + descriptorUpdates[0u].arrayElement = 0u; + descriptorUpdates[0u].count = 1u; + descriptorUpdates[0u].info = &descriptorInfosSet0[0u]; + + m_device->updateDescriptorSets(1, descriptorUpdates, 0u, nullptr); + } + + m_pipelineLayout = m_device->createPipelineLayout({ &range,1 }, core::smart_refctd_ptr(m_descriptorSetLayout0), nullptr, nullptr, nullptr); + } + + smart_refctd_ptr mainPipelineFragmentShaders = {}; + smart_refctd_ptr mainPipelineVertexShader = {}; + { + // Load Custom Shader + auto loadPrecompiledShader = [&]() -> smart_refctd_ptr + { + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = m_logger.get(); + lp.workingDirectory = "app_resources"; + + auto key = nbl::this_example::builtin::build::get_spirv_key(m_device.get()); + auto assetBundle = m_assetMgr->getAsset(key.data(), lp); + const auto assets = assetBundle.getContents(); + if (assets.empty()) + { + m_logger->log("Failed to load a precompiled shader of key \"%s\".", ILogger::ELL_ERROR, ShaderKey); + return nullptr; + } + + + auto shader = IAsset::castDown(assets[0]); + return shader; + }; + + mainPipelineFragmentShaders = loadPrecompiledShader.operator()<"main_pipeline_fragment_shader">(); // "../shaders/main_pipeline/fragment_shader.hlsl" + mainPipelineVertexShader = loadPrecompiledShader.operator()<"main_pipeline_vertex_shader">(); // "../shaders/main_pipeline/vertex_shader.hlsl" + } + + IGPUGraphicsPipeline::SCreationParams mainGraphicsPipelineParams = {}; + mainGraphicsPipelineParams.layout = m_pipelineLayout.get(); + mainGraphicsPipelineParams.cached = { + .vertexInput = {}, + .primitiveAssembly = { + .primitiveType = E_PRIMITIVE_TOPOLOGY::EPT_TRIANGLE_LIST, + }, + .rasterization = { + .polygonMode = EPM_FILL, + .faceCullingMode = EFCM_NONE, + .depthWriteEnable = true, + .depthCompareOp = asset::E_COMPARE_OP::ECO_GREATER + }, + .blend = {}, + }; + mainGraphicsPipelineParams.renderpass = compatibleRenderPass.get(); + + assert(mainGraphicsPipelineParams.cached.rasterization.depthTestEnable()); + + // Create Main Graphics Pipelines + { + video::IGPUPipelineBase::SShaderSpecInfo specInfo[2] = { + { .shader = mainPipelineVertexShader.get(), .entryPoint = "vtxMain" }, + { .shader = mainPipelineFragmentShaders.get(), .entryPoint = "fragMain" }, + }; + + IGPUGraphicsPipeline::SCreationParams params[1] = { mainGraphicsPipelineParams }; + params[0].vertexShader = specInfo[0]; + params[0].fragmentShader = specInfo[1]; + + if (!m_device->createGraphicsPipelines(nullptr,params,&m_graphicsPipeline)) + return logFail("Graphics Pipeline Creation Failed."); + } + + // Create the commandbuffers and pools, this time properly 1 pool per FIF + m_graphicsCommandPool = m_device->createCommandPool(getGraphicsQueue()->getFamilyIndex(),IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + if (!m_graphicsCommandPool) + return logFail("Couldn't create Command Pool!"); + if (!m_graphicsCommandPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY,{m_commandBuffersInFlight.data(),MaxSubmitsInFlight})) + return logFail("Couldn't create Command Buffers!"); + + // Create the Semaphores + m_renderSemaphore = m_device->createSemaphore(0ull); + m_renderSemaphore->setObjectDebugName("m_renderSemaphore"); + m_overflowSubmitScratchSemaphore = m_device->createSemaphore(0ull); + m_overflowSubmitScratchSemaphore->setObjectDebugName("m_overflowSubmitScratchSemaphore"); + if (!m_renderSemaphore || !m_overflowSubmitScratchSemaphore) + return logFail("Failed to Create Semaphores!"); + + // Set Queue and ScratchSemaInfo -> wait semaphores and command buffers will be modified by workLoop each frame + m_intendedNextSubmit.queue = getGraphicsQueue(); + m_intendedNextSubmit.scratchSemaphore = { + .semaphore = m_overflowSubmitScratchSemaphore.get(), + .value = 0ull, + }; + for (uint32_t i = 0; i < MaxSubmitsInFlight; ++i) + m_commandBufferInfos[i] = { .cmdbuf = m_commandBuffersInFlight[i].get() }; + m_intendedNextSubmit.scratchCommandBuffers = m_commandBufferInfos; + m_currentRecordingCommandBufferInfo = &m_commandBufferInfos[0]; + + // camera + { + const core::vectorSIMDf cameraPosition(300.0f, 300.0f, 300.0f); + const core::vectorSIMDf cameraTarget(0.0f, 0.0f, 0.0f); + const float32_t aspectRatio = static_cast(m_window->getWidth()) / static_cast(m_window->getHeight()); + float32_t4x4 projectionMatrix = hlsl::math::thin_lens::lhPerspectiveFovMatrix(core::radians(60.0f), aspectRatio, 0.1f, 10000.0f); + camera = Camera(cameraPosition, cameraTarget, projectionMatrix, 1.069f, 0.4f); + camera.setMoveSpeed(50.0f); + } + + return true; + } + + // We do a very simple thing, display an image and wait `DisplayImageMs` to show it + inline void workLoopBody() override + { + auto now = std::chrono::high_resolution_clock::now(); + auto dtMilliseconds = std::chrono::duration_cast(now - lastTime); + double dt = dtMilliseconds.count(); + lastTime = now; + m_timeElapsed += dt; + + m_inputSystem->getDefaultMouse(&mouse); + m_inputSystem->getDefaultKeyboard(&keyboard); + + { + camera.beginInputProcessing(dtMilliseconds); + mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void { camera.mouseProcess(events); }, m_logger.get()); + keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void + { + camera.keyboardProcess(events); + + for (auto eventIt = events.begin(); eventIt != events.end(); eventIt++) + { + auto ev = *eventIt; + + if (ev.action == nbl::ui::SKeyboardEvent::E_KEY_ACTION::ECA_PRESSED && ev.keyCode == nbl::ui::E_KEY_CODE::EKC_1) + { + m_shadingModeExample = E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS; + } + if (ev.action == nbl::ui::SKeyboardEvent::E_KEY_ACTION::ECA_PRESSED && ev.keyCode == nbl::ui::E_KEY_CODE::EKC_2) + { + m_shadingModeExample = E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS; + } + if (ev.action == nbl::ui::SKeyboardEvent::E_KEY_ACTION::ECA_PRESSED && ev.keyCode == nbl::ui::E_KEY_CODE::EKC_3) + { + m_shadingModeExample = E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS; + } + } + } + , m_logger.get()); + camera.endInputProcessing(dtMilliseconds); + } + + if (!beginFrameRender()) + return; + + const IQueue::SSubmitInfo::SSemaphoreInfo acquired = { + .semaphore = m_currentImageAcquire.semaphore, + .value = m_currentImageAcquire.acquireCount, + .stageMask = asset::PIPELINE_STAGE_FLAGS::NONE // NONE for Acquire, right? Yes, the Spec Says so! + }; + + // prev frame done using the scene data (is in post process stage) + const IQueue::SSubmitInfo::SSemaphoreInfo prevFrameRendered = { + .semaphore = m_renderSemaphore.get(), + .value = m_realFrameIx, + .stageMask = asset::PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS + }; + + IQueue::SSubmitInfo::SSemaphoreInfo waitSems[2u] = { acquired, prevFrameRendered }; + m_intendedNextSubmit.waitSemaphores = waitSems; + + addObjects(m_intendedNextSubmit); + + endFrameRender(m_intendedNextSubmit); + } + + bool beginFrameRender() + { + // framesInFlight: ensuring safe execution of command buffers and acquires, `framesInFlight` only affect semaphore waits, don't use this to index your resources because it can change with swapchain recreation. + const uint32_t framesInFlight = core::min(MaxFramesInFlight, m_surface->getMaxAcquiresInFlight()); + // We block for semaphores for 2 reasons here: + // A) Resource: Can't use resource like a command buffer BEFORE previous use is finished! [MaxFramesInFlight] + // B) Acquire: Can't have more acquires in flight than a certain threshold returned by swapchain or your surface helper class. [MaxAcquiresInFlight] + if (m_realFrameIx>=framesInFlight) + { + const ISemaphore::SWaitInfo cmdbufDonePending[] = { + { + .semaphore = m_renderSemaphore.get(), + .value = m_realFrameIx+1-framesInFlight + } + }; + if (m_device->blockForSemaphores(cmdbufDonePending)!=ISemaphore::WAIT_RESULT::SUCCESS) + return false; + } + + // Acquire + m_currentImageAcquire = m_surface->acquireNextImage(); + if (!m_currentImageAcquire) + return false; + + const bool beganSuccess = m_intendedNextSubmit.beginNextCommandBuffer(m_currentRecordingCommandBufferInfo); + assert(beganSuccess); + auto* cb = m_currentRecordingCommandBufferInfo->cmdbuf; + + // safe to proceed + // no need to reset and begin new command buffers as SIntendedSubmitInfo already handled that. + // cb->reset(video::IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); + // cb->begin(video::IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + cb->beginDebugMarker("Frame"); + + nbl::video::IGPUCommandBuffer::SRenderpassBeginInfo beginInfo; + auto scRes = static_cast(m_surface->getSwapchainResources()); + const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {0.68f, 0.85f, 0.90f, 1.0f} }; + { + const VkRect2D currentRenderArea = + { + .offset = {0,0}, + .extent = {m_window->getWidth(),m_window->getHeight()} + }; + + IGPUCommandBuffer::SClearDepthStencilValue depthClear = + { + .depth = 0.0f, + .stencil = 0 + }; + + beginInfo = { + .renderpass = renderpassInitial.get(), + .framebuffer = scRes->getFramebuffer(m_currentImageAcquire.imageIndex), + .colorClearValues = &clearValue, + .depthStencilClearValues = &depthClear, + .renderArea = currentRenderArea + }; + } + + cb->beginRenderPass(beginInfo, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + cb->endRenderPass(); + + return true; + } + + void submitDraws(SIntendedSubmitInfo& intendedSubmitInfo, bool inBetweenSubmit) + { + drawResourcesFiller.pushAllUploads(intendedSubmitInfo); + + m_currentRecordingCommandBufferInfo = intendedSubmitInfo.getCommandBufferForRecording(); // drawResourcesFiller.pushAllUploads might've overflow submitted and changed the current recording command buffer + + // Use the current recording command buffer of the intendedSubmitInfos scratchCommandBuffers, it should be in recording state + auto* cb = m_currentRecordingCommandBufferInfo->cmdbuf; + + const auto& resourcesCollection = drawResourcesFiller.getResourcesCollection(); + const auto& resourcesGPUBuffer = drawResourcesFiller.getResourcesGPUBuffer(); + + Globals globalData = {}; + uint64_t baseAddress = resourcesGPUBuffer->getDeviceAddress(); + globalData.pointers = { + .mainObjects = baseAddress + resourcesCollection.mainObjects.bufferOffset, + .drawObjects = baseAddress + resourcesCollection.drawObjects.bufferOffset, + .geometryBuffer = baseAddress + resourcesCollection.geometryInfo.bufferOffset, + .dtmSettings = baseAddress + resourcesCollection.dtmSettings.bufferOffset, + }; + globalData.antiAliasingFactor = 1.0f; + SBufferRange globalBufferUpdateRange = { .offset = 0ull, .size = sizeof(Globals), .buffer = m_globalsBuffer}; + bool updateSuccess = cb->updateBuffer(globalBufferUpdateRange, &globalData); + assert(updateSuccess); + + asset::SViewport vp = + { + .x = 0u, + .y = 0u, + .width = static_cast(m_window->getWidth()), + .height = static_cast(m_window->getHeight()), + .minDepth = 1.f, + .maxDepth = 0.f, + }; + cb->setViewport(0u, 1u, &vp); + + VkRect2D scissor = + { + .offset = { 0, 0 }, + .extent = { m_window->getWidth(), m_window->getHeight() }, + }; + cb->setScissor(0u, 1u, &scissor); + + // pipelineBarriersBeforeDraw + { + constexpr uint32_t MaxBufferBarriersCount = 2u; + uint32_t bufferBarriersCount = 0u; + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::buffer_barrier_t bufferBarriers[MaxBufferBarriersCount]; + + const auto& resourcesCollection = drawResourcesFiller.getResourcesCollection(); + + if (m_globalsBuffer->getSize() > 0u) + { + auto& bufferBarrier = bufferBarriers[bufferBarriersCount++]; + bufferBarrier.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT; + bufferBarrier.barrier.dep.srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT; + bufferBarrier.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT | PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT; + bufferBarrier.barrier.dep.dstAccessMask = ACCESS_FLAGS::UNIFORM_READ_BIT; + bufferBarrier.range = + { + .offset = 0u, + .size = m_globalsBuffer->getSize(), + .buffer = m_globalsBuffer, + }; + } + if (drawResourcesFiller.getCopiedResourcesSize() > 0u) + { + auto& bufferBarrier = bufferBarriers[bufferBarriersCount++]; + bufferBarrier.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT; + bufferBarrier.barrier.dep.srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT; + bufferBarrier.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::VERTEX_INPUT_BITS | PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT | PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT; + bufferBarrier.barrier.dep.dstAccessMask = ACCESS_FLAGS::MEMORY_READ_BITS | ACCESS_FLAGS::MEMORY_WRITE_BITS; + bufferBarrier.range = + { + .offset = 0u, + .size = drawResourcesFiller.getCopiedResourcesSize(), + .buffer = drawResourcesFiller.getResourcesGPUBuffer(), + }; + } + cb->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .bufBarriers = {bufferBarriers, bufferBarriersCount}, .imgBarriers = {} }); + } + + nbl::video::IGPUCommandBuffer::SRenderpassBeginInfo beginInfo; + VkRect2D currentRenderArea; + const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {0.f,0.f,0.f,0.f} }; + IGPUCommandBuffer::SClearDepthStencilValue depthClearValue = { + .depth = 1.0f, + .stencil = 0 + }; + { + auto scRes = static_cast(m_surface->getSwapchainResources()); + currentRenderArea = + { + .offset = {0,0}, + .extent = {m_window->getWidth(),m_window->getHeight()} + }; + + beginInfo = { + .renderpass = (inBetweenSubmit) ? renderpassInBetween.get():renderpassFinal.get(), + .framebuffer = scRes->getFramebuffer(m_currentImageAcquire.imageIndex), + .colorClearValues = &clearValue, + .depthStencilClearValues = &depthClearValue, + .renderArea = currentRenderArea + }; + } + cb->beginRenderPass(beginInfo, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + + IGPUDescriptorSet* descriptorSets[] = { m_descriptorSet0.get() }; + cb->bindDescriptorSets(asset::EPBP_GRAPHICS, m_pipelineLayout.get(), 0u, 1u, descriptorSets); + + cb->bindGraphicsPipeline(m_graphicsPipeline.get()); + + for (auto& drawCall : drawResourcesFiller.getDrawCalls()) + { + cb->bindIndexBuffer({ .offset = resourcesCollection.geometryInfo.bufferOffset + drawCall.indexBufferOffset, .buffer = drawResourcesFiller.getResourcesGPUBuffer()}, asset::EIT_32BIT); + + PushConstants pc = { + .triangleMeshVerticesBaseAddress = drawCall.triangleMeshVerticesBaseAddress + resourcesGPUBuffer->getDeviceAddress() + resourcesCollection.geometryInfo.bufferOffset, + .triangleMeshMainObjectIndex = drawCall.triangleMeshMainObjectIndex, + .viewProjectionMatrix = static_cast(camera.getConcatenatedMatrix()) + }; + cb->pushConstants(m_graphicsPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_VERTEX | IShader::E_SHADER_STAGE::ESS_FRAGMENT, 0, sizeof(PushConstants), &pc); + + cb->drawIndexed(drawCall.indexCount, 1u, 0u, 0u, 0u); + } + + cb->endRenderPass(); + + if (!inBetweenSubmit) + cb->endDebugMarker(); + + drawResourcesFiller.markFrameUsageComplete(intendedSubmitInfo.getFutureScratchSemaphore().value); + + if (inBetweenSubmit) + { + if (intendedSubmitInfo.overflowSubmit(m_currentRecordingCommandBufferInfo) != IQueue::RESULT::SUCCESS) + { + m_logger->log("overflow submit failed.", ILogger::ELL_ERROR); + } + } + else + { + // cb->end(); + + const auto nextFrameIx = m_realFrameIx+1u; + const IQueue::SSubmitInfo::SSemaphoreInfo thisFrameRendered = { + .semaphore = m_renderSemaphore.get(), + .value = nextFrameIx, + .stageMask = asset::PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS + }; + if (intendedSubmitInfo.submit(m_currentRecordingCommandBufferInfo, { &thisFrameRendered,1 }) == IQueue::RESULT::SUCCESS) + { + m_realFrameIx = nextFrameIx; + + IQueue::SSubmitInfo::SSemaphoreInfo presentWait = thisFrameRendered; + // the stages for a wait semaphore operation are about what stage you WAIT in, not what stage you wait for + presentWait.stageMask = PIPELINE_STAGE_FLAGS::NONE; // top of pipe, there's no explicit presentation engine stage + m_surface->present(m_currentImageAcquire.imageIndex,{&presentWait,1}); + } + else + { + m_logger->log("regular submit failed.", ILogger::ELL_ERROR); + } + } + } + + void endFrameRender(SIntendedSubmitInfo& intendedSubmitInfo) + { + submitDraws(intendedSubmitInfo, false); + } + + inline bool keepRunning() override + { + if (duration_cast(clock_t::now()-start)>timeout) + return false; + + return m_surface && !m_surface->irrecoverable(); + } + + virtual bool onAppTerminated() override + { + m_currentRecordingCommandBufferInfo->cmdbuf->end(); + + // We actually want to wait for all the frames to finish rendering, otherwise our destructors will run out of order late + m_device->waitIdle(); + + // This is optional, but the window would close AFTER we return from this function + m_surface = nullptr; + + return device_base_t::onAppTerminated(); + } + + virtual video::IAPIConnection::SFeatures getAPIFeaturesToEnable() override + { + auto retval = base_t::getAPIFeaturesToEnable(); + // We only support one swapchain mode, surface, the other one is Display which we have not implemented yet. + retval.swapchainMode = video::E_SWAPCHAIN_MODE::ESM_SURFACE; + retval.validations = true; + retval.synchronizationValidation = false; + return retval; + } + +protected: + + void addObjects(SIntendedSubmitInfo& intendedNextSubmit) + { + drawResourcesFiller.setSubmitDrawsFunction( + [&](SIntendedSubmitInfo& intendedNextSubmit) + { + return submitDraws(intendedNextSubmit, true); + } + ); + drawResourcesFiller.reset(); + + CTriangleMesh mesh; + mesh.setVertices(core::vector(DTMMainMeshVertices)); + mesh.setIndices(core::vector(DTMMainMeshIndices)); + + DTMSettingsInfo dtmInfo{}; + + // PRESS 1, 2, 3 TO SWITCH HEIGHT SHADING MODE + // 1 - DISCRETE_VARIABLE_LENGTH_INTERVALS + // 2 - DISCRETE_FIXED_LENGTH_INTERVALS + // 3 - CONTINOUS_INTERVALS + float animatedAlpha = (std::cos(m_timeElapsed * 0.0005) + 1.0) * 0.5; + switch (m_shadingModeExample) + { + case E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS: + { + dtmInfo.heightShadingInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS; + + dtmInfo.heightShadingInfo.addHeightColorMapEntry(-10.0f, float32_t4(0.5f, 1.0f, 1.0f, 1.0f)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(20.0f, float32_t4(0.0f, 1.0f, 0.0f, 1.0f)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(70.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(90.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); + + break; + } + case E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS: + { + dtmInfo.heightShadingInfo.intervalLength = 10.0f; + dtmInfo.heightShadingInfo.intervalIndexToHeightMultiplier = dtmInfo.heightShadingInfo.intervalLength; + dtmInfo.heightShadingInfo.isCenteredShading = false; + dtmInfo.heightShadingInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS; + dtmInfo.heightShadingInfo.addHeightColorMapEntry(0.0f, float32_t4(0.0f, 0.0f, 1.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 1.0f, 1.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(50.0f, float32_t4(0.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(75.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(100.0f, float32_t4(1.0f, 0.0f, 0.0f, animatedAlpha)); + + break; + } + case E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS: + { + dtmInfo.heightShadingInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS; + dtmInfo.heightShadingInfo.addHeightColorMapEntry(0.0f, float32_t4(0.0f, 0.0f, 1.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 1.0f, 1.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(50.0f, float32_t4(0.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(75.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(90.0f, float32_t4(1.0f, 0.0f, 0.0f, animatedAlpha)); + + break; + } + } + + drawResourcesFiller.drawTriangleMesh(mesh, dtmInfo, intendedNextSubmit); + } + +protected: + clock_t::time_point start; // TODO: am i missing somehting? why is it never initialized + std::chrono::seconds timeout = std::chrono::seconds(0x7fffFFFFu); + + double m_timeElapsed = 0.0; + std::chrono::steady_clock::time_point lastTime; + + core::smart_refctd_ptr m_inputSystem; + nbl::examples::InputSystem::ChannelReader mouse; + nbl::examples::InputSystem::ChannelReader keyboard; + + smart_refctd_ptr renderpassInitial; // this renderpass will clear the attachment and transition it to COLOR_ATTACHMENT_OPTIMAL + smart_refctd_ptr renderpassInBetween; // this renderpass will load the attachment and transition it to COLOR_ATTACHMENT_OPTIMAL + smart_refctd_ptr renderpassFinal; // this renderpass will load the attachment and transition it to PRESENT + + smart_refctd_ptr m_graphicsCommandPool; + std::array, MaxSubmitsInFlight> m_commandBuffersInFlight; + // ref to above cmd buffers, these go into SIntendedSubmitInfo as command buffers available for recording. + std::array m_commandBufferInfos; + // pointer to one of the command buffer infos from above, this is the only command buffer used to record current submit in current frame, it will be updated by SIntendedSubmitInfo + IQueue::SSubmitInfo::SCommandBufferInfo const * m_currentRecordingCommandBufferInfo; // pointer can change, value cannot + + smart_refctd_ptr m_globalsBuffer; + DrawResourcesFiller drawResourcesFiller; // you can think of this as the scene data needed to draw everything, we only have one instance so let's use a timeline semaphore to sync all renders + + smart_refctd_ptr m_renderSemaphore; // timeline semaphore to sync frames together + + // timeline semaphore used for overflows (they need to be on their own timeline to count overflows) + smart_refctd_ptr m_overflowSubmitScratchSemaphore; + SIntendedSubmitInfo m_intendedNextSubmit; + + ISimpleManagedSurface::SAcquireResult m_currentImageAcquire = {}; + + uint64_t m_realFrameIx = 0u; + + smart_refctd_ptr m_descriptorSetLayout0; + smart_refctd_ptr m_descriptorSet0; + smart_refctd_ptr m_pipelineLayout; + smart_refctd_ptr m_graphicsPipeline; + + smart_refctd_ptr m_window; + smart_refctd_ptr> m_surface; + smart_refctd_ptr pseudoStencilImageView; + smart_refctd_ptr colorStorageImageView; + + Camera camera; + + E_HEIGHT_SHADING_MODE m_shadingModeExample = E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS; +}; + +NBL_MAIN_FUNC(ComputerAidedDesign) + diff --git a/75_CAD_3D/scripts/generate_mipmaps.py b/75_CAD_3D/scripts/generate_mipmaps.py new file mode 100644 index 000000000..78420cda5 --- /dev/null +++ b/75_CAD_3D/scripts/generate_mipmaps.py @@ -0,0 +1,47 @@ +import OpenEXR +import Imath +import numpy as np + +def read_exr(path): + exr = OpenEXR.InputFile(path) + dw = exr.header()['dataWindow'] + size = (dw.max.x - dw.min.x + 1, dw.max.y - dw.min.y + 1) + + pt = Imath.PixelType(Imath.PixelType.FLOAT) + channels = ['R', 'G', 'B'] + data = [np.frombuffer(exr.channel(c, pt), dtype=np.float32).reshape(size[1], size[0]) for c in channels] + return np.stack(data, axis=-1) # shape: (H, W, 3) + +def write_exr(path, arr): + H, W, C = arr.shape + assert C == 3, "Only RGB supported" + header = OpenEXR.Header(W, H) + pt = Imath.PixelType(Imath.PixelType.FLOAT) + channels = { + 'R': arr[:, :, 0].astype(np.float32).tobytes(), + 'G': arr[:, :, 1].astype(np.float32).tobytes(), + 'B': arr[:, :, 2].astype(np.float32).tobytes() + } + exr = OpenEXR.OutputFile(path, header) + exr.writePixels(channels) + +def mipmap_exr(): + img = read_exr("../../media/tiled_grid_mip_0.exr") + h, w, _ = img.shape + base_path = "../../media/tiled_grid_mip_" + tile_size = 128 + mip_level = 1 + tile_length = h // (2 * tile_size) + + while tile_length > 0: + # Reshape and average 2x2 blocks + reshaped = img.reshape(h//2, 2, w//2, 2, 3) + mipmap = reshaped.mean(axis=(1, 3)) + write_exr(base_path + str(mip_level) + ".exr", mipmap) + img = mipmap + mip_level = mip_level + 1 + tile_length = tile_length // 2 + h = h // 2 + w = w // 2 + +mipmap_exr() \ No newline at end of file diff --git a/75_CAD_3D/scripts/tiled_grid.py b/75_CAD_3D/scripts/tiled_grid.py new file mode 100644 index 000000000..89c637338 --- /dev/null +++ b/75_CAD_3D/scripts/tiled_grid.py @@ -0,0 +1,266 @@ +from PIL import Image, ImageDraw, ImageFont +import numpy as np +import os +import OpenImageIO as oiio + + + +def create_single_tile(tile_size, color, x_coord, y_coord, font_path=None): + """ + Creates a single square tile image with a given color and two lines of centered text. + + Args: + tile_size (int): The sidelength of the square tile in pixels. + color (tuple): A tuple of three floats (R, G, B) representing the color (0.0-1.0). + x_coord (int): The X coordinate to display on the tile. + y_coord (int): The Y coordinate to display on the tile. + font_path (str, optional): The path to a TrueType font file (.ttf). + If None, a default PIL font will be used. + Returns: + PIL.Image.Image: The created tile image with text. + """ + # Convert float color (0.0-1.0) to 8-bit integer color (0-255) + int_color = tuple(int(max(0, min(1, c)) * 255) for c in color) # Ensure color components are clamped + + img = Image.new('RGB', (tile_size, tile_size), int_color) + draw = ImageDraw.Draw(img) + + text_line1 = f"x = {x_coord}" + text_line2 = f"y = {y_coord}" + + text_fill_color = (255, 255, 255) + + # --- Dynamic Font Size Adjustment --- + # Start with a relatively large font size and shrink if needed + font_size = int(tile_size * 0.25) # Initial guess for font size + max_font_size = int(tile_size * 0.25) # Don't exceed this + + font = None + max_iterations = 100 # Prevent infinite loops in font size reduction + + for _ in range(max_iterations): + current_font_path = font_path + current_font_size = max(1, font_size) # Ensure font size is at least 1 + + try: + if current_font_path and os.path.exists(current_font_path): + font = ImageFont.truetype(current_font_path, current_font_size) + else: + # Fallback to default font (size argument might not always work perfectly) + font = ImageFont.load_default() + # For default font, try to scale if load_default(size=...) is supported and works + try: + scaled_font = ImageFont.load_default(size=current_font_size) + if draw.textbbox((0, 0), text_line1, font=scaled_font)[2] > 0: # Check if usable + font = scaled_font + except Exception: + pass # Stick with original default font + + if font is None: # Last resort if no font could be loaded + font = ImageFont.load_default() + + # Measure text dimensions + bbox1 = draw.textbbox((0, 0), text_line1, font=font) + text_width1 = bbox1[2] - bbox1[0] + text_height1 = bbox1[3] - bbox1[1] + + bbox2 = draw.textbbox((0, 0), text_line2, font=font) + text_width2 = bbox2[2] - bbox2[0] + text_height2 = bbox2[3] - bbox2[1] + + # Calculate total height needed for both lines plus some padding + # Let's assume a small gap between lines (e.g., 0.1 * text_height) + line_gap = int(text_height1 * 0.2) # 20% of line height + total_text_height = text_height1 + text_height2 + line_gap + + # Check if text fits vertically and horizontally + if (total_text_height < tile_size * 0.9) and \ + (text_width1 < tile_size * 0.9) and \ + (text_width2 < tile_size * 0.9): + break # Font size is good, break out of loop + else: + font_size -= 1 # Reduce font size + if font_size <= 0: # Prevent infinite loop if text can never fit + font_size = 1 # Smallest possible font size + break + + except Exception as e: + # Handle cases where font loading or textbbox fails + print(f"Error during font sizing: {e}. Reducing font size and retrying.") + font_size -= 1 + if font_size <= 0: + font_size = 1 + break # Cannot make font smaller, stop + + # Final check: if font_size became 0 or less, ensure it's at least 1 + if font_size <= 0: + font_size = 1 + # Reload font with minimum size if needed + if font_path and os.path.exists(font_path): + font = ImageFont.truetype(font_path, font_size) + else: + font = ImageFont.load_default() + try: + scaled_font = ImageFont.load_default(size=font_size) + if draw.textbbox((0, 0), text_line1, font=scaled_font)[2] > 0: + font = scaled_font + except Exception: + pass + + + # Re-measure with final font size to ensure accurate positioning + bbox1 = draw.textbbox((0, 0), text_line1, font=font) + text_width1 = bbox1[2] - bbox1[0] + text_height1 = bbox1[3] - bbox1[1] + + bbox2 = draw.textbbox((0, 0), text_line2, font=font) + text_width2 = bbox2[2] - bbox2[0] + text_height2 = bbox2[3] - bbox2[1] + + # Calculate positions for centering + # Line 1: centered horizontally, midpoint at 1/3 tile height + x1 = (tile_size - text_width1) / 2 + y1 = (tile_size / 3) - (text_height1 / 2) + + # Line 2: centered horizontally, midpoint at 2/3 tile height + x2 = (tile_size - text_width2) / 2 + y2 = (tile_size * 2 / 3) - (text_height2 / 2) + + # Draw the text + draw.text((x1, y1), text_line1, fill=text_fill_color, font=font) + draw.text((x2, y2), text_line2, fill=text_fill_color, font=font) + + return img + +def generate_interpolated_grid_image(tile_size, count, font_path=None): + """ + Generates a large image composed of 'count' x 'count' tiles, + with colors bilinearly interpolated from corners and text indicating tile index. + + Args: + tile_size (int): The sidelength of each individual square tile in pixels. + count (int): The number of tiles per side of the large grid (e.g., if count=3, + it's a 3x3 grid of tiles). + font_path (str, optional): Path to a TrueType font file for the tile text. + If None, a default PIL font will be used. + + Returns: + PIL.Image.Image: The generated large grid image. + """ + if count <= 0: + raise ValueError("Count must be a positive integer.") + + total_image_size = count * tile_size + main_img = Image.new('RGB', (total_image_size, total_image_size)) + + # Corner colors (R, G, B) as floats (0.0-1.0) + corner_colors = { + "top_left": (1.0, 0.0, 0.0), # Red + "top_right": (1.0, 0.0, 1.0), # Purple + "bottom_left": (0.0, 1.0, 0.0), # Green + "bottom_right": (0.0, 0.0, 1.0) # Blue + } + + # Handle the edge case where count is 1 + if count == 1: + # If count is 1, there's only one tile, which is the top-left corner + tile_color = corner_colors["top_left"] + tile_image = create_single_tile(tile_size, tile_color, 0, 0, font_path=font_path) + main_img.paste(tile_image, (0, 0)) + return main_img + + for y_tile in range(count): + for x_tile in range(count): + # Calculate normalized coordinates (u, v) for interpolation + # We divide by (count - 1) to ensure 0 and 1 values at the edges + u = x_tile / (count - 1) + v = y_tile / (count - 1) + + # Apply the simplified bilinear interpolation formulas + r_component = 1 - v + g_component = v * (1 - u) + b_component = u + + # Clamp components to be within 0.0 and 1.0 (due to potential floating point inaccuracies) + current_color = ( + max(0.0, min(1.0, r_component)), + max(0.0, min(1.0, g_component)), + max(0.0, min(1.0, b_component)) + ) + + # Create the individual tile + tile_image = create_single_tile(tile_size, current_color, x_tile, y_tile, font_path=font_path) + + # Paste the tile onto the main image + paste_x = x_tile * tile_size + paste_y = y_tile * tile_size + main_img.paste(tile_image, (paste_x, paste_y)) + + return main_img + + + + +import argparse +parser = argparse.ArgumentParser(description="Process two optional named parameters.") +parser.add_argument('--ts', type=int, default=128, help='Tile Size') +parser.add_argument('--gs', type=int, default=128, help='Grid Size') + +# Parse the arguments +args = parser.parse_args() + + +# --- Configuration --- +tile_sidelength = args.ts # Size of each individual tile in pixels +grid_count = args.gs # Number of tiles per side (e.g., 15 means 15x15 grid) + +# Path to a font file (adjust this for your system) +# On Windows, you can typically use 'C:/Windows/Fonts/arial.ttf' or similar +# You might need to find a suitable font on your system. +# For testing, you can use None to let PIL use its default font. +# If a specific font path is provided and doesn't exist, it will fall back to default. +windows_font_path = "C:/Windows/Fonts/arial.ttf" # Example path for Windows +# If Arial is not found, try Times New Roman: +# windows_font_path = "C:/Windows/Fonts/times.ttf" + +font_to_use = None +if os.name == 'nt': # Check if OS is Windows + if os.path.exists(windows_font_path): + font_to_use = windows_font_path + print(f"Using font: {windows_font_path}") + else: + print(f"Warning: Windows font not found at '{windows_font_path}'. Using default PIL font.") +else: # Assume Linux/macOS for other OS types + # Common Linux/macOS font paths (adjust as needed) + linux_font_path = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf" + mac_font_path = "/Library/Fonts/Arial.ttf" + if os.path.exists(linux_font_path): + font_to_use = linux_font_path + print(f"Using font: {linux_font_path}") + elif os.path.exists(mac_font_path): + font_to_use = mac_font_path + print(f"Using font: {mac_font_path}") + else: + print("Warning: No common Linux/macOS font found. Using default PIL font.") + + +# --- Generate and save the image --- +print(f"Generating a {grid_count}x{grid_count} grid of tiles, each {tile_sidelength}x{tile_sidelength} pixels.") +print(f"Total image size will be {grid_count * tile_sidelength}x{grid_count * tile_sidelength} pixels.") + +try: + final_image = generate_interpolated_grid_image(tile_sidelength, grid_count, font_path=font_to_use) + output_filename = "../../media/tiled_grid_mip_0.exr" + np_img = np.array(final_image).astype(np.float32) / 255.0 # Normalize for EXR + spec = oiio.ImageSpec(final_image.width, final_image.height, 3, oiio.TypeDesc("float")) + out = oiio.ImageOutput.create(output_filename) + out.open(output_filename, spec) + out.write_image(np_img.reshape(-1)) # Flatten for OIIO’s expected input + out.close() + + print(f"Successfully created '{output_filename}'") + +except ValueError as e: + print(f"Error: {e}") +except Exception as e: + print(f"An unexpected error occurred: {e}") \ No newline at end of file diff --git a/75_CAD_3D/shaders/globals.hlsl b/75_CAD_3D/shaders/globals.hlsl new file mode 100644 index 000000000..544f05516 --- /dev/null +++ b/75_CAD_3D/shaders/globals.hlsl @@ -0,0 +1,173 @@ +#ifndef _CAD_3D_EXAMPLE_GLOBALS_HLSL_INCLUDED_ +#define _CAD_3D_EXAMPLE_GLOBALS_HLSL_INCLUDED_ + +// TODO[Erfan]: Turn off in the future, but keep enabled to test +// #define NBL_FORCE_EMULATED_FLOAT_64 + +#include +#include +#include +#include +#include +#include +#include + +#ifdef __HLSL_VERSION +#include +#endif + +using namespace nbl::hlsl; + +#ifdef __HLSL_VERSION +using pfloat64_t = portable_float64_t; +using pfloat64_t2 = portable_float64_t2; +using pfloat64_t3 = portable_float64_t3; +using pfloat64_t4 = portable_float64_t4; +#else +using pfloat64_t = float64_t; +using pfloat64_t2 = nbl::hlsl::vector; +using pfloat64_t3 = nbl::hlsl::vector; +using pfloat64_t4 = nbl::hlsl::vector; +#endif + +using pfloat64_t3x3 = portable_matrix_t3x3; +using pfloat64_t4x4 = portable_matrix_t4x4; + +enum class MainObjectType : uint32_t +{ + NONE = 0u, + DTM, +}; + +struct MainObject +{ + uint32_t dtmSettingsIdx; +}; + +struct PushConstants +{ + uint64_t triangleMeshVerticesBaseAddress; + uint32_t triangleMeshMainObjectIndex; + pfloat64_t4x4 viewProjectionMatrix; +}; + +struct Pointers +{ + uint64_t mainObjects; + uint64_t drawObjects; + uint64_t geometryBuffer; + uint64_t dtmSettings; +}; +#ifndef __HLSL_VERSION +static_assert(sizeof(Pointers) == 32u); +#endif + +struct Globals +{ + Pointers pointers; + float32_t antiAliasingFactor; + float32_t __padding; +}; +#ifndef __HLSL_VERSION +static_assert(sizeof(Globals) == 40u); +#endif + +struct DrawObject +{ + uint32_t type_subsectionIdx; // packed two uint16 into uint32 + uint32_t mainObjIndex; + uint64_t geometryAddress; +}; + +struct TriangleMeshVertex +{ + pfloat64_t3 pos; +}; + +enum class E_HEIGHT_SHADING_MODE : uint32_t +{ + DISCRETE_VARIABLE_LENGTH_INTERVALS, + DISCRETE_FIXED_LENGTH_INTERVALS, + CONTINOUS_INTERVALS +}; + +struct DTMHeightShadingSettings +{ + const static uint32_t HeightColorMapMaxEntries = 16u; + + // height-color map + float intervalLength; + float intervalIndexToHeightMultiplier; + int isCenteredShading; + + uint32_t heightColorEntryCount; + float heightColorMapHeights[HeightColorMapMaxEntries]; + float32_t4 heightColorMapColors[HeightColorMapMaxEntries]; + + E_HEIGHT_SHADING_MODE determineHeightShadingMode() + { + if (nbl::hlsl::isinf(intervalLength)) + return E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS; + if (intervalLength == 0.0f) + return E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS; + return E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS; + } +}; + +struct DTMSettings +{ + // height shading + DTMHeightShadingSettings heightShadingSettings; +}; + +#ifndef __HLSL_VERSION +inline bool operator==(const DTMSettings& lhs, const DTMSettings& rhs) +{ + + if(true) //if (lhs.drawHeightShadingEnabled()) + { + if (lhs.heightShadingSettings.intervalLength != rhs.heightShadingSettings.intervalLength) + return false; + if (lhs.heightShadingSettings.intervalIndexToHeightMultiplier != rhs.heightShadingSettings.intervalIndexToHeightMultiplier) + return false; + if (lhs.heightShadingSettings.isCenteredShading != rhs.heightShadingSettings.isCenteredShading) + return false; + if (lhs.heightShadingSettings.heightColorEntryCount != rhs.heightShadingSettings.heightColorEntryCount) + return false; + + + if(memcmp(lhs.heightShadingSettings.heightColorMapHeights, rhs.heightShadingSettings.heightColorMapHeights, lhs.heightShadingSettings.heightColorEntryCount * sizeof(float))) + return false; + if(memcmp(lhs.heightShadingSettings.heightColorMapColors, rhs.heightShadingSettings.heightColorMapColors, lhs.heightShadingSettings.heightColorEntryCount * sizeof(float32_t4))) + return false; + } + + return true; +} +#endif + +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t MainObjectIdxBits = 24u; // It will be packed next to alpha in a texture +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t MaxIndexableMainObjects = (1u << MainObjectIdxBits) - 1u; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t InvalidMainObjectIdx = MaxIndexableMainObjects; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t InvalidDTMSettingsIdx = nbl::hlsl::numeric_limits::max; + +#ifdef __HLSL_VERSION +[[vk::binding(0, 0)]] ConstantBuffer globals : register(b0); + +MainObject loadMainObject(const uint32_t index) +{ + return vk::RawBufferLoad(globals.pointers.mainObjects + index * sizeof(MainObject), 4u); +} +DTMSettings loadDTMSettings(const uint32_t index) +{ + return vk::RawBufferLoad(globals.pointers.dtmSettings + index * sizeof(DTMSettings), 4u); +} + +#else +static_assert(alignof(MainObject)==4u); +static_assert(alignof(DTMSettings)==4u); +static_assert(alignof(pfloat64_t3x3)==8u); +static_assert(alignof(DrawObject)==8u); +#endif + +#endif diff --git a/75_CAD_3D/shaders/main_pipeline/common.hlsl b/75_CAD_3D/shaders/main_pipeline/common.hlsl new file mode 100644 index 000000000..8fc59e1ee --- /dev/null +++ b/75_CAD_3D/shaders/main_pipeline/common.hlsl @@ -0,0 +1,37 @@ +#ifndef _CAD_3D_EXAMPLE_MAIN_PIPELINE_COMMON_HLSL_INCLUDED_ +#define _CAD_3D_EXAMPLE_MAIN_PIPELINE_COMMON_HLSL_INCLUDED_ + +#include "../globals.hlsl" + +struct PSInput +{ + [[vk::location(0)]] float4 position : SV_Position; + + [[vk::location(1)]] nointerpolation float4 data1 : COLOR1; + [[vk::location(2)]] float4 interpolatedData1 : COLOR2; + + // TODO: do we even need vertexScreenSpacePos? +#ifndef FRAGMENT_SHADER_INPUT // vertex shader + [[vk::location(3)]] float3 vertexScreenSpacePos : COLOR3; +#else + [[vk::location(3)]] [[vk::ext_decorate(/*spv::DecoratePerVertexKHR*/5285)]] float3 vertexScreenSpacePos[3] : COLOR3; +#endif + + void setNormal(NBL_CONST_REF_ARG(float3) normal) { data1.xyz = normal; } + float3 getNormal() { return data1.xyz; } + + void setHeight(float height) { interpolatedData1.x = height; } + float getHeight() { return interpolatedData1.x; } + +#ifndef FRAGMENT_SHADER_INPUT // vertex shader + void setScreenSpaceVertexAttribs(float3 pos) { vertexScreenSpacePos = pos; } +#else // fragment shader + float3 getScreenSpaceVertexAttribs(uint32_t vertexIndex) { return vertexScreenSpacePos[vertexIndex]; } +#endif +}; + +// [[vk::binding(0, 0)]] ConstantBuffer globals; ---> moved to globals.hlsl + +[[vk::push_constant]] PushConstants pc; + +#endif diff --git a/75_CAD_3D/shaders/main_pipeline/dtm.hlsl b/75_CAD_3D/shaders/main_pipeline/dtm.hlsl new file mode 100644 index 000000000..cf85766dd --- /dev/null +++ b/75_CAD_3D/shaders/main_pipeline/dtm.hlsl @@ -0,0 +1,191 @@ +#ifndef _CAD_3D_EXAMPLE_DTM_HLSL_INCLUDED_ +#define _CAD_3D_EXAMPLE_DTM_HLSL_INCLUDED_ + +#include "common.hlsl" + +namespace dtm +{ + +// for usage in upper_bound function +struct DTMSettingsHeightsAccessor +{ + DTMHeightShadingSettings settings; + using value_type = float; + + float operator[](const uint32_t ix) + { + return settings.heightColorMapHeights[ix]; + } +}; + +struct HeightSegmentTransitionData +{ + float currentHeight; + float4 currentSegmentColor; + float boundaryHeight; + float4 otherSegmentColor; +}; + +void getIntervalHeightAndColor(in int intervalIndex, in DTMHeightShadingSettings settings, out float4 outIntervalColor, out float outIntervalHeight) +{ + float minShadingHeight = settings.heightColorMapHeights[0]; + float heightForColor = minShadingHeight + float(intervalIndex) * settings.intervalIndexToHeightMultiplier; + + if (settings.isCenteredShading) + outIntervalHeight = minShadingHeight + (float(intervalIndex) - 0.5) * settings.intervalLength; + else + outIntervalHeight = minShadingHeight + (float(intervalIndex)) * settings.intervalLength; + + DTMSettingsHeightsAccessor dtmHeightsAccessor = { settings }; + int32_t upperBoundHeightIndex = min(nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, settings.heightColorEntryCount, heightForColor), settings.heightColorEntryCount - 1u); + int32_t lowerBoundHeightIndex = max(upperBoundHeightIndex - 1, 0); + + float upperBoundHeight = settings.heightColorMapHeights[upperBoundHeightIndex]; + float lowerBoundHeight = settings.heightColorMapHeights[lowerBoundHeightIndex]; + + float4 upperBoundColor = settings.heightColorMapColors[upperBoundHeightIndex]; + float4 lowerBoundColor = settings.heightColorMapColors[lowerBoundHeightIndex]; + + if (upperBoundHeight == lowerBoundHeight) + { + outIntervalColor = upperBoundColor; + } + else + { + float interpolationVal = (heightForColor - lowerBoundHeight) / (upperBoundHeight - lowerBoundHeight); + outIntervalColor = lerp(lowerBoundColor, upperBoundColor, interpolationVal); + } +} + +// This function interpolates between the current and nearest segment colors based on the +// screen-space distance to the segment boundary. The result is a smoothly blended color +// useful for visualizing discrete height levels without harsh edges. +float4 smoothHeightSegmentTransition(in HeightSegmentTransitionData transitionInfo, in float heightDeriv) +{ + float pxDistanceToNearestSegment = abs((transitionInfo.currentHeight - transitionInfo.boundaryHeight) / heightDeriv); + float nearestSegmentColorCoverage = smoothstep(-globals.antiAliasingFactor, globals.antiAliasingFactor, pxDistanceToNearestSegment); + float4 localHeightColor = lerp(transitionInfo.otherSegmentColor, transitionInfo.currentSegmentColor, nearestSegmentColorCoverage); + return localHeightColor; +} + +float getIntervalPosition(in float height, in float minHeight, in float intervalLength, in bool isCenteredShading) +{ + if (isCenteredShading) + return ((height - minHeight) / intervalLength + 0.5f); + else + return ((height - minHeight) / intervalLength); +} + +float32_t4 calcIntervalColor(in int intervalIndex, in DTMHeightShadingSettings settings) +{ + const float minShadingHeight = settings.heightColorMapHeights[0]; + float heightForColor = minShadingHeight + float(intervalIndex) * settings.intervalIndexToHeightMultiplier; + + DTMSettingsHeightsAccessor dtmHeightsAccessor = { settings }; + int32_t upperBoundHeightIndex = min(nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, settings.heightColorEntryCount, heightForColor), settings.heightColorEntryCount - 1u); + int32_t lowerBoundHeightIndex = max(upperBoundHeightIndex - 1, 0); + + float upperBoundHeight = settings.heightColorMapHeights[upperBoundHeightIndex]; + float lowerBoundHeight = settings.heightColorMapHeights[lowerBoundHeightIndex]; + + float4 upperBoundColor = settings.heightColorMapColors[upperBoundHeightIndex]; + float4 lowerBoundColor = settings.heightColorMapColors[lowerBoundHeightIndex]; + + if (upperBoundHeight == lowerBoundHeight) + { + return upperBoundColor; + } + else + { + float interpolationVal = (heightForColor - lowerBoundHeight) / (upperBoundHeight - lowerBoundHeight); + return lerp(lowerBoundColor, upperBoundColor, interpolationVal); + } +} + +float32_t4 calculateDTMHeightColor(in DTMHeightShadingSettings settings, in float heightDeriv, in float3 triangleVertices[3], in float2 fragPos, in float height) +{ + const uint32_t heightMapSize = settings.heightColorEntryCount; + if(heightMapSize == 0) + return float32_t4(0.0f, 0.0f, 0.0f, 0.0f); + + const E_HEIGHT_SHADING_MODE mode = settings.determineHeightShadingMode(); + if(mode == E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS) + { + DTMSettingsHeightsAccessor dtmHeightsAccessor = { settings }; + const int upperBoundIndex = min(nbl::hlsl::upper_bound(dtmHeightsAccessor, 0u, heightMapSize, height), heightMapSize - 1u); + const int mapIndex = max(upperBoundIndex - 1, 0); + int mapIndexPrev = max(mapIndex - 1, 0); + int mapIndexNext = min(mapIndex + 1, heightMapSize - 1); + + // logic explainer: if colorIdx is 0.0 then it means blend with next + // if color idx is >= length of the colours array then it means it's also > 0.0 and this blend with prev is true + // if color idx is > 0 and < len - 1, then it depends on the current pixel's height value and two closest height values + bool blendWithPrev = (mapIndex > 0) + && (mapIndex >= heightMapSize - 1 || (height * 2.0 < settings.heightColorMapHeights[upperBoundIndex] + settings.heightColorMapHeights[mapIndex])); + + HeightSegmentTransitionData transitionInfo; + transitionInfo.currentHeight = height; + transitionInfo.currentSegmentColor = settings.heightColorMapColors[mapIndex]; + transitionInfo.boundaryHeight = blendWithPrev ? settings.heightColorMapHeights[mapIndex] : settings.heightColorMapHeights[mapIndexNext]; + transitionInfo.otherSegmentColor = blendWithPrev ? settings.heightColorMapColors[mapIndexPrev] : settings.heightColorMapColors[mapIndexNext]; + + return smoothHeightSegmentTransition(transitionInfo, heightDeriv); + } + else if(mode == E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS) + { + const float minShadingHeight = settings.heightColorMapHeights[0]; + const float intervalPosition = getIntervalPosition(height, minShadingHeight, settings.intervalLength, settings.isCenteredShading); + const float positionWithinInterval = frac(intervalPosition); + const int intervalIndex = nbl::hlsl::_static_cast(intervalPosition); + + float4 currentIntervalColor; + float currentIntervalHeight; + getIntervalHeightAndColor(intervalIndex, settings, currentIntervalColor, currentIntervalHeight); + + bool blendWithPrev = (positionWithinInterval < 0.5f); + + HeightSegmentTransitionData transitionInfo; + transitionInfo.currentHeight = height; + transitionInfo.currentSegmentColor = currentIntervalColor; + if (blendWithPrev) + { + int prevIntervalIdx = max(intervalIndex - 1, 0); + float prevIntervalHeight; // unused, the currentIntervalHeight is the boundary height between current and prev + getIntervalHeightAndColor(prevIntervalIdx, settings, transitionInfo.otherSegmentColor, prevIntervalHeight); + transitionInfo.boundaryHeight = currentIntervalHeight; + } + else + { + int nextIntervalIdx = intervalIndex + 1; + getIntervalHeightAndColor(nextIntervalIdx, settings, transitionInfo.otherSegmentColor, transitionInfo.boundaryHeight); + } + + return smoothHeightSegmentTransition(transitionInfo, heightDeriv); + } + else if(mode == E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS) + { + DTMSettingsHeightsAccessor dtmHeightsAccessor = { settings }; + uint32_t upperBoundHeightIndex = min(nbl::hlsl::upper_bound(dtmHeightsAccessor, 0u, heightMapSize - 1u, height), heightMapSize - 1u); + uint32_t lowerBoundHeightIndex = upperBoundHeightIndex == 0 ? upperBoundHeightIndex : upperBoundHeightIndex - 1; + + float upperBoundHeight = settings.heightColorMapHeights[upperBoundHeightIndex]; + float lowerBoundHeight = settings.heightColorMapHeights[lowerBoundHeightIndex]; + + float4 upperBoundColor = settings.heightColorMapColors[upperBoundHeightIndex]; + float4 lowerBoundColor = settings.heightColorMapColors[lowerBoundHeightIndex]; + + float interpolationVal; + if (upperBoundHeightIndex == 0) + interpolationVal = 1.0f; + else + interpolationVal = (height - lowerBoundHeight) / (upperBoundHeight - lowerBoundHeight); + + return lerp(lowerBoundColor, upperBoundColor, interpolationVal); + } + + return float32_t4(0.0f, 0.0f, 0.0f, 0.0f); +} + +} + +#endif \ No newline at end of file diff --git a/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl b/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl new file mode 100644 index 000000000..aad91afbd --- /dev/null +++ b/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl @@ -0,0 +1,31 @@ +#define FRAGMENT_SHADER_INPUT +#pragma shader_stage(fragment) + +#include "dtm.hlsl" +#include "common.hlsl" + +static const float32_t3 SunlightDirection = float32_t3(0.7071f, -0.7071f, 0.0f); + +[shader("pixel")] +float4 fragMain(PSInput input) : SV_Target +{ + static const float AmbientLightIntensity = 0.1f; + const float diffuseLightIntensity = max(dot(-SunlightDirection, normalize(input.getNormal())), 0.0f); + + const MainObject mainObj = loadMainObject(pc.triangleMeshMainObjectIndex); + const DTMSettings dtmSettings = loadDTMSettings(mainObj.dtmSettingsIdx); + + float32_t3 triangleVertices[3]; + triangleVertices[0] = input.getScreenSpaceVertexAttribs(0); + triangleVertices[1] = input.getScreenSpaceVertexAttribs(1); + triangleVertices[2] = input.getScreenSpaceVertexAttribs(2); + + const float height = input.getHeight(); + const float heightDeriv = fwidth(height); + + const float32_t4 HeightColor = dtm::calculateDTMHeightColor(dtmSettings.heightShadingSettings, heightDeriv, triangleVertices, input.position.xy, height); + + const float32_t4 fragColor = (AmbientLightIntensity + diffuseLightIntensity) * HeightColor; + + return fragColor; +} \ No newline at end of file diff --git a/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl b/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl new file mode 100644 index 000000000..5120c356d --- /dev/null +++ b/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl @@ -0,0 +1,66 @@ +#pragma shader_stage(vertex) + +#include "common.hlsl" + +[shader("vertex")] +PSInput vtxMain(uint vertexID : SV_VertexID) +{ + PSInput outV; + TriangleMeshVertex vtx = vk::RawBufferLoad(pc.triangleMeshVerticesBaseAddress + sizeof(TriangleMeshVertex) * vertexID, 8u); + + // calculate object space normal, for now we can treat it as the world space normal + { + const uint32_t currentVertexWithinTriangleIndex = vertexID % 3; + const uint32_t firstVertexOfCurrentTriangleIndex = vertexID - currentVertexWithinTriangleIndex; + + TriangleMeshVertex triangleVertices[3]; + triangleVertices[0] = vk::RawBufferLoad(pc.triangleMeshVerticesBaseAddress + sizeof(TriangleMeshVertex) * firstVertexOfCurrentTriangleIndex, 8u); + triangleVertices[1] = vk::RawBufferLoad(pc.triangleMeshVerticesBaseAddress + sizeof(TriangleMeshVertex) * (firstVertexOfCurrentTriangleIndex + 1), 8u); + triangleVertices[2] = vk::RawBufferLoad(pc.triangleMeshVerticesBaseAddress + sizeof(TriangleMeshVertex) * (firstVertexOfCurrentTriangleIndex + 2), 8u); + + // TODO: calculate on pfloat64_t + float32_t3 vertex0 = _static_cast(triangleVertices[0].pos); + float32_t3 vertex1 = _static_cast(triangleVertices[1].pos); + float32_t3 vertex2 = _static_cast(triangleVertices[2].pos); + + float32_t3 triangleEdge0 = vertex1 - vertex0; + float32_t3 triangleEdge1 = vertex2 - vertex0; + + // TODO: Whether to use cross(e0, e1) or cross(e1, e0) depends on the triangle winding (CCW vs CW). + outV.setNormal(normalize(cross(triangleEdge1, triangleEdge0))); + } + + pfloat64_t4 pos; + pos.x = vtx.pos.x; + pos.y = vtx.pos.y; + pos.z = vtx.pos.z; + pos.w = _static_cast(1.0f); + + + outV.setHeight(_static_cast(pos.y)); + + //pos = mul(pc.viewProjectionMatrix, pos); + // TODO: use pc.viewProjectionMatrix and multiply it with pfloat64_t4 pos instead fix portable_matrix with portable_float multiplication + float4x4 viewProjMatrix; + for (int i = 0; i < 4; ++i) + { + viewProjMatrix[i][0] = _static_cast(pc.viewProjectionMatrix[i].x); + viewProjMatrix[i][1] = _static_cast(pc.viewProjectionMatrix[i].y); + viewProjMatrix[i][2] = _static_cast(pc.viewProjectionMatrix[i].z); + viewProjMatrix[i][3] = _static_cast(pc.viewProjectionMatrix[i].w); + } + + outV.setScreenSpaceVertexAttribs(_static_cast(pos).xyz); + + /*if (vertexID == 0) + { + printf("%f, %f, %f, %f", a[0][0], a[0][1], a[0][2], a[0][3]); + printf("%f, %f, %f, %f", a[1][0], a[1][1], a[1][2], a[1][3]); + printf("%f, %f, %f, %f", a[2][0], a[2][1], a[2][2], a[2][3]); + printf("%f, %f, %f, %f", a[3][0], a[3][1], a[3][2], a[3][3]); + }*/ + + outV.position = mul(viewProjMatrix, _static_cast(pos)); + + return outV; +} diff --git a/CMakeLists.txt b/CMakeLists.txt index a93a86a4f..334ac0731 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -112,6 +112,7 @@ if(NBL_BUILD_EXAMPLES) endif() add_subdirectory(74_QuantizedSequenceTests) + add_subdirectory(75_CAD_3D) # add new examples *before* NBL_GET_ALL_TARGETS invocation, it gathers recursively all targets created so far in this subdirectory NBL_GET_ALL_TARGETS(TARGETS)