From d293823b4987062b8abb5988dadec6d1463500dd Mon Sep 17 00:00:00 2001
From: Przemog1 <minikers21@gmail.com>
Date: Sat, 7 Mar 2026 21:52:59 +0100
Subject: [PATCH 1/9] Created example 75

---
 75_CAD_3D/CMakeLists.txt                      |   89 ++
 75_CAD_3D/CTriangleMesh.cpp                   |    1 +
 75_CAD_3D/CTriangleMesh.h                     |   54 +
 75_CAD_3D/DrawResourcesFiller.cpp             |  235 ++++
 75_CAD_3D/DrawResourcesFiller.h               |  220 ++++
 75_CAD_3D/config.json.template                |   28 +
 75_CAD_3D/main.cpp                            | 1097 +++++++++++++++++
 75_CAD_3D/scripts/generate_mipmaps.py         |   47 +
 75_CAD_3D/scripts/tiled_grid.py               |  266 ++++
 75_CAD_3D/shaders/globals.hlsl                |  105 ++
 75_CAD_3D/shaders/main_pipeline/common.hlsl   |   17 +
 .../main_pipeline/fragment_shader.hlsl        |    7 +
 .../shaders/main_pipeline/vertex_shader.hlsl  |   17 +
 CMakeLists.txt                                |    1 +
 14 files changed, 2184 insertions(+)
 create mode 100644 75_CAD_3D/CMakeLists.txt
 create mode 100644 75_CAD_3D/CTriangleMesh.cpp
 create mode 100644 75_CAD_3D/CTriangleMesh.h
 create mode 100644 75_CAD_3D/DrawResourcesFiller.cpp
 create mode 100644 75_CAD_3D/DrawResourcesFiller.h
 create mode 100644 75_CAD_3D/config.json.template
 create mode 100644 75_CAD_3D/main.cpp
 create mode 100644 75_CAD_3D/scripts/generate_mipmaps.py
 create mode 100644 75_CAD_3D/scripts/tiled_grid.py
 create mode 100644 75_CAD_3D/shaders/globals.hlsl
 create mode 100644 75_CAD_3D/shaders/main_pipeline/common.hlsl
 create mode 100644 75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl
 create mode 100644 75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl

diff --git a/75_CAD_3D/CMakeLists.txt b/75_CAD_3D/CMakeLists.txt
new file mode 100644
index 000000000..794ba1c3c
--- /dev/null
+++ b/75_CAD_3D/CMakeLists.txt
@@ -0,0 +1,89 @@
+include(common RESULT_VARIABLE RES)
+if(NOT RES)
+	message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory")
+endif()
+
+set(EXAMPLE_SOURCES
+  "${CMAKE_CURRENT_SOURCE_DIR}/DrawResourcesFiller.cpp"
+  "${CMAKE_CURRENT_SOURCE_DIR}/DrawResourcesFiller.h"
+)
+set(EXAMPLE_INCLUDES
+  "${CMAKE_CURRENT_SOURCE_DIR}/../../3rdparty/boost/superproject/libs/math/include")
+nbl_create_executable_project("${EXAMPLE_SOURCES}" "" "${EXAMPLE_INCLUDES}" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}")
+target_link_libraries(${EXECUTABLE_NAME} PRIVATE Nabla::ext::FullScreenTriangle)
+
+# if enabled then try use Nabla "Text Rendering" extension 
+# with an implemented interface using the 3rdparty deps 
+
+set(NBL_CAD_EX_USE_TEXT_RENDERING_EXT OFF) # do not enable, for future usage when the extension is written
+
+if(NBL_BUILD_TEXT_RENDERING AND NBL_CAD_EX_USE_TEXT_RENDERING_EXT)
+	add_dependencies(${EXECUTABLE_NAME} ${NBL_EXT_TEXT_RENDERING_TARGET})
+	target_link_libraries(${EXECUTABLE_NAME} PRIVATE ${NBL_EXT_TEXT_RENDERING_TARGET})
+	target_include_directories(${EXECUTABLE_NAME} PUBLIC $<TARGET_PROPERTY:${NBL_EXT_TEXT_RENDERING_TARGET},INCLUDE_DIRECTORIES>)
+else()
+	# Freetype
+	add_dependencies(${EXECUTABLE_NAME} freetype)
+	target_link_libraries(${EXECUTABLE_NAME} PRIVATE freetype)
+	target_include_directories(${EXECUTABLE_NAME} PUBLIC $<TARGET_PROPERTY:freetype,INCLUDE_DIRECTORIES>)
+
+	# msdfgen
+	add_dependencies(${EXECUTABLE_NAME} ${NBL_MSDFGEN_TARGETS})
+	target_link_libraries(${EXECUTABLE_NAME} PRIVATE ${NBL_MSDFGEN_TARGETS})
+	foreach(NBL_TARGET IN LISTS NBL_MSDFGEN_TARGETS)
+		target_include_directories(${EXECUTABLE_NAME} PUBLIC $<TARGET_PROPERTY:${NBL_TARGET},INCLUDE_DIRECTORIES>)
+	endforeach()
+endif()
+
+set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen")
+
+set(SM 6_8)
+set(REQUIRED_CAPS [=[
+{
+  "kind": "features",
+  "name": "fragmentShaderPixelInterlock",
+  "type": "bool",
+  "values": [1]
+}
+]=])
+
+set(JSON [=[
+[
+	{
+		"INPUT": "shaders/main_pipeline/vertex_shader.hlsl",
+		"KEY": "main_pipeline_vertex_shader",
+		"CAPS": []
+    },
+    {
+		"INPUT": "shaders/main_pipeline/fragment_shader.hlsl",
+		"KEY": "main_pipeline_fragment_shader",
+		"CAPS": []
+    }
+]
+]=])
+string(CONFIGURE "${JSON}" JSON)
+
+set(COMPILE_OPTIONS
+    -I "${CMAKE_CURRENT_SOURCE_DIR}"
+    -T lib_${SM}
+)
+
+NBL_CREATE_NSC_COMPILE_RULES(
+    TARGET ${EXECUTABLE_NAME}SPIRV
+    LINK_TO ${EXECUTABLE_NAME}
+    BINARY_DIR ${OUTPUT_DIRECTORY}
+    MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT
+    COMMON_OPTIONS ${COMPILE_OPTIONS}
+    OUTPUT_VAR KEYS
+    INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp
+    NAMESPACE nbl::this_example::builtin::build
+    INPUTS ${JSON}
+)
+
+NBL_CREATE_RESOURCE_ARCHIVE(
+    NAMESPACE nbl::this_example::builtin::build
+    TARGET ${EXECUTABLE_NAME}_builtinsBuild
+    LINK_TO ${EXECUTABLE_NAME}
+    BIND ${OUTPUT_DIRECTORY}
+    BUILTINS ${KEYS}
+)
diff --git a/75_CAD_3D/CTriangleMesh.cpp b/75_CAD_3D/CTriangleMesh.cpp
new file mode 100644
index 000000000..5564c0a51
--- /dev/null
+++ b/75_CAD_3D/CTriangleMesh.cpp
@@ -0,0 +1 @@
+#include "CTriangleMesh.h"
\ No newline at end of file
diff --git a/75_CAD_3D/CTriangleMesh.h b/75_CAD_3D/CTriangleMesh.h
new file mode 100644
index 000000000..8f941928a
--- /dev/null
+++ b/75_CAD_3D/CTriangleMesh.h
@@ -0,0 +1,54 @@
+#pragma once
+
+#include <nabla.h>
+#include <nbl/builtin/hlsl/cpp_compat.hlsl>
+#include "shaders/globals.hlsl"
+
+using namespace nbl;
+
+class CTriangleMesh final
+{
+public:
+	using index_t = uint32_t;
+	using vertex_t = TriangleMeshVertex;
+
+	inline void setVertices(core::vector<vertex_t>&& vertices)
+	{
+		m_vertices = std::move(vertices);
+	}
+	inline void setIndices(core::vector<uint32_t>&& indices)
+	{
+		m_indices = std::move(indices);
+	}
+
+	inline const core::vector<vertex_t>& getVertices() const
+	{
+		return m_vertices;
+	}
+	inline const core::vector<uint32_t>& getIndices() const
+	{
+		return m_indices;
+	}
+
+	inline size_t getVertexBuffByteSize() const
+	{
+		return sizeof(vertex_t) * m_vertices.size();
+	}
+	inline size_t getIndexBuffByteSize() const
+	{
+		return sizeof(index_t) * m_indices.size();
+	}
+	inline size_t getIndexCount() const
+	{
+		return m_indices.size();
+	}
+	
+	inline void clear()
+	{
+		m_vertices.clear();
+		m_indices.clear();
+	}
+
+	core::vector<vertex_t> m_vertices;
+	core::vector<index_t> m_indices;
+};
\ No newline at end of file
diff --git a/75_CAD_3D/DrawResourcesFiller.cpp b/75_CAD_3D/DrawResourcesFiller.cpp
new file mode 100644
index 000000000..313c74358
--- /dev/null
+++ b/75_CAD_3D/DrawResourcesFiller.cpp
@@ -0,0 +1,235 @@
+#include "DrawResourcesFiller.h"
+
+using namespace nbl;
+
+DrawResourcesFiller::DrawResourcesFiller()
+{}
+
+DrawResourcesFiller::DrawResourcesFiller(smart_refctd_ptr<video::ILogicalDevice>&& device, smart_refctd_ptr<IUtilities>&& bufferUploadUtils, IQueue* copyQueue, core::smart_refctd_ptr<system::ILogger>&& logger) :
+	m_device(std::move(device)),
+	m_bufferUploadUtils(std::move(bufferUploadUtils)),
+	m_copyQueue(copyQueue),
+	m_logger(std::move(logger))
+{
+}
+
+// function is called when buffer is filled and we should submit draws and clear the buffers and continue filling
+void DrawResourcesFiller::setSubmitDrawsFunction(const SubmitFunc& func)
+{
+	submitDraws = func;
+}
+
+// TODO: redo it completely
+bool DrawResourcesFiller::allocateDrawResources(ILogicalDevice* logicalDevice, size_t requiredImageMemorySize, size_t requiredBufferMemorySize, std::span<uint32_t> memoryTypeIndexTryOrder)
+{
+	const size_t adjustedBuffersMemorySize = requiredBufferMemorySize;
+	const size_t totalResourcesSize = adjustedBuffersMemorySize;
+
+	IGPUBuffer::SCreationParams resourcesBufferCreationParams = {};
+	resourcesBufferCreationParams.size = adjustedBuffersMemorySize;
+	resourcesBufferCreationParams.usage = bitflag(IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT) | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INDEX_BUFFER_BIT;
+	resourcesGPUBuffer = logicalDevice->createBuffer(std::move(resourcesBufferCreationParams));
+
+	if (!resourcesGPUBuffer)
+	{
+		m_logger.log("Failed to create resourcesGPUBuffer.", nbl::system::ILogger::ELL_ERROR);
+		return false;
+	}
+
+	resourcesGPUBuffer->setObjectDebugName("drawResourcesBuffer");
+
+	IDeviceMemoryBacked::SDeviceMemoryRequirements memReq = resourcesGPUBuffer->getMemoryReqs();
+	
+	nbl::video::IDeviceMemoryBacked::SDeviceMemoryRequirements gpuBufferMemoryReqs = resourcesGPUBuffer->getMemoryReqs();
+	const bool memoryRequirementsMatch =
+		(logicalDevice->getPhysicalDevice()->getDeviceLocalMemoryTypeBits() & gpuBufferMemoryReqs.memoryTypeBits) != 0 && // should have device local memory compatible
+		(gpuBufferMemoryReqs.requiresDedicatedAllocation == false); // should not require dedicated allocation
+
+	if (!memoryRequirementsMatch)
+	{
+		m_logger.log("Shouldn't happen: Buffer Memory Requires Dedicated Allocation or can't biind to device local memory.", nbl::system::ILogger::ELL_ERROR);
+		return false;
+	}
+	
+	const auto& memoryProperties = logicalDevice->getPhysicalDevice()->getMemoryProperties();
+
+	video::IDeviceMemoryAllocator::SAllocation allocation = {};
+	for (const auto& memoryTypeIdx : memoryTypeIndexTryOrder)
+	{
+		IDeviceMemoryAllocator::SAllocateInfo allocationInfo =
+		{
+			.size = totalResourcesSize,
+			.flags = IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS::EMAF_DEVICE_ADDRESS_BIT, // for the buffers
+			.memoryTypeIndex = memoryTypeIdx,
+			.dedication = nullptr,
+		};
+
+		allocation = logicalDevice->allocate(allocationInfo);
+			
+		if (allocation.isValid())
+			break;
+	}
+
+	if (!allocation.isValid())
+	{
+		m_logger.log("Failed Allocation for draw resources!", nbl::system::ILogger::ELL_ERROR);
+		return false;
+	}
+
+	buffersMemoryArena = {
+		.memory = allocation.memory,
+		.offset = core::alignUp(allocation.offset, GPUStructsMaxNaturalAlignment), // first natural alignment after images section of the memory allocation
+	};
+
+	video::ILogicalDevice::SBindBufferMemoryInfo bindBufferMemory = {
+		.buffer = resourcesGPUBuffer.get(),
+		.binding = {
+			.memory = buffersMemoryArena.memory.get(),
+			.offset  = buffersMemoryArena.offset,
+		}
+	};
+
+	if (!logicalDevice->bindBufferMemory(1, &bindBufferMemory))
+	{
+		m_logger.log("DrawResourcesFiller::allocateDrawResources, bindBufferMemory failed.", nbl::system::ILogger::ELL_ERROR);
+		return false;
+	}
+
+	return true;
+}
+
+bool DrawResourcesFiller::allocateDrawResourcesWithinAvailableVRAM(ILogicalDevice* logicalDevice, size_t maxImageMemorySize, size_t maxBufferMemorySize, std::span<uint32_t> memoryTypeIndexTryOrder, uint32_t reductionPercent, uint32_t maxTries)
+{
+	const size_t minimumAcceptableSize = MinimumDrawResourcesMemorySize;
+
+	size_t currentBufferSize = maxBufferMemorySize;
+	size_t currentImageSize = maxImageMemorySize;
+	const size_t totalInitialSize = currentBufferSize + currentImageSize;
+
+	// If initial size is less than minimum acceptable then increase the buffer and image size to sum up to minimumAcceptableSize with image:buffer ratios preserved
+	if (totalInitialSize < minimumAcceptableSize)
+	{
+		// Preserve ratio: R = buffer / (buffer + image)
+		// scaleFactor = minimumAcceptableSize / totalInitialSize;
+		const double scaleFactor = static_cast<double>(minimumAcceptableSize) / totalInitialSize;
+		currentBufferSize = static_cast<size_t>(currentBufferSize * scaleFactor);
+		currentImageSize = minimumAcceptableSize - currentBufferSize; // ensures exact sum
+	}
+
+	uint32_t numTries = 0u;
+	while ((currentBufferSize + currentImageSize) >= minimumAcceptableSize && numTries < maxTries)
+	{
+		if (allocateDrawResources(logicalDevice, currentImageSize, currentBufferSize, memoryTypeIndexTryOrder))
+		{
+			m_logger.log("Successfully allocated memory for images (%zu) and buffers (%zu).", system::ILogger::ELL_INFO, currentImageSize, currentBufferSize);
+			return true;
+		}
+
+		m_logger.log("Allocation of memory for images(%zu) and buffers(%zu) failed; Reducing allocation size by %u%% and retrying...", system::ILogger::ELL_WARNING, currentImageSize, currentBufferSize, reductionPercent);
+		currentBufferSize = (currentBufferSize * (100 - reductionPercent)) / 100;
+		currentImageSize = (currentImageSize * (100 - reductionPercent)) / 100;
+		numTries++;
+	}
+
+	m_logger.log("All attempts to allocate memory for images(%zu) and buffers(%zu) failed.", system::ILogger::ELL_ERROR, currentImageSize, currentBufferSize);
+	return false;
+}
+
+void DrawResourcesFiller::drawTriangleMesh(
+	const CTriangleMesh& mesh,
+	SIntendedSubmitInfo& intendedNextSubmit)
+{
+	// TODO: main objects
+	// beginMainObject();
+
+	// TODO: for now we add whole mesh at once, instead we should add triangle by triangle and see check if we overflow memory
+
+	const size_t vertexBuffByteSize = mesh.getVertexBuffByteSize();
+	const size_t indexBuffByteSize = mesh.getIndexBuffByteSize();
+	const auto& indexBuffer = mesh.getIndices();
+	const auto& vertexBuffer = mesh.getVertices();
+	assert(indexBuffer.size() == vertexBuffer.size()); // TODO: figure out why it was needed then decide if this constraint needs to be kept
+
+	DrawCallData drawCallData = {};
+
+	// Copy VertexBuffer
+	size_t geometryBufferOffset = resourcesCollection.geometryInfo.increaseSizeAndGetOffset(vertexBuffByteSize, alignof(CTriangleMesh::vertex_t));
+	drawCallData.triangleMeshVerticesBaseAddress = geometryBufferOffset;
+	void* dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset;
+	memcpy(dst, vertexBuffer.data(), vertexBuffByteSize);
+	geometryBufferOffset += vertexBuffByteSize;
+
+	// Copy IndexBuffer
+	dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset;
+	drawCallData.indexBufferOffset = geometryBufferOffset;
+	memcpy(dst, indexBuffer.data(), indexBuffByteSize);
+
+	drawCallData.triangleMeshMainObjectIndex = 0u; // TODO: fix when implementing main objects
+	drawCallData.indexCount = mesh.getIndexCount();
+	drawCalls.push_back(drawCallData);
+
+	//endMainObject();
+}
+
+bool DrawResourcesFiller::pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit)
+{
+	if (!intendedNextSubmit.valid())
+	{
+		// It is a caching submit without command buffer, just for the purpose of accumulation of staging resources
+		// In that case we don't push any uploads (i.e. we don't record any imageRecord commmand in active command buffer, because there is no active command buffer)
+		return false;
+	}
+
+	bool success = true;
+	success &= pushBufferUploads(intendedNextSubmit, resourcesCollection);
+
+	return success;
+}
+
+bool DrawResourcesFiller::pushBufferUploads(SIntendedSubmitInfo& intendedNextSubmit, ResourcesCollection& resources)
+{
+	copiedResourcesSize = 0ull;
+
+	if (resourcesCollection.calculateTotalConsumption() > resourcesGPUBuffer->getSize())
+	{
+		m_logger.log("some bug has caused the resourcesCollection to consume more memory than available in resourcesGPUBuffer without overflow submit", nbl::system::ILogger::ELL_ERROR);
+		assert(false);
+		return false;
+	}
+
+	auto copyCPUFilledDrawBuffer = [&](auto& drawBuffer) -> bool
+		{
+			// drawBuffer must be of type CPUGeneratedResource<T>
+			SBufferRange<IGPUBuffer> copyRange = { copiedResourcesSize, drawBuffer.getStorageSize(), resourcesGPUBuffer };
+
+			if (copyRange.offset + copyRange.size > resourcesGPUBuffer->getSize())
+			{
+				m_logger.log("`copyRange.offset + copyRange.size > resourcesGPUBuffer->getSize()` is true in `copyCPUFilledDrawBuffer`, this shouldn't happen with correct auto-submission mechanism.", nbl::system::ILogger::ELL_ERROR);
+				assert(false);
+				return false;
+			}
+
+			drawBuffer.bufferOffset = copyRange.offset;
+			if (copyRange.size > 0ull)
+			{
+				if (!m_bufferUploadUtils->updateBufferRangeViaStagingBuffer(intendedNextSubmit, copyRange, drawBuffer.vector.data()))
+					return false;
+				copiedResourcesSize += drawBuffer.getAlignedStorageSize();
+			}
+			return true;
+		};
+
+	copyCPUFilledDrawBuffer(resources.drawObjects);
+	copyCPUFilledDrawBuffer(resources.indexBuffer);
+	copyCPUFilledDrawBuffer(resources.geometryInfo);
+
+	return true;
+}
+
+void DrawResourcesFiller::markFrameUsageComplete(uint64_t drawSubmitWaitValue)
+{
+	// m_logger.log(std::format("Finished Frame Idx = {}", currentFrameIndex).c_str(), nbl::system::ILogger::ELL_INFO);
+	currentFrameIndex++;
+	// TODO[LATER]: take into account that currentFrameIndex was submitted with drawSubmitWaitValue; Use that value when deallocating the resources marked with this frame index
+	//				Currently, for evictions the worst case value will be waited for, as there is no way yet to know which semaphoroe value will signal the completion of the (to be evicted) resource's usage
+}
\ No newline at end of file
diff --git a/75_CAD_3D/DrawResourcesFiller.h b/75_CAD_3D/DrawResourcesFiller.h
new file mode 100644
index 000000000..ea2bca02e
--- /dev/null
+++ b/75_CAD_3D/DrawResourcesFiller.h
@@ -0,0 +1,220 @@
+/******************************************************************************/
+/* DrawResourcesFiller: This class provides important functionality to manage resources needed for a draw.
+/******************************************************************************/
+#pragma once
+
+#if __has_include("glm/glm/glm.hpp") // legacy
+#include "glm/glm/glm.hpp"
+#else
+#include "glm/glm.hpp" // new build system
+#endif
+#include <nbl/builtin/hlsl/cpp_compat.hlsl>
+#include <nbl/builtin/hlsl/cpp_compat/matrix.hlsl>
+#include <nbl/builtin/hlsl/cpp_compat/vector.hlsl>
+#include <nbl/builtin/hlsl/limits.hlsl>
+#include <nbl/core/containers/LRUCache.h>
+#include <nbl/video/utilities/SIntendedSubmitInfo.h>
+#include "CTriangleMesh.h"
+#include "Shaders/globals.hlsl"
+
+using namespace nbl;
+using namespace nbl::video;
+using namespace nbl::core;
+using namespace nbl::asset;
+
+static_assert(sizeof(DrawObject) == 16u);
+static_assert(sizeof(MainObject) == 20u);
+
+// ! DrawResourcesFiller
+// ! This class provides important functionality to manage resources needed for a draw.
+// ! Drawing new objects (polylines, hatches, etc.) should go through this function.
+// ! Contains all the scene resources (buffers and images)
+// ! In the case of overflow (i.e. not enough remaining v-ram) will auto-submit/render everything recorded so far,
+//   and additionally makes sure relavant data needed for those draw calls are present in memory
+struct DrawResourcesFiller
+{
+	struct DrawCallData
+	{
+		uint64_t indexBufferOffset;
+		uint64_t indexCount;
+		uint64_t triangleMeshVerticesBaseAddress;
+		uint32_t triangleMeshMainObjectIndex;
+	};
+
+public:
+	
+	// We pack multiple data types in a single buffer, we need to makes sure each offset starts aligned to avoid mis-aligned accesses
+	static constexpr size_t GPUStructsMaxNaturalAlignment = 8u;
+	static constexpr size_t MinimumDrawResourcesMemorySize = 512u * 1 << 20u; // 512MB
+
+	/// @brief general parent struct for 1.ReservedCompute and 2.CPUGenerated Resources
+	struct ResourceBase
+	{
+		static constexpr size_t InvalidBufferOffset = ~0u;
+		size_t bufferOffset = InvalidBufferOffset; // set when copy to gpu buffer is issued
+		virtual size_t getCount() const = 0;
+		virtual size_t getStorageSize() const = 0;
+		virtual size_t getAlignedStorageSize() const { return core::alignUp(getStorageSize(), GPUStructsMaxNaturalAlignment); }
+	};
+
+	/// @brief ResourceBase reserved for compute shader stages input/output
+	template <typename T>
+	struct ReservedComputeResource : ResourceBase
+	{
+		size_t count = 0ull;
+		size_t getCount() const override { return count; }
+		size_t getStorageSize() const override  { return count * sizeof(T); }
+	};
+
+	/// @brief ResourceBase which is filled by CPU, packed and sent to GPU
+	template <typename T>
+	struct CPUGeneratedResource : ResourceBase
+	{
+		core::vector<T> vector;
+		size_t getCount() const { return vector.size(); }
+		size_t getStorageSize() const { return vector.size() * sizeof(T); }
+		
+		/// @return pointer to start of the data to be filled, up to additionalCount
+		T* increaseCountAndGetPtr(size_t additionalCount) 
+		{
+			size_t offset = vector.size();
+			vector.resize(offset + additionalCount);
+			return &vector[offset];
+		}
+
+		/// @brief increases size of general-purpose resources that hold bytes
+		/// @param alignment: Alignment of the pointer returned to be filled, should be PoT and <= GPUStructsMaxNaturalAlignment, only use this if storing raw bytes in vector
+		/// @return pointer to start of the data to be filled, up to additional size
+		size_t increaseSizeAndGetOffset(size_t additionalSize, size_t alignment) 
+		{
+			assert(core::isPoT(alignment) && alignment <= GPUStructsMaxNaturalAlignment);
+			size_t offset = core::alignUp(vector.size(), alignment);
+			vector.resize(offset + additionalSize);
+			return offset;
+		}
+		
+		uint32_t addAndGetOffset(const T& val)
+		{
+			vector.push_back(val);
+			return vector.size() - 1u;
+		}
+
+		T* data() { return vector.data(); }
+	};
+
+	/// @brief struct to hold all resources
+	// TODO: rename to staged resources buffers or something like that
+	struct ResourcesCollection
+	{
+		// auto-submission level 1 buffers (mainObj that drawObjs references, if all drawObjs+idxBuffer+geometryInfo doesn't fit into mem this will be broken down into many)
+		//CPUGeneratedResource<MainObject> mainObjects;
+
+		// auto-submission level 2 buffers
+		CPUGeneratedResource<DrawObject> drawObjects;
+		CPUGeneratedResource<uint32_t> indexBuffer; // TODO: this is going to change to ReservedComputeResource where index buffer gets filled by compute shaders
+		CPUGeneratedResource<uint8_t> geometryInfo; // general purpose byte buffer for custom data for geometries (eg. line points, bezier definitions, aabbs)
+
+		// Get Total memory consumption, If all ResourcesCollection get packed together with GPUStructsMaxNaturalAlignment
+		// used to decide the remaining memory and when to overflow
+		size_t calculateTotalConsumption() const
+		{
+			return
+				drawObjects.getAlignedStorageSize() +
+				indexBuffer.getAlignedStorageSize() +
+				geometryInfo.getAlignedStorageSize();
+		}
+	};
+	
+	DrawResourcesFiller();
+
+	DrawResourcesFiller(smart_refctd_ptr<video::ILogicalDevice>&& device, smart_refctd_ptr<IUtilities>&& bufferUploadUtils, IQueue* copyQueue, core::smart_refctd_ptr<system::ILogger>&& logger);
+
+	typedef std::function<void(SIntendedSubmitInfo&)> SubmitFunc;
+	void setSubmitDrawsFunction(const SubmitFunc& func);
+
+	/**
+	 * @brief Attempts to allocate a single contiguous device-local memory block for draw resources, divided into image and buffer sections.
+	 * 
+	 * The function allocates a single memory block and splits it into image and buffer arenas.
+	 * 
+	 * @param logicalDevice Pointer to the logical device used for memory allocation and resource creation.
+	 * @param requiredImageMemorySize The size in bytes of the memory required for images.
+	 * @param requiredBufferMemorySize The size in bytes of the memory required for buffers.
+	 * @param memoryTypeIndexTryOrder Ordered list of memory type indices to attempt allocation with, in the order they should be tried.
+	 * 
+	 * @return true if the memory allocation and resource setup succeeded; false otherwise.
+	 */
+	bool allocateDrawResources(ILogicalDevice* logicalDevice, size_t requiredImageMemorySize, size_t requiredBufferMemorySize, std::span<uint32_t> memoryTypeIndexTryOrder);
+	
+	/**
+	 * @brief Attempts to allocate draw resources within a given VRAM budget, retrying with progressively smaller sizes on failure.
+	 * 
+	 * This function preserves the initial image-to-buffer memory ratio. If the initial sizes are too small,
+	 * it scales them up to meet a minimum required threshold. On allocation failure, it reduces the memory
+	 * sizes by a specified percentage and retries, until it either succeeds or the number of attempts exceeds `maxTries`.
+	 * 
+	 * @param logicalDevice Pointer to the logical device used for allocation.
+	 * @param maxImageMemorySize Initial image memory size (in bytes) to attempt allocation with.
+	 * @param maxBufferMemorySize Initial buffer memory size (in bytes) to attempt allocation with.
+	 * @param memoryTypeIndexTryOrder Ordered list of memory type indices to attempt allocation with, in the order they should be tried.
+	 * @param reductionPercent The percentage by which to reduce the memory sizes after each failed attempt (e.g., 10 means reduce by 10%).
+	 * @param maxTries Maximum number of attempts to try reducing and allocating memory.
+	 * 
+	 * @return true if the allocation succeeded at any iteration; false if all attempts failed.
+	 */
+	bool allocateDrawResourcesWithinAvailableVRAM(ILogicalDevice* logicalDevice, size_t maxImageMemorySize, size_t maxBufferMemorySize, std::span<uint32_t> memoryTypeIndexTryOrder, uint32_t reductionPercent = 10u, uint32_t maxTries = 32u);
+
+	// Must be called at the end of each frame.
+	// right before submitting the main draw that uses the currently queued geometry, images, or other objects/resources.
+	// Registers the semaphore/value that will signal completion of this frame�s draw,
+	// This allows future frames to safely deallocate or evict resources used in the current frame by waiting on this signal before reuse or destruction.
+	// `drawSubmitWaitValue` should reference the wait value of the draw submission finishing this frame using the `intendedNextSubmit`; 
+	void markFrameUsageComplete(uint64_t drawSubmitWaitValue);
+	
+	void drawTriangleMesh(
+		const CTriangleMesh& mesh,
+		SIntendedSubmitInfo& intendedNextSubmit);
+
+	/// @brief call this function before submitting to ensure all buffer and textures resourcesCollection requested via drawing calls are copied to GPU
+	/// records copy command into intendedNextSubmit's active command buffer and might possibly submits if fails allocation on staging upload memory.
+	bool pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit);
+
+	/// @brief Records GPU copy commands for all staged buffer resourcesCollection into the active command buffer.
+	bool pushBufferUploads(SIntendedSubmitInfo& intendedNextSubmit, ResourcesCollection& resourcesCollection);
+
+	/// @brief  resets staging buffers and images
+	void reset()
+	{
+		drawCalls.clear();
+	}
+
+	/// @brief collection of all the resources that will eventually be reserved or copied to in the resourcesGPUBuffer, will be accessed via individual BDA pointers in shaders
+	const ResourcesCollection& getResourcesCollection() const { return resourcesCollection; }
+	/// @brief buffer containing all non-texture type resources
+	nbl::core::smart_refctd_ptr<IGPUBuffer> getResourcesGPUBuffer() const { return resourcesGPUBuffer; }
+	/// @return how far resourcesGPUBuffer was copied to by `finalizeAllCopiesToGPU` in `resourcesCollection` 
+	const size_t getCopiedResourcesSize() { return copiedResourcesSize; }
+	const core::vector<DrawCallData>& getDrawCalls() const { return drawCalls; }
+
+private:
+	nbl::system::logger_opt_smart_ptr m_logger = nullptr;
+
+	smart_refctd_ptr<video::ILogicalDevice> m_device;
+	core::smart_refctd_ptr<video::IUtilities> m_bufferUploadUtils;
+
+	IQueue* m_copyQueue;
+
+	// FrameIndex used as a criteria for resource/image eviction in case of limitations
+	uint32_t currentFrameIndex = 0u;
+
+	// DrawCalls Data
+	core::vector<DrawCallData> drawCalls;
+
+	// ResourcesCollection and packed into GPUBuffer
+	ResourcesCollection resourcesCollection;
+	IDeviceMemoryAllocator::SAllocation buffersMemoryArena;
+	nbl::core::smart_refctd_ptr<IGPUBuffer> resourcesGPUBuffer;
+	size_t copiedResourcesSize;
+
+	SubmitFunc submitDraws;
+};
\ No newline at end of file
diff --git a/75_CAD_3D/config.json.template b/75_CAD_3D/config.json.template
new file mode 100644
index 000000000..f961745c1
--- /dev/null
+++ b/75_CAD_3D/config.json.template
@@ -0,0 +1,28 @@
+{
+  "enableParallelBuild": true,
+  "threadsPerBuildProcess" : 2,
+  "isExecuted": false,
+  "scriptPath": "",
+  "cmake": {
+    "configurations": [ "Release", "Debug", "RelWithDebInfo" ],
+    "buildModes": [],
+    "requiredOptions": []
+  }, 
+  "profiles": [
+    {
+      "backend": "vulkan",
+      "platform": "windows",
+      "buildModes": [],
+      "runConfiguration": "Release",
+      "gpuArchitectures": []
+    }
+  ],
+  "dependencies": [],
+  "data": [
+    {
+      "dependencies": [],
+      "command": [""],
+      "outputs": []
+    }
+  ]
+}
\ No newline at end of file
diff --git a/75_CAD_3D/main.cpp b/75_CAD_3D/main.cpp
new file mode 100644
index 000000000..ed7ddc039
--- /dev/null
+++ b/75_CAD_3D/main.cpp
@@ -0,0 +1,1097 @@
+﻿// TODO: Copyright notice
+#include "nbl/this_example/builtin/build/spirv/keys.hpp"
+
+#include "nbl/examples/examples.hpp"
+
+using namespace nbl::hlsl;
+using namespace nbl;
+using namespace core;
+using namespace system;
+using namespace asset;
+using namespace ui;
+using namespace video;
+
+#include <nbl/builtin/hlsl/cpp_compat.hlsl>
+#include "DrawResourcesFiller.h"
+
+#include "nbl/builtin/hlsl/math/linalg/transform.hlsl"
+#include "nbl/builtin/hlsl/math/thin_lens_projection.hlsl"
+
+class CEventCallback : public ISimpleManagedSurface::ICallback
+{
+public:
+	CEventCallback(nbl::core::smart_refctd_ptr<nbl::examples::InputSystem>&& m_inputSystem, nbl::system::logger_opt_smart_ptr&& logger) : m_inputSystem(std::move(m_inputSystem)), m_logger(std::move(logger)) {}
+	CEventCallback() {}
+
+	void setLogger(nbl::system::logger_opt_smart_ptr& logger)
+	{
+		m_logger = logger;
+	}
+	void setInputSystem(nbl::core::smart_refctd_ptr<nbl::examples::InputSystem>&& m_inputSystem)
+	{
+		m_inputSystem = std::move(m_inputSystem);
+	}
+private:
+
+	void onMouseConnected_impl(nbl::core::smart_refctd_ptr<nbl::ui::IMouseEventChannel>&& mch) override
+	{
+		m_logger.log("A mouse %p has been connected", nbl::system::ILogger::ELL_INFO, mch.get());
+		m_inputSystem.get()->add(m_inputSystem.get()->m_mouse, std::move(mch));
+	}
+	void onMouseDisconnected_impl(nbl::ui::IMouseEventChannel* mch) override
+	{
+		m_logger.log("A mouse %p has been disconnected", nbl::system::ILogger::ELL_INFO, mch);
+		m_inputSystem.get()->remove(m_inputSystem.get()->m_mouse, mch);
+	}
+	void onKeyboardConnected_impl(nbl::core::smart_refctd_ptr<nbl::ui::IKeyboardEventChannel>&& kbch) override
+	{
+		m_logger.log("A keyboard %p has been connected", nbl::system::ILogger::ELL_INFO, kbch.get());
+		m_inputSystem.get()->add(m_inputSystem.get()->m_keyboard, std::move(kbch));
+	}
+	void onKeyboardDisconnected_impl(nbl::ui::IKeyboardEventChannel* kbch) override
+	{
+		m_logger.log("A keyboard %p has been disconnected", nbl::system::ILogger::ELL_INFO, kbch);
+		m_inputSystem.get()->remove(m_inputSystem.get()->m_keyboard, kbch);
+	}
+
+private:
+	nbl::core::smart_refctd_ptr<nbl::examples::InputSystem> m_inputSystem = nullptr;
+	nbl::system::logger_opt_smart_ptr m_logger = nullptr;
+};
+
+class CSwapchainResources : public ISimpleManagedSurface::ISwapchainResources
+{
+	public:
+		CSwapchainResources() = default;
+
+		inline E_FORMAT deduceRenderpassFormat(ISurface* surface, IPhysicalDevice* physDev)
+		{
+			ISwapchain::SCreationParams swapchainParams = {.surface=smart_refctd_ptr<ISurface>(surface), };
+			// Need to choose a surface format
+			if (!swapchainParams.deduceFormat(physDev, getPreferredFormats(), getPreferredEOTFs(), getPreferredColorPrimaries()))
+				return EF_UNKNOWN;
+			return swapchainParams.surfaceFormat.format;
+		}
+
+		// When needing to recreate the framebuffer, We need to have access to a renderpass compatible to renderpass used to render to the framebuffer
+		inline void setCompatibleRenderpass(core::smart_refctd_ptr<IGPURenderpass> renderpass)
+		{
+			m_renderpass = renderpass;
+		}
+
+		inline IGPUFramebuffer* getFramebuffer(const uint8_t imageIx)
+		{
+			if (imageIx<m_framebuffers.size())
+				return m_framebuffers[imageIx].get();
+			return nullptr;
+		}
+
+	protected:
+		virtual inline void invalidate_impl()
+		{
+			std::fill(m_framebuffers.begin(),m_framebuffers.end(),nullptr);
+		}
+
+		// For creating extra per-image or swapchain resourcesCollection you might need
+		virtual inline bool onCreateSwapchain_impl(const uint8_t qFam)
+		{
+			auto device = const_cast<ILogicalDevice*>(m_renderpass->getOriginDevice());
+
+			const auto swapchain = getSwapchain();
+			const auto count = swapchain->getImageCount();
+			const auto& sharedParams = swapchain->getCreationParameters().sharedParams;
+			for (uint8_t i=0u; i<count; i++)
+			{
+				auto imageView = device->createImageView({
+					.flags = IGPUImageView::ECF_NONE,
+					.subUsages = IGPUImage::EUF_RENDER_ATTACHMENT_BIT,
+					.image = core::smart_refctd_ptr<IGPUImage>(getImage(i)),
+					.viewType = IGPUImageView::ET_2D,
+					.format = getImage(i)->getCreationParameters().format
+				});
+				m_framebuffers[i] = device->createFramebuffer({{
+					.renderpass = core::smart_refctd_ptr(m_renderpass),
+					.colorAttachments = &imageView.get(),
+					.width = sharedParams.width,
+					.height = sharedParams.height
+				}});
+				if (!m_framebuffers[i])
+					return false;
+			}
+			return true;
+		}
+
+		// Per-swapchain
+		core::smart_refctd_ptr<IGPURenderpass> m_renderpass;
+		std::array<core::smart_refctd_ptr<IGPUFramebuffer>,ISwapchain::MaxImages> m_framebuffers;
+};
+
+class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplication, public nbl::examples::BuiltinResourcesApplication
+{
+	using device_base_t = nbl::examples::SimpleWindowedApplication;
+	using asset_base_t = nbl::examples::BuiltinResourcesApplication;
+	using clock_t = std::chrono::steady_clock;
+	
+	constexpr static uint32_t WindowWidthRequest = 1600u;
+	constexpr static uint32_t WindowHeightRequest = 900u;
+	constexpr static uint32_t MaxFramesInFlight = 3u;
+	constexpr static uint32_t MaxSubmitsInFlight = 16u;
+public:
+
+	void allocateResources()
+	{
+		// TODO: currently using the same utils for buffers and images, make them separate staging buffers
+		drawResourcesFiller = DrawResourcesFiller(core::smart_refctd_ptr(m_device), core::smart_refctd_ptr(m_utils), getGraphicsQueue(), core::smart_refctd_ptr(m_logger));
+
+		// Just wanting to try memory type indices with device local flag, TODO: later improve to prioritize pure device local
+		std::vector<uint32_t> deviceLocalMemoryTypeIndices;
+		for (uint32_t i = 0u; i < m_physicalDevice->getMemoryProperties().memoryTypeCount; ++i)
+		{
+			const auto& memType = m_physicalDevice->getMemoryProperties().memoryTypes[i];
+			if (memType.propertyFlags.hasFlags(IDeviceMemoryAllocation::EMPF_DEVICE_LOCAL_BIT))
+				deviceLocalMemoryTypeIndices.push_back(i);
+		}
+
+		size_t maxImagesMemSize = 1024ull * 1024ull * 1024ull; // 1024 MB
+		size_t maxBufferMemSize = 1024ull * 1024ull * 1024ull; // 1024 MB
+
+		drawResourcesFiller.allocateDrawResourcesWithinAvailableVRAM(m_device.get(), maxImagesMemSize, maxBufferMemSize, deviceLocalMemoryTypeIndices);
+
+		{
+			IGPUBuffer::SCreationParams globalsCreationParams = {};
+			globalsCreationParams.size = sizeof(Globals);
+			globalsCreationParams.usage = IGPUBuffer::EUF_UNIFORM_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INLINE_UPDATE_VIA_CMDBUF;
+			m_globalsBuffer = m_device->createBuffer(std::move(globalsCreationParams));
+
+			IDeviceMemoryBacked::SDeviceMemoryRequirements memReq = m_globalsBuffer->getMemoryReqs();
+			memReq.memoryTypeBits &= m_device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits();
+			auto globalsBufferMem = m_device->allocate(memReq, m_globalsBuffer.get());
+		}
+		
+		// pseudoStencil
+		{
+			asset::E_FORMAT pseudoStencilFormat = asset::EF_R32_UINT;
+			{
+				IGPUImage::SCreationParams imgInfo;
+				imgInfo.format = pseudoStencilFormat;
+				imgInfo.type = IGPUImage::ET_2D;
+				imgInfo.extent.width = m_window->getWidth();
+				imgInfo.extent.height = m_window->getHeight();
+				imgInfo.extent.depth = 1u;
+				imgInfo.mipLevels = 1u;
+				imgInfo.arrayLayers = 1u;
+				imgInfo.samples = asset::ICPUImage::ESCF_1_BIT;
+				imgInfo.flags = asset::IImage::E_CREATE_FLAGS::ECF_NONE;
+				imgInfo.usage = asset::IImage::EUF_STORAGE_BIT | asset::IImage::EUF_TRANSFER_DST_BIT;
+				// [VKTODO] imgInfo.initialLayout = IGPUImage::EL_UNDEFINED;
+				imgInfo.tiling = IGPUImage::TILING::OPTIMAL;
+
+				auto image = m_device->createImage(std::move(imgInfo));
+				auto imageMemReqs = image->getMemoryReqs();
+				imageMemReqs.memoryTypeBits &= m_device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits();
+				m_device->allocate(imageMemReqs, image.get());
+
+				image->setObjectDebugName("pseudoStencil Image");
+
+				IGPUImageView::SCreationParams imgViewInfo;
+				imgViewInfo.image = std::move(image);
+				imgViewInfo.format = pseudoStencilFormat;
+				imgViewInfo.viewType = IGPUImageView::ET_2D;
+				imgViewInfo.flags = IGPUImageView::E_CREATE_FLAGS::ECF_NONE;
+				imgViewInfo.subresourceRange.aspectMask = asset::IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT;
+				imgViewInfo.subresourceRange.baseArrayLayer = 0u;
+				imgViewInfo.subresourceRange.baseMipLevel = 0u;
+				imgViewInfo.subresourceRange.layerCount = 1u;
+				imgViewInfo.subresourceRange.levelCount = 1u;
+
+				pseudoStencilImageView = m_device->createImageView(std::move(imgViewInfo));
+			}
+		}
+		
+		// colorStorage
+		{
+			asset::E_FORMAT colorStorageFormat = asset::EF_R32_UINT;
+			{
+				IGPUImage::SCreationParams imgInfo;
+				imgInfo.format = colorStorageFormat;
+				imgInfo.type = IGPUImage::ET_2D;
+				imgInfo.extent.width = m_window->getWidth();
+				imgInfo.extent.height = m_window->getHeight();
+				imgInfo.extent.depth = 1u;
+				imgInfo.mipLevels = 1u;
+				imgInfo.arrayLayers = 1u;
+				imgInfo.samples = asset::ICPUImage::ESCF_1_BIT;
+				imgInfo.flags = asset::IImage::E_CREATE_FLAGS::ECF_NONE;
+				imgInfo.usage = asset::IImage::EUF_STORAGE_BIT | asset::IImage::EUF_TRANSFER_DST_BIT;
+				// [VKTODO] imgInfo.initialLayout = IGPUImage::EL_UNDEFINED;
+				imgInfo.tiling = IGPUImage::TILING::OPTIMAL;
+
+				auto image = m_device->createImage(std::move(imgInfo));
+				auto imageMemReqs = image->getMemoryReqs();
+				imageMemReqs.memoryTypeBits &= m_device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits();
+				m_device->allocate(imageMemReqs, image.get());
+
+				image->setObjectDebugName("colorStorage Image");
+
+				IGPUImageView::SCreationParams imgViewInfo;
+				imgViewInfo.image = std::move(image);
+				imgViewInfo.format = colorStorageFormat;
+				imgViewInfo.viewType = IGPUImageView::ET_2D;
+				imgViewInfo.flags = IGPUImageView::E_CREATE_FLAGS::ECF_NONE;
+				imgViewInfo.subresourceRange.aspectMask = asset::IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT;
+				imgViewInfo.subresourceRange.baseArrayLayer = 0u;
+				imgViewInfo.subresourceRange.baseMipLevel = 0u;
+				imgViewInfo.subresourceRange.layerCount = 1u;
+				imgViewInfo.subresourceRange.levelCount = 1u;
+
+				colorStorageImageView = m_device->createImageView(std::move(imgViewInfo));
+			}
+		}
+
+		// Initial Pipeline Transitions and Clearing of PseudoStencil and ColorStorage
+		// Recorded to Temporary CommandBuffer, Submitted to Graphics Queue, and Blocked on here
+		{
+			auto cmdPool = m_device->createCommandPool(getGraphicsQueue()->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::TRANSIENT_BIT);
+			smart_refctd_ptr<IGPUCommandBuffer> tmpCmdBuffer;
+			cmdPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { &tmpCmdBuffer, 1 });
+			auto tmpJobFinishedSema = m_device->createSemaphore(0ull);
+
+			tmpCmdBuffer->begin(video::IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT);
+			{
+				// Clear pseudoStencil
+				auto pseudoStencilImage = pseudoStencilImageView->getCreationParameters().image;
+				auto colorStorageImage = colorStorageImageView->getCreationParameters().image;
+
+				IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t beforeClearImageBarrier[] =
+				{
+					{
+						.barrier = {
+							.dep = {
+								.srcStageMask = PIPELINE_STAGE_FLAGS::NONE, // previous top of pipe -> top_of_pipe in first scope = none
+								.srcAccessMask = ACCESS_FLAGS::NONE,
+								.dstStageMask = PIPELINE_STAGE_FLAGS::CLEAR_BIT,
+								.dstAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, // could be ALL_TRANSFER but let's be specific we only want to CLEAR right now
+							}
+							// .ownershipOp. No queueFam ownership transfer
+						},
+						.image = pseudoStencilImage.get(),
+						.subresourceRange = {
+							.aspectMask = IImage::EAF_COLOR_BIT,
+							.baseMipLevel = 0u,
+							.levelCount = 1u,
+							.baseArrayLayer = 0u,
+							.layerCount = 1u,
+						},
+						.oldLayout = IImage::LAYOUT::UNDEFINED,
+						.newLayout = IImage::LAYOUT::GENERAL,
+					}
+				};
+
+				tmpCmdBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = beforeClearImageBarrier });
+
+				uint32_t pseudoStencilInvalidValue = core::bitfieldInsert<uint32_t>(0u, 16777215, 8, 24);
+				IGPUCommandBuffer::SClearColorValue clear = {};
+				clear.uint32[0] = pseudoStencilInvalidValue;
+
+				asset::IImage::SSubresourceRange subresourceRange = {};
+				subresourceRange.aspectMask = asset::IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT;
+				subresourceRange.baseArrayLayer = 0u;
+				subresourceRange.baseMipLevel = 0u;
+				subresourceRange.layerCount = 1u;
+				subresourceRange.levelCount = 1u;
+
+				tmpCmdBuffer->clearColorImage(pseudoStencilImage.get(), asset::IImage::LAYOUT::GENERAL, &clear, 1u, &subresourceRange);
+
+				// prepare pseudoStencilImage for usage in drawcall
+
+				IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t beforeUsageImageBarriers[] =
+				{
+					{
+						.barrier = {
+							.dep = {
+								.srcStageMask = PIPELINE_STAGE_FLAGS::CLEAR_BIT,
+								.srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT,
+								.dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT,
+								.dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS | ACCESS_FLAGS::MEMORY_WRITE_BITS, // could be ALL_TRANSFER but let's be specific we only want to CLEAR right now
+							}
+							// .ownershipOp. No queueFam ownership transfer
+						},
+						.image = pseudoStencilImage.get(),
+						.subresourceRange = {
+							.aspectMask = IImage::EAF_COLOR_BIT,
+							.baseMipLevel = 0u,
+							.levelCount = 1u,
+							.baseArrayLayer = 0u,
+							.layerCount = 1u,
+						},
+						.oldLayout = IImage::LAYOUT::GENERAL,
+						.newLayout = IImage::LAYOUT::GENERAL,
+					}, 
+					{
+						.barrier = {
+							.dep = {
+								.srcStageMask = PIPELINE_STAGE_FLAGS::NONE,
+								.srcAccessMask = ACCESS_FLAGS::NONE,
+								.dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT,
+								.dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS | ACCESS_FLAGS::MEMORY_WRITE_BITS, // could be ALL_TRANSFER but let's be specific we only want to CLEAR right now
+							}
+							// .ownershipOp. No queueFam ownership transfer
+						},
+						.image = colorStorageImage.get(),
+						.subresourceRange = {
+							.aspectMask = IImage::EAF_COLOR_BIT,
+							.baseMipLevel = 0u,
+							.levelCount = 1u,
+							.baseArrayLayer = 0u,
+							.layerCount = 1u,
+						},
+						.oldLayout = IImage::LAYOUT::UNDEFINED,
+						.newLayout = IImage::LAYOUT::GENERAL,
+					}
+				};
+
+				tmpCmdBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = beforeUsageImageBarriers });
+			}
+			tmpCmdBuffer->end();
+
+			IQueue::SSubmitInfo::SCommandBufferInfo cmdbufs[1u] = { {.cmdbuf = tmpCmdBuffer.get() } };
+			IQueue::SSubmitInfo::SSemaphoreInfo singalSemaphores[1] = {};
+			singalSemaphores[0].semaphore = tmpJobFinishedSema.get();
+			singalSemaphores[0].stageMask = asset::PIPELINE_STAGE_FLAGS::NONE;
+			singalSemaphores[0].value = 1u;
+
+			IQueue::SSubmitInfo submitInfo = {};
+			submitInfo.commandBuffers = cmdbufs;
+			submitInfo.waitSemaphores = {};
+			submitInfo.signalSemaphores = singalSemaphores;
+
+			getGraphicsQueue()->submit({ &submitInfo, 1u });
+
+			ISemaphore::SWaitInfo waitTmpJobFinish = { .semaphore = tmpJobFinishedSema.get(), .value = 1u};
+			m_device->blockForSemaphores({ &waitTmpJobFinish, 1u });
+		}
+	}
+	
+	smart_refctd_ptr<IGPURenderpass> createRenderpass(
+		E_FORMAT colorAttachmentFormat,
+		IGPURenderpass::LOAD_OP loadOp,
+		IImage::LAYOUT initialLayout,
+		IImage::LAYOUT finalLayout)
+	{		
+		const IGPURenderpass::SCreationParams::SColorAttachmentDescription colorAttachments[] = {
+			{{
+				{
+					.format = colorAttachmentFormat,
+					.samples = IGPUImage::ESCF_1_BIT,
+					.mayAlias = false
+				},
+				/*.loadOp = */loadOp,
+				/*.storeOp = */IGPURenderpass::STORE_OP::STORE,
+				/*.initialLayout = */initialLayout,
+				/*.finalLayout = */finalLayout
+			}},
+			IGPURenderpass::SCreationParams::ColorAttachmentsEnd
+		};
+		
+		IGPURenderpass::SCreationParams::SSubpassDescription subpasses[] = {
+			{},
+			IGPURenderpass::SCreationParams::SubpassesEnd
+		};
+		subpasses[0].colorAttachments[0] = {.render={.attachmentIndex=0,.layout=IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}};
+		
+		// We actually need external dependencies to ensure ordering of the Implicit Layout Transitions relative to the semaphore signals
+		const IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = {
+			// wipe-transition to ATTACHMENT_OPTIMAL
+			{
+				.srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External,
+				.dstSubpass = 0,
+				.memoryBarrier = {
+					// we can have NONE as Sources because ????
+					.dstStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT,
+					.dstAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT
+				}
+				// leave view offsets and flags default
+			},
+			// ATTACHMENT_OPTIMAL to PRESENT_SRC
+			{
+				.srcSubpass = 0,
+				.dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External,
+				.memoryBarrier = {
+					.srcStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT,
+					.srcAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT
+					// we can have NONE as the Destinations because the spec says so about presents
+				}
+				// leave view offsets and flags default
+			},
+			IGPURenderpass::SCreationParams::DependenciesEnd
+		};
+		
+		smart_refctd_ptr<IGPURenderpass> renderpass;
+		IGPURenderpass::SCreationParams params = {};
+		params.colorAttachments = colorAttachments;
+		params.subpasses = subpasses;
+		params.dependencies = dependencies;
+		renderpass = m_device->createRenderpass(params);
+		if (!renderpass)
+			logFail("Failed to Create a Renderpass!");
+		return renderpass;
+	}
+
+
+	// Yay thanks to multiple inheritance we cannot forward ctors anymore
+	inline ComputerAidedDesign(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) :
+		IApplicationFramework(_localInputCWD,_localOutputCWD,_sharedInputCWD,_sharedOutputCWD) {}
+	
+	// Will get called mid-initialization, via `filterDevices` between when the API Connection is created and Physical Device is chosen
+	inline core::vector<video::SPhysicalDeviceFilter::SurfaceCompatibility> getSurfaces() const override
+	{
+		// So let's create our Window and Surface then!
+		if (!m_surface)
+		{
+			{
+				auto windowCallback = core::make_smart_refctd_ptr<CEventCallback>(smart_refctd_ptr(m_inputSystem), smart_refctd_ptr(m_logger));
+				IWindow::SCreationParams params = {};
+				params.callback = windowCallback;
+				params.width = WindowWidthRequest;
+				params.height = WindowHeightRequest;
+				params.x = 32;
+				params.y = 32;
+				// Don't want to have a window lingering about before we're ready so create it hidden.
+				// Only programmatic resize, not regular.
+				params.flags = IWindow::ECF_BORDERLESS|IWindow::ECF_RESIZABLE;
+				params.windowCaption = "CAD 3D Playground";
+				const_cast<std::remove_const_t<decltype(m_window)>&>(m_window) = m_winMgr->createWindow(std::move(params));
+			}
+			auto surface = CSurfaceVulkanWin32::create(smart_refctd_ptr(m_api),smart_refctd_ptr_static_cast<IWindowWin32>(m_window));
+			const_cast<std::remove_const_t<decltype(m_surface)>&>(m_surface) = CSimpleResizeSurface<CSwapchainResources>::create(std::move(surface));
+		}
+		if (m_surface)
+			return {{m_surface->getSurface()/*,EQF_NONE*/}};
+		return {};
+	}
+	
+	inline bool onAppInitialized(smart_refctd_ptr<ISystem>&& system) override
+	{
+		m_inputSystem = make_smart_refctd_ptr<nbl::examples::InputSystem>(logger_opt_smart_ptr(smart_refctd_ptr(m_logger)));
+
+		// Remember to call the base class initialization!
+		if (!device_base_t::onAppInitialized(smart_refctd_ptr(system)))
+			return false;
+		if (!asset_base_t::onAppInitialized(std::move(system)))
+			return false;
+		
+		// Let's just use the same queue since there's no need for async present
+		if (!m_surface)
+			return logFail("Could not create Window & Surface!");
+		
+		auto scResources = std::make_unique<CSwapchainResources>();
+		const auto format = scResources->deduceRenderpassFormat(m_surface->getSurface(), m_physicalDevice); // TODO: DO I need to recreate render passes if swapchain gets recreated with different format?
+		renderpassInitial = createRenderpass(format, IGPURenderpass::LOAD_OP::CLEAR, IImage::LAYOUT::UNDEFINED, IImage::LAYOUT::ATTACHMENT_OPTIMAL);
+		renderpassInBetween = createRenderpass(format, IGPURenderpass::LOAD_OP::LOAD, IImage::LAYOUT::ATTACHMENT_OPTIMAL, IImage::LAYOUT::ATTACHMENT_OPTIMAL);
+		renderpassFinal = createRenderpass(format, IGPURenderpass::LOAD_OP::LOAD, IImage::LAYOUT::ATTACHMENT_OPTIMAL, IImage::LAYOUT::PRESENT_SRC);
+		const auto compatibleRenderPass = renderpassInitial; // all 3 above are compatible
+
+		scResources->setCompatibleRenderpass(compatibleRenderPass);
+
+		if (!m_surface->init(getGraphicsQueue(),std::move(scResources),{}))
+			return logFail("Could not initialize the Surface!");
+
+		allocateResources();
+
+		const asset::SPushConstantRange range = {
+			.stageFlags = IShader::E_SHADER_STAGE::ESS_VERTEX | IShader::E_SHADER_STAGE::ESS_FRAGMENT,
+			.offset = 0,
+			.size = sizeof(PushConstants)
+		};
+
+		m_pipelineLayout = m_device->createPipelineLayout({ &range,1 }, nullptr, nullptr, nullptr, nullptr);
+
+		smart_refctd_ptr<IShader> mainPipelineFragmentShaders = {};
+		smart_refctd_ptr<IShader> mainPipelineVertexShader = {};
+		{
+			// Load Custom Shader
+			auto loadPrecompiledShader = [&]<core::StringLiteral ShaderKey>() -> smart_refctd_ptr<IShader>
+			{
+				IAssetLoader::SAssetLoadParams lp = {};
+				lp.logger = m_logger.get();
+				lp.workingDirectory = "app_resources";
+
+				auto key = nbl::this_example::builtin::build::get_spirv_key<ShaderKey>(m_device.get());
+				auto assetBundle = m_assetMgr->getAsset(key.data(), lp);
+				const auto assets = assetBundle.getContents();
+				if (assets.empty())
+				{
+					m_logger->log("Failed to load a precompiled shader of key \"%s\".", ILogger::ELL_ERROR, ShaderKey);
+					return nullptr;
+				}
+					
+
+				auto shader = IAsset::castDown<IShader>(assets[0]);
+				return shader;
+			};
+
+			mainPipelineFragmentShaders = loadPrecompiledShader.operator()<"main_pipeline_fragment_shader">(); // "../shaders/main_pipeline/fragment_shader.hlsl"
+			mainPipelineVertexShader = loadPrecompiledShader.operator()<"main_pipeline_vertex_shader">(); // "../shaders/main_pipeline/vertex_shader.hlsl"
+		}
+
+		IGPUGraphicsPipeline::SCreationParams mainGraphicsPipelineParams = {};
+		mainGraphicsPipelineParams.layout = m_pipelineLayout.get();
+		mainGraphicsPipelineParams.cached = {
+			.vertexInput = {},
+			.primitiveAssembly = {
+				.primitiveType = E_PRIMITIVE_TOPOLOGY::EPT_TRIANGLE_LIST,
+			},
+			.rasterization = {
+				.polygonMode = EPM_FILL,
+				.faceCullingMode = EFCM_NONE,
+				.depthWriteEnable = false,
+			},
+			.blend = {},
+		};
+		mainGraphicsPipelineParams.renderpass = compatibleRenderPass.get();
+
+		// Create Main Graphics Pipelines 
+		{
+			video::IGPUPipelineBase::SShaderSpecInfo specInfo[2] = {
+				{ .shader = mainPipelineVertexShader.get(), .entryPoint = "vtxMain" },
+				{ .shader = mainPipelineFragmentShaders.get(), .entryPoint = "fragMain" },
+			};
+			
+			IGPUGraphicsPipeline::SCreationParams params[1] = { mainGraphicsPipelineParams };
+			params[0].vertexShader = specInfo[0];
+			params[0].fragmentShader = specInfo[1];
+
+			if (!m_device->createGraphicsPipelines(nullptr,params,&m_graphicsPipeline))
+				return logFail("Graphics Pipeline Creation Failed.");
+		}
+		
+		// Create the commandbuffers and pools, this time properly 1 pool per FIF
+		m_graphicsCommandPool = m_device->createCommandPool(getGraphicsQueue()->getFamilyIndex(),IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT);
+		if (!m_graphicsCommandPool)
+			return logFail("Couldn't create Command Pool!");
+		if (!m_graphicsCommandPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY,{m_commandBuffersInFlight.data(),MaxSubmitsInFlight}))
+			return logFail("Couldn't create Command Buffers!");
+		
+		// Create the Semaphores
+		m_renderSemaphore = m_device->createSemaphore(0ull);
+		m_renderSemaphore->setObjectDebugName("m_renderSemaphore");
+		m_overflowSubmitScratchSemaphore = m_device->createSemaphore(0ull);
+		m_overflowSubmitScratchSemaphore->setObjectDebugName("m_overflowSubmitScratchSemaphore");
+		if (!m_renderSemaphore || !m_overflowSubmitScratchSemaphore)
+			return logFail("Failed to Create Semaphores!");
+
+		// Set Queue and ScratchSemaInfo -> wait semaphores and command buffers will be modified by workLoop each frame
+		m_intendedNextSubmit.queue = getGraphicsQueue();
+		m_intendedNextSubmit.scratchSemaphore = {
+				.semaphore = m_overflowSubmitScratchSemaphore.get(),
+				.value = 0ull,
+		};
+		for (uint32_t i = 0; i < MaxSubmitsInFlight; ++i)
+			m_commandBufferInfos[i] = { .cmdbuf = m_commandBuffersInFlight[i].get() };
+		m_intendedNextSubmit.scratchCommandBuffers = m_commandBufferInfos;
+		m_currentRecordingCommandBufferInfo = &m_commandBufferInfos[0];
+		
+		return true;
+	}
+
+	// We do a very simple thing, display an image and wait `DisplayImageMs` to show it
+	inline void workLoopBody() override
+	{
+		auto now = std::chrono::high_resolution_clock::now();
+		double dt = std::chrono::duration_cast<std::chrono::milliseconds>(now - lastTime).count();
+		lastTime = now;
+		m_timeElapsed += dt;
+
+		m_inputSystem->getDefaultMouse(&mouse);
+		m_inputSystem->getDefaultKeyboard(&keyboard);
+
+		mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void
+			{
+			}
+		, m_logger.get());
+		keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void
+			{
+				for (auto eventIt = events.begin(); eventIt != events.end(); eventIt++)
+				{
+				}
+			}
+		, m_logger.get());
+
+		if (!beginFrameRender())
+			return;
+
+		const IQueue::SSubmitInfo::SSemaphoreInfo acquired = {
+			.semaphore = m_currentImageAcquire.semaphore,
+			.value = m_currentImageAcquire.acquireCount,
+			.stageMask = asset::PIPELINE_STAGE_FLAGS::NONE // NONE for Acquire, right? Yes, the Spec Says so!
+		};
+
+		// prev frame done using the scene data (is in post process stage)
+		const IQueue::SSubmitInfo::SSemaphoreInfo prevFrameRendered = {
+			.semaphore = m_renderSemaphore.get(),
+			.value = m_realFrameIx,
+			.stageMask = asset::PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS
+		};
+
+		IQueue::SSubmitInfo::SSemaphoreInfo waitSems[2u] = { acquired, prevFrameRendered };
+		m_intendedNextSubmit.waitSemaphores = waitSems;
+
+		addObjects(m_intendedNextSubmit);
+
+		endFrameRender(m_intendedNextSubmit);
+	}
+	
+	bool beginFrameRender()
+	{
+		// framesInFlight: ensuring safe execution of command buffers and acquires, `framesInFlight` only affect semaphore waits, don't use this to index your resources because it can change with swapchain recreation.
+		const uint32_t framesInFlight = core::min(MaxFramesInFlight, m_surface->getMaxAcquiresInFlight());
+		// We block for semaphores for 2 reasons here:
+			// A) Resource: Can't use resource like a command buffer BEFORE previous use is finished! [MaxFramesInFlight]
+			// B) Acquire: Can't have more acquires in flight than a certain threshold returned by swapchain or your surface helper class. [MaxAcquiresInFlight]
+		if (m_realFrameIx>=framesInFlight)
+		{
+			const ISemaphore::SWaitInfo cmdbufDonePending[] = {
+				{ 
+					.semaphore = m_renderSemaphore.get(),
+					.value = m_realFrameIx+1-framesInFlight
+				}
+			};
+			if (m_device->blockForSemaphores(cmdbufDonePending)!=ISemaphore::WAIT_RESULT::SUCCESS)
+				return false;
+		}
+
+		// Acquire
+		m_currentImageAcquire = m_surface->acquireNextImage();
+		if (!m_currentImageAcquire)
+			return false;
+		
+		const bool beganSuccess = m_intendedNextSubmit.beginNextCommandBuffer(m_currentRecordingCommandBufferInfo);
+		assert(beganSuccess);
+		auto* cb = m_currentRecordingCommandBufferInfo->cmdbuf;
+
+		// safe to proceed
+		// no need to reset and begin new command buffers as SIntendedSubmitInfo already handled that.
+		// cb->reset(video::IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT);
+		// cb->begin(video::IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT);
+		cb->beginDebugMarker("Frame");
+		
+		nbl::video::IGPUCommandBuffer::SRenderpassBeginInfo beginInfo;
+		auto scRes = static_cast<CSwapchainResources*>(m_surface->getSwapchainResources());
+		const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {0.68f, 0.85f, 0.90f, 1.0f} };
+		{
+			const VkRect2D currentRenderArea =
+			{
+				.offset = {0,0},
+				.extent = {m_window->getWidth(),m_window->getHeight()}
+			};
+
+			beginInfo = {
+				.renderpass = renderpassInitial.get(),
+				.framebuffer = scRes->getFramebuffer(m_currentImageAcquire.imageIndex),
+				.colorClearValues = &clearValue,
+				.depthStencilClearValues = nullptr,
+				.renderArea = currentRenderArea
+			};
+		}
+
+		cb->beginRenderPass(beginInfo, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE);
+		cb->endRenderPass();
+
+		return true;
+	}
+	
+	void _submitDraws(SIntendedSubmitInfo& intendedSubmitInfo, bool inBetweenSubmit)
+	{
+		drawResourcesFiller.pushAllUploads(intendedSubmitInfo);
+
+		m_currentRecordingCommandBufferInfo = intendedSubmitInfo.getCommandBufferForRecording(); // drawResourcesFiller.pushAllUploads might've overflow submitted and changed the current recording command buffer
+
+		// Use the current recording command buffer of the intendedSubmitInfos scratchCommandBuffers, it should be in recording state
+		auto* cb = m_currentRecordingCommandBufferInfo->cmdbuf;
+		
+		const auto& resourcesCollection = drawResourcesFiller.getResourcesCollection();
+		const auto& resourcesGPUBuffer = drawResourcesFiller.getResourcesGPUBuffer();
+
+		float64_t4x4 viewProjection;
+		{
+			// TODO: create a proper camera
+
+			auto view = hlsl::math::linalg::rhLookAt<float64_t>({ 300.0f, 300.0f, 300.0f }, { 0.0f, 0.0f, 0.0f }, { 0.0f, 0.0f, 1.0f });
+			const float64_t aspectRatio = static_cast<float64_t>(m_window->getWidth()) / static_cast<float64_t>(m_window->getHeight());
+			auto proj = hlsl::math::thin_lens::rhPerspectiveFovMatrix<float64_t>(hlsl::radians(60.0f), aspectRatio, 0.1f, 2000.0f);
+
+			viewProjection = hlsl::mul(proj, nbl::hlsl::math::linalg::promote_affine<4, 4>(view));
+		}
+
+		Globals globalData = {};
+		uint64_t baseAddress = resourcesGPUBuffer->getDeviceAddress();
+		globalData.pointers = {
+			.drawObjects			= baseAddress + resourcesCollection.drawObjects.bufferOffset,
+			.geometryBuffer			= baseAddress + resourcesCollection.geometryInfo.bufferOffset,
+		};
+		SBufferRange<IGPUBuffer> globalBufferUpdateRange = { .offset = 0ull, .size = sizeof(Globals), .buffer = m_globalsBuffer};
+		bool updateSuccess = cb->updateBuffer(globalBufferUpdateRange, &globalData);
+		assert(updateSuccess);
+
+		asset::SViewport vp =
+		{
+			.x = 0u,
+			.y = 0u,
+			.width = static_cast<float>(m_window->getWidth()),
+			.height = static_cast<float>(m_window->getHeight()),
+			.minDepth = 1.f,
+			.maxDepth = 0.f,
+		};
+		cb->setViewport(0u, 1u, &vp);
+
+		VkRect2D scissor =
+		{
+			.offset = { 0, 0 },
+			.extent = { m_window->getWidth(), m_window->getHeight() },
+		};
+		cb->setScissor(0u, 1u, &scissor);
+
+		// pipelineBarriersBeforeDraw
+		{	
+			constexpr uint32_t MaxBufferBarriersCount = 2u;
+			uint32_t bufferBarriersCount = 0u;
+			IGPUCommandBuffer::SPipelineBarrierDependencyInfo::buffer_barrier_t bufferBarriers[MaxBufferBarriersCount];
+			
+			const auto& resourcesCollection = drawResourcesFiller.getResourcesCollection();
+
+			if (m_globalsBuffer->getSize() > 0u)
+			{
+				auto& bufferBarrier = bufferBarriers[bufferBarriersCount++];
+				bufferBarrier.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT;
+				bufferBarrier.barrier.dep.srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT;
+				bufferBarrier.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT | PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT;
+				bufferBarrier.barrier.dep.dstAccessMask = ACCESS_FLAGS::UNIFORM_READ_BIT;
+				bufferBarrier.range =
+				{
+					.offset = 0u,
+					.size = m_globalsBuffer->getSize(),
+					.buffer = m_globalsBuffer,
+				};
+			}
+			if (drawResourcesFiller.getCopiedResourcesSize() > 0u)
+			{
+				auto& bufferBarrier = bufferBarriers[bufferBarriersCount++];
+				bufferBarrier.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT;
+				bufferBarrier.barrier.dep.srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT;
+				bufferBarrier.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::VERTEX_INPUT_BITS | PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT | PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT;
+				bufferBarrier.barrier.dep.dstAccessMask = ACCESS_FLAGS::MEMORY_READ_BITS | ACCESS_FLAGS::MEMORY_WRITE_BITS;
+				bufferBarrier.range =
+				{
+					.offset = 0u,
+					.size = drawResourcesFiller.getCopiedResourcesSize(),
+					.buffer = drawResourcesFiller.getResourcesGPUBuffer(),
+				};
+			}
+			cb->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .bufBarriers = {bufferBarriers, bufferBarriersCount}, .imgBarriers = {} });
+		}
+
+		nbl::video::IGPUCommandBuffer::SRenderpassBeginInfo beginInfo;
+		VkRect2D currentRenderArea;
+		const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {0.f,0.f,0.f,0.f} };
+		{
+			auto scRes = static_cast<CSwapchainResources*>(m_surface->getSwapchainResources());
+			currentRenderArea =
+			{
+				.offset = {0,0},
+				.extent = {m_window->getWidth(),m_window->getHeight()}
+			};
+			beginInfo = {
+				.renderpass = (inBetweenSubmit) ? renderpassInBetween.get():renderpassFinal.get(),
+				.framebuffer = scRes->getFramebuffer(m_currentImageAcquire.imageIndex),
+				.colorClearValues = &clearValue,
+				.depthStencilClearValues = nullptr,
+				.renderArea = currentRenderArea
+			};
+		}
+		cb->beginRenderPass(beginInfo, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE);
+		
+		cb->bindGraphicsPipeline(m_graphicsPipeline.get());
+
+		for (auto& drawCall : drawResourcesFiller.getDrawCalls())
+		{
+			cb->bindIndexBuffer({ .offset = resourcesCollection.geometryInfo.bufferOffset + drawCall.indexBufferOffset, .buffer = drawResourcesFiller.getResourcesGPUBuffer()}, asset::EIT_32BIT);
+
+			PushConstants pc = {
+				.triangleMeshVerticesBaseAddress = drawCall.triangleMeshVerticesBaseAddress + resourcesGPUBuffer->getDeviceAddress() + resourcesCollection.geometryInfo.bufferOffset,
+				.triangleMeshMainObjectIndex = drawCall.triangleMeshMainObjectIndex
+			};
+			cb->pushConstants(m_graphicsPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_VERTEX | IShader::E_SHADER_STAGE::ESS_FRAGMENT, 0, sizeof(PushConstants), &pc);
+
+			cb->drawIndexed(drawCall.indexCount, 1u, 0u, 0u, 0u);
+		}
+
+		cb->endRenderPass();
+
+		if (!inBetweenSubmit)
+			cb->endDebugMarker();
+		
+		drawResourcesFiller.markFrameUsageComplete(intendedSubmitInfo.getFutureScratchSemaphore().value);
+
+		if (inBetweenSubmit)
+		{
+			if (intendedSubmitInfo.overflowSubmit(m_currentRecordingCommandBufferInfo) != IQueue::RESULT::SUCCESS)
+			{
+				m_logger->log("overflow submit failed.", ILogger::ELL_ERROR);
+			}
+		}
+		else
+		{
+			// cb->end();
+			
+			const auto nextFrameIx = m_realFrameIx+1u;
+			const IQueue::SSubmitInfo::SSemaphoreInfo thisFrameRendered = {
+				.semaphore = m_renderSemaphore.get(),
+				.value = nextFrameIx,
+				.stageMask = asset::PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS
+			};
+			if (intendedSubmitInfo.submit(m_currentRecordingCommandBufferInfo, { &thisFrameRendered,1 }) == IQueue::RESULT::SUCCESS)
+			{
+				m_realFrameIx = nextFrameIx;
+				
+				IQueue::SSubmitInfo::SSemaphoreInfo presentWait = thisFrameRendered;
+				// the stages for a wait semaphore operation are about what stage you WAIT in, not what stage you wait for
+				presentWait.stageMask = PIPELINE_STAGE_FLAGS::NONE; // top of pipe, there's no explicit presentation engine stage
+				m_surface->present(m_currentImageAcquire.imageIndex,{&presentWait,1});
+			}
+			else
+			{
+				m_logger->log("regular submit failed.", ILogger::ELL_ERROR);
+			}
+		}
+	}
+
+	// TODO: remove
+	void submitDraws(SIntendedSubmitInfo& intendedSubmitInfo, bool inBetweenSubmit)
+	{
+		m_currentRecordingCommandBufferInfo = intendedSubmitInfo.getCommandBufferForRecording(); // drawResourcesFiller.pushAllUploads might've overflow submitted and changed the current recording command buffer
+
+		// Use the current recording command buffer of the intendedSubmitInfos scratchCommandBuffers, it should be in recording state
+		auto* cb = m_currentRecordingCommandBufferInfo->cmdbuf;
+
+		asset::SViewport vp =
+		{
+			.x = 0u,
+			.y = 0u,
+			.width = static_cast<float>(m_window->getWidth()),
+			.height = static_cast<float>(m_window->getHeight()),
+			.minDepth = 1.f,
+			.maxDepth = 0.f,
+		};
+		cb->setViewport(0u, 1u, &vp);
+
+		VkRect2D scissor =
+		{
+			.offset = { 0, 0 },
+			.extent = { m_window->getWidth(), m_window->getHeight() },
+		};
+		cb->setScissor(0u, 1u, &scissor);
+
+		nbl::video::IGPUCommandBuffer::SRenderpassBeginInfo beginInfo;
+		VkRect2D currentRenderArea;
+		const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {0.f,0.f,0.f,0.f} };
+		{
+			auto scRes = static_cast<CSwapchainResources*>(m_surface->getSwapchainResources());
+			currentRenderArea =
+			{
+				.offset = {0,0},
+				.extent = {m_window->getWidth(),m_window->getHeight()}
+			};
+			beginInfo = {
+				.renderpass = (inBetweenSubmit) ? renderpassInBetween.get() : renderpassFinal.get(),
+				.framebuffer = scRes->getFramebuffer(m_currentImageAcquire.imageIndex),
+				.colorClearValues = &clearValue,
+				.depthStencilClearValues = nullptr,
+				.renderArea = currentRenderArea
+			};
+		}
+		cb->beginRenderPass(beginInfo, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE);
+
+		cb->bindGraphicsPipeline(m_graphicsPipeline.get());
+
+		{
+			PushConstants pc = {
+				.triangleMeshVerticesBaseAddress = 1,
+				.triangleMeshMainObjectIndex = 2
+			};
+			cb->pushConstants(m_graphicsPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_VERTEX | IShader::E_SHADER_STAGE::ESS_FRAGMENT, 0, sizeof(PushConstants), &pc);
+
+			cb->draw(3, 1, 0, 0);
+		}
+
+		cb->endRenderPass();
+
+		if (!inBetweenSubmit)
+			cb->endDebugMarker();
+
+		drawResourcesFiller.markFrameUsageComplete(intendedSubmitInfo.getFutureScratchSemaphore().value);
+
+		if (inBetweenSubmit)
+		{
+			if (intendedSubmitInfo.overflowSubmit(m_currentRecordingCommandBufferInfo) != IQueue::RESULT::SUCCESS)
+			{
+				m_logger->log("overflow submit failed.", ILogger::ELL_ERROR);
+			}
+		}
+		else
+		{
+			const auto nextFrameIx = m_realFrameIx + 1u;
+			const IQueue::SSubmitInfo::SSemaphoreInfo thisFrameRendered = {
+				.semaphore = m_renderSemaphore.get(),
+				.value = nextFrameIx,
+				.stageMask = asset::PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS
+			};
+			if (intendedSubmitInfo.submit(m_currentRecordingCommandBufferInfo, { &thisFrameRendered,1 }) == IQueue::RESULT::SUCCESS)
+			{
+				m_realFrameIx = nextFrameIx;
+
+				IQueue::SSubmitInfo::SSemaphoreInfo presentWait = thisFrameRendered;
+				// the stages for a wait semaphore operation are about what stage you WAIT in, not what stage you wait for
+				presentWait.stageMask = PIPELINE_STAGE_FLAGS::NONE; // top of pipe, there's no explicit presentation engine stage
+				m_surface->present(m_currentImageAcquire.imageIndex, { &presentWait,1 });
+			}
+			else
+			{
+				m_logger->log("regular submit failed.", ILogger::ELL_ERROR);
+			}
+		}
+	}
+
+	void endFrameRender(SIntendedSubmitInfo& intendedSubmitInfo)
+	{
+		submitDraws(intendedSubmitInfo, false);
+	}
+
+	inline bool keepRunning() override
+	{
+		if (duration_cast<decltype(timeout)>(clock_t::now()-start)>timeout)
+			return false;
+
+		return m_surface && !m_surface->irrecoverable();
+	}
+
+	virtual bool onAppTerminated() override
+	{
+		m_currentRecordingCommandBufferInfo->cmdbuf->end();
+
+		// We actually want to wait for all the frames to finish rendering, otherwise our destructors will run out of order late
+		m_device->waitIdle();
+
+		// This is optional, but the window would close AFTER we return from this function
+		m_surface = nullptr;
+		
+		return device_base_t::onAppTerminated();
+	}
+		
+	virtual video::IAPIConnection::SFeatures getAPIFeaturesToEnable() override
+	{
+		auto retval = base_t::getAPIFeaturesToEnable();
+		// We only support one swapchain mode, surface, the other one is Display which we have not implemented yet.
+		retval.swapchainMode = video::E_SWAPCHAIN_MODE::ESM_SURFACE;
+		retval.validations = true;
+		retval.synchronizationValidation = false;
+		return retval;
+	}
+
+protected:
+
+	void addObjects(SIntendedSubmitInfo& intendedNextSubmit)
+	{
+		drawResourcesFiller.setSubmitDrawsFunction(
+			[&](SIntendedSubmitInfo& intendedNextSubmit)
+			{
+				return submitDraws(intendedNextSubmit, true);
+			}
+		);
+		drawResourcesFiller.reset();
+
+		core::vector<TriangleMeshVertex> vertices = {
+			//{ float64_t2(0.0, 0.0), 100.0 }, //0
+			//{ float64_t2(-200.0, -200.0), 10.0 }, //1
+			//{ float64_t2(200.0, -200.0), 10.0 }, //2
+			//{ float64_t2(200.0, 200.0), -20.0 }, //3
+			//{ float64_t2(-200.0, 200.0), 10.0 }, //4
+
+			{ float64_t3(0.0, 0.0, 100.0) },
+			{ float64_t3(-200.0, -200.0, 10.0) },
+			{ float64_t3(200.0, -100.0, 10.0) },
+			{ float64_t3(0.0, 0.0, 100.0) },
+			{ float64_t3(200.0, -100.0, 10.0) },
+			{ float64_t3(200.0, 200.0, -20.0) },
+			{ float64_t3(0.0, 0.0, 100.0) },
+			{ float64_t3(200.0, 200.0, -20.0) },
+			{ float64_t3(-200.0, 200.0, 10.0) },
+			{ float64_t3(0.0, 0.0, 100.0) },
+			{ float64_t3(-200.0, 200.0, 10.0) },
+			{ float64_t3(-200.0, -200.0, 10.0) },
+		};
+
+		core::vector<uint32_t> indices = {
+			0, 1, 2,
+			3, 4, 5,
+			6, 7, 8,
+			9, 10, 11
+		};
+
+		CTriangleMesh mesh;
+		mesh.setVertices(std::move(vertices));
+		mesh.setIndices(std::move(indices));
+
+		drawResourcesFiller.drawTriangleMesh(mesh, intendedNextSubmit);
+	}
+
+protected:
+	clock_t::time_point start;
+	std::chrono::seconds timeout = std::chrono::seconds(0x7fffFFFFu);
+
+	double m_timeElapsed = 0.0;
+	std::chrono::steady_clock::time_point lastTime;
+
+	core::smart_refctd_ptr<nbl::examples::InputSystem> m_inputSystem;
+	nbl::examples::InputSystem::ChannelReader<IMouseEventChannel> mouse;
+	nbl::examples::InputSystem::ChannelReader<IKeyboardEventChannel> keyboard;
+
+	smart_refctd_ptr<IGPURenderpass> renderpassInitial; // this renderpass will clear the attachment and transition it to COLOR_ATTACHMENT_OPTIMAL
+	smart_refctd_ptr<IGPURenderpass> renderpassInBetween; // this renderpass will load the attachment and transition it to COLOR_ATTACHMENT_OPTIMAL
+	smart_refctd_ptr<IGPURenderpass> renderpassFinal; // this renderpass will load the attachment and transition it to PRESENT
+	
+	smart_refctd_ptr<IGPUCommandPool> m_graphicsCommandPool;
+	std::array<smart_refctd_ptr<IGPUCommandBuffer>,	MaxSubmitsInFlight>	m_commandBuffersInFlight; 
+	// ref to above cmd buffers, these go into SIntendedSubmitInfo as command buffers available for recording.
+	std::array<IQueue::SSubmitInfo::SCommandBufferInfo,	MaxSubmitsInFlight>	m_commandBufferInfos;
+	// pointer to one of the command buffer infos from above, this is the only command buffer used to record current submit in current frame, it will be updated by SIntendedSubmitInfo
+	IQueue::SSubmitInfo::SCommandBufferInfo const * m_currentRecordingCommandBufferInfo; // pointer can change, value cannot
+
+	smart_refctd_ptr<IGPUBuffer>		m_globalsBuffer;
+	DrawResourcesFiller drawResourcesFiller; // you can think of this as the scene data needed to draw everything, we only have one instance so let's use a timeline semaphore to sync all renders
+
+	smart_refctd_ptr<ISemaphore> m_renderSemaphore; // timeline semaphore to sync frames together
+	
+	// timeline semaphore used for overflows (they need to be on their own timeline to count overflows)
+	smart_refctd_ptr<ISemaphore> m_overflowSubmitScratchSemaphore; 
+	SIntendedSubmitInfo m_intendedNextSubmit;
+	
+	ISimpleManagedSurface::SAcquireResult m_currentImageAcquire = {};
+
+	uint64_t m_realFrameIx = 0u;
+
+	smart_refctd_ptr<IGPUDescriptorSetLayout>	descriptorSetLayout0;
+	smart_refctd_ptr<IGPUDescriptorSetLayout>	descriptorSetLayout1;
+	smart_refctd_ptr<IGPUPipelineLayout>		m_pipelineLayout;
+	smart_refctd_ptr<IGPUGraphicsPipeline>		resolveAlphaGraphicsPipeline;
+	smart_refctd_ptr<IGPUGraphicsPipeline>		m_debugGraphicsPipeline;
+	smart_refctd_ptr<IGPUGraphicsPipeline>		m_graphicsPipeline;
+	smart_refctd_ptr<IGPUGraphicsPipeline>		m_streamedImagesGraphicsPipeline;
+
+	smart_refctd_ptr<IWindow> m_window;
+	smart_refctd_ptr<CSimpleResizeSurface<CSwapchainResources>> m_surface;
+	smart_refctd_ptr<IGPUImageView> pseudoStencilImageView;
+	smart_refctd_ptr<IGPUImageView> colorStorageImageView;
+};
+
+NBL_MAIN_FUNC(ComputerAidedDesign)
+
diff --git a/75_CAD_3D/scripts/generate_mipmaps.py b/75_CAD_3D/scripts/generate_mipmaps.py
new file mode 100644
index 000000000..78420cda5
--- /dev/null
+++ b/75_CAD_3D/scripts/generate_mipmaps.py
@@ -0,0 +1,47 @@
+import OpenEXR
+import Imath
+import numpy as np
+
+def read_exr(path):
+    exr = OpenEXR.InputFile(path)
+    dw = exr.header()['dataWindow']
+    size = (dw.max.x - dw.min.x + 1, dw.max.y - dw.min.y + 1)
+
+    pt = Imath.PixelType(Imath.PixelType.FLOAT)
+    channels = ['R', 'G', 'B']
+    data = [np.frombuffer(exr.channel(c, pt), dtype=np.float32).reshape(size[1], size[0]) for c in channels]
+    return np.stack(data, axis=-1)  # shape: (H, W, 3)
+
+def write_exr(path, arr):
+    H, W, C = arr.shape
+    assert C == 3, "Only RGB supported"
+    header = OpenEXR.Header(W, H)
+    pt = Imath.PixelType(Imath.PixelType.FLOAT)
+    channels = {
+        'R': arr[:, :, 0].astype(np.float32).tobytes(),
+        'G': arr[:, :, 1].astype(np.float32).tobytes(),
+        'B': arr[:, :, 2].astype(np.float32).tobytes()
+    }
+    exr = OpenEXR.OutputFile(path, header)
+    exr.writePixels(channels)
+
+def mipmap_exr():
+    img = read_exr("../../media/tiled_grid_mip_0.exr")
+    h, w, _ = img.shape
+    base_path = "../../media/tiled_grid_mip_"
+    tile_size = 128
+    mip_level = 1
+    tile_length = h // (2 * tile_size)
+    
+    while tile_length > 0:
+        # Reshape and average 2x2 blocks
+        reshaped = img.reshape(h//2, 2, w//2, 2, 3)
+        mipmap = reshaped.mean(axis=(1, 3))
+        write_exr(base_path + str(mip_level) + ".exr", mipmap)
+        img = mipmap
+        mip_level = mip_level + 1
+        tile_length = tile_length // 2
+        h = h // 2
+        w = w // 2
+
+mipmap_exr()
\ No newline at end of file
diff --git a/75_CAD_3D/scripts/tiled_grid.py b/75_CAD_3D/scripts/tiled_grid.py
new file mode 100644
index 000000000..89c637338
--- /dev/null
+++ b/75_CAD_3D/scripts/tiled_grid.py
@@ -0,0 +1,266 @@
+from PIL import Image, ImageDraw, ImageFont
+import numpy as np
+import os
+import OpenImageIO as oiio
+
+
+
+def create_single_tile(tile_size, color, x_coord, y_coord, font_path=None):
+    """
+    Creates a single square tile image with a given color and two lines of centered text.
+
+    Args:
+        tile_size (int): The sidelength of the square tile in pixels.
+        color (tuple): A tuple of three floats (R, G, B) representing the color (0.0-1.0).
+        x_coord (int): The X coordinate to display on the tile.
+        y_coord (int): The Y coordinate to display on the tile.
+        font_path (str, optional): The path to a TrueType font file (.ttf).
+                                   If None, a default PIL font will be used.
+    Returns:
+        PIL.Image.Image: The created tile image with text.
+    """
+    # Convert float color (0.0-1.0) to 8-bit integer color (0-255)
+    int_color = tuple(int(max(0, min(1, c)) * 255) for c in color) # Ensure color components are clamped
+
+    img = Image.new('RGB', (tile_size, tile_size), int_color)
+    draw = ImageDraw.Draw(img)
+
+    text_line1 = f"x = {x_coord}"
+    text_line2 = f"y = {y_coord}"
+
+    text_fill_color = (255, 255, 255)
+
+    # --- Dynamic Font Size Adjustment ---
+    # Start with a relatively large font size and shrink if needed
+    font_size = int(tile_size * 0.25) # Initial guess for font size
+    max_font_size = int(tile_size * 0.25) # Don't exceed this
+
+    font = None
+    max_iterations = 100 # Prevent infinite loops in font size reduction
+
+    for _ in range(max_iterations):
+        current_font_path = font_path
+        current_font_size = max(1, font_size) # Ensure font size is at least 1
+
+        try:
+            if current_font_path and os.path.exists(current_font_path):
+                font = ImageFont.truetype(current_font_path, current_font_size)
+            else:
+                # Fallback to default font (size argument might not always work perfectly)
+                font = ImageFont.load_default()
+                # For default font, try to scale if load_default(size=...) is supported and works
+                try:
+                    scaled_font = ImageFont.load_default(size=current_font_size)
+                    if draw.textbbox((0, 0), text_line1, font=scaled_font)[2] > 0: # Check if usable
+                        font = scaled_font
+                except Exception:
+                    pass # Stick with original default font
+
+            if font is None: # Last resort if no font could be loaded
+                font = ImageFont.load_default()
+
+            # Measure text dimensions
+            bbox1 = draw.textbbox((0, 0), text_line1, font=font)
+            text_width1 = bbox1[2] - bbox1[0]
+            text_height1 = bbox1[3] - bbox1[1]
+
+            bbox2 = draw.textbbox((0, 0), text_line2, font=font)
+            text_width2 = bbox2[2] - bbox2[0]
+            text_height2 = bbox2[3] - bbox2[1]
+
+            # Calculate total height needed for both lines plus some padding
+            # Let's assume a small gap between lines (e.g., 0.1 * text_height)
+            line_gap = int(text_height1 * 0.2) # 20% of line height
+            total_text_height = text_height1 + text_height2 + line_gap
+
+            # Check if text fits vertically and horizontally
+            if (total_text_height < tile_size * 0.9) and \
+               (text_width1 < tile_size * 0.9) and \
+               (text_width2 < tile_size * 0.9):
+                break # Font size is good, break out of loop
+            else:
+                font_size -= 1 # Reduce font size
+                if font_size <= 0: # Prevent infinite loop if text can never fit
+                    font_size = 1 # Smallest possible font size
+                    break
+
+        except Exception as e:
+            # Handle cases where font loading or textbbox fails
+            print(f"Error during font sizing: {e}. Reducing font size and retrying.")
+            font_size -= 1
+            if font_size <= 0:
+                font_size = 1
+                break # Cannot make font smaller, stop
+
+    # Final check: if font_size became 0 or less, ensure it's at least 1
+    if font_size <= 0:
+        font_size = 1
+        # Reload font with minimum size if needed
+        if font_path and os.path.exists(font_path):
+            font = ImageFont.truetype(font_path, font_size)
+        else:
+            font = ImageFont.load_default()
+            try:
+                scaled_font = ImageFont.load_default(size=font_size)
+                if draw.textbbox((0, 0), text_line1, font=scaled_font)[2] > 0:
+                    font = scaled_font
+            except Exception:
+                pass
+
+
+    # Re-measure with final font size to ensure accurate positioning
+    bbox1 = draw.textbbox((0, 0), text_line1, font=font)
+    text_width1 = bbox1[2] - bbox1[0]
+    text_height1 = bbox1[3] - bbox1[1]
+
+    bbox2 = draw.textbbox((0, 0), text_line2, font=font)
+    text_width2 = bbox2[2] - bbox2[0]
+    text_height2 = bbox2[3] - bbox2[1]
+
+    # Calculate positions for centering
+    # Line 1: centered horizontally, midpoint at 1/3 tile height
+    x1 = (tile_size - text_width1) / 2
+    y1 = (tile_size / 3) - (text_height1 / 2)
+
+    # Line 2: centered horizontally, midpoint at 2/3 tile height
+    x2 = (tile_size - text_width2) / 2
+    y2 = (tile_size * 2 / 3) - (text_height2 / 2)
+
+    # Draw the text
+    draw.text((x1, y1), text_line1, fill=text_fill_color, font=font)
+    draw.text((x2, y2), text_line2, fill=text_fill_color, font=font)
+
+    return img
+
+def generate_interpolated_grid_image(tile_size, count, font_path=None):
+    """
+    Generates a large image composed of 'count' x 'count' tiles,
+    with colors bilinearly interpolated from corners and text indicating tile index.
+
+    Args:
+        tile_size (int): The sidelength of each individual square tile in pixels.
+        count (int): The number of tiles per side of the large grid (e.g., if count=3,
+                     it's a 3x3 grid of tiles).
+        font_path (str, optional): Path to a TrueType font file for the tile text.
+                                   If None, a default PIL font will be used.
+
+    Returns:
+        PIL.Image.Image: The generated large grid image.
+    """
+    if count <= 0:
+        raise ValueError("Count must be a positive integer.")
+
+    total_image_size = count * tile_size
+    main_img = Image.new('RGB', (total_image_size, total_image_size))
+
+    # Corner colors (R, G, B) as floats (0.0-1.0)
+    corner_colors = {
+        "top_left": (1.0, 0.0, 0.0),    # Red
+        "top_right": (1.0, 0.0, 1.0),   # Purple
+        "bottom_left": (0.0, 1.0, 0.0), # Green
+        "bottom_right": (0.0, 0.0, 1.0) # Blue
+    }
+
+    # Handle the edge case where count is 1
+    if count == 1:
+        # If count is 1, there's only one tile, which is the top-left corner
+        tile_color = corner_colors["top_left"]
+        tile_image = create_single_tile(tile_size, tile_color, 0, 0, font_path=font_path)
+        main_img.paste(tile_image, (0, 0))
+        return main_img
+
+    for y_tile in range(count):
+        for x_tile in range(count):
+            # Calculate normalized coordinates (u, v) for interpolation
+            # We divide by (count - 1) to ensure 0 and 1 values at the edges
+            u = x_tile / (count - 1)
+            v = y_tile / (count - 1)
+
+            # Apply the simplified bilinear interpolation formulas
+            r_component = 1 - v
+            g_component = v * (1 - u)
+            b_component = u
+
+            # Clamp components to be within 0.0 and 1.0 (due to potential floating point inaccuracies)
+            current_color = (
+                max(0.0, min(1.0, r_component)),
+                max(0.0, min(1.0, g_component)),
+                max(0.0, min(1.0, b_component))
+            )
+
+            # Create the individual tile
+            tile_image = create_single_tile(tile_size, current_color, x_tile, y_tile, font_path=font_path)
+
+            # Paste the tile onto the main image
+            paste_x = x_tile * tile_size
+            paste_y = y_tile * tile_size
+            main_img.paste(tile_image, (paste_x, paste_y))
+
+    return main_img
+
+
+
+
+import argparse
+parser = argparse.ArgumentParser(description="Process two optional named parameters.")
+parser.add_argument('--ts', type=int, default=128, help='Tile Size')
+parser.add_argument('--gs', type=int, default=128, help='Grid Size')
+
+# Parse the arguments
+args = parser.parse_args()
+
+
+# --- Configuration ---
+tile_sidelength = args.ts  # Size of each individual tile in pixels
+grid_count = args.gs      # Number of tiles per side (e.g., 15 means 15x15 grid)
+
+# Path to a font file (adjust this for your system)
+# On Windows, you can typically use 'C:/Windows/Fonts/arial.ttf' or similar
+# You might need to find a suitable font on your system.
+# For testing, you can use None to let PIL use its default font.
+# If a specific font path is provided and doesn't exist, it will fall back to default.
+windows_font_path = "C:/Windows/Fonts/arial.ttf" # Example path for Windows
+# If Arial is not found, try Times New Roman:
+# windows_font_path = "C:/Windows/Fonts/times.ttf"
+
+font_to_use = None
+if os.name == 'nt': # Check if OS is Windows
+    if os.path.exists(windows_font_path):
+        font_to_use = windows_font_path
+        print(f"Using font: {windows_font_path}")
+    else:
+        print(f"Warning: Windows font not found at '{windows_font_path}'. Using default PIL font.")
+else: # Assume Linux/macOS for other OS types
+    # Common Linux/macOS font paths (adjust as needed)
+    linux_font_path = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"
+    mac_font_path = "/Library/Fonts/Arial.ttf"
+    if os.path.exists(linux_font_path):
+        font_to_use = linux_font_path
+        print(f"Using font: {linux_font_path}")
+    elif os.path.exists(mac_font_path):
+        font_to_use = mac_font_path
+        print(f"Using font: {mac_font_path}")
+    else:
+        print("Warning: No common Linux/macOS font found. Using default PIL font.")
+
+
+# --- Generate and save the image ---
+print(f"Generating a {grid_count}x{grid_count} grid of tiles, each {tile_sidelength}x{tile_sidelength} pixels.")
+print(f"Total image size will be {grid_count * tile_sidelength}x{grid_count * tile_sidelength} pixels.")
+
+try:
+    final_image = generate_interpolated_grid_image(tile_sidelength, grid_count, font_path=font_to_use)
+    output_filename = "../../media/tiled_grid_mip_0.exr"
+    np_img = np.array(final_image).astype(np.float32) / 255.0  # Normalize for EXR
+    spec = oiio.ImageSpec(final_image.width, final_image.height, 3, oiio.TypeDesc("float"))
+    out = oiio.ImageOutput.create(output_filename)
+    out.open(output_filename, spec)
+    out.write_image(np_img.reshape(-1))  # Flatten for OIIO’s expected input
+    out.close()
+
+    print(f"Successfully created '{output_filename}'")
+
+except ValueError as e:
+    print(f"Error: {e}")
+except Exception as e:
+    print(f"An unexpected error occurred: {e}")
\ No newline at end of file
diff --git a/75_CAD_3D/shaders/globals.hlsl b/75_CAD_3D/shaders/globals.hlsl
new file mode 100644
index 000000000..901b13958
--- /dev/null
+++ b/75_CAD_3D/shaders/globals.hlsl
@@ -0,0 +1,105 @@
+#ifndef _CAD_EXAMPLE_GLOBALS_HLSL_INCLUDED_
+#define _CAD_EXAMPLE_GLOBALS_HLSL_INCLUDED_
+
+// TODO[Erfan]: Turn off in the future, but keep enabled to test
+// #define NBL_FORCE_EMULATED_FLOAT_64
+
+#include <nbl/builtin/hlsl/portable/float64_t.hlsl>
+#include <nbl/builtin/hlsl/portable/vector_t.hlsl>
+#include <nbl/builtin/hlsl/portable/matrix_t.hlsl>
+#include <nbl/builtin/hlsl/cpp_compat/basic.h>
+#include <nbl/builtin/hlsl/cpp_compat/matrix.hlsl>
+#include <nbl/builtin/hlsl/shapes/beziers.hlsl>
+#include <nbl/builtin/hlsl/tgmath.hlsl>
+
+#ifdef __HLSL_VERSION
+#include <nbl/builtin/hlsl/math/equations/quadratic.hlsl>
+#endif
+
+using namespace nbl::hlsl;
+
+#ifdef __HLSL_VERSION
+using pfloat64_t = portable_float64_t<DeviceConfigCaps>;
+using pfloat64_t2 = portable_float64_t2<DeviceConfigCaps>;
+using pfloat64_t3 = portable_float64_t3<DeviceConfigCaps>;
+#else
+using pfloat64_t = float64_t;
+using pfloat64_t2 = nbl::hlsl::vector<float64_t, 2>;
+using pfloat64_t3 = nbl::hlsl::vector<float64_t, 3>;
+#endif
+
+using pfloat64_t3x3 = portable_matrix_t3x3<pfloat64_t>;
+using pfloat64_t4x4 = portable_matrix_t4x4<pfloat64_t>;
+
+struct PushConstants
+{
+    uint64_t triangleMeshVerticesBaseAddress;
+    uint32_t triangleMeshMainObjectIndex;
+    pfloat64_t4x4 viewProjectionMatrix;
+};
+
+struct Pointers
+{
+    uint64_t mainObjects;
+    uint64_t drawObjects;
+    uint64_t geometryBuffer;
+};
+#ifndef __HLSL_VERSION
+static_assert(sizeof(Pointers) == 24u);
+#endif
+
+struct Globals
+{
+    Pointers pointers;
+    pfloat64_t4x4 defaultProjectionToNDC;
+};
+#ifndef __HLSL_VERSION
+static_assert(sizeof(Globals) == 152u);
+#endif
+
+enum class MainObjectType : uint32_t
+{
+    NONE = 0u,
+    POLYLINE,
+    HATCH,
+    TEXT,
+    STATIC_IMAGE,
+    DTM,
+    GRID_DTM,
+    STREAMED_IMAGE,
+};
+
+// Consists of multiple DrawObjects
+// [IDEA]: In GPU-driven rendering, to save mem for MainObject data fetching: many of these can be shared amongst different main objects, we could find these styles, settings, etc indices with upper_bound
+// [TODO]: pack indices and members of mainObject and DrawObject + enforce max size for autosubmit --> but do it only after the mainobject definition is finalized in gpu-driven rendering work
+struct MainObject
+{
+    uint32_t styleIdx;
+    uint32_t dtmSettingsIdx;
+    uint32_t customProjectionIndex;
+    uint32_t customClipRectIndex;
+    uint32_t transformationType; // todo pack later, it's just 2 possible values atm
+};
+
+struct DrawObject
+{
+    uint32_t type_subsectionIdx; // packed two uint16 into uint32
+    uint32_t mainObjIndex;
+    uint64_t geometryAddress;
+};
+
+struct TriangleMeshVertex
+{
+    pfloat64_t3 pos;
+};
+
+#ifdef __HLSL_VERSION
+[[vk::binding(0, 0)]] ConstantBuffer<Globals> globals : register(b0);
+#else
+static_assert(alignof(pfloat64_t3x3)==8u);
+static_assert(alignof(MainObject)==4u);
+static_assert(alignof(DrawObject)==8u);
+#endif
+
+
+#endif
diff --git a/75_CAD_3D/shaders/main_pipeline/common.hlsl b/75_CAD_3D/shaders/main_pipeline/common.hlsl
new file mode 100644
index 000000000..677bf0ec9
--- /dev/null
+++ b/75_CAD_3D/shaders/main_pipeline/common.hlsl
@@ -0,0 +1,17 @@
+#ifndef _CAD_EXAMPLE_MAIN_PIPELINE_COMMON_HLSL_INCLUDED_
+#define _CAD_EXAMPLE_MAIN_PIPELINE_COMMON_HLSL_INCLUDED_
+
+#include "../globals.hlsl"
+
+struct PSInput
+{
+    [[vk::location(0)]] float4 position : SV_Position;
+};
+
+// Set 0 - Scene Data and Globals, buffer bindings don't change the buffers only get updated
+
+// [[vk::binding(0, 0)]] ConstantBuffer<Globals> globals; ---> moved to globals.hlsl
+
+[[vk::push_constant]] PushConstants pc;
+
+#endif
diff --git a/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl b/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl
new file mode 100644
index 000000000..f82fc9eab
--- /dev/null
+++ b/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl
@@ -0,0 +1,7 @@
+#include "common.hlsl"
+
+[shader("pixel")]
+float4 fragMain(PSInput input) : SV_Target
+{
+	return float4(0.0f, 0.0f, 1.0f, 1.0f);
+}
diff --git a/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl b/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl
new file mode 100644
index 000000000..7e81c85f6
--- /dev/null
+++ b/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl
@@ -0,0 +1,17 @@
+#pragma shader_stage(vertex)
+
+#include "common.hlsl"
+
+[shader("vertex")]
+PSInput vtxMain(uint vertexID : SV_VertexID)
+{
+    PSInput outV;
+    TriangleMeshVertex vtx = vk::RawBufferLoad<TriangleMeshVertex>(pc.triangleMeshVerticesBaseAddress + sizeof(TriangleMeshVertex) * vertexID, 8u);
+
+    outV.position.x = _static_cast<float>(vtx.pos.x);
+    outV.position.y = _static_cast<float>(vtx.pos.y);
+    outV.position.z = _static_cast<float>(vtx.pos.z);
+    outV.position.w = 1.0f;
+
+    return outV;
+}
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d945c547a..7a24e8345 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -111,6 +111,7 @@ if(NBL_BUILD_EXAMPLES)
 	endif()
 
 	add_subdirectory(74_QuantizedSequenceTests)
+	add_subdirectory(75_CAD_3D)
 
 	# add new examples *before* NBL_GET_ALL_TARGETS invocation, it gathers recursively all targets created so far in this subdirectory
 	NBL_GET_ALL_TARGETS(TARGETS)

From 18e71101932066506a86512ebf97e93073c2aa0c Mon Sep 17 00:00:00 2001
From: Przemog1 <minikers21@gmail.com>
Date: Tue, 10 Mar 2026 19:26:39 +0100
Subject: [PATCH 2/9] Example 75 now draws meshes

---
 75_CAD_3D/CMakeLists.txt                      |   9 +-
 75_CAD_3D/CTriangleMesh.cpp                   |   1 -
 75_CAD_3D/CTriangleMesh.h                     |   1 +
 75_CAD_3D/DrawResourcesFiller.cpp             |   3 +-
 75_CAD_3D/DrawResourcesFiller.h               |  80 ++++----
 75_CAD_3D/main.cpp                            | 187 ++++++------------
 75_CAD_3D/shaders/globals.hlsl                |  32 +--
 75_CAD_3D/shaders/main_pipeline/common.hlsl   |   1 +
 .../main_pipeline/fragment_shader.hlsl        |  10 +-
 .../shaders/main_pipeline/vertex_shader.hlsl  |  54 ++++-
 10 files changed, 166 insertions(+), 212 deletions(-)
 delete mode 100644 75_CAD_3D/CTriangleMesh.cpp

diff --git a/75_CAD_3D/CMakeLists.txt b/75_CAD_3D/CMakeLists.txt
index 794ba1c3c..144fb4a33 100644
--- a/75_CAD_3D/CMakeLists.txt
+++ b/75_CAD_3D/CMakeLists.txt
@@ -6,6 +6,7 @@ endif()
 set(EXAMPLE_SOURCES
   "${CMAKE_CURRENT_SOURCE_DIR}/DrawResourcesFiller.cpp"
   "${CMAKE_CURRENT_SOURCE_DIR}/DrawResourcesFiller.h"
+  "${CMAKE_CURRENT_SOURCE_DIR}/CTriangleMesh.h"
 )
 set(EXAMPLE_INCLUDES
   "${CMAKE_CURRENT_SOURCE_DIR}/../../3rdparty/boost/superproject/libs/math/include")
@@ -38,14 +39,6 @@ endif()
 set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen")
 
 set(SM 6_8)
-set(REQUIRED_CAPS [=[
-{
-  "kind": "features",
-  "name": "fragmentShaderPixelInterlock",
-  "type": "bool",
-  "values": [1]
-}
-]=])
 
 set(JSON [=[
 [
diff --git a/75_CAD_3D/CTriangleMesh.cpp b/75_CAD_3D/CTriangleMesh.cpp
deleted file mode 100644
index 5564c0a51..000000000
--- a/75_CAD_3D/CTriangleMesh.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "CTriangleMesh.h"
\ No newline at end of file
diff --git a/75_CAD_3D/CTriangleMesh.h b/75_CAD_3D/CTriangleMesh.h
index 8f941928a..2100c801d 100644
--- a/75_CAD_3D/CTriangleMesh.h
+++ b/75_CAD_3D/CTriangleMesh.h
@@ -49,6 +49,7 @@ class CTriangleMesh final
 		m_indices.clear();
 	}
 
+private:
 	core::vector<vertex_t> m_vertices;
 	core::vector<index_t> m_indices;
 };
\ No newline at end of file
diff --git a/75_CAD_3D/DrawResourcesFiller.cpp b/75_CAD_3D/DrawResourcesFiller.cpp
index 313c74358..f2de0793d 100644
--- a/75_CAD_3D/DrawResourcesFiller.cpp
+++ b/75_CAD_3D/DrawResourcesFiller.cpp
@@ -146,6 +146,7 @@ void DrawResourcesFiller::drawTriangleMesh(
 
 	const size_t vertexBuffByteSize = mesh.getVertexBuffByteSize();
 	const size_t indexBuffByteSize = mesh.getIndexBuffByteSize();
+	const size_t triangleDataByteSize = vertexBuffByteSize + indexBuffByteSize;
 	const auto& indexBuffer = mesh.getIndices();
 	const auto& vertexBuffer = mesh.getVertices();
 	assert(indexBuffer.size() == vertexBuffer.size()); // TODO: figure out why it was needed then decide if this constraint needs to be kept
@@ -153,7 +154,7 @@ void DrawResourcesFiller::drawTriangleMesh(
 	DrawCallData drawCallData = {};
 
 	// Copy VertexBuffer
-	size_t geometryBufferOffset = resourcesCollection.geometryInfo.increaseSizeAndGetOffset(vertexBuffByteSize, alignof(CTriangleMesh::vertex_t));
+	size_t geometryBufferOffset = resourcesCollection.geometryInfo.increaseSizeAndGetOffset(triangleDataByteSize, alignof(CTriangleMesh::vertex_t));
 	drawCallData.triangleMeshVerticesBaseAddress = geometryBufferOffset;
 	void* dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset;
 	memcpy(dst, vertexBuffer.data(), vertexBuffByteSize);
diff --git a/75_CAD_3D/DrawResourcesFiller.h b/75_CAD_3D/DrawResourcesFiller.h
index ea2bca02e..aca81b409 100644
--- a/75_CAD_3D/DrawResourcesFiller.h
+++ b/75_CAD_3D/DrawResourcesFiller.h
@@ -23,7 +23,6 @@ using namespace nbl::core;
 using namespace nbl::asset;
 
 static_assert(sizeof(DrawObject) == 16u);
-static_assert(sizeof(MainObject) == 20u);
 
 // ! DrawResourcesFiller
 // ! This class provides important functionality to manage resources needed for a draw.
@@ -47,6 +46,28 @@ struct DrawResourcesFiller
 	static constexpr size_t GPUStructsMaxNaturalAlignment = 8u;
 	static constexpr size_t MinimumDrawResourcesMemorySize = 512u * 1 << 20u; // 512MB
 
+	/**
+	 * @brief Attempts to allocate draw resources within a given VRAM budget, retrying with progressively smaller sizes on failure.
+	 *
+	 * This function preserves the initial image-to-buffer memory ratio. If the initial sizes are too small,
+	 * it scales them up to meet a minimum required threshold. On allocation failure, it reduces the memory
+	 * sizes by a specified percentage and retries, until it either succeeds or the number of attempts exceeds `maxTries`.
+	 *
+	 * @param logicalDevice Pointer to the logical device used for allocation.
+	 * @param maxImageMemorySize Initial image memory size (in bytes) to attempt allocation with.
+	 * @param maxBufferMemorySize Initial buffer memory size (in bytes) to attempt allocation with.
+	 * @param memoryTypeIndexTryOrder Ordered list of memory type indices to attempt allocation with, in the order they should be tried.
+	 * @param reductionPercent The percentage by which to reduce the memory sizes after each failed attempt (e.g., 10 means reduce by 10%).
+	 * @param maxTries Maximum number of attempts to try reducing and allocating memory.
+	 *
+	 * @return true if the allocation succeeded at any iteration; false if all attempts failed.
+	 */
+	bool allocateDrawResourcesWithinAvailableVRAM(ILogicalDevice* logicalDevice, size_t maxImageMemorySize, size_t maxBufferMemorySize, std::span<uint32_t> memoryTypeIndexTryOrder, uint32_t reductionPercent = 10u, uint32_t maxTries = 32u);
+
+	/// @brief call this function before submitting to ensure all buffer and textures resourcesCollection requested via drawing calls are copied to GPU
+	/// records copy command into intendedNextSubmit's active command buffer and might possibly submits if fails allocation on staging upload memory.
+	bool pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit);
+
 	/// @brief general parent struct for 1.ReservedCompute and 2.CPUGenerated Resources
 	struct ResourceBase
 	{
@@ -132,38 +153,6 @@ struct DrawResourcesFiller
 	typedef std::function<void(SIntendedSubmitInfo&)> SubmitFunc;
 	void setSubmitDrawsFunction(const SubmitFunc& func);
 
-	/**
-	 * @brief Attempts to allocate a single contiguous device-local memory block for draw resources, divided into image and buffer sections.
-	 * 
-	 * The function allocates a single memory block and splits it into image and buffer arenas.
-	 * 
-	 * @param logicalDevice Pointer to the logical device used for memory allocation and resource creation.
-	 * @param requiredImageMemorySize The size in bytes of the memory required for images.
-	 * @param requiredBufferMemorySize The size in bytes of the memory required for buffers.
-	 * @param memoryTypeIndexTryOrder Ordered list of memory type indices to attempt allocation with, in the order they should be tried.
-	 * 
-	 * @return true if the memory allocation and resource setup succeeded; false otherwise.
-	 */
-	bool allocateDrawResources(ILogicalDevice* logicalDevice, size_t requiredImageMemorySize, size_t requiredBufferMemorySize, std::span<uint32_t> memoryTypeIndexTryOrder);
-	
-	/**
-	 * @brief Attempts to allocate draw resources within a given VRAM budget, retrying with progressively smaller sizes on failure.
-	 * 
-	 * This function preserves the initial image-to-buffer memory ratio. If the initial sizes are too small,
-	 * it scales them up to meet a minimum required threshold. On allocation failure, it reduces the memory
-	 * sizes by a specified percentage and retries, until it either succeeds or the number of attempts exceeds `maxTries`.
-	 * 
-	 * @param logicalDevice Pointer to the logical device used for allocation.
-	 * @param maxImageMemorySize Initial image memory size (in bytes) to attempt allocation with.
-	 * @param maxBufferMemorySize Initial buffer memory size (in bytes) to attempt allocation with.
-	 * @param memoryTypeIndexTryOrder Ordered list of memory type indices to attempt allocation with, in the order they should be tried.
-	 * @param reductionPercent The percentage by which to reduce the memory sizes after each failed attempt (e.g., 10 means reduce by 10%).
-	 * @param maxTries Maximum number of attempts to try reducing and allocating memory.
-	 * 
-	 * @return true if the allocation succeeded at any iteration; false if all attempts failed.
-	 */
-	bool allocateDrawResourcesWithinAvailableVRAM(ILogicalDevice* logicalDevice, size_t maxImageMemorySize, size_t maxBufferMemorySize, std::span<uint32_t> memoryTypeIndexTryOrder, uint32_t reductionPercent = 10u, uint32_t maxTries = 32u);
-
 	// Must be called at the end of each frame.
 	// right before submitting the main draw that uses the currently queued geometry, images, or other objects/resources.
 	// Registers the semaphore/value that will signal completion of this frame�s draw,
@@ -175,13 +164,6 @@ struct DrawResourcesFiller
 		const CTriangleMesh& mesh,
 		SIntendedSubmitInfo& intendedNextSubmit);
 
-	/// @brief call this function before submitting to ensure all buffer and textures resourcesCollection requested via drawing calls are copied to GPU
-	/// records copy command into intendedNextSubmit's active command buffer and might possibly submits if fails allocation on staging upload memory.
-	bool pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit);
-
-	/// @brief Records GPU copy commands for all staged buffer resourcesCollection into the active command buffer.
-	bool pushBufferUploads(SIntendedSubmitInfo& intendedNextSubmit, ResourcesCollection& resourcesCollection);
-
 	/// @brief  resets staging buffers and images
 	void reset()
 	{
@@ -196,6 +178,24 @@ struct DrawResourcesFiller
 	const size_t getCopiedResourcesSize() { return copiedResourcesSize; }
 	const core::vector<DrawCallData>& getDrawCalls() const { return drawCalls; }
 
+private:
+	/**
+	 * @brief Attempts to allocate a single contiguous device-local memory block for draw resources, divided into image and buffer sections.
+	 *
+	 * The function allocates a single memory block and splits it into image and buffer arenas.
+	 *
+	 * @param logicalDevice Pointer to the logical device used for memory allocation and resource creation.
+	 * @param requiredImageMemorySize The size in bytes of the memory required for images.
+	 * @param requiredBufferMemorySize The size in bytes of the memory required for buffers.
+	 * @param memoryTypeIndexTryOrder Ordered list of memory type indices to attempt allocation with, in the order they should be tried.
+	 *
+	 * @return true if the memory allocation and resource setup succeeded; false otherwise.
+	 */
+	bool allocateDrawResources(ILogicalDevice* logicalDevice, size_t requiredImageMemorySize, size_t requiredBufferMemorySize, std::span<uint32_t> memoryTypeIndexTryOrder);
+
+	/// @brief Records GPU copy commands for all staged buffer resourcesCollection into the active command buffer.
+	bool pushBufferUploads(SIntendedSubmitInfo& intendedNextSubmit, ResourcesCollection& resourcesCollection);
+
 private:
 	nbl::system::logger_opt_smart_ptr m_logger = nullptr;
 
diff --git a/75_CAD_3D/main.cpp b/75_CAD_3D/main.cpp
index ed7ddc039..7188e8b00 100644
--- a/75_CAD_3D/main.cpp
+++ b/75_CAD_3D/main.cpp
@@ -109,9 +109,11 @@ class CSwapchainResources : public ISimpleManagedSurface::ISwapchainResources
 					.viewType = IGPUImageView::ET_2D,
 					.format = getImage(i)->getCreationParameters().format
 				});
-				m_framebuffers[i] = device->createFramebuffer({{
+				m_framebuffers[i] = device->createFramebuffer({ {
 					.renderpass = core::smart_refctd_ptr(m_renderpass),
 					.colorAttachments = &imageView.get(),
+					// TODO:
+					//.depthStencilAttachments = &depthImageView.get(),
 					.width = sharedParams.width,
 					.height = sharedParams.height
 				}});
@@ -392,12 +394,31 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio
 			}},
 			IGPURenderpass::SCreationParams::ColorAttachmentsEnd
 		};
-		
+
+		// TODO:
+		//IGPURenderpass::SCreationParams::SDepthStencilAttachmentDescription depthAttachments[] = {
+		//	{{
+		//		{
+		//			.format = asset::EF_D32_SFLOAT,
+		//			.samples = IGPUImage::ESCF_1_BIT,
+		//			.mayAlias = false
+		//		},
+		//		/*.loadOp = */{IGPURenderpass::LOAD_OP::CLEAR},
+		//		/*.storeOp = */{IGPURenderpass::STORE_OP::STORE},
+		//		/*.initialLayout = */{IGPUImage::LAYOUT::UNDEFINED},
+		//		/*.finalLayout = */{IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}
+		//	}},
+		//	IGPURenderpass::SCreationParams::DepthStencilAttachmentsEnd
+		//};
+
 		IGPURenderpass::SCreationParams::SSubpassDescription subpasses[] = {
 			{},
 			IGPURenderpass::SCreationParams::SubpassesEnd
 		};
+
 		subpasses[0].colorAttachments[0] = {.render={.attachmentIndex=0,.layout=IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}};
+		// TODO:
+		//subpasses[0].depthStencilAttachment = {{.render = {.attachmentIndex=0,.layout = IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}}};
 		
 		// We actually need external dependencies to ensure ordering of the Implicit Layout Transitions relative to the semaphore signals
 		const IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = {
@@ -429,6 +450,8 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio
 		smart_refctd_ptr<IGPURenderpass> renderpass;
 		IGPURenderpass::SCreationParams params = {};
 		params.colorAttachments = colorAttachments;
+		// TODO:
+		//params.depthStencilAttachments = depthAttachments;
 		params.subpasses = subpasses;
 		params.dependencies = dependencies;
 		renderpass = m_device->createRenderpass(params);
@@ -700,7 +723,7 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio
 		return true;
 	}
 	
-	void _submitDraws(SIntendedSubmitInfo& intendedSubmitInfo, bool inBetweenSubmit)
+	void submitDraws(SIntendedSubmitInfo& intendedSubmitInfo, bool inBetweenSubmit)
 	{
 		drawResourcesFiller.pushAllUploads(intendedSubmitInfo);
 
@@ -716,7 +739,11 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio
 		{
 			// TODO: create a proper camera
 
-			auto view = hlsl::math::linalg::rhLookAt<float64_t>({ 300.0f, 300.0f, 300.0f }, { 0.0f, 0.0f, 0.0f }, { 0.0f, 0.0f, 1.0f });
+			// animated camera which rotates around and always looks at the center
+			const double animationFactor = m_timeElapsed * 0.0003;
+			const float32_t3 cameraPosition = { 300.0f * std::cos(animationFactor), 300.0f, 300.0f * std::sin(animationFactor) };
+
+			auto view = hlsl::math::linalg::rhLookAt<float64_t>(cameraPosition, { 0.0f, 0.0f, 0.0f }, { 0.0f, 1.0f, 0.0f });
 			const float64_t aspectRatio = static_cast<float64_t>(m_window->getWidth()) / static_cast<float64_t>(m_window->getHeight());
 			auto proj = hlsl::math::thin_lens::rhPerspectiveFovMatrix<float64_t>(hlsl::radians(60.0f), aspectRatio, 0.1f, 2000.0f);
 
@@ -818,7 +845,8 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio
 
 			PushConstants pc = {
 				.triangleMeshVerticesBaseAddress = drawCall.triangleMeshVerticesBaseAddress + resourcesGPUBuffer->getDeviceAddress() + resourcesCollection.geometryInfo.bufferOffset,
-				.triangleMeshMainObjectIndex = drawCall.triangleMeshMainObjectIndex
+				.triangleMeshMainObjectIndex = drawCall.triangleMeshMainObjectIndex,
+				.viewProjectionMatrix = viewProjection
 			};
 			cb->pushConstants(m_graphicsPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_VERTEX | IShader::E_SHADER_STAGE::ESS_FRAGMENT, 0, sizeof(PushConstants), &pc);
 
@@ -865,102 +893,6 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio
 		}
 	}
 
-	// TODO: remove
-	void submitDraws(SIntendedSubmitInfo& intendedSubmitInfo, bool inBetweenSubmit)
-	{
-		m_currentRecordingCommandBufferInfo = intendedSubmitInfo.getCommandBufferForRecording(); // drawResourcesFiller.pushAllUploads might've overflow submitted and changed the current recording command buffer
-
-		// Use the current recording command buffer of the intendedSubmitInfos scratchCommandBuffers, it should be in recording state
-		auto* cb = m_currentRecordingCommandBufferInfo->cmdbuf;
-
-		asset::SViewport vp =
-		{
-			.x = 0u,
-			.y = 0u,
-			.width = static_cast<float>(m_window->getWidth()),
-			.height = static_cast<float>(m_window->getHeight()),
-			.minDepth = 1.f,
-			.maxDepth = 0.f,
-		};
-		cb->setViewport(0u, 1u, &vp);
-
-		VkRect2D scissor =
-		{
-			.offset = { 0, 0 },
-			.extent = { m_window->getWidth(), m_window->getHeight() },
-		};
-		cb->setScissor(0u, 1u, &scissor);
-
-		nbl::video::IGPUCommandBuffer::SRenderpassBeginInfo beginInfo;
-		VkRect2D currentRenderArea;
-		const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {0.f,0.f,0.f,0.f} };
-		{
-			auto scRes = static_cast<CSwapchainResources*>(m_surface->getSwapchainResources());
-			currentRenderArea =
-			{
-				.offset = {0,0},
-				.extent = {m_window->getWidth(),m_window->getHeight()}
-			};
-			beginInfo = {
-				.renderpass = (inBetweenSubmit) ? renderpassInBetween.get() : renderpassFinal.get(),
-				.framebuffer = scRes->getFramebuffer(m_currentImageAcquire.imageIndex),
-				.colorClearValues = &clearValue,
-				.depthStencilClearValues = nullptr,
-				.renderArea = currentRenderArea
-			};
-		}
-		cb->beginRenderPass(beginInfo, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE);
-
-		cb->bindGraphicsPipeline(m_graphicsPipeline.get());
-
-		{
-			PushConstants pc = {
-				.triangleMeshVerticesBaseAddress = 1,
-				.triangleMeshMainObjectIndex = 2
-			};
-			cb->pushConstants(m_graphicsPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_VERTEX | IShader::E_SHADER_STAGE::ESS_FRAGMENT, 0, sizeof(PushConstants), &pc);
-
-			cb->draw(3, 1, 0, 0);
-		}
-
-		cb->endRenderPass();
-
-		if (!inBetweenSubmit)
-			cb->endDebugMarker();
-
-		drawResourcesFiller.markFrameUsageComplete(intendedSubmitInfo.getFutureScratchSemaphore().value);
-
-		if (inBetweenSubmit)
-		{
-			if (intendedSubmitInfo.overflowSubmit(m_currentRecordingCommandBufferInfo) != IQueue::RESULT::SUCCESS)
-			{
-				m_logger->log("overflow submit failed.", ILogger::ELL_ERROR);
-			}
-		}
-		else
-		{
-			const auto nextFrameIx = m_realFrameIx + 1u;
-			const IQueue::SSubmitInfo::SSemaphoreInfo thisFrameRendered = {
-				.semaphore = m_renderSemaphore.get(),
-				.value = nextFrameIx,
-				.stageMask = asset::PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS
-			};
-			if (intendedSubmitInfo.submit(m_currentRecordingCommandBufferInfo, { &thisFrameRendered,1 }) == IQueue::RESULT::SUCCESS)
-			{
-				m_realFrameIx = nextFrameIx;
-
-				IQueue::SSubmitInfo::SSemaphoreInfo presentWait = thisFrameRendered;
-				// the stages for a wait semaphore operation are about what stage you WAIT in, not what stage you wait for
-				presentWait.stageMask = PIPELINE_STAGE_FLAGS::NONE; // top of pipe, there's no explicit presentation engine stage
-				m_surface->present(m_currentImageAcquire.imageIndex, { &presentWait,1 });
-			}
-			else
-			{
-				m_logger->log("regular submit failed.", ILogger::ELL_ERROR);
-			}
-		}
-	}
-
 	void endFrameRender(SIntendedSubmitInfo& intendedSubmitInfo)
 	{
 		submitDraws(intendedSubmitInfo, false);
@@ -1010,24 +942,18 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio
 		drawResourcesFiller.reset();
 
 		core::vector<TriangleMeshVertex> vertices = {
-			//{ float64_t2(0.0, 0.0), 100.0 }, //0
-			//{ float64_t2(-200.0, -200.0), 10.0 }, //1
-			//{ float64_t2(200.0, -200.0), 10.0 }, //2
-			//{ float64_t2(200.0, 200.0), -20.0 }, //3
-			//{ float64_t2(-200.0, 200.0), 10.0 }, //4
-
-			{ float64_t3(0.0, 0.0, 100.0) },
-			{ float64_t3(-200.0, -200.0, 10.0) },
-			{ float64_t3(200.0, -100.0, 10.0) },
-			{ float64_t3(0.0, 0.0, 100.0) },
-			{ float64_t3(200.0, -100.0, 10.0) },
-			{ float64_t3(200.0, 200.0, -20.0) },
-			{ float64_t3(0.0, 0.0, 100.0) },
-			{ float64_t3(200.0, 200.0, -20.0) },
-			{ float64_t3(-200.0, 200.0, 10.0) },
-			{ float64_t3(0.0, 0.0, 100.0) },
-			{ float64_t3(-200.0, 200.0, 10.0) },
-			{ float64_t3(-200.0, -200.0, 10.0) },
+			{ float64_t3(0.0, 100.0, 0.0) },
+			{ float64_t3(-200.0, 10.0, -200.0) },
+			{ float64_t3(200.0, 10.0, -100.0) },
+			{ float64_t3(0.0, 100.0, 0.0) },
+			{ float64_t3(200.0, 10.0, -100.0) },
+			{ float64_t3(200.0, -20.0, 200.0) },
+			{ float64_t3(0.0, 100.0, 0.0) },
+			{ float64_t3(200.0, -20.0, 200.0) },
+			{ float64_t3(-200.0, 10.0, 200.0) },
+			{ float64_t3(0.0, 100.0, 0.0) },
+			{ float64_t3(-200.0, 10.0, 200.0) },
+			{ float64_t3(-200.0, 10.0, -200.0) },
 		};
 
 		core::vector<uint32_t> indices = {
@@ -1038,14 +964,22 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio
 		};
 
 		CTriangleMesh mesh;
-		mesh.setVertices(std::move(vertices));
+		mesh.setVertices(core::vector<TriangleMeshVertex>(vertices));
 		mesh.setIndices(std::move(indices));
 
+		// pyramid A
+		drawResourcesFiller.drawTriangleMesh(mesh, intendedNextSubmit);
+
+		// pyramid B
+		float64_t3 offset = { 500.0f, 0.0f, 0.0f };
+		for (auto& vertex : vertices)
+			vertex.pos += offset;
+		mesh.setVertices(std::move(vertices));
 		drawResourcesFiller.drawTriangleMesh(mesh, intendedNextSubmit);
 	}
 
 protected:
-	clock_t::time_point start;
+	clock_t::time_point start; // TODO: am i missing somehting? why is it never initialized
 	std::chrono::seconds timeout = std::chrono::seconds(0x7fffFFFFu);
 
 	double m_timeElapsed = 0.0;
@@ -1066,7 +1000,7 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio
 	// pointer to one of the command buffer infos from above, this is the only command buffer used to record current submit in current frame, it will be updated by SIntendedSubmitInfo
 	IQueue::SSubmitInfo::SCommandBufferInfo const * m_currentRecordingCommandBufferInfo; // pointer can change, value cannot
 
-	smart_refctd_ptr<IGPUBuffer>		m_globalsBuffer;
+	smart_refctd_ptr<IGPUBuffer> m_globalsBuffer;
 	DrawResourcesFiller drawResourcesFiller; // you can think of this as the scene data needed to draw everything, we only have one instance so let's use a timeline semaphore to sync all renders
 
 	smart_refctd_ptr<ISemaphore> m_renderSemaphore; // timeline semaphore to sync frames together
@@ -1079,13 +1013,8 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio
 
 	uint64_t m_realFrameIx = 0u;
 
-	smart_refctd_ptr<IGPUDescriptorSetLayout>	descriptorSetLayout0;
-	smart_refctd_ptr<IGPUDescriptorSetLayout>	descriptorSetLayout1;
-	smart_refctd_ptr<IGPUPipelineLayout>		m_pipelineLayout;
-	smart_refctd_ptr<IGPUGraphicsPipeline>		resolveAlphaGraphicsPipeline;
-	smart_refctd_ptr<IGPUGraphicsPipeline>		m_debugGraphicsPipeline;
-	smart_refctd_ptr<IGPUGraphicsPipeline>		m_graphicsPipeline;
-	smart_refctd_ptr<IGPUGraphicsPipeline>		m_streamedImagesGraphicsPipeline;
+	smart_refctd_ptr<IGPUPipelineLayout> m_pipelineLayout;
+	smart_refctd_ptr<IGPUGraphicsPipeline> m_graphicsPipeline;
 
 	smart_refctd_ptr<IWindow> m_window;
 	smart_refctd_ptr<CSimpleResizeSurface<CSwapchainResources>> m_surface;
diff --git a/75_CAD_3D/shaders/globals.hlsl b/75_CAD_3D/shaders/globals.hlsl
index 901b13958..c080c7c4a 100644
--- a/75_CAD_3D/shaders/globals.hlsl
+++ b/75_CAD_3D/shaders/globals.hlsl
@@ -22,10 +22,12 @@ using namespace nbl::hlsl;
 using pfloat64_t = portable_float64_t<DeviceConfigCaps>;
 using pfloat64_t2 = portable_float64_t2<DeviceConfigCaps>;
 using pfloat64_t3 = portable_float64_t3<DeviceConfigCaps>;
+using pfloat64_t4 = portable_float64_t4<DeviceConfigCaps>;
 #else
 using pfloat64_t = float64_t;
 using pfloat64_t2 = nbl::hlsl::vector<float64_t, 2>;
 using pfloat64_t3 = nbl::hlsl::vector<float64_t, 3>;
+using pfloat64_t4 = nbl::hlsl::vector<float64_t, 4>;
 #endif
 
 using pfloat64_t3x3 = portable_matrix_t3x3<pfloat64_t>;
@@ -40,12 +42,11 @@ struct PushConstants
 
 struct Pointers
 {
-    uint64_t mainObjects;
     uint64_t drawObjects;
     uint64_t geometryBuffer;
 };
 #ifndef __HLSL_VERSION
-static_assert(sizeof(Pointers) == 24u);
+static_assert(sizeof(Pointers) == 16u);
 #endif
 
 struct Globals
@@ -54,33 +55,9 @@ struct Globals
     pfloat64_t4x4 defaultProjectionToNDC;
 };
 #ifndef __HLSL_VERSION
-static_assert(sizeof(Globals) == 152u);
+static_assert(sizeof(Globals) == 144u);
 #endif
 
-enum class MainObjectType : uint32_t
-{
-    NONE = 0u,
-    POLYLINE,
-    HATCH,
-    TEXT,
-    STATIC_IMAGE,
-    DTM,
-    GRID_DTM,
-    STREAMED_IMAGE,
-};
-
-// Consists of multiple DrawObjects
-// [IDEA]: In GPU-driven rendering, to save mem for MainObject data fetching: many of these can be shared amongst different main objects, we could find these styles, settings, etc indices with upper_bound
-// [TODO]: pack indices and members of mainObject and DrawObject + enforce max size for autosubmit --> but do it only after the mainobject definition is finalized in gpu-driven rendering work
-struct MainObject
-{
-    uint32_t styleIdx;
-    uint32_t dtmSettingsIdx;
-    uint32_t customProjectionIndex;
-    uint32_t customClipRectIndex;
-    uint32_t transformationType; // todo pack later, it's just 2 possible values atm
-};
-
 struct DrawObject
 {
     uint32_t type_subsectionIdx; // packed two uint16 into uint32
@@ -97,7 +74,6 @@ struct TriangleMeshVertex
 [[vk::binding(0, 0)]] ConstantBuffer<Globals> globals : register(b0);
 #else
 static_assert(alignof(pfloat64_t3x3)==8u);
-static_assert(alignof(MainObject)==4u);
 static_assert(alignof(DrawObject)==8u);
 #endif
 
diff --git a/75_CAD_3D/shaders/main_pipeline/common.hlsl b/75_CAD_3D/shaders/main_pipeline/common.hlsl
index 677bf0ec9..7866b5b8c 100644
--- a/75_CAD_3D/shaders/main_pipeline/common.hlsl
+++ b/75_CAD_3D/shaders/main_pipeline/common.hlsl
@@ -6,6 +6,7 @@
 struct PSInput
 {
     [[vk::location(0)]] float4 position : SV_Position;
+    [[vk::location(1)]] float3 normal : COLOR1;
 };
 
 // Set 0 - Scene Data and Globals, buffer bindings don't change the buffers only get updated
diff --git a/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl b/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl
index f82fc9eab..d61b99275 100644
--- a/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl
+++ b/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl
@@ -1,7 +1,15 @@
 #include "common.hlsl"
 
+static const float32_t3 SunlightDirection = float32_t3(0.7071f, -0.7071f, 0.0f);
+static const float32_t3 TerrainColor = float32_t3(1.0f, 1.0f, 1.0f);
+
 [shader("pixel")]
 float4 fragMain(PSInput input) : SV_Target
 {
-	return float4(0.0f, 0.0f, 1.0f, 1.0f);
+	static const float AmbientLightIntensity = 0.1f;
+	const float diffuseLightIntensity = max(dot(-SunlightDirection, input.normal), 0.0f);
+
+	const float32_t3 fragColor = (AmbientLightIntensity + diffuseLightIntensity) * TerrainColor;
+
+	return float32_t4(fragColor, 1.0f);
 }
diff --git a/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl b/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl
index 7e81c85f6..98996ba79 100644
--- a/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl
+++ b/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl
@@ -8,10 +8,56 @@ PSInput vtxMain(uint vertexID : SV_VertexID)
     PSInput outV;
     TriangleMeshVertex vtx = vk::RawBufferLoad<TriangleMeshVertex>(pc.triangleMeshVerticesBaseAddress + sizeof(TriangleMeshVertex) * vertexID, 8u);
 
-    outV.position.x = _static_cast<float>(vtx.pos.x);
-    outV.position.y = _static_cast<float>(vtx.pos.y);
-    outV.position.z = _static_cast<float>(vtx.pos.z);
-    outV.position.w = 1.0f;
+    // calculate object space normal, for now we can treat it as the world space normal
+    {
+        const uint32_t currentVertexWithinTriangleIndex = vertexID % 3;
+        const uint32_t firstVertexOfCurrentTriangleIndex = vertexID - currentVertexWithinTriangleIndex;
+
+        TriangleMeshVertex triangleVertices[3];
+        triangleVertices[0] = vk::RawBufferLoad<TriangleMeshVertex>(pc.triangleMeshVerticesBaseAddress + sizeof(TriangleMeshVertex) * firstVertexOfCurrentTriangleIndex, 8u);
+        triangleVertices[1] = vk::RawBufferLoad<TriangleMeshVertex>(pc.triangleMeshVerticesBaseAddress + sizeof(TriangleMeshVertex) * (firstVertexOfCurrentTriangleIndex + 1), 8u);
+        triangleVertices[2] = vk::RawBufferLoad<TriangleMeshVertex>(pc.triangleMeshVerticesBaseAddress + sizeof(TriangleMeshVertex) * (firstVertexOfCurrentTriangleIndex + 2), 8u);
+
+        // TODO: calculate on pfloat64_t
+        float32_t3 vertex0 = _static_cast<float32_t3>(triangleVertices[0].pos);
+        float32_t3 vertex1 = _static_cast<float32_t3>(triangleVertices[1].pos);
+        float32_t3 vertex2 = _static_cast<float32_t3>(triangleVertices[2].pos);
+
+        float32_t3 triangleEdge0 = vertex1 - vertex0;
+        float32_t3 triangleEdge1 = vertex2 - vertex0;
+
+        outV.normal = (normalize(cross(triangleEdge1, triangleEdge0)) + 1.0f) * 0.5f;
+    }
+
+    pfloat64_t4 pos;
+    pos.x = vtx.pos.x;
+    pos.y = vtx.pos.y;
+    pos.z = vtx.pos.z;
+    pos.w = _static_cast<pfloat64_t>(1.0f);
+
+
+    outV.position = _static_cast<float4>(pos);
+
+    //pos = mul(pc.viewProjectionMatrix, pos);
+    // TODO: use pc.viewProjectionMatrix and multiply it with pfloat64_t4 pos instead fix portable_matrix with portable_float multiplication
+    float4x4 viewProjMatrix;
+    for (int i = 0; i < 4; ++i)
+    {
+        viewProjMatrix[i][0] = _static_cast<float>(pc.viewProjectionMatrix[i].x);
+        viewProjMatrix[i][1] = _static_cast<float>(pc.viewProjectionMatrix[i].y);
+        viewProjMatrix[i][2] = _static_cast<float>(pc.viewProjectionMatrix[i].z);
+        viewProjMatrix[i][3] = _static_cast<float>(pc.viewProjectionMatrix[i].w);
+    }
+
+    /*if (vertexID == 0)
+    {
+        printf("%f, %f, %f, %f", a[0][0], a[0][1], a[0][2], a[0][3]);
+        printf("%f, %f, %f, %f", a[1][0], a[1][1], a[1][2], a[1][3]);
+        printf("%f, %f, %f, %f", a[2][0], a[2][1], a[2][2], a[2][3]);
+        printf("%f, %f, %f, %f", a[3][0], a[3][1], a[3][2], a[3][3]);
+    }*/
+
+    outV.position = mul(viewProjMatrix, outV.position);
 
     return outV;
 }

From ac6c8604adcc936ec9ad131c321d665651af5377 Mon Sep 17 00:00:00 2001
From: Przemog1 <minikers21@gmail.com>
Date: Wed, 11 Mar 2026 22:53:38 +0100
Subject: [PATCH 3/9] Added camera

---
 75_CAD_3D/main.cpp | 140 ++++++++++++++++++++++++++++-----------------
 1 file changed, 89 insertions(+), 51 deletions(-)

diff --git a/75_CAD_3D/main.cpp b/75_CAD_3D/main.cpp
index 7188e8b00..37f9927b4 100644
--- a/75_CAD_3D/main.cpp
+++ b/75_CAD_3D/main.cpp
@@ -64,6 +64,32 @@ class CSwapchainResources : public ISimpleManagedSurface::ISwapchainResources
 	public:
 		CSwapchainResources() = default;
 
+		// TODO: this is a prototype, depth images should be probably not created in the initialize function
+		void initialize(const smart_refctd_ptr<IWindow>& window, const core::smart_refctd_ptr<video::ILogicalDevice>& device)	
+		{
+			asset::E_FORMAT depthFormat = asset::EF_D32_SFLOAT;
+
+			for (auto& depthImage : depthImages)
+			{
+				IGPUImage::SCreationParams imgInfo;
+				imgInfo.format = depthFormat;
+				imgInfo.type = IGPUImage::ET_2D;
+				imgInfo.extent.width = window->getWidth();
+				imgInfo.extent.height = window->getHeight();
+				imgInfo.extent.depth = 1u;
+				imgInfo.mipLevels = 1u;
+				imgInfo.arrayLayers = 1u;
+				imgInfo.samples = asset::ICPUImage::ESCF_1_BIT;
+				imgInfo.tiling = IGPUImage::TILING::OPTIMAL;
+				imgInfo.usage = asset::IImage::E_USAGE_FLAGS::EUF_RENDER_ATTACHMENT_BIT;
+
+				depthImage = device->createImage(std::move(imgInfo));
+				auto memReq = depthImage->getMemoryReqs();
+				memReq.memoryTypeBits &= device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits();
+				device->allocate(memReq, depthImage.get());
+			}
+		}
+
 		inline E_FORMAT deduceRenderpassFormat(ISurface* surface, IPhysicalDevice* physDev)
 		{
 			ISwapchain::SCreationParams swapchainParams = {.surface=smart_refctd_ptr<ISurface>(surface), };
@@ -109,11 +135,24 @@ class CSwapchainResources : public ISimpleManagedSurface::ISwapchainResources
 					.viewType = IGPUImageView::ET_2D,
 					.format = getImage(i)->getCreationParameters().format
 				});
+				auto depthImageView = device->createImageView({
+					.flags = IGPUImageView::ECF_NONE,
+					.subUsages = IGPUImage::EUF_RENDER_ATTACHMENT_BIT,
+					.image = core::smart_refctd_ptr<IGPUImage>(depthImages[i]),
+					.viewType = IGPUImageView::ET_2D,
+					.format = depthImages[i]->getCreationParameters().format,
+					.subresourceRange = {
+							.aspectMask = asset::IImage::EAF_DEPTH_BIT,
+							.baseMipLevel = 0,
+							.levelCount = 1,
+							.baseArrayLayer = 0,
+							.layerCount = 1
+						}
+					});
 				m_framebuffers[i] = device->createFramebuffer({ {
 					.renderpass = core::smart_refctd_ptr(m_renderpass),
+					.depthStencilAttachments = &depthImageView.get(),
 					.colorAttachments = &imageView.get(),
-					// TODO:
-					//.depthStencilAttachments = &depthImageView.get(),
 					.width = sharedParams.width,
 					.height = sharedParams.height
 				}});
@@ -126,6 +165,7 @@ class CSwapchainResources : public ISimpleManagedSurface::ISwapchainResources
 		// Per-swapchain
 		core::smart_refctd_ptr<IGPURenderpass> m_renderpass;
 		std::array<core::smart_refctd_ptr<IGPUFramebuffer>,ISwapchain::MaxImages> m_framebuffers;
+		std::array<core::smart_refctd_ptr<IGPUImage>, ISwapchain::MaxImages> depthImages = {};
 };
 
 class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplication, public nbl::examples::BuiltinResourcesApplication
@@ -395,21 +435,20 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio
 			IGPURenderpass::SCreationParams::ColorAttachmentsEnd
 		};
 
-		// TODO:
-		//IGPURenderpass::SCreationParams::SDepthStencilAttachmentDescription depthAttachments[] = {
-		//	{{
-		//		{
-		//			.format = asset::EF_D32_SFLOAT,
-		//			.samples = IGPUImage::ESCF_1_BIT,
-		//			.mayAlias = false
-		//		},
-		//		/*.loadOp = */{IGPURenderpass::LOAD_OP::CLEAR},
-		//		/*.storeOp = */{IGPURenderpass::STORE_OP::STORE},
-		//		/*.initialLayout = */{IGPUImage::LAYOUT::UNDEFINED},
-		//		/*.finalLayout = */{IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}
-		//	}},
-		//	IGPURenderpass::SCreationParams::DepthStencilAttachmentsEnd
-		//};
+		IGPURenderpass::SCreationParams::SDepthStencilAttachmentDescription depthAttachments[] = {
+			{{
+				{
+					.format = asset::EF_D32_SFLOAT,
+					.samples = IGPUImage::ESCF_1_BIT,
+					.mayAlias = false
+				},
+				/*.loadOp = */{IGPURenderpass::LOAD_OP::CLEAR},
+				/*.storeOp = */{IGPURenderpass::STORE_OP::STORE},
+				/*.initialLayout = */{IGPUImage::LAYOUT::UNDEFINED},
+				/*.finalLayout = */{IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}
+			}},
+			IGPURenderpass::SCreationParams::DepthStencilAttachmentsEnd
+		};
 
 		IGPURenderpass::SCreationParams::SSubpassDescription subpasses[] = {
 			{},
@@ -417,8 +456,7 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio
 		};
 
 		subpasses[0].colorAttachments[0] = {.render={.attachmentIndex=0,.layout=IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}};
-		// TODO:
-		//subpasses[0].depthStencilAttachment = {{.render = {.attachmentIndex=0,.layout = IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}}};
+		subpasses[0].depthStencilAttachment = {{.render = {.attachmentIndex=0,.layout = IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}}};
 		
 		// We actually need external dependencies to ensure ordering of the Implicit Layout Transitions relative to the semaphore signals
 		const IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = {
@@ -450,8 +488,7 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio
 		smart_refctd_ptr<IGPURenderpass> renderpass;
 		IGPURenderpass::SCreationParams params = {};
 		params.colorAttachments = colorAttachments;
-		// TODO:
-		//params.depthStencilAttachments = depthAttachments;
+		params.depthStencilAttachments = depthAttachments;
 		params.subpasses = subpasses;
 		params.dependencies = dependencies;
 		renderpass = m_device->createRenderpass(params);
@@ -508,6 +545,7 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio
 			return logFail("Could not create Window & Surface!");
 		
 		auto scResources = std::make_unique<CSwapchainResources>();
+		scResources->initialize(m_window, m_device);
 		const auto format = scResources->deduceRenderpassFormat(m_surface->getSurface(), m_physicalDevice); // TODO: DO I need to recreate render passes if swapchain gets recreated with different format?
 		renderpassInitial = createRenderpass(format, IGPURenderpass::LOAD_OP::CLEAR, IImage::LAYOUT::UNDEFINED, IImage::LAYOUT::ATTACHMENT_OPTIMAL);
 		renderpassInBetween = createRenderpass(format, IGPURenderpass::LOAD_OP::LOAD, IImage::LAYOUT::ATTACHMENT_OPTIMAL, IImage::LAYOUT::ATTACHMENT_OPTIMAL);
@@ -567,7 +605,8 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio
 			.rasterization = {
 				.polygonMode = EPM_FILL,
 				.faceCullingMode = EFCM_NONE,
-				.depthWriteEnable = false,
+				.depthWriteEnable = true,
+				.depthCompareOp = asset::E_COMPARE_OP::ECO_LESS
 			},
 			.blend = {},
 		};
@@ -614,6 +653,16 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio
 		m_intendedNextSubmit.scratchCommandBuffers = m_commandBufferInfos;
 		m_currentRecordingCommandBufferInfo = &m_commandBufferInfos[0];
 		
+		// camera
+		{
+			const core::vectorSIMDf cameraPosition(300.0f, 300.0f, 300.0f);
+			const core::vectorSIMDf cameraTarget(0.0f, 0.0f, 0.0f);
+			const float32_t aspectRatio = static_cast<float32_t>(m_window->getWidth()) / static_cast<float32_t>(m_window->getHeight());
+			float32_t4x4 projectionMatrix = hlsl::math::thin_lens::lhPerspectiveFovMatrix<float>(core::radians(60.0f), aspectRatio, 0.1f, 10000.0f);
+			camera = Camera(cameraPosition, cameraTarget, projectionMatrix, 1.069f, 0.4f);
+			camera.setMoveSpeed(30.0f);
+		}
+
 		return true;
 	}
 
@@ -621,24 +670,20 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio
 	inline void workLoopBody() override
 	{
 		auto now = std::chrono::high_resolution_clock::now();
-		double dt = std::chrono::duration_cast<std::chrono::milliseconds>(now - lastTime).count();
+		auto dtMilliseconds = std::chrono::duration_cast<std::chrono::milliseconds>(now - lastTime);
+		double dt = dtMilliseconds.count();
 		lastTime = now;
 		m_timeElapsed += dt;
 
 		m_inputSystem->getDefaultMouse(&mouse);
 		m_inputSystem->getDefaultKeyboard(&keyboard);
 
-		mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void
-			{
-			}
-		, m_logger.get());
-		keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void
-			{
-				for (auto eventIt = events.begin(); eventIt != events.end(); eventIt++)
-				{
-				}
-			}
-		, m_logger.get());
+		{
+			camera.beginInputProcessing(dtMilliseconds);
+			mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void { camera.mouseProcess(events); }, m_logger.get());
+			keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void { camera.keyboardProcess(events); }, m_logger.get());
+			camera.endInputProcessing(dtMilliseconds);
+		}
 
 		if (!beginFrameRender())
 			return;
@@ -708,11 +753,17 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio
 				.extent = {m_window->getWidth(),m_window->getHeight()}
 			};
 
+			IGPUCommandBuffer::SClearDepthStencilValue depthClear =
+			{
+				.depth = 1.0f,
+				.stencil = 0
+			};
+
 			beginInfo = {
 				.renderpass = renderpassInitial.get(),
 				.framebuffer = scRes->getFramebuffer(m_currentImageAcquire.imageIndex),
 				.colorClearValues = &clearValue,
-				.depthStencilClearValues = nullptr,
+				.depthStencilClearValues = &depthClear,
 				.renderArea = currentRenderArea
 			};
 		}
@@ -735,21 +786,6 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio
 		const auto& resourcesCollection = drawResourcesFiller.getResourcesCollection();
 		const auto& resourcesGPUBuffer = drawResourcesFiller.getResourcesGPUBuffer();
 
-		float64_t4x4 viewProjection;
-		{
-			// TODO: create a proper camera
-
-			// animated camera which rotates around and always looks at the center
-			const double animationFactor = m_timeElapsed * 0.0003;
-			const float32_t3 cameraPosition = { 300.0f * std::cos(animationFactor), 300.0f, 300.0f * std::sin(animationFactor) };
-
-			auto view = hlsl::math::linalg::rhLookAt<float64_t>(cameraPosition, { 0.0f, 0.0f, 0.0f }, { 0.0f, 1.0f, 0.0f });
-			const float64_t aspectRatio = static_cast<float64_t>(m_window->getWidth()) / static_cast<float64_t>(m_window->getHeight());
-			auto proj = hlsl::math::thin_lens::rhPerspectiveFovMatrix<float64_t>(hlsl::radians(60.0f), aspectRatio, 0.1f, 2000.0f);
-
-			viewProjection = hlsl::mul(proj, nbl::hlsl::math::linalg::promote_affine<4, 4>(view));
-		}
-
 		Globals globalData = {};
 		uint64_t baseAddress = resourcesGPUBuffer->getDeviceAddress();
 		globalData.pointers = {
@@ -846,7 +882,7 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio
 			PushConstants pc = {
 				.triangleMeshVerticesBaseAddress = drawCall.triangleMeshVerticesBaseAddress + resourcesGPUBuffer->getDeviceAddress() + resourcesCollection.geometryInfo.bufferOffset,
 				.triangleMeshMainObjectIndex = drawCall.triangleMeshMainObjectIndex,
-				.viewProjectionMatrix = viewProjection
+				.viewProjectionMatrix = static_cast<float64_t4x4>(camera.getConcatenatedMatrix())
 			};
 			cb->pushConstants(m_graphicsPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_VERTEX | IShader::E_SHADER_STAGE::ESS_FRAGMENT, 0, sizeof(PushConstants), &pc);
 
@@ -1020,6 +1056,8 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio
 	smart_refctd_ptr<CSimpleResizeSurface<CSwapchainResources>> m_surface;
 	smart_refctd_ptr<IGPUImageView> pseudoStencilImageView;
 	smart_refctd_ptr<IGPUImageView> colorStorageImageView;
+
+	Camera camera;
 };
 
 NBL_MAIN_FUNC(ComputerAidedDesign)

From 0bc7c7d92aa9ece2e8408c57cea5113e1bada4e6 Mon Sep 17 00:00:00 2001
From: Przemog1 <minikers21@gmail.com>
Date: Thu, 12 Mar 2026 13:01:35 +0100
Subject: [PATCH 4/9] Fixed depth buffer

---
 75_CAD_3D/main.cpp | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/75_CAD_3D/main.cpp b/75_CAD_3D/main.cpp
index 37f9927b4..4e5bea76b 100644
--- a/75_CAD_3D/main.cpp
+++ b/75_CAD_3D/main.cpp
@@ -442,10 +442,10 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio
 					.samples = IGPUImage::ESCF_1_BIT,
 					.mayAlias = false
 				},
-				/*.loadOp = */{IGPURenderpass::LOAD_OP::CLEAR},
+				/*.loadOp = */{loadOp},
 				/*.storeOp = */{IGPURenderpass::STORE_OP::STORE},
-				/*.initialLayout = */{IGPUImage::LAYOUT::UNDEFINED},
-				/*.finalLayout = */{IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}
+				/*.initialLayout = */{initialLayout},
+				/*.finalLayout = */{IImage::LAYOUT::ATTACHMENT_OPTIMAL}
 			}},
 			IGPURenderpass::SCreationParams::DepthStencilAttachmentsEnd
 		};
@@ -606,12 +606,14 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio
 				.polygonMode = EPM_FILL,
 				.faceCullingMode = EFCM_NONE,
 				.depthWriteEnable = true,
-				.depthCompareOp = asset::E_COMPARE_OP::ECO_LESS
+				.depthCompareOp = asset::E_COMPARE_OP::ECO_GREATER
 			},
 			.blend = {},
 		};
 		mainGraphicsPipelineParams.renderpass = compatibleRenderPass.get();
 
+		assert(mainGraphicsPipelineParams.cached.rasterization.depthTestEnable());
+
 		// Create Main Graphics Pipelines 
 		{
 			video::IGPUPipelineBase::SShaderSpecInfo specInfo[2] = {
@@ -660,7 +662,7 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio
 			const float32_t aspectRatio = static_cast<float32_t>(m_window->getWidth()) / static_cast<float32_t>(m_window->getHeight());
 			float32_t4x4 projectionMatrix = hlsl::math::thin_lens::lhPerspectiveFovMatrix<float>(core::radians(60.0f), aspectRatio, 0.1f, 10000.0f);
 			camera = Camera(cameraPosition, cameraTarget, projectionMatrix, 1.069f, 0.4f);
-			camera.setMoveSpeed(30.0f);
+			camera.setMoveSpeed(50.0f);
 		}
 
 		return true;
@@ -755,7 +757,7 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio
 
 			IGPUCommandBuffer::SClearDepthStencilValue depthClear =
 			{
-				.depth = 1.0f,
+				.depth = 0.0f,
 				.stencil = 0
 			};
 
@@ -856,6 +858,10 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio
 		nbl::video::IGPUCommandBuffer::SRenderpassBeginInfo beginInfo;
 		VkRect2D currentRenderArea;
 		const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {0.f,0.f,0.f,0.f} };
+		IGPUCommandBuffer::SClearDepthStencilValue depthClearValue = {
+				.depth = 1.0f,
+				.stencil = 0
+		};
 		{
 			auto scRes = static_cast<CSwapchainResources*>(m_surface->getSwapchainResources());
 			currentRenderArea =
@@ -863,11 +869,12 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio
 				.offset = {0,0},
 				.extent = {m_window->getWidth(),m_window->getHeight()}
 			};
+
 			beginInfo = {
 				.renderpass = (inBetweenSubmit) ? renderpassInBetween.get():renderpassFinal.get(),
 				.framebuffer = scRes->getFramebuffer(m_currentImageAcquire.imageIndex),
 				.colorClearValues = &clearValue,
-				.depthStencilClearValues = nullptr,
+				.depthStencilClearValues = &depthClearValue,
 				.renderArea = currentRenderArea
 			};
 		}

From c0d264d472d6581534f9744d942b81452006022a Mon Sep 17 00:00:00 2001
From: Przemog1 <minikers21@gmail.com>
Date: Tue, 17 Mar 2026 21:31:57 +0100
Subject: [PATCH 5/9] Added DTM settings

---
 75_CAD_3D/CTriangleMesh.h                     |  64 ++++++++
 75_CAD_3D/DrawResourcesFiller.cpp             |  83 +++++++++-
 75_CAD_3D/DrawResourcesFiller.h               |  73 ++++++++-
 75_CAD_3D/main.cpp                            | 142 +++++++++++++++++-
 75_CAD_3D/shaders/globals.hlsl                |  98 +++++++++++-
 75_CAD_3D/shaders/main_pipeline/common.hlsl   |   1 +
 .../main_pipeline/fragment_shader.hlsl        |  10 +-
 .../shaders/main_pipeline/vertex_shader.hlsl  |   3 +
 8 files changed, 455 insertions(+), 19 deletions(-)

diff --git a/75_CAD_3D/CTriangleMesh.h b/75_CAD_3D/CTriangleMesh.h
index 2100c801d..f5b9b034a 100644
--- a/75_CAD_3D/CTriangleMesh.h
+++ b/75_CAD_3D/CTriangleMesh.h
@@ -6,6 +6,70 @@
 
 using namespace nbl;
 
+struct DTMHeightShadingSettingsInfo
+{
+	// Height Shading Mode
+	E_HEIGHT_SHADING_MODE heightShadingMode;
+
+	// Used as fixed interval length for "DISCRETE_FIXED_LENGTH_INTERVALS" shading mode
+	float intervalLength;
+
+	// Converts an interval index to its corresponding height value
+	// For example, if this value is 10.0, then an interval index of 2 corresponds to a height of 20.0.
+	// This computed height is later used to determine the interpolated color for shading.
+	// It makes sense for this variable to be always equal to `intervalLength` but sometimes it's a different scaling so that last index corresponds to largestHeight
+	float intervalIndexToHeightMultiplier;
+
+	// Used for "DISCRETE_FIXED_LENGTH_INTERVALS" shading mode
+	// If `isCenteredShading` is true, the intervals are centered around `minHeight`, meaning the
+	// first interval spans [minHeight - intervalLength / 2.0, minHeight + intervalLength / 2.0].
+	// Otherwise, intervals are aligned from `minHeight` upward, so the first interval spans
+	// [minHeight, minHeight + intervalLength].
+	bool isCenteredShading;
+
+	void addHeightColorMapEntry(float height, float32_t4 color)
+	{
+		heightColorSet.emplace(height, color);
+	}
+
+	bool fillShaderDTMSettingsHeightColorMap(DTMSettings& dtmSettings) const
+	{
+		const uint32_t mapSize = heightColorSet.size();
+		if (mapSize > DTMHeightShadingSettings::HeightColorMapMaxEntries)
+			return false;
+		dtmSettings.heightShadingSettings.heightColorEntryCount = mapSize;
+
+		int index = 0;
+		for (auto it = heightColorSet.begin(); it != heightColorSet.end(); ++it)
+		{
+			dtmSettings.heightShadingSettings.heightColorMapHeights[index] = it->height;
+			dtmSettings.heightShadingSettings.heightColorMapColors[index] = it->color;
+			++index;
+		}
+
+		return true;
+	}
+
+private:
+	struct HeightColor
+	{
+		float height;
+		float32_t4 color;
+
+		bool operator<(const HeightColor& other) const
+		{
+			return height < other.height;
+		}
+	};
+
+	std::set<HeightColor> heightColorSet;
+};
+
+struct DTMSettingsInfo
+{
+	DTMHeightShadingSettingsInfo heightShadingInfo;
+};
+
 class CTriangleMesh final
 {
 public:
diff --git a/75_CAD_3D/DrawResourcesFiller.cpp b/75_CAD_3D/DrawResourcesFiller.cpp
index f2de0793d..81ab6b451 100644
--- a/75_CAD_3D/DrawResourcesFiller.cpp
+++ b/75_CAD_3D/DrawResourcesFiller.cpp
@@ -137,10 +137,19 @@ bool DrawResourcesFiller::allocateDrawResourcesWithinAvailableVRAM(ILogicalDevic
 
 void DrawResourcesFiller::drawTriangleMesh(
 	const CTriangleMesh& mesh,
+	const DTMSettingsInfo& dtmSettingsInfo,
 	SIntendedSubmitInfo& intendedNextSubmit)
 {
-	// TODO: main objects
-	// beginMainObject();
+	setActiveDTMSettings(dtmSettingsInfo);
+	beginMainObject(MainObjectType::DTM);
+
+	uint32_t mainObjectIdx = acquireActiveMainObjectIndex(intendedNextSubmit);
+	if (mainObjectIdx == InvalidMainObjectIdx)
+	{
+		m_logger.log("drawTriangleMesh: acquireActiveMainObjectIndex returned invalid index", nbl::system::ILogger::ELL_ERROR);
+		assert(false);
+		return;
+	}
 
 	// TODO: for now we add whole mesh at once, instead we should add triangle by triangle and see check if we overflow memory
 
@@ -165,11 +174,11 @@ void DrawResourcesFiller::drawTriangleMesh(
 	drawCallData.indexBufferOffset = geometryBufferOffset;
 	memcpy(dst, indexBuffer.data(), indexBuffByteSize);
 
-	drawCallData.triangleMeshMainObjectIndex = 0u; // TODO: fix when implementing main objects
+	drawCallData.triangleMeshMainObjectIndex = mainObjectIdx;
 	drawCallData.indexCount = mesh.getIndexCount();
 	drawCalls.push_back(drawCallData);
 
-	//endMainObject();
+	endMainObject();
 }
 
 bool DrawResourcesFiller::pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit)
@@ -220,6 +229,8 @@ bool DrawResourcesFiller::pushBufferUploads(SIntendedSubmitInfo& intendedNextSub
 			return true;
 		};
 
+	copyCPUFilledDrawBuffer(resources.mainObjects);
+	copyCPUFilledDrawBuffer(resources.dtmSettings);
 	copyCPUFilledDrawBuffer(resources.drawObjects);
 	copyCPUFilledDrawBuffer(resources.indexBuffer);
 	copyCPUFilledDrawBuffer(resources.geometryInfo);
@@ -233,4 +244,68 @@ void DrawResourcesFiller::markFrameUsageComplete(uint64_t drawSubmitWaitValue)
 	currentFrameIndex++;
 	// TODO[LATER]: take into account that currentFrameIndex was submitted with drawSubmitWaitValue; Use that value when deallocating the resources marked with this frame index
 	//				Currently, for evictions the worst case value will be waited for, as there is no way yet to know which semaphoroe value will signal the completion of the (to be evicted) resource's usage
+}
+
+uint32_t DrawResourcesFiller::acquireActiveMainObjectIndex(SIntendedSubmitInfo& intendedNextSubmit)
+{
+	if (activeMainObjectIndex != InvalidMainObjectIdx)
+		return activeMainObjectIndex;
+
+	if (activeMainObjectType == MainObjectType::NONE)
+	{
+		assert(false); // You're probably trying to acquire mainObjectIndex outside of startMainObject, endMainObject scope
+		return InvalidMainObjectIdx;
+	}
+
+	const bool needsDTMSettings = activeMainObjectType == MainObjectType::DTM;
+
+	MainObject mainObject = {};
+	mainObject.dtmSettingsIdx = (needsDTMSettings) ? acquireActiveDTMSettingsIndex_SubmitIfNeeded(intendedNextSubmit) : InvalidDTMSettingsIdx;
+	activeMainObjectIndex = resourcesCollection.mainObjects.addAndGetOffset(mainObject);
+	return activeMainObjectIndex;
+}
+
+uint32_t DrawResourcesFiller::acquireActiveDTMSettingsIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit)
+{
+	if (activeDTMSettingsIndex == InvalidDTMSettingsIdx)
+		activeDTMSettingsIndex = addDTMSettings_SubmitIfNeeded(activeDTMSettings, intendedNextSubmit);
+
+	return activeDTMSettingsIndex;
+}
+
+uint32_t DrawResourcesFiller::addDTMSettings_SubmitIfNeeded(const DTMSettingsInfo& dtmSettings, SIntendedSubmitInfo& intendedNextSubmit)
+{
+	// before calling `addDTMSettings_Internal` we have made sute we have enough mem for 
+	uint32_t outDTMSettingIdx = addDTMSettings_Internal(dtmSettings, intendedNextSubmit);
+	return outDTMSettingIdx;
+}
+
+uint32_t DrawResourcesFiller::addDTMSettings_Internal(const DTMSettingsInfo& dtmSettingsInfo, SIntendedSubmitInfo& intendedNextSubmit)
+{
+	DTMSettings dtmSettings;
+
+	switch (dtmSettingsInfo.heightShadingInfo.heightShadingMode)
+	{
+	case E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS:
+		dtmSettings.heightShadingSettings.intervalLength = std::numeric_limits<float>::infinity();
+		break;
+	case E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS:
+		dtmSettings.heightShadingSettings.intervalLength = dtmSettingsInfo.heightShadingInfo.intervalLength;
+		break;
+	case E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS:
+		dtmSettings.heightShadingSettings.intervalLength = 0.0f;
+		break;
+	}
+	dtmSettings.heightShadingSettings.intervalIndexToHeightMultiplier = dtmSettingsInfo.heightShadingInfo.intervalIndexToHeightMultiplier;
+	dtmSettings.heightShadingSettings.isCenteredShading = static_cast<int>(dtmSettingsInfo.heightShadingInfo.isCenteredShading);
+	dtmSettingsInfo.heightShadingInfo.fillShaderDTMSettingsHeightColorMap(dtmSettings);
+
+	for (uint32_t i = 0u; i < resourcesCollection.dtmSettings.vector.size(); ++i)
+	{
+		const DTMSettings& itr = resourcesCollection.dtmSettings.vector[i];
+		if (itr == dtmSettings)
+			return i;
+	}
+
+	return resourcesCollection.dtmSettings.addAndGetOffset(dtmSettings); // this will implicitly increase total resource consumption and reduce remaining size --> no need for mem size trackers
 }
\ No newline at end of file
diff --git a/75_CAD_3D/DrawResourcesFiller.h b/75_CAD_3D/DrawResourcesFiller.h
index aca81b409..c037b634d 100644
--- a/75_CAD_3D/DrawResourcesFiller.h
+++ b/75_CAD_3D/DrawResourcesFiller.h
@@ -127,8 +127,11 @@ struct DrawResourcesFiller
 	// TODO: rename to staged resources buffers or something like that
 	struct ResourcesCollection
 	{
+		// auto-submission level 0 resources (settings that mainObj references)
+		CPUGeneratedResource<DTMSettings> dtmSettings;
+
 		// auto-submission level 1 buffers (mainObj that drawObjs references, if all drawObjs+idxBuffer+geometryInfo doesn't fit into mem this will be broken down into many)
-		//CPUGeneratedResource<MainObject> mainObjects;
+		CPUGeneratedResource<MainObject> mainObjects;
 
 		// auto-submission level 2 buffers
 		CPUGeneratedResource<DrawObject> drawObjects;
@@ -140,6 +143,8 @@ struct DrawResourcesFiller
 		size_t calculateTotalConsumption() const
 		{
 			return
+				dtmSettings.getAlignedStorageSize() +
+				mainObjects.getAlignedStorageSize() +
 				drawObjects.getAlignedStorageSize() +
 				indexBuffer.getAlignedStorageSize() +
 				geometryInfo.getAlignedStorageSize();
@@ -162,11 +167,16 @@ struct DrawResourcesFiller
 	
 	void drawTriangleMesh(
 		const CTriangleMesh& mesh,
+		const DTMSettingsInfo& dtmSettingsInfo,
 		SIntendedSubmitInfo& intendedNextSubmit);
 
 	/// @brief  resets staging buffers and images
 	void reset()
 	{
+		resetDrawObjects();
+		resetMainObjects();
+		resetDTMSettings();
+
 		drawCalls.clear();
 	}
 
@@ -196,6 +206,60 @@ struct DrawResourcesFiller
 	/// @brief Records GPU copy commands for all staged buffer resourcesCollection into the active command buffer.
 	bool pushBufferUploads(SIntendedSubmitInfo& intendedNextSubmit, ResourcesCollection& resourcesCollection);
 
+	// Gets resource index to the active main object data
+	// TODO: submit if overflow
+	uint32_t acquireActiveMainObjectIndex(SIntendedSubmitInfo& intendedNextSubmit);
+
+	uint32_t acquireActiveDTMSettingsIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit);
+
+	uint32_t addDTMSettings_SubmitIfNeeded(const DTMSettingsInfo& dtmSettings, SIntendedSubmitInfo& intendedNextSubmit);
+
+	uint32_t addDTMSettings_Internal(const DTMSettingsInfo& dtmSettingsInfo, SIntendedSubmitInfo& intendedNextSubmit);
+
+	inline void beginMainObject(MainObjectType type)
+	{
+		activeMainObjectType = type;
+		activeMainObjectIndex = InvalidMainObjectIdx;
+	}
+
+	inline void endMainObject()
+	{
+		activeMainObjectType = MainObjectType::NONE;
+		activeMainObjectIndex = InvalidMainObjectIdx;
+	}
+
+	inline void setActiveDTMSettings(const DTMSettingsInfo& dtmSettingsInfo)
+	{
+		activeDTMSettings = dtmSettingsInfo;
+		activeDTMSettingsIndex = InvalidDTMSettingsIdx;
+	}
+
+	inline const size_t calculateRemainingResourcesSize() const
+	{
+		assert(resourcesGPUBuffer->getSize() >= resourcesCollection.calculateTotalConsumption());
+		return resourcesGPUBuffer->getSize() - resourcesCollection.calculateTotalConsumption();
+	}
+
+	void resetMainObjects()
+	{
+		resourcesCollection.mainObjects.vector.clear();
+		activeMainObjectIndex = InvalidMainObjectIdx;
+	}
+
+	// these resources are data related to chunks of a whole mainObject
+	void resetDrawObjects()
+	{
+		resourcesCollection.drawObjects.vector.clear();
+		resourcesCollection.indexBuffer.vector.clear();
+		resourcesCollection.geometryInfo.vector.clear();
+	}
+
+	void resetDTMSettings()
+	{
+		resourcesCollection.dtmSettings.vector.clear();
+		activeDTMSettingsIndex = InvalidDTMSettingsIdx;
+	}
+
 private:
 	nbl::system::logger_opt_smart_ptr m_logger = nullptr;
 
@@ -217,4 +281,11 @@ struct DrawResourcesFiller
 	size_t copiedResourcesSize;
 
 	SubmitFunc submitDraws;
+
+	// Active Resources we need to keep track of and push to resources buffer if needed.
+	MainObjectType activeMainObjectType;
+	uint32_t activeMainObjectIndex = InvalidMainObjectIdx;
+
+	DTMSettingsInfo activeDTMSettings;
+	uint32_t activeDTMSettingsIndex = InvalidDTMSettingsIdx;
 };
\ No newline at end of file
diff --git a/75_CAD_3D/main.cpp b/75_CAD_3D/main.cpp
index 4e5bea76b..12f13d345 100644
--- a/75_CAD_3D/main.cpp
+++ b/75_CAD_3D/main.cpp
@@ -559,13 +559,60 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio
 
 		allocateResources();
 
-		const asset::SPushConstantRange range = {
+		// Create DescriptorSetLayout, PipelineLayout and update DescriptorSets
+		{
+			video::IGPUDescriptorSetLayout::SBinding bindingsSet0[] = {
+				{
+					.binding = 0u,
+					.type = asset::IDescriptor::E_TYPE::ET_UNIFORM_BUFFER,
+					.createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE,
+					.stageFlags = asset::IShader::E_SHADER_STAGE::ESS_VERTEX | asset::IShader::E_SHADER_STAGE::ESS_FRAGMENT,
+					.count = 1u,
+				}
+			};
+			m_descriptorSetLayout0 = m_device->createDescriptorSetLayout(bindingsSet0);
+			if (!m_descriptorSetLayout0)
+				return logFail("Failed to Create Descriptor Layout 0");
+
+			const asset::SPushConstantRange range = {
 			.stageFlags = IShader::E_SHADER_STAGE::ESS_VERTEX | IShader::E_SHADER_STAGE::ESS_FRAGMENT,
 			.offset = 0,
 			.size = sizeof(PushConstants)
-		};
+			};
+
+			const video::IGPUDescriptorSetLayout* const layouts[1u] = { m_descriptorSetLayout0.get() };
+
+			smart_refctd_ptr<IDescriptorPool> descriptorPool = nullptr;
+			{
+				const uint32_t setCounts[2u] = { 1u, 1u };
+				descriptorPool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT, layouts, setCounts);
+				if (!descriptorPool)
+					return logFail("Failed to Create Descriptor Pool");
+			}
 
-		m_pipelineLayout = m_device->createPipelineLayout({ &range,1 }, nullptr, nullptr, nullptr, nullptr);
+			// Update descriptor sets
+			{
+				m_descriptorSet0 = descriptorPool->createDescriptorSet(smart_refctd_ptr(m_descriptorSetLayout0));
+
+				video::IGPUDescriptorSet::SDescriptorInfo descriptorInfosSet0[1] = {};
+				descriptorInfosSet0[0u].info.buffer.offset = 0u;
+				descriptorInfosSet0[0u].info.buffer.size = m_globalsBuffer->getCreationParams().size;
+				descriptorInfosSet0[0u].desc = m_globalsBuffer;
+
+				video::IGPUDescriptorSet::SWriteDescriptorSet descriptorUpdates[1] = {};
+
+				// globals
+				descriptorUpdates[0u].dstSet = m_descriptorSet0.get();
+				descriptorUpdates[0u].binding = 0u;
+				descriptorUpdates[0u].arrayElement = 0u;
+				descriptorUpdates[0u].count = 1u;
+				descriptorUpdates[0u].info = &descriptorInfosSet0[0u];
+
+				m_device->updateDescriptorSets(1, descriptorUpdates, 0u, nullptr);
+			}
+
+			m_pipelineLayout = m_device->createPipelineLayout({ &range,1 }, core::smart_refctd_ptr(m_descriptorSetLayout0), nullptr, nullptr, nullptr);
+		}
 
 		smart_refctd_ptr<IShader> mainPipelineFragmentShaders = {};
 		smart_refctd_ptr<IShader> mainPipelineVertexShader = {};
@@ -683,7 +730,29 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio
 		{
 			camera.beginInputProcessing(dtMilliseconds);
 			mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void { camera.mouseProcess(events); }, m_logger.get());
-			keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void { camera.keyboardProcess(events); }, m_logger.get());
+			keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void
+			{
+					camera.keyboardProcess(events);
+
+				for (auto eventIt = events.begin(); eventIt != events.end(); eventIt++)
+				{
+					auto ev = *eventIt;
+
+					if (ev.action == nbl::ui::SKeyboardEvent::E_KEY_ACTION::ECA_PRESSED && ev.keyCode == nbl::ui::E_KEY_CODE::EKC_1)
+					{
+						m_shadingModeExample = E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS;
+					}
+					if (ev.action == nbl::ui::SKeyboardEvent::E_KEY_ACTION::ECA_PRESSED && ev.keyCode == nbl::ui::E_KEY_CODE::EKC_2)
+					{
+						m_shadingModeExample = E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS;
+					}
+					if (ev.action == nbl::ui::SKeyboardEvent::E_KEY_ACTION::ECA_PRESSED && ev.keyCode == nbl::ui::E_KEY_CODE::EKC_3)
+					{
+						m_shadingModeExample = E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS;
+					}
+				}
+			}
+		, m_logger.get());
 			camera.endInputProcessing(dtMilliseconds);
 		}
 
@@ -791,8 +860,10 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio
 		Globals globalData = {};
 		uint64_t baseAddress = resourcesGPUBuffer->getDeviceAddress();
 		globalData.pointers = {
-			.drawObjects			= baseAddress + resourcesCollection.drawObjects.bufferOffset,
-			.geometryBuffer			= baseAddress + resourcesCollection.geometryInfo.bufferOffset,
+			.mainObjects = baseAddress + resourcesCollection.mainObjects.bufferOffset,
+			.drawObjects = baseAddress + resourcesCollection.drawObjects.bufferOffset,
+			.geometryBuffer = baseAddress + resourcesCollection.geometryInfo.bufferOffset,
+			.dtmSettings = baseAddress + resourcesCollection.dtmSettings.bufferOffset,
 		};
 		SBufferRange<IGPUBuffer> globalBufferUpdateRange = { .offset = 0ull, .size = sizeof(Globals), .buffer = m_globalsBuffer};
 		bool updateSuccess = cb->updateBuffer(globalBufferUpdateRange, &globalData);
@@ -880,6 +951,9 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio
 		}
 		cb->beginRenderPass(beginInfo, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE);
 		
+		IGPUDescriptorSet* descriptorSets[] = { m_descriptorSet0.get() };
+		cb->bindDescriptorSets(asset::EPBP_GRAPHICS, m_pipelineLayout.get(), 0u, 1u, descriptorSets);
+
 		cb->bindGraphicsPipeline(m_graphicsPipeline.get());
 
 		for (auto& drawCall : drawResourcesFiller.getDrawCalls())
@@ -1010,15 +1084,63 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio
 		mesh.setVertices(core::vector<TriangleMeshVertex>(vertices));
 		mesh.setIndices(std::move(indices));
 
+		DTMSettingsInfo dtmInfo{};
+
+		// PRESS 1, 2, 3 TO SWITCH HEIGHT SHADING MODE
+		// 1 - DISCRETE_VARIABLE_LENGTH_INTERVALS
+		// 2 - DISCRETE_FIXED_LENGTH_INTERVALS
+		// 3 - CONTINOUS_INTERVALS
+		float animatedAlpha = (std::cos(m_timeElapsed * 0.0005) + 1.0) * 0.5;
+		switch (m_shadingModeExample)
+		{
+			case E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS:
+			{
+				dtmInfo.heightShadingInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS;
+
+				dtmInfo.heightShadingInfo.addHeightColorMapEntry(-10.0f, float32_t4(0.5f, 1.0f, 1.0f, 1.0f));
+				dtmInfo.heightShadingInfo.addHeightColorMapEntry(20.0f, float32_t4(0.0f, 1.0f, 0.0f, 1.0f));
+				dtmInfo.heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha));
+				dtmInfo.heightShadingInfo.addHeightColorMapEntry(70.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f));
+				dtmInfo.heightShadingInfo.addHeightColorMapEntry(90.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f));
+
+				break;
+			}
+			case E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS:
+			{
+				dtmInfo.heightShadingInfo.intervalLength = 10.0f;
+				dtmInfo.heightShadingInfo.intervalIndexToHeightMultiplier = dtmInfo.heightShadingInfo.intervalLength;
+				dtmInfo.heightShadingInfo.isCenteredShading = false;
+				dtmInfo.heightShadingInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS;
+				dtmInfo.heightShadingInfo.addHeightColorMapEntry(0.0f, float32_t4(0.0f, 0.0f, 1.0f, animatedAlpha));
+				dtmInfo.heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 1.0f, 1.0f, animatedAlpha));
+				dtmInfo.heightShadingInfo.addHeightColorMapEntry(50.0f, float32_t4(0.0f, 1.0f, 0.0f, animatedAlpha));
+				dtmInfo.heightShadingInfo.addHeightColorMapEntry(75.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha));
+				dtmInfo.heightShadingInfo.addHeightColorMapEntry(100.0f, float32_t4(1.0f, 0.0f, 0.0f, animatedAlpha));
+
+				break;
+			}
+			case E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS:
+			{
+				dtmInfo.heightShadingInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS;
+				dtmInfo.heightShadingInfo.addHeightColorMapEntry(0.0f, float32_t4(0.0f, 0.0f, 1.0f, animatedAlpha));
+				dtmInfo.heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 1.0f, 1.0f, animatedAlpha));
+				dtmInfo.heightShadingInfo.addHeightColorMapEntry(50.0f, float32_t4(0.0f, 1.0f, 0.0f, animatedAlpha));
+				dtmInfo.heightShadingInfo.addHeightColorMapEntry(75.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha));
+				dtmInfo.heightShadingInfo.addHeightColorMapEntry(90.0f, float32_t4(1.0f, 0.0f, 0.0f, animatedAlpha));
+
+				break;
+			}
+		}
+
 		// pyramid A
-		drawResourcesFiller.drawTriangleMesh(mesh, intendedNextSubmit);
+		drawResourcesFiller.drawTriangleMesh(mesh, dtmInfo, intendedNextSubmit);
 
 		// pyramid B
 		float64_t3 offset = { 500.0f, 0.0f, 0.0f };
 		for (auto& vertex : vertices)
 			vertex.pos += offset;
 		mesh.setVertices(std::move(vertices));
-		drawResourcesFiller.drawTriangleMesh(mesh, intendedNextSubmit);
+		drawResourcesFiller.drawTriangleMesh(mesh, dtmInfo, intendedNextSubmit);
 	}
 
 protected:
@@ -1056,6 +1178,8 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio
 
 	uint64_t m_realFrameIx = 0u;
 
+	smart_refctd_ptr<IGPUDescriptorSetLayout> m_descriptorSetLayout0;
+	smart_refctd_ptr<IGPUDescriptorSet>	m_descriptorSet0;
 	smart_refctd_ptr<IGPUPipelineLayout> m_pipelineLayout;
 	smart_refctd_ptr<IGPUGraphicsPipeline> m_graphicsPipeline;
 
@@ -1065,6 +1189,8 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio
 	smart_refctd_ptr<IGPUImageView> colorStorageImageView;
 
 	Camera camera;
+
+	E_HEIGHT_SHADING_MODE m_shadingModeExample = E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS;
 };
 
 NBL_MAIN_FUNC(ComputerAidedDesign)
diff --git a/75_CAD_3D/shaders/globals.hlsl b/75_CAD_3D/shaders/globals.hlsl
index c080c7c4a..5c5791983 100644
--- a/75_CAD_3D/shaders/globals.hlsl
+++ b/75_CAD_3D/shaders/globals.hlsl
@@ -33,6 +33,17 @@ using pfloat64_t4 = nbl::hlsl::vector<float64_t, 4>;
 using pfloat64_t3x3 = portable_matrix_t3x3<pfloat64_t>;
 using pfloat64_t4x4 = portable_matrix_t4x4<pfloat64_t>;
 
+enum class MainObjectType : uint32_t
+{
+    NONE = 0u,
+    DTM,
+};
+
+struct MainObject
+{
+    uint32_t dtmSettingsIdx;
+};
+
 struct PushConstants
 {
     uint64_t triangleMeshVerticesBaseAddress;
@@ -42,20 +53,21 @@ struct PushConstants
 
 struct Pointers
 {
+    uint64_t mainObjects;
     uint64_t drawObjects;
     uint64_t geometryBuffer;
+    uint64_t dtmSettings;
 };
 #ifndef __HLSL_VERSION
-static_assert(sizeof(Pointers) == 16u);
+static_assert(sizeof(Pointers) == 32u);
 #endif
 
 struct Globals
 {
     Pointers pointers;
-    pfloat64_t4x4 defaultProjectionToNDC;
 };
 #ifndef __HLSL_VERSION
-static_assert(sizeof(Globals) == 144u);
+static_assert(sizeof(Globals) == 32u);
 #endif
 
 struct DrawObject
@@ -70,12 +82,90 @@ struct TriangleMeshVertex
     pfloat64_t3 pos;
 };
 
+enum class E_HEIGHT_SHADING_MODE : uint32_t
+{
+    DISCRETE_VARIABLE_LENGTH_INTERVALS,
+    DISCRETE_FIXED_LENGTH_INTERVALS,
+    CONTINOUS_INTERVALS
+};
+    
+struct DTMHeightShadingSettings
+{
+    const static uint32_t HeightColorMapMaxEntries = 16u;
+    
+    // height-color map
+    float intervalLength;
+    float intervalIndexToHeightMultiplier;
+    int isCenteredShading;
+    
+    uint32_t heightColorEntryCount;
+    float heightColorMapHeights[HeightColorMapMaxEntries];
+    float32_t4 heightColorMapColors[HeightColorMapMaxEntries];
+    
+    E_HEIGHT_SHADING_MODE determineHeightShadingMode()
+    {
+        if (nbl::hlsl::isinf(intervalLength))
+            return E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS;
+        if (intervalLength == 0.0f)
+            return E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS;
+        return E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS;
+    }
+};
+
+struct DTMSettings
+{
+    // height shading
+    DTMHeightShadingSettings heightShadingSettings;
+};
+    
+#ifndef __HLSL_VERSION
+inline bool operator==(const DTMSettings& lhs, const DTMSettings& rhs)
+{
+
+    if(true) //if (lhs.drawHeightShadingEnabled())
+    {
+        if (lhs.heightShadingSettings.intervalLength != rhs.heightShadingSettings.intervalLength)
+            return false;
+        if (lhs.heightShadingSettings.intervalIndexToHeightMultiplier != rhs.heightShadingSettings.intervalIndexToHeightMultiplier)
+            return false;
+        if (lhs.heightShadingSettings.isCenteredShading != rhs.heightShadingSettings.isCenteredShading)
+            return false;
+        if (lhs.heightShadingSettings.heightColorEntryCount != rhs.heightShadingSettings.heightColorEntryCount)
+            return false;
+        
+                
+        if(memcmp(lhs.heightShadingSettings.heightColorMapHeights, rhs.heightShadingSettings.heightColorMapHeights, lhs.heightShadingSettings.heightColorEntryCount * sizeof(float)))
+            return false;
+        if(memcmp(lhs.heightShadingSettings.heightColorMapColors, rhs.heightShadingSettings.heightColorMapColors, lhs.heightShadingSettings.heightColorEntryCount * sizeof(float32_t4)))
+            return false;
+    }
+
+    return true;
+}
+#endif
+
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t MainObjectIdxBits = 24u; // It will be packed next to alpha in a texture
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t MaxIndexableMainObjects = (1u << MainObjectIdxBits) - 1u;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t InvalidMainObjectIdx = MaxIndexableMainObjects;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t InvalidDTMSettingsIdx = nbl::hlsl::numeric_limits<uint32_t>::max;
+
 #ifdef __HLSL_VERSION
 [[vk::binding(0, 0)]] ConstantBuffer<Globals> globals : register(b0);
+    
+MainObject loadMainObject(const uint32_t index)
+{
+    return vk::RawBufferLoad<MainObject>(globals.pointers.mainObjects + index * sizeof(MainObject), 4u);
+}
+DTMSettings loadDTMSettings(const uint32_t index)
+{
+    return vk::RawBufferLoad<DTMSettings>(globals.pointers.dtmSettings + index * sizeof(DTMSettings), 4u);
+}
+    
 #else
+static_assert(alignof(MainObject)==4u);
+static_assert(alignof(DTMSettings)==4u);
 static_assert(alignof(pfloat64_t3x3)==8u);
 static_assert(alignof(DrawObject)==8u);
 #endif
 
-
 #endif
diff --git a/75_CAD_3D/shaders/main_pipeline/common.hlsl b/75_CAD_3D/shaders/main_pipeline/common.hlsl
index 7866b5b8c..87dd5407a 100644
--- a/75_CAD_3D/shaders/main_pipeline/common.hlsl
+++ b/75_CAD_3D/shaders/main_pipeline/common.hlsl
@@ -7,6 +7,7 @@ struct PSInput
 {
     [[vk::location(0)]] float4 position : SV_Position;
     [[vk::location(1)]] float3 normal : COLOR1;
+    [[vk::location(2)]] float height : COLOR2;
 };
 
 // Set 0 - Scene Data and Globals, buffer bindings don't change the buffers only get updated
diff --git a/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl b/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl
index d61b99275..8017ed404 100644
--- a/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl
+++ b/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl
@@ -1,7 +1,8 @@
+#pragma shader_stage(fragment)
+
 #include "common.hlsl"
 
 static const float32_t3 SunlightDirection = float32_t3(0.7071f, -0.7071f, 0.0f);
-static const float32_t3 TerrainColor = float32_t3(1.0f, 1.0f, 1.0f);
 
 [shader("pixel")]
 float4 fragMain(PSInput input) : SV_Target
@@ -9,7 +10,12 @@ float4 fragMain(PSInput input) : SV_Target
 	static const float AmbientLightIntensity = 0.1f;
 	const float diffuseLightIntensity = max(dot(-SunlightDirection, input.normal), 0.0f);
 
-	const float32_t3 fragColor = (AmbientLightIntensity + diffuseLightIntensity) * TerrainColor;
+	MainObject mainObj = loadMainObject(pc.triangleMeshMainObjectIndex);
+	DTMSettings dtmSettings = loadDTMSettings(mainObj.dtmSettingsIdx);
+		
+	const float32_t3 HeightColor = input.height < 50.0f ? float32_t3(0.0, 1.0, 0.0) : (input.height < 75.0f ? float32_t3(1.0, 1.0, 0.0) : float32_t3(1.0, 0.0, 0.0));
+		
+	const float32_t3 fragColor = (AmbientLightIntensity + diffuseLightIntensity) * HeightColor;
 
 	return float32_t4(fragColor, 1.0f);
 }
diff --git a/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl b/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl
index 98996ba79..a0f256d60 100644
--- a/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl
+++ b/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl
@@ -37,6 +37,9 @@ PSInput vtxMain(uint vertexID : SV_VertexID)
 
 
     outV.position = _static_cast<float4>(pos);
+    
+    // TODO: we want to separate height from the Y coordinate I guess?
+    outV.height = _static_cast<float>(pos.y);
 
     //pos = mul(pc.viewProjectionMatrix, pos);
     // TODO: use pc.viewProjectionMatrix and multiply it with pfloat64_t4 pos instead fix portable_matrix with portable_float multiplication

From 28f1e340cbab2ca3c256ab3dcaa1c501ed4695c3 Mon Sep 17 00:00:00 2001
From: Przemog1 <minikers21@gmail.com>
Date: Thu, 19 Mar 2026 20:23:00 +0100
Subject: [PATCH 6/9] Implemented DTM modes

---
 75_CAD_3D/shaders/globals.hlsl                |   4 +-
 75_CAD_3D/shaders/main_pipeline/common.hlsl   |  30 ++++-
 75_CAD_3D/shaders/main_pipeline/dtm.hlsl      | 106 ++++++++++++++++++
 .../main_pipeline/fragment_shader.hlsl        |  28 +++--
 .../shaders/main_pipeline/vertex_shader.hlsl  |  11 +-
 5 files changed, 156 insertions(+), 23 deletions(-)
 create mode 100644 75_CAD_3D/shaders/main_pipeline/dtm.hlsl

diff --git a/75_CAD_3D/shaders/globals.hlsl b/75_CAD_3D/shaders/globals.hlsl
index 5c5791983..d83ffca7d 100644
--- a/75_CAD_3D/shaders/globals.hlsl
+++ b/75_CAD_3D/shaders/globals.hlsl
@@ -1,5 +1,5 @@
-#ifndef _CAD_EXAMPLE_GLOBALS_HLSL_INCLUDED_
-#define _CAD_EXAMPLE_GLOBALS_HLSL_INCLUDED_
+#ifndef _CAD_3D_EXAMPLE_GLOBALS_HLSL_INCLUDED_
+#define _CAD_3D_EXAMPLE_GLOBALS_HLSL_INCLUDED_
 
 // TODO[Erfan]: Turn off in the future, but keep enabled to test
 // #define NBL_FORCE_EMULATED_FLOAT_64
diff --git a/75_CAD_3D/shaders/main_pipeline/common.hlsl b/75_CAD_3D/shaders/main_pipeline/common.hlsl
index 87dd5407a..8fc59e1ee 100644
--- a/75_CAD_3D/shaders/main_pipeline/common.hlsl
+++ b/75_CAD_3D/shaders/main_pipeline/common.hlsl
@@ -1,16 +1,34 @@
-#ifndef _CAD_EXAMPLE_MAIN_PIPELINE_COMMON_HLSL_INCLUDED_
-#define _CAD_EXAMPLE_MAIN_PIPELINE_COMMON_HLSL_INCLUDED_
+#ifndef _CAD_3D_EXAMPLE_MAIN_PIPELINE_COMMON_HLSL_INCLUDED_
+#define _CAD_3D_EXAMPLE_MAIN_PIPELINE_COMMON_HLSL_INCLUDED_
 
 #include "../globals.hlsl"
 
 struct PSInput
 {
     [[vk::location(0)]] float4 position : SV_Position;
-    [[vk::location(1)]] float3 normal : COLOR1;
-    [[vk::location(2)]] float height : COLOR2;
-};
 
-// Set 0 - Scene Data and Globals, buffer bindings don't change the buffers only get updated
+    [[vk::location(1)]] nointerpolation float4 data1 : COLOR1;
+    [[vk::location(2)]] float4 interpolatedData1 : COLOR2;
+
+    // TODO: do we even need vertexScreenSpacePos?
+#ifndef FRAGMENT_SHADER_INPUT // vertex shader
+    [[vk::location(3)]] float3 vertexScreenSpacePos : COLOR3;
+#else
+    [[vk::location(3)]] [[vk::ext_decorate(/*spv::DecoratePerVertexKHR*/5285)]] float3 vertexScreenSpacePos[3] : COLOR3;
+#endif
+
+    void setNormal(NBL_CONST_REF_ARG(float3) normal) { data1.xyz = normal; }
+    float3 getNormal() { return data1.xyz; }
+
+    void setHeight(float height) { interpolatedData1.x = height; }
+    float getHeight() { return interpolatedData1.x; }
+
+#ifndef FRAGMENT_SHADER_INPUT // vertex shader
+    void setScreenSpaceVertexAttribs(float3 pos) { vertexScreenSpacePos = pos; }
+#else // fragment shader
+    float3 getScreenSpaceVertexAttribs(uint32_t vertexIndex) { return vertexScreenSpacePos[vertexIndex]; }
+#endif
+};
 
 // [[vk::binding(0, 0)]] ConstantBuffer<Globals> globals; ---> moved to globals.hlsl
 
diff --git a/75_CAD_3D/shaders/main_pipeline/dtm.hlsl b/75_CAD_3D/shaders/main_pipeline/dtm.hlsl
new file mode 100644
index 000000000..60320647f
--- /dev/null
+++ b/75_CAD_3D/shaders/main_pipeline/dtm.hlsl
@@ -0,0 +1,106 @@
+#ifndef _CAD_3D_EXAMPLE_DTM_HLSL_INCLUDED_
+#define _CAD_3D_EXAMPLE_DTM_HLSL_INCLUDED_
+
+#include "common.hlsl"
+
+namespace dtm
+{
+
+// for usage in upper_bound function
+struct DTMSettingsHeightsAccessor
+{
+    DTMHeightShadingSettings settings;
+    using value_type = float;
+
+    float operator[](const uint32_t ix)
+    {
+        return settings.heightColorMapHeights[ix];
+    }
+};
+
+float getIntervalPosition(in float height, in float minHeight, in float intervalLength, in bool isCenteredShading)
+{
+    if (isCenteredShading)
+        return ((height - minHeight) / intervalLength + 0.5f);
+    else
+        return ((height - minHeight) / intervalLength);
+}
+
+float32_t4 calcIntervalColor(in int intervalIndex, in DTMHeightShadingSettings settings)
+{
+    const float minShadingHeight = settings.heightColorMapHeights[0];
+    float heightForColor = minShadingHeight + float(intervalIndex) * settings.intervalIndexToHeightMultiplier;
+
+    DTMSettingsHeightsAccessor dtmHeightsAccessor = { settings };
+    int32_t upperBoundHeightIndex = min(nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, settings.heightColorEntryCount, heightForColor), settings.heightColorEntryCount - 1u);
+    int32_t lowerBoundHeightIndex = max(upperBoundHeightIndex - 1, 0);
+
+    float upperBoundHeight = settings.heightColorMapHeights[upperBoundHeightIndex];
+    float lowerBoundHeight = settings.heightColorMapHeights[lowerBoundHeightIndex];
+
+    float4 upperBoundColor = settings.heightColorMapColors[upperBoundHeightIndex];
+    float4 lowerBoundColor = settings.heightColorMapColors[lowerBoundHeightIndex];
+
+    if (upperBoundHeight == lowerBoundHeight)
+    {
+        return upperBoundColor;
+    }
+    else
+    {
+        float interpolationVal = (heightForColor - lowerBoundHeight) / (upperBoundHeight - lowerBoundHeight);
+        return lerp(lowerBoundColor, upperBoundColor, interpolationVal);
+    }
+}
+
+float32_t4 calculateDTMHeightColor(in DTMHeightShadingSettings settings, in float3 triangleVertices[3], in float2 fragPos, in float height)
+{
+    const uint32_t heightMapSize = settings.heightColorEntryCount;
+    if(heightMapSize == 0)
+        return float32_t4(0.0f, 0.0f, 0.0f, 0.0f);
+
+
+    const E_HEIGHT_SHADING_MODE mode = settings.determineHeightShadingMode();
+    if(mode == E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS)
+    {
+        DTMSettingsHeightsAccessor dtmHeightsAccessor = { settings };
+        const int upperBoundIndex = min(nbl::hlsl::upper_bound(dtmHeightsAccessor, 0u, heightMapSize, height), heightMapSize - 1u);
+        const int mapIndex = max(upperBoundIndex - 1, 0);
+
+        return settings.heightColorMapColors[mapIndex];
+    }
+    else if(mode == E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS)
+    {
+        const float minShadingHeight = settings.heightColorMapHeights[0];
+        const float intervalPosition = getIntervalPosition(height, minShadingHeight, settings.intervalLength, settings.isCenteredShading);
+        const float positionWithinInterval = frac(intervalPosition);
+        const int intervalIndex = nbl::hlsl::_static_cast<int>(intervalPosition);
+
+        return calcIntervalColor(intervalIndex, settings);
+    }
+    else if(mode == E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS)
+    {
+        DTMSettingsHeightsAccessor dtmHeightsAccessor = { settings };
+        uint32_t upperBoundHeightIndex = min(nbl::hlsl::upper_bound(dtmHeightsAccessor, 0u, heightMapSize - 1u, height), heightMapSize - 1u);
+        uint32_t lowerBoundHeightIndex = upperBoundHeightIndex == 0 ? upperBoundHeightIndex : upperBoundHeightIndex - 1;
+
+        float upperBoundHeight = settings.heightColorMapHeights[upperBoundHeightIndex];
+        float lowerBoundHeight = settings.heightColorMapHeights[lowerBoundHeightIndex];
+
+        float4 upperBoundColor = settings.heightColorMapColors[upperBoundHeightIndex];
+        float4 lowerBoundColor = settings.heightColorMapColors[lowerBoundHeightIndex];
+
+        float interpolationVal;
+        if (upperBoundHeightIndex == 0)
+            interpolationVal = 1.0f;
+        else
+            interpolationVal = (height - lowerBoundHeight) / (upperBoundHeight - lowerBoundHeight);
+
+        return lerp(lowerBoundColor, upperBoundColor, interpolationVal);
+    }
+
+    return float32_t4(0.0f, 0.0f, 0.0f, 0.0f);
+}
+
+}
+
+#endif
\ No newline at end of file
diff --git a/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl b/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl
index 8017ed404..14f2e74c0 100644
--- a/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl
+++ b/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl
@@ -1,5 +1,7 @@
+#define FRAGMENT_SHADER_INPUT
 #pragma shader_stage(fragment)
 
+#include "dtm.hlsl"
 #include "common.hlsl"
 
 static const float32_t3 SunlightDirection = float32_t3(0.7071f, -0.7071f, 0.0f);
@@ -8,14 +10,22 @@ static const float32_t3 SunlightDirection = float32_t3(0.7071f, -0.7071f, 0.0f);
 float4 fragMain(PSInput input) : SV_Target
 {
 	static const float AmbientLightIntensity = 0.1f;
-	const float diffuseLightIntensity = max(dot(-SunlightDirection, input.normal), 0.0f);
+	const float diffuseLightIntensity = max(dot(-SunlightDirection, input.getNormal()), 0.0f);
 
-	MainObject mainObj = loadMainObject(pc.triangleMeshMainObjectIndex);
-	DTMSettings dtmSettings = loadDTMSettings(mainObj.dtmSettingsIdx);
-		
-	const float32_t3 HeightColor = input.height < 50.0f ? float32_t3(0.0, 1.0, 0.0) : (input.height < 75.0f ? float32_t3(1.0, 1.0, 0.0) : float32_t3(1.0, 0.0, 0.0));
-		
-	const float32_t3 fragColor = (AmbientLightIntensity + diffuseLightIntensity) * HeightColor;
+	const MainObject mainObj = loadMainObject(pc.triangleMeshMainObjectIndex);
+	const DTMSettings dtmSettings = loadDTMSettings(mainObj.dtmSettingsIdx);
+	
+	float32_t3 triangleVertices[3];
+    triangleVertices[0] = input.getScreenSpaceVertexAttribs(0);
+    triangleVertices[1] = input.getScreenSpaceVertexAttribs(1);
+    triangleVertices[2] = input.getScreenSpaceVertexAttribs(2);
 
-	return float32_t4(fragColor, 1.0f);
-}
+	const float height = input.getHeight();
+	//const float32_t3 HeightColor = height < 0.0f ? float32_t3(0.0f, 0.0f, 1.0f) : height < 50.0f ? float32_t3(0.0, 1.0, 0.0) : (height < 75.0f ? float32_t3(1.0, 1.0, 0.0) : float32_t3(1.0, 0.0, 0.0));
+
+	const float32_t4 HeightColor = dtm::calculateDTMHeightColor(dtmSettings.heightShadingSettings, triangleVertices, input.position.xy, height);
+
+	const float32_t4 fragColor = (AmbientLightIntensity + diffuseLightIntensity) * HeightColor;
+
+	return fragColor;
+}
\ No newline at end of file
diff --git a/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl b/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl
index a0f256d60..c82ab696d 100644
--- a/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl
+++ b/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl
@@ -26,7 +26,7 @@ PSInput vtxMain(uint vertexID : SV_VertexID)
         float32_t3 triangleEdge0 = vertex1 - vertex0;
         float32_t3 triangleEdge1 = vertex2 - vertex0;
 
-        outV.normal = (normalize(cross(triangleEdge1, triangleEdge0)) + 1.0f) * 0.5f;
+        outV.setNormal((normalize(cross(triangleEdge1, triangleEdge0)) + 1.0f) * 0.5f);
     }
 
     pfloat64_t4 pos;
@@ -36,10 +36,7 @@ PSInput vtxMain(uint vertexID : SV_VertexID)
     pos.w = _static_cast<pfloat64_t>(1.0f);
 
 
-    outV.position = _static_cast<float4>(pos);
-    
-    // TODO: we want to separate height from the Y coordinate I guess?
-    outV.height = _static_cast<float>(pos.y);
+    outV.setHeight(_static_cast<float>(pos.y));
 
     //pos = mul(pc.viewProjectionMatrix, pos);
     // TODO: use pc.viewProjectionMatrix and multiply it with pfloat64_t4 pos instead fix portable_matrix with portable_float multiplication
@@ -52,6 +49,8 @@ PSInput vtxMain(uint vertexID : SV_VertexID)
         viewProjMatrix[i][3] = _static_cast<float>(pc.viewProjectionMatrix[i].w);
     }
 
+    outV.setScreenSpaceVertexAttribs(_static_cast<float4>(pos).xyz);
+
     /*if (vertexID == 0)
     {
         printf("%f, %f, %f, %f", a[0][0], a[0][1], a[0][2], a[0][3]);
@@ -60,7 +59,7 @@ PSInput vtxMain(uint vertexID : SV_VertexID)
         printf("%f, %f, %f, %f", a[3][0], a[3][1], a[3][2], a[3][3]);
     }*/
 
-    outV.position = mul(viewProjMatrix, outV.position);
+    outV.position = mul(viewProjMatrix, _static_cast<float4>(pos));
 
     return outV;
 }

From 29f1b9278bc1483d2edacba8a37faf175cabc189 Mon Sep 17 00:00:00 2001
From: Przemog1 <minikers21@gmail.com>
Date: Sat, 21 Mar 2026 12:42:20 +0100
Subject: [PATCH 7/9] Implemented height shading anti aliasing

---
 75_CAD_3D/main.cpp                            |  1 +
 75_CAD_3D/shaders/globals.hlsl                |  4 +-
 75_CAD_3D/shaders/main_pipeline/dtm.hlsl      | 97 +++++++++++++++++--
 .../main_pipeline/fragment_shader.hlsl        |  4 +-
 4 files changed, 97 insertions(+), 9 deletions(-)

diff --git a/75_CAD_3D/main.cpp b/75_CAD_3D/main.cpp
index 12f13d345..f24d4d06a 100644
--- a/75_CAD_3D/main.cpp
+++ b/75_CAD_3D/main.cpp
@@ -865,6 +865,7 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio
 			.geometryBuffer = baseAddress + resourcesCollection.geometryInfo.bufferOffset,
 			.dtmSettings = baseAddress + resourcesCollection.dtmSettings.bufferOffset,
 		};
+		globalData.antiAliasingFactor = 1.0f;
 		SBufferRange<IGPUBuffer> globalBufferUpdateRange = { .offset = 0ull, .size = sizeof(Globals), .buffer = m_globalsBuffer};
 		bool updateSuccess = cb->updateBuffer(globalBufferUpdateRange, &globalData);
 		assert(updateSuccess);
diff --git a/75_CAD_3D/shaders/globals.hlsl b/75_CAD_3D/shaders/globals.hlsl
index d83ffca7d..544f05516 100644
--- a/75_CAD_3D/shaders/globals.hlsl
+++ b/75_CAD_3D/shaders/globals.hlsl
@@ -65,9 +65,11 @@ static_assert(sizeof(Pointers) == 32u);
 struct Globals
 {
     Pointers pointers;
+    float32_t antiAliasingFactor;
+    float32_t __padding;
 };
 #ifndef __HLSL_VERSION
-static_assert(sizeof(Globals) == 32u);
+static_assert(sizeof(Globals) == 40u);
 #endif
 
 struct DrawObject
diff --git a/75_CAD_3D/shaders/main_pipeline/dtm.hlsl b/75_CAD_3D/shaders/main_pipeline/dtm.hlsl
index 60320647f..cf85766dd 100644
--- a/75_CAD_3D/shaders/main_pipeline/dtm.hlsl
+++ b/75_CAD_3D/shaders/main_pipeline/dtm.hlsl
@@ -18,6 +18,56 @@ struct DTMSettingsHeightsAccessor
     }
 };
 
+struct HeightSegmentTransitionData
+{
+    float currentHeight;
+    float4 currentSegmentColor;
+    float boundaryHeight;
+    float4 otherSegmentColor;
+};
+
+void getIntervalHeightAndColor(in int intervalIndex, in DTMHeightShadingSettings settings, out float4 outIntervalColor, out float outIntervalHeight)
+{
+    float minShadingHeight = settings.heightColorMapHeights[0];
+    float heightForColor = minShadingHeight + float(intervalIndex) * settings.intervalIndexToHeightMultiplier;
+
+    if (settings.isCenteredShading)
+        outIntervalHeight = minShadingHeight + (float(intervalIndex) - 0.5) * settings.intervalLength;
+    else
+        outIntervalHeight = minShadingHeight + (float(intervalIndex)) * settings.intervalLength;
+
+    DTMSettingsHeightsAccessor dtmHeightsAccessor = { settings };
+    int32_t upperBoundHeightIndex = min(nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, settings.heightColorEntryCount, heightForColor), settings.heightColorEntryCount - 1u);
+    int32_t lowerBoundHeightIndex = max(upperBoundHeightIndex - 1, 0);
+
+    float upperBoundHeight = settings.heightColorMapHeights[upperBoundHeightIndex];
+    float lowerBoundHeight = settings.heightColorMapHeights[lowerBoundHeightIndex];
+
+    float4 upperBoundColor = settings.heightColorMapColors[upperBoundHeightIndex];
+    float4 lowerBoundColor = settings.heightColorMapColors[lowerBoundHeightIndex];
+
+    if (upperBoundHeight == lowerBoundHeight)
+    {
+        outIntervalColor = upperBoundColor;
+    }
+    else
+    {
+        float interpolationVal = (heightForColor - lowerBoundHeight) / (upperBoundHeight - lowerBoundHeight);
+        outIntervalColor = lerp(lowerBoundColor, upperBoundColor, interpolationVal);
+    }
+}
+
+// This function interpolates between the current and nearest segment colors based on the
+// screen-space distance to the segment boundary. The result is a smoothly blended color
+// useful for visualizing discrete height levels without harsh edges.
+float4 smoothHeightSegmentTransition(in HeightSegmentTransitionData transitionInfo, in float heightDeriv)
+{
+    float pxDistanceToNearestSegment = abs((transitionInfo.currentHeight - transitionInfo.boundaryHeight) / heightDeriv);
+    float nearestSegmentColorCoverage = smoothstep(-globals.antiAliasingFactor, globals.antiAliasingFactor, pxDistanceToNearestSegment);
+    float4 localHeightColor = lerp(transitionInfo.otherSegmentColor, transitionInfo.currentSegmentColor, nearestSegmentColorCoverage);
+    return localHeightColor;
+}
+
 float getIntervalPosition(in float height, in float minHeight, in float intervalLength, in bool isCenteredShading)
 {
     if (isCenteredShading)
@@ -52,21 +102,34 @@ float32_t4 calcIntervalColor(in int intervalIndex, in DTMHeightShadingSettings s
     }
 }
 
-float32_t4 calculateDTMHeightColor(in DTMHeightShadingSettings settings, in float3 triangleVertices[3], in float2 fragPos, in float height)
+float32_t4 calculateDTMHeightColor(in DTMHeightShadingSettings settings, in float heightDeriv, in float3 triangleVertices[3], in float2 fragPos, in float height)
 {
     const uint32_t heightMapSize = settings.heightColorEntryCount;
     if(heightMapSize == 0)
         return float32_t4(0.0f, 0.0f, 0.0f, 0.0f);
-
-
+    
     const E_HEIGHT_SHADING_MODE mode = settings.determineHeightShadingMode();
     if(mode == E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS)
     {
         DTMSettingsHeightsAccessor dtmHeightsAccessor = { settings };
         const int upperBoundIndex = min(nbl::hlsl::upper_bound(dtmHeightsAccessor, 0u, heightMapSize, height), heightMapSize - 1u);
         const int mapIndex = max(upperBoundIndex - 1, 0);
-
-        return settings.heightColorMapColors[mapIndex];
+        int mapIndexPrev = max(mapIndex - 1, 0);
+        int mapIndexNext = min(mapIndex + 1, heightMapSize - 1);
+
+        // logic explainer: if colorIdx is 0.0 then it means blend with next
+        // if color idx is >= length of the colours array then it means it's also > 0.0 and this blend with prev is true
+        // if color idx is > 0 and < len - 1, then it depends on the current pixel's height value and two closest height values
+        bool blendWithPrev = (mapIndex > 0)
+            && (mapIndex >= heightMapSize - 1 || (height * 2.0 < settings.heightColorMapHeights[upperBoundIndex] + settings.heightColorMapHeights[mapIndex]));
+
+        HeightSegmentTransitionData transitionInfo;
+        transitionInfo.currentHeight = height;
+        transitionInfo.currentSegmentColor = settings.heightColorMapColors[mapIndex];
+        transitionInfo.boundaryHeight = blendWithPrev ? settings.heightColorMapHeights[mapIndex] : settings.heightColorMapHeights[mapIndexNext];
+        transitionInfo.otherSegmentColor = blendWithPrev ? settings.heightColorMapColors[mapIndexPrev] : settings.heightColorMapColors[mapIndexNext];
+
+        return smoothHeightSegmentTransition(transitionInfo, heightDeriv);
     }
     else if(mode == E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS)
     {
@@ -75,7 +138,29 @@ float32_t4 calculateDTMHeightColor(in DTMHeightShadingSettings settings, in floa
         const float positionWithinInterval = frac(intervalPosition);
         const int intervalIndex = nbl::hlsl::_static_cast<int>(intervalPosition);
 
-        return calcIntervalColor(intervalIndex, settings);
+        float4 currentIntervalColor;
+        float currentIntervalHeight;
+        getIntervalHeightAndColor(intervalIndex, settings, currentIntervalColor, currentIntervalHeight);
+
+        bool blendWithPrev = (positionWithinInterval < 0.5f);
+
+        HeightSegmentTransitionData transitionInfo;
+        transitionInfo.currentHeight = height;
+        transitionInfo.currentSegmentColor = currentIntervalColor;
+        if (blendWithPrev)
+        {
+            int prevIntervalIdx = max(intervalIndex - 1, 0);
+            float prevIntervalHeight; // unused, the currentIntervalHeight is the boundary height between current and prev
+            getIntervalHeightAndColor(prevIntervalIdx, settings, transitionInfo.otherSegmentColor, prevIntervalHeight);
+            transitionInfo.boundaryHeight = currentIntervalHeight;
+        }
+        else
+        {
+            int nextIntervalIdx = intervalIndex + 1;
+            getIntervalHeightAndColor(nextIntervalIdx, settings, transitionInfo.otherSegmentColor, transitionInfo.boundaryHeight);
+        }
+
+        return smoothHeightSegmentTransition(transitionInfo, heightDeriv);
     }
     else if(mode == E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS)
     {
diff --git a/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl b/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl
index 14f2e74c0..33c5a3240 100644
--- a/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl
+++ b/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl
@@ -21,9 +21,9 @@ float4 fragMain(PSInput input) : SV_Target
     triangleVertices[2] = input.getScreenSpaceVertexAttribs(2);
 
 	const float height = input.getHeight();
-	//const float32_t3 HeightColor = height < 0.0f ? float32_t3(0.0f, 0.0f, 1.0f) : height < 50.0f ? float32_t3(0.0, 1.0, 0.0) : (height < 75.0f ? float32_t3(1.0, 1.0, 0.0) : float32_t3(1.0, 0.0, 0.0));
+	const float heightDeriv = fwidth(height);
 
-	const float32_t4 HeightColor = dtm::calculateDTMHeightColor(dtmSettings.heightShadingSettings, triangleVertices, input.position.xy, height);
+	const float32_t4 HeightColor = dtm::calculateDTMHeightColor(dtmSettings.heightShadingSettings, heightDeriv, triangleVertices, input.position.xy, height);
 
 	const float32_t4 fragColor = (AmbientLightIntensity + diffuseLightIntensity) * HeightColor;
 

From 852af8af4828cae0f94d79f623d522065928bb2e Mon Sep 17 00:00:00 2001
From: Przemog1 <minikers21@gmail.com>
Date: Tue, 24 Mar 2026 15:04:23 +0100
Subject: [PATCH 8/9] Fixed normal calculation

---
 75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl b/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl
index c82ab696d..59482c557 100644
--- a/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl
+++ b/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl
@@ -26,7 +26,7 @@ PSInput vtxMain(uint vertexID : SV_VertexID)
         float32_t3 triangleEdge0 = vertex1 - vertex0;
         float32_t3 triangleEdge1 = vertex2 - vertex0;
 
-        outV.setNormal((normalize(cross(triangleEdge1, triangleEdge0)) + 1.0f) * 0.5f);
+        outV.setNormal(normalize(cross(triangleEdge1, triangleEdge0)));
     }
 
     pfloat64_t4 pos;

From efbf6ce01369a4ee843b61fb3588fbe74c4004a9 Mon Sep 17 00:00:00 2001
From: Przemog1 <minikers21@gmail.com>
Date: Tue, 7 Apr 2026 14:02:13 +0200
Subject: [PATCH 9/9] Fixes

---
 75_CAD_3D/DTMMeshes.h                         | 26 ++++++++++++++
 75_CAD_3D/main.cpp                            | 35 ++-----------------
 .../main_pipeline/fragment_shader.hlsl        |  2 +-
 .../shaders/main_pipeline/vertex_shader.hlsl  |  1 +
 4 files changed, 31 insertions(+), 33 deletions(-)
 create mode 100644 75_CAD_3D/DTMMeshes.h

diff --git a/75_CAD_3D/DTMMeshes.h b/75_CAD_3D/DTMMeshes.h
new file mode 100644
index 000000000..3ebefd5ad
--- /dev/null
+++ b/75_CAD_3D/DTMMeshes.h
@@ -0,0 +1,26 @@
+#pragma once
+
+// pyramid
+#if 1
+core::vector<TriangleMeshVertex> DTMMainMeshVertices = {
+	{ float64_t3(0.0, 100.0, 0.0) },
+	{ float64_t3(-200.0, 10.0, -200.0) },
+	{ float64_t3(200.0, 10.0, -100.0) },
+	{ float64_t3(0.0, 100.0, 0.0) },
+	{ float64_t3(200.0, 10.0, -100.0) },
+	{ float64_t3(200.0, -20.0, 200.0) },
+	{ float64_t3(0.0, 100.0, 0.0) },
+	{ float64_t3(200.0, -20.0, 200.0) },
+	{ float64_t3(-200.0, 10.0, 200.0) },
+	{ float64_t3(0.0, 100.0, 0.0) },
+	{ float64_t3(-200.0, 10.0, 200.0) },
+	{ float64_t3(-200.0, 10.0, -200.0) },
+};
+
+core::vector<uint32_t> DTMMainMeshIndices = {
+	0, 1, 2,
+	3, 4, 5,
+	6, 7, 8,
+	9, 10, 11
+};
+#endif
\ No newline at end of file
diff --git a/75_CAD_3D/main.cpp b/75_CAD_3D/main.cpp
index f24d4d06a..1b755eebd 100644
--- a/75_CAD_3D/main.cpp
+++ b/75_CAD_3D/main.cpp
@@ -16,6 +16,7 @@ using namespace video;
 
 #include "nbl/builtin/hlsl/math/linalg/transform.hlsl"
 #include "nbl/builtin/hlsl/math/thin_lens_projection.hlsl"
+#include "DTMMeshes.h"
 
 class CEventCallback : public ISimpleManagedSurface::ICallback
 {
@@ -1059,31 +1060,9 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio
 		);
 		drawResourcesFiller.reset();
 
-		core::vector<TriangleMeshVertex> vertices = {
-			{ float64_t3(0.0, 100.0, 0.0) },
-			{ float64_t3(-200.0, 10.0, -200.0) },
-			{ float64_t3(200.0, 10.0, -100.0) },
-			{ float64_t3(0.0, 100.0, 0.0) },
-			{ float64_t3(200.0, 10.0, -100.0) },
-			{ float64_t3(200.0, -20.0, 200.0) },
-			{ float64_t3(0.0, 100.0, 0.0) },
-			{ float64_t3(200.0, -20.0, 200.0) },
-			{ float64_t3(-200.0, 10.0, 200.0) },
-			{ float64_t3(0.0, 100.0, 0.0) },
-			{ float64_t3(-200.0, 10.0, 200.0) },
-			{ float64_t3(-200.0, 10.0, -200.0) },
-		};
-
-		core::vector<uint32_t> indices = {
-			0, 1, 2,
-			3, 4, 5,
-			6, 7, 8,
-			9, 10, 11
-		};
-
 		CTriangleMesh mesh;
-		mesh.setVertices(core::vector<TriangleMeshVertex>(vertices));
-		mesh.setIndices(std::move(indices));
+		mesh.setVertices(core::vector<TriangleMeshVertex>(DTMMainMeshVertices));
+		mesh.setIndices(core::vector<uint32_t>(DTMMainMeshIndices));
 
 		DTMSettingsInfo dtmInfo{};
 
@@ -1133,14 +1112,6 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio
 			}
 		}
 
-		// pyramid A
-		drawResourcesFiller.drawTriangleMesh(mesh, dtmInfo, intendedNextSubmit);
-
-		// pyramid B
-		float64_t3 offset = { 500.0f, 0.0f, 0.0f };
-		for (auto& vertex : vertices)
-			vertex.pos += offset;
-		mesh.setVertices(std::move(vertices));
 		drawResourcesFiller.drawTriangleMesh(mesh, dtmInfo, intendedNextSubmit);
 	}
 
diff --git a/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl b/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl
index 33c5a3240..aad91afbd 100644
--- a/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl
+++ b/75_CAD_3D/shaders/main_pipeline/fragment_shader.hlsl
@@ -10,7 +10,7 @@ static const float32_t3 SunlightDirection = float32_t3(0.7071f, -0.7071f, 0.0f);
 float4 fragMain(PSInput input) : SV_Target
 {
 	static const float AmbientLightIntensity = 0.1f;
-	const float diffuseLightIntensity = max(dot(-SunlightDirection, input.getNormal()), 0.0f);
+	const float diffuseLightIntensity = max(dot(-SunlightDirection, normalize(input.getNormal())), 0.0f);
 
 	const MainObject mainObj = loadMainObject(pc.triangleMeshMainObjectIndex);
 	const DTMSettings dtmSettings = loadDTMSettings(mainObj.dtmSettingsIdx);
diff --git a/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl b/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl
index 59482c557..5120c356d 100644
--- a/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl
+++ b/75_CAD_3D/shaders/main_pipeline/vertex_shader.hlsl
@@ -26,6 +26,7 @@ PSInput vtxMain(uint vertexID : SV_VertexID)
         float32_t3 triangleEdge0 = vertex1 - vertex0;
         float32_t3 triangleEdge1 = vertex2 - vertex0;
 
+        // TODO: Whether to use cross(e0, e1) or cross(e1, e0) depends on the triangle winding (CCW vs CW).
         outV.setNormal(normalize(cross(triangleEdge1, triangleEdge0)));
     }