// Sample by Sascha Willems // Contact: webmaster@saschawillems.de #include #include #include #include #include #include #include #include #include #include #include #include #ifdef __INTELLISENSE__ #include #else import vulkan_hpp; #endif #include #define GLFW_INCLUDE_VULKAN // REQUIRED only for GLFW CreateWindowSurface. #include #define GLM_FORCE_RADIANS #include #include constexpr uint32_t WIDTH = 800; constexpr uint32_t HEIGHT = 600; constexpr uint64_t FenceTimeout = 100000000; constexpr uint32_t PARTICLE_COUNT = 8192; constexpr int MAX_FRAMES_IN_FLIGHT = 2; const std::vector validationLayers = { "VK_LAYER_KHRONOS_validation" }; #ifdef NDEBUG constexpr bool enableValidationLayers = false; #else constexpr bool enableValidationLayers = true; #endif struct UniformBufferObject { float deltaTime = 1.0f; }; struct Particle { glm::vec2 position; glm::vec2 velocity; glm::vec4 color; static vk::VertexInputBindingDescription getBindingDescription() { return { 0, sizeof(Particle), vk::VertexInputRate::eVertex }; } static std::array getAttributeDescriptions() { return { vk::VertexInputAttributeDescription( 0, 0, vk::Format::eR32G32Sfloat, offsetof(Particle, position) ), vk::VertexInputAttributeDescription( 1, 0, vk::Format::eR32G32B32A32Sfloat, offsetof(Particle, color) ), }; } }; class ComputeShaderApplication { public: void run() { initWindow(); initVulkan(); mainLoop(); cleanup(); } private: GLFWwindow * window = nullptr; vk::raii::Context context; vk::raii::Instance instance = nullptr; vk::raii::DebugUtilsMessengerEXT debugMessenger = nullptr; vk::raii::SurfaceKHR surface = nullptr; vk::raii::PhysicalDevice physicalDevice = nullptr; vk::raii::Device device = nullptr; uint32_t queueIndex = ~0; vk::raii::Queue queue = nullptr; vk::raii::SwapchainKHR swapChain = nullptr; std::vector swapChainImages; vk::SurfaceFormatKHR swapChainImageFormat; vk::Extent2D swapChainExtent; std::vector swapChainImageViews; vk::raii::PipelineLayout pipelineLayout = nullptr; vk::raii::Pipeline graphicsPipeline = nullptr; vk::raii::DescriptorSetLayout computeDescriptorSetLayout = nullptr; vk::raii::PipelineLayout computePipelineLayout = nullptr; vk::raii::Pipeline computePipeline = nullptr; std::vector shaderStorageBuffers; std::vector shaderStorageBuffersMemory; std::vector uniformBuffers; std::vector uniformBuffersMemory; std::vector uniformBuffersMapped; vk::raii::DescriptorPool descriptorPool = nullptr; std::vector computeDescriptorSets; vk::raii::CommandPool commandPool = nullptr; std::vector commandBuffers; std::vector computeCommandBuffers; vk::raii::Semaphore semaphore = nullptr; uint64_t timelineValue = 0; std::vector inFlightFences; uint32_t currentFrame = 0; double lastFrameTime = 0.0; bool framebufferResized = false; double lastTime = 0.0f; std::vector requiredDeviceExtension = { vk::KHRSwapchainExtensionName, vk::KHRSpirv14ExtensionName, vk::KHRSynchronization2ExtensionName, vk::KHRCreateRenderpass2ExtensionName }; void initWindow() { glfwInit(); glfwWindowHint(GLFW_CLIENT_API, GLFW_NO_API); window = glfwCreateWindow(WIDTH, HEIGHT, "Vulkan", nullptr, nullptr); glfwSetWindowUserPointer(window, this); glfwSetFramebufferSizeCallback(window, framebufferResizeCallback); lastTime = glfwGetTime(); } static void framebufferResizeCallback(GLFWwindow* window, int width, int height) { auto app = static_cast(glfwGetWindowUserPointer(window)); app->framebufferResized = true; } void initVulkan() { createInstance(); setupDebugMessenger(); createSurface(); pickPhysicalDevice(); createLogicalDevice(); createSwapChain(); createImageViews(); createComputeDescriptorSetLayout(); createGraphicsPipeline(); createComputePipeline(); createCommandPool(); createShaderStorageBuffers(); createUniformBuffers(); createDescriptorPool(); createComputeDescriptorSets(); createCommandBuffers(); createComputeCommandBuffers(); createSyncObjects(); } void mainLoop() { while (!glfwWindowShouldClose(window)) { glfwPollEvents(); drawFrame(); // We want to animate the particle system using the last frames time to get smooth, frame-rate independent animation double currentTime = glfwGetTime(); lastFrameTime = (currentTime - lastTime) * 1000.0; lastTime = currentTime; } device.waitIdle(); } void cleanupSwapChain() { swapChainImageViews.clear(); swapChain = nullptr; } void cleanup() const { glfwDestroyWindow(window); glfwTerminate(); } void recreateSwapChain() { int width = 0, height = 0; glfwGetFramebufferSize(window, &width, &height); while (width == 0 || height == 0) { glfwGetFramebufferSize(window, &width, &height); glfwWaitEvents(); } device.waitIdle(); cleanupSwapChain(); createSwapChain(); createImageViews(); } void createInstance() { constexpr vk::ApplicationInfo appInfo{ .pApplicationName = "Hello Triangle", .applicationVersion = VK_MAKE_VERSION( 1, 0, 0 ), .pEngineName = "No Engine", .engineVersion = VK_MAKE_VERSION( 1, 0, 0 ), .apiVersion = vk::ApiVersion14 }; // Get the required layers std::vector requiredLayers; if (enableValidationLayers) { requiredLayers.assign(validationLayers.begin(), validationLayers.end()); } // Check if the required layers are supported by the Vulkan implementation. auto layerProperties = context.enumerateInstanceLayerProperties(); for (auto const& requiredLayer : requiredLayers) { if (std::ranges::none_of(layerProperties, [requiredLayer](auto const& layerProperty) { return strcmp(layerProperty.layerName, requiredLayer) == 0; })) { throw std::runtime_error("Required layer not supported: " + std::string(requiredLayer)); } } // Get the required extensions. auto requiredExtensions = getRequiredExtensions(); // Check if the required extensions are supported by the Vulkan implementation. auto extensionProperties = context.enumerateInstanceExtensionProperties(); for (auto const& requiredExtension : requiredExtensions) { if (std::ranges::none_of(extensionProperties, [requiredExtension](auto const& extensionProperty) { return strcmp(extensionProperty.extensionName, requiredExtension) == 0; })) { throw std::runtime_error("Required extension not supported: " + std::string(requiredExtension)); } } vk::InstanceCreateInfo createInfo{ .pApplicationInfo = &appInfo, .enabledLayerCount = static_cast(requiredLayers.size()), .ppEnabledLayerNames = requiredLayers.data(), .enabledExtensionCount = static_cast(requiredExtensions.size()), .ppEnabledExtensionNames = requiredExtensions.data() }; instance = vk::raii::Instance(context, createInfo); } void setupDebugMessenger() { if (!enableValidationLayers) return; vk::DebugUtilsMessageSeverityFlagsEXT severityFlags( vk::DebugUtilsMessageSeverityFlagBitsEXT::eVerbose | vk::DebugUtilsMessageSeverityFlagBitsEXT::eWarning | vk::DebugUtilsMessageSeverityFlagBitsEXT::eError ); vk::DebugUtilsMessageTypeFlagsEXT messageTypeFlags( vk::DebugUtilsMessageTypeFlagBitsEXT::eGeneral | vk::DebugUtilsMessageTypeFlagBitsEXT::ePerformance | vk::DebugUtilsMessageTypeFlagBitsEXT::eValidation ); vk::DebugUtilsMessengerCreateInfoEXT debugUtilsMessengerCreateInfoEXT{ .messageSeverity = severityFlags, .messageType = messageTypeFlags, .pfnUserCallback = &debugCallback }; debugMessenger = instance.createDebugUtilsMessengerEXT(debugUtilsMessengerCreateInfoEXT); } void createSurface() { VkSurfaceKHR _surface; if (glfwCreateWindowSurface(*instance, window, nullptr, &_surface) != 0) { throw std::runtime_error("failed to create window surface!"); } surface = vk::raii::SurfaceKHR(instance, _surface); } void pickPhysicalDevice() { std::vector devices = instance.enumeratePhysicalDevices(); const auto devIter = std::ranges::find_if( devices, [&]( auto const & device ) { // Check if the device supports the Vulkan 1.3 API version bool supportsVulkan1_3 = device.getProperties().apiVersion >= VK_API_VERSION_1_3; // Check if any of the queue families support graphics operations auto queueFamilies = device.getQueueFamilyProperties(); bool supportsGraphics = std::ranges::any_of( queueFamilies, []( auto const & qfp ) { return !!( qfp.queueFlags & vk::QueueFlagBits::eGraphics ); } ); // Check if all required device extensions are available auto availableDeviceExtensions = device.enumerateDeviceExtensionProperties(); bool supportsAllRequiredExtensions = std::ranges::all_of( requiredDeviceExtension, [&availableDeviceExtensions]( auto const & requiredDeviceExtension ) { return std::ranges::any_of( availableDeviceExtensions, [requiredDeviceExtension]( auto const & availableDeviceExtension ) { return strcmp( availableDeviceExtension.extensionName, requiredDeviceExtension ) == 0; } ); } ); auto features = device.template getFeatures2(); bool supportsRequiredFeatures = features.template get().features.samplerAnisotropy && features.template get().dynamicRendering && features.template get().extendedDynamicState && features.template get().timelineSemaphore; return supportsVulkan1_3 && supportsGraphics && supportsAllRequiredExtensions && supportsRequiredFeatures; }); if ( devIter != devices.end() ) { physicalDevice = *devIter; } else { throw std::runtime_error("failed to find a suitable GPU!"); } } void createLogicalDevice() { std::vector queueFamilyProperties = physicalDevice.getQueueFamilyProperties(); // get the first index into queueFamilyProperties which supports both graphics and present for (uint32_t qfpIndex = 0; qfpIndex < queueFamilyProperties.size(); qfpIndex++) { if ((queueFamilyProperties[qfpIndex].queueFlags & vk::QueueFlagBits::eGraphics) && (queueFamilyProperties[qfpIndex].queueFlags & vk::QueueFlagBits::eCompute) && physicalDevice.getSurfaceSupportKHR(qfpIndex, *surface)) { // found a queue family that supports both graphics and present queueIndex = qfpIndex; break; } } if (queueIndex == ~0) { throw std::runtime_error("Could not find a queue for graphics and present -> terminating"); } // query for Vulkan 1.3 features vk::StructureChain featureChain = { {.features = {.samplerAnisotropy = true } }, // vk::PhysicalDeviceFeatures2 {.synchronization2 = true, .dynamicRendering = true }, // vk::PhysicalDeviceVulkan13Features {.extendedDynamicState = true }, // vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT {.timelineSemaphore = true } // vk::PhysicalDeviceTimelineSemaphoreFeaturesKHR }; // create a Device float queuePriority = 0.0f; vk::DeviceQueueCreateInfo deviceQueueCreateInfo{ .queueFamilyIndex = queueIndex, .queueCount = 1, .pQueuePriorities = &queuePriority }; vk::DeviceCreateInfo deviceCreateInfo{ .pNext = &featureChain.get(), .queueCreateInfoCount = 1, .pQueueCreateInfos = &deviceQueueCreateInfo, .enabledExtensionCount = static_cast(requiredDeviceExtension.size()), .ppEnabledExtensionNames = requiredDeviceExtension.data() }; device = vk::raii::Device( physicalDevice, deviceCreateInfo ); queue = vk::raii::Queue( device, queueIndex, 0 ); } void createSwapChain() { auto surfaceCapabilities = physicalDevice.getSurfaceCapabilitiesKHR( surface ); swapChainImageFormat = chooseSwapSurfaceFormat(physicalDevice.getSurfaceFormatsKHR( surface )); swapChainExtent = chooseSwapExtent(surfaceCapabilities); auto minImageCount = std::max( 3u, surfaceCapabilities.minImageCount ); minImageCount = ( surfaceCapabilities.maxImageCount > 0 && minImageCount > surfaceCapabilities.maxImageCount ) ? surfaceCapabilities.maxImageCount : minImageCount; vk::SwapchainCreateInfoKHR swapChainCreateInfo{ .flags = vk::SwapchainCreateFlagsKHR(), .surface = surface, .minImageCount = minImageCount, .imageFormat = swapChainImageFormat.format, .imageColorSpace = swapChainImageFormat.colorSpace, .imageExtent = swapChainExtent, .imageArrayLayers =1, .imageUsage = vk::ImageUsageFlagBits::eColorAttachment, .imageSharingMode = vk::SharingMode::eExclusive, .preTransform = surfaceCapabilities.currentTransform, .compositeAlpha = vk::CompositeAlphaFlagBitsKHR::eOpaque, .presentMode = chooseSwapPresentMode(physicalDevice.getSurfacePresentModesKHR(surface)), .clipped = true, .oldSwapchain = nullptr }; swapChain = vk::raii::SwapchainKHR( device, swapChainCreateInfo ); swapChainImages = swapChain.getImages(); } void createImageViews() { vk::ImageViewCreateInfo imageViewCreateInfo{ .viewType = vk::ImageViewType::e2D, .format = swapChainImageFormat.format, .components = {vk::ComponentSwizzle::eIdentity, vk::ComponentSwizzle::eIdentity, vk::ComponentSwizzle::eIdentity, vk::ComponentSwizzle::eIdentity}, .subresourceRange = { vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1 } }; for ( auto image : swapChainImages ) { imageViewCreateInfo.image = image; swapChainImageViews.emplace_back( device, imageViewCreateInfo ); } } void createComputeDescriptorSetLayout() { std::array layoutBindings{ vk::DescriptorSetLayoutBinding(0, vk::DescriptorType::eUniformBuffer, 1, vk::ShaderStageFlagBits::eCompute, nullptr), vk::DescriptorSetLayoutBinding(1, vk::DescriptorType::eStorageBuffer, 1, vk::ShaderStageFlagBits::eCompute, nullptr), vk::DescriptorSetLayoutBinding(2, vk::DescriptorType::eStorageBuffer, 1, vk::ShaderStageFlagBits::eCompute, nullptr) }; vk::DescriptorSetLayoutCreateInfo layoutInfo{ .bindingCount = static_cast(layoutBindings.size()), .pBindings = layoutBindings.data() }; computeDescriptorSetLayout = vk::raii::DescriptorSetLayout( device, layoutInfo ); } void createGraphicsPipeline() { vk::raii::ShaderModule shaderModule = createShaderModule(readFile("shaders/slang.spv")); vk::PipelineShaderStageCreateInfo vertShaderStageInfo{ .stage = vk::ShaderStageFlagBits::eVertex, .module = shaderModule, .pName = "vertMain" }; vk::PipelineShaderStageCreateInfo fragShaderStageInfo{ .stage = vk::ShaderStageFlagBits::eFragment, .module = shaderModule, .pName = "fragMain" }; vk::PipelineShaderStageCreateInfo shaderStages[] = {vertShaderStageInfo, fragShaderStageInfo}; auto bindingDescription = Particle::getBindingDescription(); auto attributeDescriptions = Particle::getAttributeDescriptions(); vk::PipelineVertexInputStateCreateInfo vertexInputInfo{ .vertexBindingDescriptionCount = 1, .pVertexBindingDescriptions = &bindingDescription, .vertexAttributeDescriptionCount = static_cast(attributeDescriptions.size()), .pVertexAttributeDescriptions = attributeDescriptions.data() }; vk::PipelineInputAssemblyStateCreateInfo inputAssembly{ .topology = vk::PrimitiveTopology::ePointList, .primitiveRestartEnable = vk::False }; vk::PipelineViewportStateCreateInfo viewportState{ .viewportCount = 1, .scissorCount = 1 }; vk::PipelineRasterizationStateCreateInfo rasterizer{ .depthClampEnable = vk::False, .rasterizerDiscardEnable = vk::False, .polygonMode = vk::PolygonMode::eFill, .cullMode = vk::CullModeFlagBits::eBack, .frontFace = vk::FrontFace::eCounterClockwise, .depthBiasEnable = vk::False, .lineWidth = 1.0f }; vk::PipelineMultisampleStateCreateInfo multisampling{ .rasterizationSamples = vk::SampleCountFlagBits::e1, .sampleShadingEnable = vk::False }; vk::PipelineColorBlendAttachmentState colorBlendAttachment{ .blendEnable = vk::True, .srcColorBlendFactor = vk::BlendFactor::eSrcAlpha, .dstColorBlendFactor = vk::BlendFactor::eOneMinusSrcAlpha, .colorBlendOp = vk::BlendOp::eAdd, .srcAlphaBlendFactor = vk::BlendFactor::eOneMinusSrcAlpha, .dstAlphaBlendFactor = vk::BlendFactor::eZero, .alphaBlendOp = vk::BlendOp::eAdd, .colorWriteMask = vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG | vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA, }; vk::PipelineColorBlendStateCreateInfo colorBlending{ .logicOpEnable = vk::False, .logicOp = vk::LogicOp::eCopy, .attachmentCount = 1, .pAttachments = &colorBlendAttachment }; std::vector dynamicStates = { vk::DynamicState::eViewport, vk::DynamicState::eScissor }; vk::PipelineDynamicStateCreateInfo dynamicState{ .dynamicStateCount = static_cast(dynamicStates.size()), .pDynamicStates = dynamicStates.data() }; vk::PipelineLayoutCreateInfo pipelineLayoutInfo; pipelineLayout = vk::raii::PipelineLayout( device, pipelineLayoutInfo ); vk::PipelineRenderingCreateInfo pipelineRenderingCreateInfo{ .colorAttachmentCount = 1, .pColorAttachmentFormats = &swapChainImageFormat.format }; vk::GraphicsPipelineCreateInfo pipelineInfo{ .pNext = &pipelineRenderingCreateInfo, .stageCount = 2, .pStages = shaderStages, .pVertexInputState = &vertexInputInfo, .pInputAssemblyState = &inputAssembly, .pViewportState = &viewportState, .pRasterizationState = &rasterizer, .pMultisampleState = &multisampling, .pColorBlendState = &colorBlending, .pDynamicState = &dynamicState, .layout = *pipelineLayout, .subpass = 0 }; graphicsPipeline = vk::raii::Pipeline(device, nullptr, pipelineInfo); } void createComputePipeline() { vk::raii::ShaderModule shaderModule = createShaderModule(readFile("shaders/slang.spv")); vk::PipelineShaderStageCreateInfo computeShaderStageInfo{ .stage = vk::ShaderStageFlagBits::eCompute, .module = shaderModule, .pName = "compMain" }; vk::PipelineLayoutCreateInfo pipelineLayoutInfo{ .setLayoutCount = 1, .pSetLayouts = &*computeDescriptorSetLayout }; computePipelineLayout = vk::raii::PipelineLayout( device, pipelineLayoutInfo ); vk::ComputePipelineCreateInfo pipelineInfo{ .stage = computeShaderStageInfo, .layout = *computePipelineLayout }; computePipeline = vk::raii::Pipeline(device, nullptr, pipelineInfo); } void createCommandPool() { vk::CommandPoolCreateInfo poolInfo{}; poolInfo.flags = vk::CommandPoolCreateFlagBits::eResetCommandBuffer; poolInfo.queueFamilyIndex = queueIndex; commandPool = vk::raii::CommandPool(device, poolInfo); } void createShaderStorageBuffers() { // Initialize particles std::default_random_engine rndEngine(static_cast(time(nullptr))); std::uniform_real_distribution rndDist(0.0f, 1.0f); // Initial particle positions on a circle std::vector particles(PARTICLE_COUNT); for (auto& particle : particles) { float r = 0.25f * sqrtf(rndDist(rndEngine)); float theta = rndDist(rndEngine) * 2.0f * 3.14159265358979323846f; float x = r * cosf(theta) * HEIGHT / WIDTH; float y = r * sinf(theta); particle.position = glm::vec2(x, y); particle.velocity = normalize(glm::vec2(x,y)) * 0.00025f; particle.color = glm::vec4(rndDist(rndEngine), rndDist(rndEngine), rndDist(rndEngine), 1.0f); } vk::DeviceSize bufferSize = sizeof(Particle) * PARTICLE_COUNT; // Create a staging buffer used to upload data to the gpu vk::raii::Buffer stagingBuffer({}); vk::raii::DeviceMemory stagingBufferMemory({}); createBuffer(bufferSize, vk::BufferUsageFlagBits::eTransferSrc, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent, stagingBuffer, stagingBufferMemory); void* dataStaging = stagingBufferMemory.mapMemory(0, bufferSize); memcpy(dataStaging, particles.data(), (size_t)bufferSize); stagingBufferMemory.unmapMemory(); shaderStorageBuffers.clear(); shaderStorageBuffersMemory.clear(); // Copy initial particle data to all storage buffers for (size_t i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) { vk::raii::Buffer shaderStorageBufferTemp({}); vk::raii::DeviceMemory shaderStorageBufferTempMemory({}); createBuffer(bufferSize, vk::BufferUsageFlagBits::eStorageBuffer | vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eTransferDst, vk::MemoryPropertyFlagBits::eDeviceLocal, shaderStorageBufferTemp, shaderStorageBufferTempMemory); copyBuffer(stagingBuffer, shaderStorageBufferTemp, bufferSize); shaderStorageBuffers.emplace_back(std::move(shaderStorageBufferTemp)); shaderStorageBuffersMemory.emplace_back(std::move(shaderStorageBufferTempMemory)); } } void createUniformBuffers() { uniformBuffers.clear(); uniformBuffersMemory.clear(); uniformBuffersMapped.clear(); for (size_t i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) { vk::DeviceSize bufferSize = sizeof(UniformBufferObject); vk::raii::Buffer buffer({}); vk::raii::DeviceMemory bufferMem({}); createBuffer(bufferSize, vk::BufferUsageFlagBits::eUniformBuffer, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent, buffer, bufferMem); uniformBuffers.emplace_back(std::move(buffer)); uniformBuffersMemory.emplace_back(std::move(bufferMem)); uniformBuffersMapped.emplace_back( uniformBuffersMemory[i].mapMemory(0, bufferSize)); } } void createDescriptorPool() { std::array poolSize { vk::DescriptorPoolSize( vk::DescriptorType::eUniformBuffer, MAX_FRAMES_IN_FLIGHT), vk::DescriptorPoolSize( vk::DescriptorType::eStorageBuffer, MAX_FRAMES_IN_FLIGHT * 2) }; vk::DescriptorPoolCreateInfo poolInfo{}; poolInfo.flags = vk::DescriptorPoolCreateFlagBits::eFreeDescriptorSet; poolInfo.maxSets = MAX_FRAMES_IN_FLIGHT; poolInfo.poolSizeCount = poolSize.size(); poolInfo.pPoolSizes = poolSize.data(); descriptorPool = vk::raii::DescriptorPool(device, poolInfo); } void createComputeDescriptorSets() { std::vector layouts(MAX_FRAMES_IN_FLIGHT, computeDescriptorSetLayout); vk::DescriptorSetAllocateInfo allocInfo{}; allocInfo.descriptorPool = *descriptorPool; allocInfo.descriptorSetCount = MAX_FRAMES_IN_FLIGHT; allocInfo.pSetLayouts = layouts.data(); computeDescriptorSets.clear(); computeDescriptorSets = device.allocateDescriptorSets(allocInfo); for (size_t i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) { vk::DescriptorBufferInfo bufferInfo(uniformBuffers[i], 0, sizeof(UniformBufferObject)); vk::DescriptorBufferInfo storageBufferInfoLastFrame(shaderStorageBuffers[(i - 1) % MAX_FRAMES_IN_FLIGHT], 0, sizeof(Particle) * PARTICLE_COUNT); vk::DescriptorBufferInfo storageBufferInfoCurrentFrame(shaderStorageBuffers[i], 0, sizeof(Particle) * PARTICLE_COUNT); std::array descriptorWrites{ vk::WriteDescriptorSet{ .dstSet = *computeDescriptorSets[i], .dstBinding = 0, .dstArrayElement = 0, .descriptorCount = 1, .descriptorType = vk::DescriptorType::eUniformBuffer, .pImageInfo = nullptr, .pBufferInfo = &bufferInfo, .pTexelBufferView = nullptr }, vk::WriteDescriptorSet{ .dstSet = *computeDescriptorSets[i], .dstBinding = 1, .dstArrayElement = 0, .descriptorCount = 1, .descriptorType = vk::DescriptorType::eStorageBuffer, .pImageInfo = nullptr, .pBufferInfo = &storageBufferInfoLastFrame, .pTexelBufferView = nullptr }, vk::WriteDescriptorSet{ .dstSet = *computeDescriptorSets[i], .dstBinding = 2, .dstArrayElement = 0, .descriptorCount = 1, .descriptorType = vk::DescriptorType::eStorageBuffer, .pImageInfo = nullptr, .pBufferInfo = &storageBufferInfoCurrentFrame, .pTexelBufferView = nullptr }, }; device.updateDescriptorSets(descriptorWrites, {}); } } void createBuffer(vk::DeviceSize size, vk::BufferUsageFlags usage, vk::MemoryPropertyFlags properties, vk::raii::Buffer& buffer, vk::raii::DeviceMemory& bufferMemory) const { vk::BufferCreateInfo bufferInfo{}; bufferInfo.size = size; bufferInfo.usage = usage; bufferInfo.sharingMode = vk::SharingMode::eExclusive; buffer = vk::raii::Buffer(device, bufferInfo); vk::MemoryRequirements memRequirements = buffer.getMemoryRequirements(); vk::MemoryAllocateInfo allocInfo{}; allocInfo.allocationSize = memRequirements.size; allocInfo.memoryTypeIndex = findMemoryType(memRequirements.memoryTypeBits, properties); bufferMemory = vk::raii::DeviceMemory(device, allocInfo); buffer.bindMemory(bufferMemory, 0); } [[nodiscard]] vk::raii::CommandBuffer beginSingleTimeCommands() const { vk::CommandBufferAllocateInfo allocInfo{}; allocInfo.commandPool = *commandPool; allocInfo.level = vk::CommandBufferLevel::ePrimary; allocInfo.commandBufferCount = 1; vk::raii::CommandBuffer commandBuffer = std::move(vk::raii::CommandBuffers( device, allocInfo ).front()); vk::CommandBufferBeginInfo beginInfo{ .flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit }; commandBuffer.begin(beginInfo); return commandBuffer; } void endSingleTimeCommands(const vk::raii::CommandBuffer& commandBuffer) const { commandBuffer.end(); vk::SubmitInfo submitInfo{}; submitInfo.commandBufferCount = 1; submitInfo.pCommandBuffers = &*commandBuffer; queue.submit(submitInfo, nullptr); queue.waitIdle(); } void copyBuffer(const vk::raii::Buffer & srcBuffer, const vk::raii::Buffer & dstBuffer, vk::DeviceSize size) const { vk::raii::CommandBuffer commandCopyBuffer = beginSingleTimeCommands(); commandCopyBuffer.copyBuffer(srcBuffer, dstBuffer, vk::BufferCopy(0, 0, size)); endSingleTimeCommands(commandCopyBuffer); } [[nodiscard]] uint32_t findMemoryType(uint32_t typeFilter, vk::MemoryPropertyFlags properties) const { vk::PhysicalDeviceMemoryProperties memProperties = physicalDevice.getMemoryProperties(); for (uint32_t i = 0; i < memProperties.memoryTypeCount; i++) { if ((typeFilter & (1 << i)) && (memProperties.memoryTypes[i].propertyFlags & properties) == properties) { return i; } } throw std::runtime_error("failed to find suitable memory type!"); } void createCommandBuffers() { commandBuffers.clear(); vk::CommandBufferAllocateInfo allocInfo{}; allocInfo.commandPool = *commandPool; allocInfo.level = vk::CommandBufferLevel::ePrimary; allocInfo.commandBufferCount = MAX_FRAMES_IN_FLIGHT; commandBuffers = vk::raii::CommandBuffers(device, allocInfo); } void createComputeCommandBuffers() { computeCommandBuffers.clear(); vk::CommandBufferAllocateInfo allocInfo{}; allocInfo.commandPool = *commandPool; allocInfo.level = vk::CommandBufferLevel::ePrimary; allocInfo.commandBufferCount = MAX_FRAMES_IN_FLIGHT; computeCommandBuffers = vk::raii::CommandBuffers(device, allocInfo); } void recordCommandBuffer( uint32_t imageIndex) { commandBuffers[currentFrame].reset(); commandBuffers[currentFrame].begin( {} ); // Before starting rendering, transition the swapchain image to COLOR_ATTACHMENT_OPTIMAL transition_image_layout( imageIndex, vk::ImageLayout::eUndefined, vk::ImageLayout::eColorAttachmentOptimal, {}, // srcAccessMask (no need to wait for previous operations) vk::AccessFlagBits2::eColorAttachmentWrite, // dstAccessMask vk::PipelineStageFlagBits2::eTopOfPipe, // srcStage vk::PipelineStageFlagBits2::eColorAttachmentOutput // dstStage ); vk::ClearValue clearColor = vk::ClearColorValue(0.0f, 0.0f, 0.0f, 1.0f); vk::RenderingAttachmentInfo attachmentInfo = { .imageView = swapChainImageViews[imageIndex], .imageLayout = vk::ImageLayout::eColorAttachmentOptimal, .loadOp = vk::AttachmentLoadOp::eClear, .storeOp = vk::AttachmentStoreOp::eStore, .clearValue = clearColor }; vk::RenderingInfo renderingInfo = { .renderArea = { .offset = { 0, 0 }, .extent = swapChainExtent }, .layerCount = 1, .colorAttachmentCount = 1, .pColorAttachments = &attachmentInfo }; commandBuffers[currentFrame].beginRendering(renderingInfo); commandBuffers[currentFrame].bindPipeline(vk::PipelineBindPoint::eGraphics, *graphicsPipeline); commandBuffers[currentFrame].setViewport(0, vk::Viewport(0.0f, 0.0f, static_cast(swapChainExtent.width), static_cast(swapChainExtent.height), 0.0f, 1.0f)); commandBuffers[currentFrame].setScissor( 0, vk::Rect2D( vk::Offset2D( 0, 0 ), swapChainExtent ) ); commandBuffers[currentFrame].bindVertexBuffers(0, { shaderStorageBuffers[currentFrame] }, {0}); commandBuffers[currentFrame].draw( PARTICLE_COUNT, 1, 0, 0 ); commandBuffers[currentFrame].endRendering(); // After rendering, transition the swapchain image to PRESENT_SRC transition_image_layout( imageIndex, vk::ImageLayout::eColorAttachmentOptimal, vk::ImageLayout::ePresentSrcKHR, vk::AccessFlagBits2::eColorAttachmentWrite, // srcAccessMask {}, // dstAccessMask vk::PipelineStageFlagBits2::eColorAttachmentOutput, // srcStage vk::PipelineStageFlagBits2::eBottomOfPipe // dstStage ); commandBuffers[currentFrame].end(); } void transition_image_layout( uint32_t imageIndex, vk::ImageLayout old_layout, vk::ImageLayout new_layout, vk::AccessFlags2 src_access_mask, vk::AccessFlags2 dst_access_mask, vk::PipelineStageFlags2 src_stage_mask, vk::PipelineStageFlags2 dst_stage_mask ) { vk::ImageMemoryBarrier2 barrier = { .srcStageMask = src_stage_mask, .srcAccessMask = src_access_mask, .dstStageMask = dst_stage_mask, .dstAccessMask = dst_access_mask, .oldLayout = old_layout, .newLayout = new_layout, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .image = swapChainImages[imageIndex], .subresourceRange = { .aspectMask = vk::ImageAspectFlagBits::eColor, .baseMipLevel = 0, .levelCount = 1, .baseArrayLayer = 0, .layerCount = 1 } }; vk::DependencyInfo dependency_info = { .dependencyFlags = {}, .imageMemoryBarrierCount = 1, .pImageMemoryBarriers = &barrier }; commandBuffers[currentFrame].pipelineBarrier2(dependency_info); } void recordComputeCommandBuffer() { computeCommandBuffers[currentFrame].reset(); computeCommandBuffers[currentFrame].begin({}); computeCommandBuffers[currentFrame].bindPipeline(vk::PipelineBindPoint::eCompute, computePipeline); computeCommandBuffers[currentFrame].bindDescriptorSets(vk::PipelineBindPoint::eCompute, computePipelineLayout, 0, {computeDescriptorSets[currentFrame]}, {}); computeCommandBuffers[currentFrame].dispatch( PARTICLE_COUNT / 256, 1, 1 ); computeCommandBuffers[currentFrame].end(); } void createSyncObjects() { inFlightFences.clear(); vk::SemaphoreTypeCreateInfo semaphoreType{ .semaphoreType = vk::SemaphoreType::eTimeline, .initialValue = 0 }; semaphore = vk::raii::Semaphore(device, {.pNext = &semaphoreType}); timelineValue = 0; for (size_t i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) { vk::FenceCreateInfo fenceInfo{}; inFlightFences.emplace_back(device, fenceInfo); } } void updateUniformBuffer(uint32_t currentImage) { UniformBufferObject ubo{}; ubo.deltaTime = static_cast(lastFrameTime) * 2.0f; memcpy(uniformBuffersMapped[currentImage], &ubo, sizeof(ubo)); } void drawFrame() { auto [result, imageIndex] = swapChain.acquireNextImage(UINT64_MAX, nullptr, *inFlightFences[currentFrame]); while ( vk::Result::eTimeout == device.waitForFences( *inFlightFences[currentFrame], vk::True, UINT64_MAX ) ) ; device.resetFences(*inFlightFences[currentFrame]); // Update timeline value for this frame uint64_t computeWaitValue = timelineValue; uint64_t computeSignalValue = ++timelineValue; uint64_t graphicsWaitValue = computeSignalValue; uint64_t graphicsSignalValue = ++timelineValue; updateUniformBuffer(currentFrame); { recordComputeCommandBuffer(); // Submit compute work vk::TimelineSemaphoreSubmitInfo computeTimelineInfo{ .waitSemaphoreValueCount = 1, .pWaitSemaphoreValues = &computeWaitValue, .signalSemaphoreValueCount = 1, .pSignalSemaphoreValues = &computeSignalValue }; vk::PipelineStageFlags waitStages[] = {vk::PipelineStageFlagBits::eComputeShader}; vk::SubmitInfo computeSubmitInfo{ .pNext = &computeTimelineInfo, .waitSemaphoreCount = 1, .pWaitSemaphores = &*semaphore, .pWaitDstStageMask = waitStages, .commandBufferCount = 1, .pCommandBuffers = &*computeCommandBuffers[currentFrame], .signalSemaphoreCount = 1, .pSignalSemaphores = &*semaphore }; queue.submit(computeSubmitInfo, nullptr); } { // Record graphics command buffer recordCommandBuffer(imageIndex); // Submit graphics work (waits for compute to finish) vk::PipelineStageFlags waitStage = vk::PipelineStageFlagBits::eVertexInput; vk::TimelineSemaphoreSubmitInfo graphicsTimelineInfo{ .waitSemaphoreValueCount = 1, .pWaitSemaphoreValues = &graphicsWaitValue, .signalSemaphoreValueCount = 1, .pSignalSemaphoreValues = &graphicsSignalValue }; vk::SubmitInfo graphicsSubmitInfo{ .pNext = &graphicsTimelineInfo, .waitSemaphoreCount = 1, .pWaitSemaphores = &*semaphore, .pWaitDstStageMask = &waitStage, .commandBufferCount = 1, .pCommandBuffers = &*commandBuffers[currentFrame], .signalSemaphoreCount = 1, .pSignalSemaphores = &*semaphore }; queue.submit(graphicsSubmitInfo, nullptr); // Present the image (wait for graphics to finish) vk::SemaphoreWaitInfo waitInfo{ .semaphoreCount = 1, .pSemaphores = &*semaphore, .pValues = &graphicsSignalValue }; // Wait for graphics to complete before presenting while ( vk::Result::eTimeout ==device.waitSemaphores(waitInfo, UINT64_MAX) ) ; vk::PresentInfoKHR presentInfo{ .waitSemaphoreCount = 0, // No binary semaphores needed .pWaitSemaphores = nullptr, .swapchainCount = 1, .pSwapchains = &*swapChain, .pImageIndices = &imageIndex }; result = queue.presentKHR(presentInfo); if (result == vk::Result::eErrorOutOfDateKHR || result == vk::Result::eSuboptimalKHR || framebufferResized) { framebufferResized = false; recreateSwapChain(); } else if (result != vk::Result::eSuccess) { throw std::runtime_error("failed to present swap chain image!"); } } currentFrame = (currentFrame + 1) % MAX_FRAMES_IN_FLIGHT; } [[nodiscard]] vk::raii::ShaderModule createShaderModule(const std::vector& code) const { vk::ShaderModuleCreateInfo createInfo{ .codeSize = code.size(), .pCode = reinterpret_cast(code.data()) }; vk::raii::ShaderModule shaderModule{ device, createInfo }; return shaderModule; } static vk::SurfaceFormatKHR chooseSwapSurfaceFormat(const std::vector& availableFormats) { for (const auto& availableFormat : availableFormats) { if (availableFormat.format == vk::Format::eB8G8R8A8Srgb && availableFormat.colorSpace == vk::ColorSpaceKHR::eSrgbNonlinear) { return availableFormat; } } return availableFormats[0]; } static vk::PresentModeKHR chooseSwapPresentMode(const std::vector& availablePresentModes) { for (const auto& availablePresentMode : availablePresentModes) { if (availablePresentMode == vk::PresentModeKHR::eMailbox) { return availablePresentMode; } } return vk::PresentModeKHR::eFifo; } [[nodiscard]] vk::Extent2D chooseSwapExtent(const vk::SurfaceCapabilitiesKHR& capabilities) const { if (capabilities.currentExtent.width != std::numeric_limits::max()) { return capabilities.currentExtent; } int width, height; glfwGetFramebufferSize(window, &width, &height); return { std::clamp(width, capabilities.minImageExtent.width, capabilities.maxImageExtent.width), std::clamp(height, capabilities.minImageExtent.height, capabilities.maxImageExtent.height) }; } [[nodiscard]] std::vector getRequiredExtensions() const { uint32_t glfwExtensionCount = 0; auto glfwExtensions = glfwGetRequiredInstanceExtensions(&glfwExtensionCount); std::vector extensions(glfwExtensions, glfwExtensions + glfwExtensionCount); if (enableValidationLayers) { extensions.push_back(vk::EXTDebugUtilsExtensionName ); } return extensions; } static VKAPI_ATTR vk::Bool32 VKAPI_CALL debugCallback(vk::DebugUtilsMessageSeverityFlagBitsEXT severity, vk::DebugUtilsMessageTypeFlagsEXT type, const vk::DebugUtilsMessengerCallbackDataEXT* pCallbackData, void*) { if (severity == vk::DebugUtilsMessageSeverityFlagBitsEXT::eError || severity == vk::DebugUtilsMessageSeverityFlagBitsEXT::eWarning) { std::cerr << "validation layer: type " << to_string(type) << " msg: " << pCallbackData->pMessage << std::endl; } return vk::False; } static std::vector readFile(const std::string& filename) { std::ifstream file(filename, std::ios::ate | std::ios::binary); if (!file.is_open()) { throw std::runtime_error("failed to open file!"); } std::vector buffer(file.tellg()); file.seekg(0, std::ios::beg); file.read(buffer.data(), static_cast(buffer.size())); file.close(); return buffer; } }; int main() { try { ComputeShaderApplication app; app.run(); } catch (const std::exception& e) { std::cerr << e.what() << std::endl; return EXIT_FAILURE; } return EXIT_SUCCESS; }