繁体   English   中英

Vanilla Vulkan Compute Shader 未写入 output 缓冲区

[英]Vanilla Vulkan Compute Shader not writing to output buffer

编辑:修复双重取消映射(但不解决问题)
EDIT2:修复 API 版本并从代码中删除验证层。 相反,使用VK_INSTANCE_LAYERS=VK_LAYER_KHRONOS_validation运行。 问题仍然存在
EDIT3:忘记了描述符集,它允许将缓冲区绑定到着色器输入。 但仍然没有解决问题:'(

<TL;DR> 我用基本的计算着色器编写了一个简单的 Vulkan 仅计算示例代码。 没有 Vulkan 也没有着色器错误,但 output 缓冲区不是由计算着色器写入的:(

为了学习 Vulkan API,我开始编写一个带有基本计算着色器的简单计算示例。 它将一个 int 缓冲区上传到 GPU,运行一个计算着色器,递增每个 int 并将结果写入第二个缓冲区。

我的问题是一切运行良好,但在我的 output 缓冲区中没有得到预期的结果,我不知道为什么。 看起来计算着色器已调度,但从未写入 output 缓冲区。

为了观察这一点,我首先将随机数上传到我的输入缓冲区,并用值 2 填充我的 output 缓冲区。然后调度应该从输入中读取每个值 X 的计算着色器,并将 X+1 写入 output 缓冲区。
等待完成后,我 map 我的 output 缓冲区并显示其数据。 我只有2个:'(

注意:绑定到缓冲区的 memory 是使用VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT创建的 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT

因此,Vulkan 中肯定有一个概念我错了,或者标志/设置中有一个我看不到的微妙之处......

计算着色器代码:

#version 450 core

layout (set = 0, binding = 0) buffer InputBuffer {
    uvec4 inputData[25];
};

layout (set = 0, binding = 1) buffer OutputBuffer {
    uvec4 outputData[25];
};

layout (local_size_x = 8, local_size_y = 1, local_size_z = 1) in;
void main()
{
    uint gid = gl_GlobalInvocationID.x;
    if(gid < 25)
        outputData[gid] = inputData[gid] + uvec4(1,1,1,1);
}

以及整个示例代码(因为我不知道我可能错在哪里,我已经粘贴了整个内容,抱歉):

#include <vulkan/vulkan.h>
#include <iostream>
#include <vector>
#include <assert.h>
#include <fstream>

// Some helper functions
typedef uint32_t            u32;
typedef uint64_t            u64;

// Vulkan two steps enumeration function
#define COUNT_AND_GET1(func, vec, arg1) {\
    u32 size = 0; \
    ##vec.clear(); \
    ##func(##arg1, &size, nullptr); \
    if(size > 0) { \
    ##vec.resize(size); \
    ##func(##arg1, &size, ##vec.data()); }\
}

#define COUNT_AND_GET2(func, vec, arg1, arg2) {\
    u32 size = 0; \
    ##vec.clear(); \
    ##func(##arg1, ##arg2, &size, nullptr); \
    if(size > 0) { \
    ##vec.resize(size); \
    ##func(##arg1, ##arg2, &size, ##vec.data()); }\
}

// Basic vec4 data
struct vec4
{
    u32 x; u32 y; u32 z; u32 w;
};

struct PhysicalDeviceProps
{
    VkPhysicalDeviceProperties              m_Properties;
    VkPhysicalDeviceFeatures                m_Features;
    VkPhysicalDeviceMemoryProperties        m_MemoryProperties;
    std::vector<VkQueueFamilyProperties>    m_QueueFamilyProperties;
    std::vector<VkLayerProperties>          m_LayerProperties;
    std::vector<VkExtensionProperties>      m_ExtensionProperties;
};

// Return device memory index that matches specified properties
u32 SelectMemoryHeapFrom(u32 memoryTypeBits, const VkPhysicalDeviceMemoryProperties& memoryProperties, VkMemoryPropertyFlags preferredProperties, VkMemoryPropertyFlags requiredProperties)
{
    assert((preferredProperties & requiredProperties) > 0);
    u32 selectedType = u32(-1);
    u32 memIndex = 0;
    while (memIndex < VK_MAX_MEMORY_TYPES && selectedType == u32(-1))
    {
        if (((memoryTypeBits & (1 << memIndex)) > 0)
            && ((memoryProperties.memoryTypes[memIndex].propertyFlags & preferredProperties) == preferredProperties))
        {
            // If it exactly matches my preferred properties, grab it.
            selectedType = memIndex;
        }
        ++memIndex;
    }

    if (selectedType == u32(-1))
    {
        memIndex = 0;
        while (memIndex < VK_MAX_MEMORY_TYPES && selectedType == u32(-1))
        {
            if (((memoryTypeBits & (1 << memIndex)) > 0)
                && ((memoryProperties.memoryTypes[memIndex].propertyFlags & requiredProperties) == requiredProperties))
            {
                // If it exactly matches my required properties, grab it.
                selectedType = memIndex;
            }
            ++memIndex;
        }
    }
    return selectedType;
}

// **** MAIN FUNCTION ****
void SampleCompute()
{
    // -------------------------------------
    // 1. Create Instance
    // -------------------------------------
    VkApplicationInfo appInfo = { VK_STRUCTURE_TYPE_APPLICATION_INFO, nullptr, "SampleCompute", 0, "MyEngine", 0, VK_API_VERSION_1_2 };
    VkInstanceCreateInfo instCreateInfo = { VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, nullptr, 0, &appInfo, 0, nullptr, 0, nullptr };
    VkInstance instance = VK_NULL_HANDLE;
    if (VK_SUCCESS != vkCreateInstance(&instCreateInfo, nullptr, &instance))
        std::cout << "Instance creation failed!\n";


    // ---------------------------------------------------
    // 2. Enumerate physical devices and select 'best' one 
    // ---------------------------------------------------
    VkPhysicalDevice bestDevice = VK_NULL_HANDLE;
    PhysicalDeviceProps bestDeviceProps;
    {
        std::vector<VkPhysicalDevice> physicalDevices;
        COUNT_AND_GET1(vkEnumeratePhysicalDevices, physicalDevices, instance)
        assert(!physicalDevices.empty());

        std::vector< PhysicalDeviceProps> physicalDeviceProps(physicalDevices.size());
        for (u64 i = 0; i < physicalDevices.size(); ++i)
        {
            vkGetPhysicalDeviceProperties(physicalDevices[i], &physicalDeviceProps[i].m_Properties);
            vkGetPhysicalDeviceMemoryProperties(physicalDevices[i], &physicalDeviceProps[i].m_MemoryProperties);
            COUNT_AND_GET1(vkGetPhysicalDeviceQueueFamilyProperties, physicalDeviceProps[i].m_QueueFamilyProperties, physicalDevices[i])
            COUNT_AND_GET1(vkEnumerateDeviceLayerProperties, physicalDeviceProps[i].m_LayerProperties, physicalDevices[i])
            COUNT_AND_GET2(vkEnumerateDeviceExtensionProperties, physicalDeviceProps[i].m_ExtensionProperties, physicalDevices[i], nullptr)
        }

        u64 bestDeviceIndex = 0;
        for (u64 i = 1; i < physicalDevices.size(); ++i)
        {
            const bool isDiscrete = physicalDeviceProps[bestDeviceIndex].m_Properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU;
            const bool otherIsDiscrete = physicalDeviceProps[i].m_Properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU;
            if (isDiscrete && !otherIsDiscrete)
                continue;
            else if ((!isDiscrete && otherIsDiscrete)
                || (physicalDeviceProps[bestDeviceIndex].m_Properties.limits.maxFramebufferWidth < physicalDeviceProps[i].m_Properties.limits.maxFramebufferWidth))
                bestDeviceIndex = i;
        }

        bestDevice = physicalDevices[bestDeviceIndex];
        bestDeviceProps = physicalDeviceProps[bestDeviceIndex];
    }


    // ---------------------------------------------------
    // 3. Find queue family which support compute pipeline
    // ---------------------------------------------------
    u32 computeQueue = 0;
    while (computeQueue < bestDeviceProps.m_QueueFamilyProperties.size()
        && ((bestDeviceProps.m_QueueFamilyProperties[computeQueue].queueFlags & VK_QUEUE_COMPUTE_BIT) != VK_QUEUE_COMPUTE_BIT))
    {
        ++computeQueue;
    }
    assert(computeQueue < bestDeviceProps.m_QueueFamilyProperties.size());


    // -------------------------------
    // 4. Create logical device
    // -------------------------------
    VkDeviceQueueCreateInfo queueInfo = { VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, nullptr, 0, computeQueue, 1, nullptr };
    VkPhysicalDeviceFeatures features = {};
    VkDeviceCreateInfo createInfo = {
        VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, nullptr, 0,
        1, &queueInfo,
        0, nullptr,
        0, nullptr,
        &features
    };
    VkDevice device = VK_NULL_HANDLE;
    if (VK_SUCCESS != vkCreateDevice(bestDevice, &createInfo, nullptr, &device))
        std::cout << "Logical Device creation failed\n";


    // -------------------------------
    // 5. Create data buffers
    // -------------------------------
    constexpr u64 elemCount = 25;
    constexpr u64 bufferSize = elemCount * sizeof(vec4);
    VkBufferCreateInfo bufferCreateInfo = {
            VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, nullptr, 0,
            bufferSize,
            VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
            VK_SHARING_MODE_EXCLUSIVE, 0, nullptr
    };

    VkBuffer inputBuffer = VK_NULL_HANDLE;
    if (VK_SUCCESS != vkCreateBuffer(device, &bufferCreateInfo, nullptr, &inputBuffer))
        std::cout << "Creating input buffer failed!\n";
    VkMemoryRequirements inputBufferMemory;
    vkGetBufferMemoryRequirements(device, inputBuffer, &inputBufferMemory);

    VkBuffer outputBuffer = VK_NULL_HANDLE;
    if (VK_SUCCESS != vkCreateBuffer(device, &bufferCreateInfo, nullptr, &outputBuffer))
        std::cout << "Creating output buffer failed!\n";
    VkMemoryRequirements outputBufferMemory;
    vkGetBufferMemoryRequirements(device, outputBuffer, &outputBufferMemory);


    // -------------------------------
    // 6. Allocate memory for buffers
    // -------------------------------
    u32 inputMemoryIndex = SelectMemoryHeapFrom(inputBufferMemory.memoryTypeBits, bestDeviceProps.m_MemoryProperties, 
        VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, 
        VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
    VkMemoryAllocateInfo inputAllocationInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, nullptr, inputBufferMemory.size, inputMemoryIndex };
    VkDeviceMemory inputMemory = VK_NULL_HANDLE;
    if (VK_SUCCESS != vkAllocateMemory(device, &inputAllocationInfo, nullptr, &inputMemory))
        std::cout << "Memory allocation of " << inputBufferMemory.size << " failed!\n";

    u32 outputMemoryIndex = SelectMemoryHeapFrom(outputBufferMemory.memoryTypeBits, bestDeviceProps.m_MemoryProperties, 
        VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, 
        VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
    VkMemoryAllocateInfo outputAllocationInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, nullptr, outputBufferMemory.size, outputMemoryIndex };
    VkDeviceMemory outputMemory = VK_NULL_HANDLE;
    if (VK_SUCCESS != vkAllocateMemory(device, &outputAllocationInfo, nullptr, &outputMemory))
        std::cout << "Memory allocation of " << outputBufferMemory.size << " failed!\n";


    // -------------------------------
    // 7. Bind buffers to memory
    // -------------------------------
    if (vkBindBufferMemory(device, inputBuffer, inputMemory, 0) != VK_SUCCESS)
        std::cout << "Input buffer binding failed!\n";

    if (vkBindBufferMemory(device, outputBuffer, outputMemory, 0) != VK_SUCCESS)
        std::cout << "Output buffer binding failed!\n";


    // ----------------------------------
    // 8. Map buffers and upload data
    // ----------------------------------
    vec4* inputData = nullptr;
    if (VK_SUCCESS != vkMapMemory(device, inputMemory, 0, VK_WHOLE_SIZE, 0, (void**)(&inputData)))
        std::cout << "Input memory mapping failed!\n";
    
    for (u32 i = 0; i < elemCount; ++i)
    {
        inputData[i].x = static_cast<u32>(rand() / (float)RAND_MAX * 100);
        inputData[i].y = static_cast<u32>(rand() / (float)RAND_MAX * 100);
        inputData[i].z = static_cast<u32>(rand() / (float)RAND_MAX * 100);
        inputData[i].w = static_cast<u32>(rand() / (float)RAND_MAX * 100);
        std::cout << inputData[i].x << ", " << inputData[i].y << ", " << inputData[i].z << ", " << inputData[i].w << ", ";
    }
    std::cout << "\n\n\n";
    vkUnmapMemory(device, inputMemory);

    vec4* initialOutputData = nullptr;
    if (VK_SUCCESS != vkMapMemory(device, outputMemory, 0, VK_WHOLE_SIZE, 0, (void**)(&initialOutputData)))
        std::cout << "Output memory mapping failed!\n";
    for (u32 i = 0; i < elemCount; ++i)
    {
        initialOutputData[i].x = 2; initialOutputData[i].z = 2; initialOutputData[i].y = 2; initialOutputData[i].w = 2;
    }
    vkUnmapMemory(device, outputMemory);


    // ----------------------------------
    // 9. Create shader/pipeline layout
    // ----------------------------------
    std::vector<VkDescriptorSetLayoutBinding> bindings = {
            { 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr },
            { 1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr }
    };
    VkDescriptorSetLayoutCreateInfo layoutInfo = { VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0, 2, bindings.data() };
    VkDescriptorSetLayout descriptorLayout = VK_NULL_HANDLE;
    if (VK_SUCCESS != vkCreateDescriptorSetLayout(device, &layoutInfo, nullptr, &descriptorLayout))
        std::cout << "Descriptor Layout creation failed!\n";

    // Create pipeline layout
    VkPipelineLayoutCreateInfo pipelineCreateInfo = { VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, nullptr, 0, 1, &descriptorLayout, 0, nullptr };
    VkPipelineLayout layout = VK_NULL_HANDLE;
    if (VK_SUCCESS != vkCreatePipelineLayout(device, &pipelineCreateInfo, nullptr, &layout))
        std::cout << "Pipeline Layout creation failed\n";


    // --------------------------------------------------
    // 10. Load shader source and create shader module
    // --------------------------------------------------
    std::ifstream file("ComputeShader.spv", std::ifstream::binary);
    u64 size = 0;
    if (!file.is_open())
        std::cout << "Can't open shader!\n";
    
    file.seekg(0, file.end);
    size = file.tellg();
    file.seekg(0);
    char* shaderSrc = new char[size];
    file.read(shaderSrc, size);

    VkShaderModuleCreateInfo shaderCreateInfo = { VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, nullptr, 0, size, reinterpret_cast<u32*>(shaderSrc) };
    VkShaderModule shader = VK_NULL_HANDLE;
    if (VK_SUCCESS != vkCreateShaderModule(device, &shaderCreateInfo, nullptr, &shader))
        std::cout << "Shader Module creation failed\n";
    delete[] shaderSrc;
    

    // ----------------------------------
    // 10.5. Create descriptor sets
    // ----------------------------------
    VkDescriptorPoolSize descriptorPoolSize = { VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2 };
    VkDescriptorPoolCreateInfo descriptorPoolCreateInfo = {
          VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, nullptr, 0,
          1, 1, &descriptorPoolSize };
    VkDescriptorPool descriptorPool = VK_NULL_HANDLE;
    vkCreateDescriptorPool(device, &descriptorPoolCreateInfo, 0, &descriptorPool);

    VkDescriptorSetAllocateInfo descriptorSetAllocateInfo = {
          VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, 0,
          descriptorPool, 1, &descriptorLayout
    };
    VkDescriptorSet descriptorSet;
    vkAllocateDescriptorSets(device, &descriptorSetAllocateInfo, &descriptorSet);

    VkDescriptorBufferInfo inputBufferDescriptorInfo = { inputBuffer, 0, VK_WHOLE_SIZE };
    VkDescriptorBufferInfo outputBufferDescriptorInfo = { outputBuffer, 0, VK_WHOLE_SIZE };
    VkWriteDescriptorSet writeDescriptorSet[2] = {
          {
            VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 0, descriptorSet,
            0, 0, 1,
            VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
            0, &inputBufferDescriptorInfo, 0
          },
          {
            VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 0, descriptorSet, 
            1, 0, 1,
            VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
            0, &outputBufferDescriptorInfo, 0
          }
    };

    vkUpdateDescriptorSets(device, 2, writeDescriptorSet, 0, nullptr);
    
    // -------------------------------
    // 11. Create compute pipeline
    // -------------------------------
    const char* entryPointName = "main";
    VkComputePipelineCreateInfo computeCreateInfo = {
            VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, nullptr, 0,
            {
                VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, nullptr, 0,
                VK_SHADER_STAGE_COMPUTE_BIT, shader,
                entryPointName, nullptr
            },
            layout, VK_NULL_HANDLE, 0
    };

    VkPipeline pipeline = VK_NULL_HANDLE;
    if (VK_SUCCESS != vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, &computeCreateInfo, nullptr, &pipeline))
        std::cout << "Compute Pipeline creation failed!\n";


    // ------------------------------------------------
    // 12. Create Command Pool and Command Buffer
    // --------------------------------------------------
    VkCommandPoolCreateInfo poolInfo = { VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, nullptr, 0, computeQueue };
    VkCommandPool cmdPool = VK_NULL_HANDLE;
    if (VK_SUCCESS != vkCreateCommandPool(device, &poolInfo, nullptr, &cmdPool))
        std::cout << "Command Pool creation failed!\n";

    VkCommandBufferAllocateInfo cmdBufferInfo = {
            VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, nullptr,
            cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY, 1
    };
    VkCommandBuffer cmdBuffer = VK_NULL_HANDLE;
    if (VK_SUCCESS != vkAllocateCommandBuffers(device, &cmdBufferInfo, &cmdBuffer))
        std::cout << "Command buffer allocation failed!\n";

    // ---------------------------
    // 13. Run compute shader
    // ---------------------------
    VkCommandBufferUsageFlags flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
    VkCommandBufferBeginInfo beginInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, nullptr, VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, nullptr };
    vkBeginCommandBuffer(cmdBuffer, &beginInfo);
    vkCmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, 1, &descriptorSet, 0, 0);
    vkCmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
    vkCmdDispatch(cmdBuffer, 8, 1, 1);
    
    vkEndCommandBuffer(cmdBuffer);

    // -----------------------------------------
    // 14. Submit command buffer (with fence)
    // -----------------------------------------
    VkFenceCreateInfo fenceCreateInfo = { VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, nullptr, (VkFenceCreateFlags)0 };
    VkFence fence = VK_NULL_HANDLE;
    if (VK_SUCCESS != vkCreateFence(device, &fenceCreateInfo, nullptr, &fence))
        std::cout << "Fence creation failed!\n";

    VkQueue queue = VK_NULL_HANDLE;
    vkGetDeviceQueue(device, computeQueue, 0, &queue);

    VkSubmitInfo submitInfo = { 
        VK_STRUCTURE_TYPE_SUBMIT_INFO, nullptr, 0, nullptr, 0,
        1, &cmdBuffer, 0, nullptr
    };
    VkResult result = vkQueueSubmit(queue, 1, &submitInfo, fence);

    // Wait for everything finished
    if (result == VK_SUCCESS)
    {
        result = vkQueueWaitIdle(queue);
    }
    vkWaitForFences(device, 1, &fence, VK_TRUE, u64(-1));

    // ---------------------------------
    // 15. Grab and display results
    // ---------------------------------
    vec4* resultData = nullptr;
    if (VK_SUCCESS != vkMapMemory(device, outputMemory, 0, VK_WHOLE_SIZE, 0, (void**)(&resultData)))
        std::cout << "Output memory mapping failed!\n";
    for (u32 i = 0; i < elemCount; ++i)
    {
        std::cout << resultData[i].x << ", " << resultData[i].y << ", " << resultData[i].z << ", " << resultData[i].w << ", ";
    }
    std::cout << "\n\n\n";
    vkUnmapMemory(device, outputMemory);

    // ------------------------
    // 16. Resources Cleanup
    // ------------------------
    vkFreeCommandBuffers(device, cmdPool, 1, &cmdBuffer);
    vkDestroyCommandPool(device, cmdPool, nullptr);
    vkDestroyFence(device, fence, nullptr);
    vkDestroyPipeline(device, pipeline, nullptr);
    vkDestroyPipelineLayout(device, layout, nullptr);
    vkDestroyShaderModule(device, shader, nullptr);
    vkDestroyDescriptorSetLayout(device, descriptorLayout, nullptr);

    vkDestroyBuffer(device, inputBuffer, nullptr);
    vkDestroyBuffer(device, outputBuffer, nullptr);
    vkFreeMemory(device, inputMemory, nullptr);
    vkFreeMemory(device, outputMemory, nullptr);

    if (VK_SUCCESS != vkDeviceWaitIdle(device))
        std::cout << "Can't wait for device to idle\n";
    vkDestroyDevice(device, nullptr);
    vkDestroyInstance(instance, nullptr);
}

我认为问题可能是错误同步,特别是缺少memory 域操作 有些平台可能不喜欢它...

在命令缓冲区的末尾,您需要这个特殊的管道屏障,它将写入从设备域转换到主机域:

VkBufferMemoryBarrier outbuffDependency = {};
outbuffDependency.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
outbuffDependency.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
outbuffDependency.dstAccessMask = VK_ACCESS_HOST_READ_BIT;
outbuffDependency.buffer = outputBuffer;
outbuffDependency.size = VK_WHOLE_SIZE;

vkCmdPipelineBarrier(
    cmdBuffer,
    VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
    (VkDependencyFlags)0,
    0, nullptr,
    1, &outbuffDependency,
    0, nullptr
);
    

Vulkan 具有 memory 域的独特概念。 有一个主机域,还有一个设备域。 相同的 memory 在每个域中可以有不同的 state。 例如,memory 写入在设备域中可见并不意味着它在主机域中也可见

围栏(或vk*WaitIdle )不包括规范中警告的 memory 域操作:

笔记

发出围栏并在主机上等待并不能保证 memory 访问的结果对主机可见,因为 memory 的访问 scope 的访问仅包括围栏定义的设备访问。 必须使用memory 屏障或其他 memory 依赖项来保证这一点。 有关详细信息,请参阅主机访问类型的描述。

唯一包含域操作的是 memory 依赖关系与VK_PIPELINE_STAGE_HOST_BITvkQueueSubmit (您确实与inputBuffer一起使用以将其从主机域传输到设备域)。

验证层无法合理地捕获此错误,因为它们无法知道(没有一些侵入式操作系统调试功能)您是否真的通过映射指针从缓冲区中读取。

所以,它终于工作了:)
在尝试某些东西时,我做了很多更改,以至于在某些时候我的输入缓冲区被绑定为统一缓冲区......
现在它作为存储缓冲区回来了,并且描述符集已正确创建和更新,我得到了预期的 output。
memory 屏障不是强制性的,但我想这是一个好习惯,当我有一个更复杂的示例时,多个通道具有不同的缓冲区使用情况。
感谢大家的帮助,它确实帮助我弄清楚了所有可能使 Vulkan 实现工作或失败的小细节。

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM