為什么我必須每幀生成 mipmap？

Question

我正在使用計算着色器實現綻放效果。

如果我在創建紋理后立即生成 mipmap，bloom 將不起作用。 如果我在計算 bloom 的循環中生成 mipmap，它就可以工作。

我不希望每一幀都生成 mipmap。

我的問題是，如果我僅在創建紋理時生成 mipmap，為什么光暈不起作用。 為什么我必須每幀生成 mips？

為了簡化事情，我的視口大小始終相同：1600x900。 C++/glsl 代碼可能有點長（但無論如何它就在偽代碼之后），也許你可以只使用偽代碼來發現問題：

colorImage = Create(screenW, screenH);
colorImage.GenerateMipmaps();

prefilteredImage = Create(bloomW, bloomH);
prefilteredImage.GenerateMipmaps();

downsampledStagingImage = Create(bloomW, bloomH);
downsampledStagingImage.GenerateMipmaps();

downsampledImage = Create(bloomW, bloomH);
downsampledImage.GenerateMipmaps();

upsampledgImage = Create(bloomW, bloomH);
upsampledImage.GenerateMipmaps();

while(true) {
    colorImage = RenderScene();

    PrefilterBloomPass();
    DownsamplingBloomPass();
    UpsamplingBloomPass();

    CompositeColorImageWithBloom();
}

結果 - 沒有綻放：

mips 生成每幀偽代碼：

//textures created same as before

while(true) {
    colorImage = RenderScene();

    prefilteredImage.GenerateMipmaps();
    PrefilterBloomPass();

    downsampledStagingImage.GenerateMipmaps();
    downsampledImage.GenerateMipmaps();
    DownsamplingBloomPass();

    upsampledImage.GenerateMipmaps();
    UpsamplingBloomPass();

    CompositeColorImageWithBloom();
}

結果 - 開花效果：

這是我有一個處理它的 class 的 c++/glsl 實現：

我不認為錯誤與這個 class 有關，但也許你會發現一些東西

struct OpenGLTexture
{
    struct CreateInfo
    {
        uint32_t w = 0;
        uint32_t h = 0;
        //TextureParameters is not related to this question so I'll just skip the implementation
        TextureParameters parameters;
    };

    OpenGLTexture(const CreateInfo& info)
    {
        const uint32_t format = ConvertFormatToOpenGL(info.parameters.format);
        const uint32_t internalFormat = ConvertInternalFormatToOpenGL(info.parameters.format);
        const uint32_t type = ConvertTextureFormatToOpenGLDataType(info.parameters.format);
        //... miplevel, format, wrap variables

        glCreateTextures(target, 1, &glHandle);

        glTextureStorage2D(glHandle, miplevel, internalFormat, w, h);

        if (wrap != 0) {
            glTextureParameteri(glHandle, GL_TEXTURE_WRAP_S, wrap);
            glTextureParameteri(glHandle, GL_TEXTURE_WRAP_T, wrap);
        }

        if (filterMag != 0) {
            glTextureParameteri(glHandle, GL_TEXTURE_MAG_FILTER, filterMag);
        }

        if (filterMin != 0) {
            glTextureParameteri(glHandle, GL_TEXTURE_MIN_FILTER, filterMin);
        }

        if (miplevel) {
            glTextureParameteri(glHandle, GL_TEXTURE_BASE_LEVEL, 0);
            glTextureParameteri(glHandle, GL_TEXTURE_MAX_LEVEL, miplevel);
            glGenerateTextureMipmap(glHandle);
        }
    }

    void GenerateMipmap() const
    {
        glGenerateTextureMipmap(texture);
    }

    void Bind(uint32_t bindingPoint) 
    {
        glBindTextureUnit(bindingPoint, texture);
    }

    void BindImage(uint32_t bindingPoint, uint32_t level, uint32_t access, uint32_t format)
    {
        glBindImageTexture(bindingPoint, texture, level, GL_FALSE, 0, access, format);
    }

    GLuint glHandle = 0;
};

我猜我的 bloom shader 使用了 COD Advanced Warfare 文章中描述的采樣方法？ 看起來像這樣

#version 460 core

layout(binding = 0, rgba32f) restrict writeonly uniform image2D o_image;
layout(binding = 1) uniform sampler2D u_colorTexture;
layout(binding = 2) uniform sampler2D u_bloomTexture;

const float Epsilon = 1.0e-4;

layout(location = 0) uniform float threshold;
layout(location = 1) uniform float knee;
layout(location = 2) uniform float lod;
layout(location = 3) uniform int mode;

#define MODE_PREFILTER      0
#define MODE_DOWNSAMPLE     1
#define MODE_UPSAMPLE_FIRST 2
#define MODE_UPSAMPLE       3

vec4 QuadraticThreshold(vec4 color, float threshold, vec3 curve)
{
    // Maximum pixel brightness
    float brightness = max(max(color.r, color.g), color.b);
    // Quadratic curve
    float rq = clamp(brightness - curve.x, 0.0, curve.y);
    rq = (rq * rq) * curve.z;
    color *= max(rq, brightness - threshold) / max(brightness, Epsilon);
    return color;
}

vec4 Prefilter(vec4 color, vec2 uv)
{
    vec4 params = { threshold, threshold - knee, knee * 2.0f, 0.25f / knee };

    float clampValue = 20.0f;
    color = min(vec4(clampValue), color);
    color = QuadraticThreshold(color, params.x, params.yzw);
    return color;
}

vec3 DownsampleBox13(sampler2D tex, float lod, vec2 uv, vec2 texelSize)
{
    // Center
    vec3 A = textureLod(tex, uv, lod).rgb;

    texelSize *= 0.5f; // Sample from center of texels

    // Inner box
    vec3 B = textureLod(tex, uv + texelSize * vec2(-1.0f, -1.0f), lod).rgb;
    vec3 C = textureLod(tex, uv + texelSize * vec2(-1.0f, 1.0f), lod).rgb;
    vec3 D = textureLod(tex, uv + texelSize * vec2(1.0f, 1.0f), lod).rgb;
    vec3 E = textureLod(tex, uv + texelSize * vec2(1.0f, -1.0f), lod).rgb;

    // Outer box
    vec3 F = textureLod(tex, uv + texelSize * vec2(-2.0f, -2.0f), lod).rgb;
    vec3 G = textureLod(tex, uv + texelSize * vec2(-2.0f, 0.0f), lod).rgb;
    vec3 H = textureLod(tex, uv + texelSize * vec2(0.0f, 2.0f), lod).rgb;
    vec3 I = textureLod(tex, uv + texelSize * vec2(2.0f, 2.0f), lod).rgb;
    vec3 J = textureLod(tex, uv + texelSize * vec2(2.0f, 2.0f), lod).rgb;
    vec3 K = textureLod(tex, uv + texelSize * vec2(2.0f, 0.0f), lod).rgb;
    vec3 L = textureLod(tex, uv + texelSize * vec2(-2.0f, -2.0f), lod).rgb;
    vec3 M = textureLod(tex, uv + texelSize * vec2(0.0f, -2.0f), lod).rgb;

    // Weights
    vec3 result = vec3(0.0);
    // Inner box
    result += (B + C + D + E) * 0.5f;
    // Bottom-left box
    result += (F + G + A + M) * 0.125f;
    // Top-left box
    result += (G + H + I + A) * 0.125f;
    // Top-right box
    result += (A + I + J + K) * 0.125f;
    // Bottom-right box
    result += (M + A + K + L) * 0.125f;

    // 4 samples each
    result *= 0.25f;

    return result;
} 

vec3 UpsampleTent9(sampler2D tex, float lod, vec2 uv, vec2 texelSize, float radius)
{
    vec4 offset = texelSize.xyxy * vec4(1.0f, 1.0f, -1.0f, 0.0f) * radius;

    // Center
    vec3 result = textureLod(tex, uv, lod).rgb * 4.0f;

    result += textureLod(tex, uv - offset.xy, lod).rgb;
    result += textureLod(tex, uv - offset.wy, lod).rgb * 2.0;
    result += textureLod(tex, uv - offset.zy, lod).rgb;

    result += textureLod(tex, uv + offset.zw, lod).rgb * 2.0;
    result += textureLod(tex, uv + offset.xw, lod).rgb * 2.0;

    result += textureLod(tex, uv + offset.zy, lod).rgb;
    result += textureLod(tex, uv + offset.wy, lod).rgb * 2.0;
    result += textureLod(tex, uv + offset.xy, lod).rgb;

    return result * (1.0f / 16.0f);
}

layout(local_size_x = 4, local_size_y = 4) in;
void main()
{
    vec2 imgSize = vec2(imageSize(o_image));
    vec2 uv = gl_GlobalInvocationID.xy / imgSize;
    uv += (1.0f / imgSize) * 0.5f;

    vec2 texSize = vec2(textureSize(u_colorTexture, int(lod))); 
    vec4 color = vec4(1, 0, 1, 1);
    if(mode == MODE_PREFILTER) {
        color.rgb = DownsampleBox13(u_colorTexture, 0, uv, 1.0f / texSize);
        color = Prefilter(color, uv);
        color.a = 1.0f;
    } else if (mode == MODE_UPSAMPLE_FIRST) {
        vec2 bloomTexSize = vec2(textureSize(u_colorTexture, int(lod + 1.0f)));
        float sampleScale = 1.0f;
        vec3 upsampledTexture = UpsampleTent9(u_colorTexture, lod + 1.0f, uv, 1.0f / bloomTexSize, sampleScale);

        vec3 existing = textureLod(u_colorTexture, uv, lod).rgb;
        color.rgb = existing + upsampledTexture;
    } else if (mode == MODE_UPSAMPLE) {
        vec2 bloomTexSize = vec2(textureSize(u_bloomTexture, int(lod + 1.0f)));
        float sampleScale = 1.0f;
        vec3 upsampledTexture = UpsampleTent9(u_bloomTexture, lod + 1.0f, uv, 1.0f / bloomTexSize, sampleScale);

        vec3 existing = textureLod(u_colorTexture, uv, lod).rgb;
        color.rgb = existing + upsampledTexture;
    } else if (mode == MODE_DOWNSAMPLE) {
        color.rgb = DownsampleBox13(u_colorTexture, lod, uv, 1.0f / texSize);
    }

    imageStore(o_image, ivec2(gl_GlobalInvocationID), color);
}

因此，正如我之前所說，此 bloom pass 中使用的紋理是在應用程序啟動時創建的。 為了簡單起見，假設它們是全球性的

OpenGLTexture2D* filteredImage;
OpenGLTexture2D* downsampledStagingImage;
OpenGLTexture2D* downsampledImage;
OpenGLTexture2D* upsampledImage;

static constexpr uint32_t bloomComputeWorkgroupSize = 4;

Vector2<uint32_t> bloomViewport;

void Init(const Vector2<uint32_t>& viewport)
{
    bloomViewport = sceneViewport / 2U;
    bloomViewport += bloomComputeWorkgroupSize - (bloomViewport % bloomComputeWorkgroupSize);

    OpenGLTexture2D::CreateInfo bloomImageInfo;
    bloomImageInfo.w = bloomViewport.x;
    bloomImageInfo.h = bloomViewport.y;
    bloomImageInfo.parameters = TextureParameters().
        Format(TextureFormat::RGBA32_FLOAT).
        Wrap(TextureWrap::Clamp);

    filteredImage = new OpenGLTexture(bloomImageInfo);
    downsampledStagingImage = new OpenGLTexture(bloomImageInfo);
    downsampledImage = new OpenGLTexture(bloomImageInfo);
    upsampledImage = new OpenGLTexture(bloomImageInfo);
}

然后每一幀我執行 3 次傳遞：過濾傳遞、縮小傳遞和放大傳遞

float threshold = 1.0f;
float knee = 0.1f;

while(true) {
    OpenGLTexture* colorImage = RenderScene();

    auto shader = ShaderCache::getShader("Bloom");

    float lod = 0.0f;

    shader->setUniform("mode", 0); // 0 means prefiltering
    shader->setUniform("threshold", threshold);
    shader->setUniform("knee", knee);
    shader->setUniform("lod", lod);
    shader->Bind();

    filteredImage->BindImage(0, 0, GL_WRITE_ONLY, GL_RGBA32F);
    colorImage->Bind(1);
    //bind whatever, prefilter mode does not use the second slot
    colorImage->Bind(2);     
    
    auto mipSize = filteredImage->getMipSize(0);
    uint32_t workGroupsX = (uint32_t)glm::ceil((float)mipSize.x / (float)bloomComputeWorkgroupSize);
    uint32_t workGroupsY = (uint32_t)glm::ceil((float)mipSize.y / (float)bloomComputeWorkgroupSize);

    glDispatchCompute(workGroupsX, workGroupsY, 1);
    glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);

    //downsampling
    shader->setUniform("mode", 1); //1 means downsampling
    uint32_t mips = filteredImage->getMipCount() - 2;
    for (uint32_t i = 1; i < mips; i++) {
        mipSize = filteredImage->getMipSize(i);
        workGroupsX = (uint32_t)glm::ceil((float)mipSize.x / (float)bloomComputeWorkgroupSize);
        workGroupsY = (uint32_t)glm::ceil((float)mipSize.y / (float)bloomComputeWorkgroupSize);

        {
            downsampledStagingImage->BindImage(0, i, GL_WRITE_ONLY, GL_RGBA32F);

            filteredImage->Bind(1);

            lod = (float)i - 1.0f;
            shader->setUniform("lod", lod);
            shader->Bind();
            glDispatchCompute(workGroupsX, workGroupsY, 1);
        }

        glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);

        {
            downsampledImage->BindImage(0, i, GL_WRITE_ONLY, GL_RGBA32F);

            downsampledStagingImage->Bind(1);

            lod = (float)i;
            shader->setUniform("lod", lod);
            shader->Bind();
            glDispatchCompute(workGroupsX, workGroupsY, 1);
        }

        glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
    }

    //upsampling
    mips -= 2;
    lod--;
    shader->setUniform("mode", 2); //upsample first
   
    upsampledImage->BindImage(0, i, GL_WRITE_ONLY, GL_RGBA32F);
    downsampledImage->Bind(1);
    colorImage->Bind(2);
    
    shader->setUniform("lod", lod);
    shader->Bind();
   
    mipSize = upsampledImage->getMipSize(mips);
    workGroupsX = (uint32_t)glm::ceil((float)mipSize.x / (float)bloomComputeWorkgroupSize);
    workGroupsY = (uint32_t)glm::ceil((float)mipSize.y / (float)bloomComputeWorkgroupSize);

    glDispatchCompute(workGroupsX, workGroupsY, 1);
    glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);

    shader->setUniform("mode", 2); //upsample in a loop
    for (int32_t mip = mips - 3; mip >= 0; mip--) {
        mipSize = upsampledImage->getMipSize(mip);
        workGroupsX = (uint32_t)glm::ceil((float)mipSize.x / (float)bloomComputeWorkgroupSize);
        workGroupsY = (uint32_t)glm::ceil((float)mipSize.y / (float)bloomComputeWorkgroupSize);

        upsampledImage->BindImage(0, mip, GL_WRITE_ONLY, GL_RGBA32F);

        filteredImage->Bind(1);
        upsampledImage->Bind(2);

        shader->setUniform("lod", (float)mip);
        shader->Bind();

        glDispatchCompute(workGroupsX, workGroupsY, 1);          
        glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
    }
}

Answer 1

整個問題似乎基於一個錯誤的前提： glGenerateMipmaps不僅負責為 mipmaps 分配 memory，而且還計算它們的下采樣內容。

由於您的代碼在每一幀中將新數據寫入紋理，並且您從 mipmap 中讀取，因此還必須在每一幀中計算新的下采樣表示。

為什么我必須每幀生成 mipmap？

問題描述

1 個解決方案

解決方案1
2 已采納 2022-09-30 12:18:10

為什么我必須每幀生成 mipmap？

問題描述

1 個解決方案

解決方案1 2 已采納 2022-09-30 12:18:10

解決方案1
2 已采納 2022-09-30 12:18:10