[英]Why I have to generate mipmap every frame?
我正在使用計算着色器實現綻放效果。
如果我在創建紋理后立即生成 mipmap,bloom 將不起作用。 如果我在計算 bloom 的循環中生成 mipmap,它就可以工作。
我不希望每一幀都生成 mipmap。
我的問題是,如果我僅在創建紋理時生成 mipmap,為什么光暈不起作用。 為什么我必須每幀生成 mips?
為了簡化事情,我的視口大小始終相同:1600x900。 C++/glsl 代碼可能有點長(但無論如何它就在偽代碼之后),也許你可以只使用偽代碼來發現問題:
colorImage = Create(screenW, screenH);
colorImage.GenerateMipmaps();
prefilteredImage = Create(bloomW, bloomH);
prefilteredImage.GenerateMipmaps();
downsampledStagingImage = Create(bloomW, bloomH);
downsampledStagingImage.GenerateMipmaps();
downsampledImage = Create(bloomW, bloomH);
downsampledImage.GenerateMipmaps();
upsampledgImage = Create(bloomW, bloomH);
upsampledImage.GenerateMipmaps();
while(true) {
colorImage = RenderScene();
PrefilterBloomPass();
DownsamplingBloomPass();
UpsamplingBloomPass();
CompositeColorImageWithBloom();
}
mips 生成每幀偽代碼:
//textures created same as before
while(true) {
colorImage = RenderScene();
prefilteredImage.GenerateMipmaps();
PrefilterBloomPass();
downsampledStagingImage.GenerateMipmaps();
downsampledImage.GenerateMipmaps();
DownsamplingBloomPass();
upsampledImage.GenerateMipmaps();
UpsamplingBloomPass();
CompositeColorImageWithBloom();
}
結果 - 開花效果:
這是我有一個處理它的 class 的 c++/glsl 實現:
我不認為錯誤與這個 class 有關,但也許你會發現一些東西
struct OpenGLTexture
{
struct CreateInfo
{
uint32_t w = 0;
uint32_t h = 0;
//TextureParameters is not related to this question so I'll just skip the implementation
TextureParameters parameters;
};
OpenGLTexture(const CreateInfo& info)
{
const uint32_t format = ConvertFormatToOpenGL(info.parameters.format);
const uint32_t internalFormat = ConvertInternalFormatToOpenGL(info.parameters.format);
const uint32_t type = ConvertTextureFormatToOpenGLDataType(info.parameters.format);
//... miplevel, format, wrap variables
glCreateTextures(target, 1, &glHandle);
glTextureStorage2D(glHandle, miplevel, internalFormat, w, h);
if (wrap != 0) {
glTextureParameteri(glHandle, GL_TEXTURE_WRAP_S, wrap);
glTextureParameteri(glHandle, GL_TEXTURE_WRAP_T, wrap);
}
if (filterMag != 0) {
glTextureParameteri(glHandle, GL_TEXTURE_MAG_FILTER, filterMag);
}
if (filterMin != 0) {
glTextureParameteri(glHandle, GL_TEXTURE_MIN_FILTER, filterMin);
}
if (miplevel) {
glTextureParameteri(glHandle, GL_TEXTURE_BASE_LEVEL, 0);
glTextureParameteri(glHandle, GL_TEXTURE_MAX_LEVEL, miplevel);
glGenerateTextureMipmap(glHandle);
}
}
void GenerateMipmap() const
{
glGenerateTextureMipmap(texture);
}
void Bind(uint32_t bindingPoint)
{
glBindTextureUnit(bindingPoint, texture);
}
void BindImage(uint32_t bindingPoint, uint32_t level, uint32_t access, uint32_t format)
{
glBindImageTexture(bindingPoint, texture, level, GL_FALSE, 0, access, format);
}
GLuint glHandle = 0;
};
我猜我的 bloom shader 使用了 COD Advanced Warfare 文章中描述的采樣方法? 看起來像這樣
#version 460 core
layout(binding = 0, rgba32f) restrict writeonly uniform image2D o_image;
layout(binding = 1) uniform sampler2D u_colorTexture;
layout(binding = 2) uniform sampler2D u_bloomTexture;
const float Epsilon = 1.0e-4;
layout(location = 0) uniform float threshold;
layout(location = 1) uniform float knee;
layout(location = 2) uniform float lod;
layout(location = 3) uniform int mode;
#define MODE_PREFILTER 0
#define MODE_DOWNSAMPLE 1
#define MODE_UPSAMPLE_FIRST 2
#define MODE_UPSAMPLE 3
vec4 QuadraticThreshold(vec4 color, float threshold, vec3 curve)
{
// Maximum pixel brightness
float brightness = max(max(color.r, color.g), color.b);
// Quadratic curve
float rq = clamp(brightness - curve.x, 0.0, curve.y);
rq = (rq * rq) * curve.z;
color *= max(rq, brightness - threshold) / max(brightness, Epsilon);
return color;
}
vec4 Prefilter(vec4 color, vec2 uv)
{
vec4 params = { threshold, threshold - knee, knee * 2.0f, 0.25f / knee };
float clampValue = 20.0f;
color = min(vec4(clampValue), color);
color = QuadraticThreshold(color, params.x, params.yzw);
return color;
}
vec3 DownsampleBox13(sampler2D tex, float lod, vec2 uv, vec2 texelSize)
{
// Center
vec3 A = textureLod(tex, uv, lod).rgb;
texelSize *= 0.5f; // Sample from center of texels
// Inner box
vec3 B = textureLod(tex, uv + texelSize * vec2(-1.0f, -1.0f), lod).rgb;
vec3 C = textureLod(tex, uv + texelSize * vec2(-1.0f, 1.0f), lod).rgb;
vec3 D = textureLod(tex, uv + texelSize * vec2(1.0f, 1.0f), lod).rgb;
vec3 E = textureLod(tex, uv + texelSize * vec2(1.0f, -1.0f), lod).rgb;
// Outer box
vec3 F = textureLod(tex, uv + texelSize * vec2(-2.0f, -2.0f), lod).rgb;
vec3 G = textureLod(tex, uv + texelSize * vec2(-2.0f, 0.0f), lod).rgb;
vec3 H = textureLod(tex, uv + texelSize * vec2(0.0f, 2.0f), lod).rgb;
vec3 I = textureLod(tex, uv + texelSize * vec2(2.0f, 2.0f), lod).rgb;
vec3 J = textureLod(tex, uv + texelSize * vec2(2.0f, 2.0f), lod).rgb;
vec3 K = textureLod(tex, uv + texelSize * vec2(2.0f, 0.0f), lod).rgb;
vec3 L = textureLod(tex, uv + texelSize * vec2(-2.0f, -2.0f), lod).rgb;
vec3 M = textureLod(tex, uv + texelSize * vec2(0.0f, -2.0f), lod).rgb;
// Weights
vec3 result = vec3(0.0);
// Inner box
result += (B + C + D + E) * 0.5f;
// Bottom-left box
result += (F + G + A + M) * 0.125f;
// Top-left box
result += (G + H + I + A) * 0.125f;
// Top-right box
result += (A + I + J + K) * 0.125f;
// Bottom-right box
result += (M + A + K + L) * 0.125f;
// 4 samples each
result *= 0.25f;
return result;
}
vec3 UpsampleTent9(sampler2D tex, float lod, vec2 uv, vec2 texelSize, float radius)
{
vec4 offset = texelSize.xyxy * vec4(1.0f, 1.0f, -1.0f, 0.0f) * radius;
// Center
vec3 result = textureLod(tex, uv, lod).rgb * 4.0f;
result += textureLod(tex, uv - offset.xy, lod).rgb;
result += textureLod(tex, uv - offset.wy, lod).rgb * 2.0;
result += textureLod(tex, uv - offset.zy, lod).rgb;
result += textureLod(tex, uv + offset.zw, lod).rgb * 2.0;
result += textureLod(tex, uv + offset.xw, lod).rgb * 2.0;
result += textureLod(tex, uv + offset.zy, lod).rgb;
result += textureLod(tex, uv + offset.wy, lod).rgb * 2.0;
result += textureLod(tex, uv + offset.xy, lod).rgb;
return result * (1.0f / 16.0f);
}
layout(local_size_x = 4, local_size_y = 4) in;
void main()
{
vec2 imgSize = vec2(imageSize(o_image));
vec2 uv = gl_GlobalInvocationID.xy / imgSize;
uv += (1.0f / imgSize) * 0.5f;
vec2 texSize = vec2(textureSize(u_colorTexture, int(lod)));
vec4 color = vec4(1, 0, 1, 1);
if(mode == MODE_PREFILTER) {
color.rgb = DownsampleBox13(u_colorTexture, 0, uv, 1.0f / texSize);
color = Prefilter(color, uv);
color.a = 1.0f;
} else if (mode == MODE_UPSAMPLE_FIRST) {
vec2 bloomTexSize = vec2(textureSize(u_colorTexture, int(lod + 1.0f)));
float sampleScale = 1.0f;
vec3 upsampledTexture = UpsampleTent9(u_colorTexture, lod + 1.0f, uv, 1.0f / bloomTexSize, sampleScale);
vec3 existing = textureLod(u_colorTexture, uv, lod).rgb;
color.rgb = existing + upsampledTexture;
} else if (mode == MODE_UPSAMPLE) {
vec2 bloomTexSize = vec2(textureSize(u_bloomTexture, int(lod + 1.0f)));
float sampleScale = 1.0f;
vec3 upsampledTexture = UpsampleTent9(u_bloomTexture, lod + 1.0f, uv, 1.0f / bloomTexSize, sampleScale);
vec3 existing = textureLod(u_colorTexture, uv, lod).rgb;
color.rgb = existing + upsampledTexture;
} else if (mode == MODE_DOWNSAMPLE) {
color.rgb = DownsampleBox13(u_colorTexture, lod, uv, 1.0f / texSize);
}
imageStore(o_image, ivec2(gl_GlobalInvocationID), color);
}
因此,正如我之前所說,此 bloom pass 中使用的紋理是在應用程序啟動時創建的。 為了簡單起見,假設它們是全球性的
OpenGLTexture2D* filteredImage;
OpenGLTexture2D* downsampledStagingImage;
OpenGLTexture2D* downsampledImage;
OpenGLTexture2D* upsampledImage;
static constexpr uint32_t bloomComputeWorkgroupSize = 4;
Vector2<uint32_t> bloomViewport;
void Init(const Vector2<uint32_t>& viewport)
{
bloomViewport = sceneViewport / 2U;
bloomViewport += bloomComputeWorkgroupSize - (bloomViewport % bloomComputeWorkgroupSize);
OpenGLTexture2D::CreateInfo bloomImageInfo;
bloomImageInfo.w = bloomViewport.x;
bloomImageInfo.h = bloomViewport.y;
bloomImageInfo.parameters = TextureParameters().
Format(TextureFormat::RGBA32_FLOAT).
Wrap(TextureWrap::Clamp);
filteredImage = new OpenGLTexture(bloomImageInfo);
downsampledStagingImage = new OpenGLTexture(bloomImageInfo);
downsampledImage = new OpenGLTexture(bloomImageInfo);
upsampledImage = new OpenGLTexture(bloomImageInfo);
}
然后每一幀我執行 3 次傳遞:過濾傳遞、縮小傳遞和放大傳遞
float threshold = 1.0f;
float knee = 0.1f;
while(true) {
OpenGLTexture* colorImage = RenderScene();
auto shader = ShaderCache::getShader("Bloom");
float lod = 0.0f;
shader->setUniform("mode", 0); // 0 means prefiltering
shader->setUniform("threshold", threshold);
shader->setUniform("knee", knee);
shader->setUniform("lod", lod);
shader->Bind();
filteredImage->BindImage(0, 0, GL_WRITE_ONLY, GL_RGBA32F);
colorImage->Bind(1);
//bind whatever, prefilter mode does not use the second slot
colorImage->Bind(2);
auto mipSize = filteredImage->getMipSize(0);
uint32_t workGroupsX = (uint32_t)glm::ceil((float)mipSize.x / (float)bloomComputeWorkgroupSize);
uint32_t workGroupsY = (uint32_t)glm::ceil((float)mipSize.y / (float)bloomComputeWorkgroupSize);
glDispatchCompute(workGroupsX, workGroupsY, 1);
glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
//downsampling
shader->setUniform("mode", 1); //1 means downsampling
uint32_t mips = filteredImage->getMipCount() - 2;
for (uint32_t i = 1; i < mips; i++) {
mipSize = filteredImage->getMipSize(i);
workGroupsX = (uint32_t)glm::ceil((float)mipSize.x / (float)bloomComputeWorkgroupSize);
workGroupsY = (uint32_t)glm::ceil((float)mipSize.y / (float)bloomComputeWorkgroupSize);
{
downsampledStagingImage->BindImage(0, i, GL_WRITE_ONLY, GL_RGBA32F);
filteredImage->Bind(1);
lod = (float)i - 1.0f;
shader->setUniform("lod", lod);
shader->Bind();
glDispatchCompute(workGroupsX, workGroupsY, 1);
}
glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
{
downsampledImage->BindImage(0, i, GL_WRITE_ONLY, GL_RGBA32F);
downsampledStagingImage->Bind(1);
lod = (float)i;
shader->setUniform("lod", lod);
shader->Bind();
glDispatchCompute(workGroupsX, workGroupsY, 1);
}
glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
}
//upsampling
mips -= 2;
lod--;
shader->setUniform("mode", 2); //upsample first
upsampledImage->BindImage(0, i, GL_WRITE_ONLY, GL_RGBA32F);
downsampledImage->Bind(1);
colorImage->Bind(2);
shader->setUniform("lod", lod);
shader->Bind();
mipSize = upsampledImage->getMipSize(mips);
workGroupsX = (uint32_t)glm::ceil((float)mipSize.x / (float)bloomComputeWorkgroupSize);
workGroupsY = (uint32_t)glm::ceil((float)mipSize.y / (float)bloomComputeWorkgroupSize);
glDispatchCompute(workGroupsX, workGroupsY, 1);
glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
shader->setUniform("mode", 2); //upsample in a loop
for (int32_t mip = mips - 3; mip >= 0; mip--) {
mipSize = upsampledImage->getMipSize(mip);
workGroupsX = (uint32_t)glm::ceil((float)mipSize.x / (float)bloomComputeWorkgroupSize);
workGroupsY = (uint32_t)glm::ceil((float)mipSize.y / (float)bloomComputeWorkgroupSize);
upsampledImage->BindImage(0, mip, GL_WRITE_ONLY, GL_RGBA32F);
filteredImage->Bind(1);
upsampledImage->Bind(2);
shader->setUniform("lod", (float)mip);
shader->Bind();
glDispatchCompute(workGroupsX, workGroupsY, 1);
glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
}
}
整個問題似乎基於一個錯誤的前提: glGenerateMipmaps
不僅負責為 mipmaps 分配 memory,而且還計算它們的下采樣內容。
由於您的代碼在每一幀中將新數據寫入紋理,並且您從 mipmap 中讀取,因此還必須在每一幀中計算新的下采樣表示。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.