简体   繁体   English

glsl(GPU)矩阵/矢量计算产生的结果与CPU不同

[英]glsl (GPU) matrix/vector calculations yielding different results than CPU

I can't find any documentation of different behavior, so this is just a sanity check that I'm not doing anything wrong... 我找不到任何有关不同行为的文档,因此这只是一项健全性检查,表明我没有做错任何事情...

I've created some helper functions in GLSL to output float/vec/mat comparisons as a color: 我已经在GLSL中创建了一些辅助函数,以将float / vec / mat比较输出为颜色:

note: pretty sure there aren't any errors here, just including it so you know exactly what I'm doing... 注意:可以肯定这里没有任何错误,只包含它,这样您就知道我在做什么...

//returns true or false if floats are eq (within some epsillon)
bool feq(float a, float b)
{
  float c = a-b;
  return (c > -0.05 && c < 0.05);
}

returns true or false if vecs are eq
bool veq(vec4 a, vec4 b)
{
  return
  (
    feq(a.x, b.x) &&
    feq(a.y, b.y) &&
    feq(a.z, b.z) &&
    feq(a.w, b.w) &&
    true
  );
}

//returns color indicating where first diff lies between vecs
//white for "no diff"
vec4 cveq(vec4 a, vec4 b)
{
       if(!feq(a.x, b.x)) return vec4(1.,0.,0.,1.);
  else if(!feq(a.y, b.y)) return vec4(0.,1.,0.,1.);
  else if(!feq(a.z, b.z)) return vec4(0.,0.,1.,1.);
  else if(!feq(a.w, b.w)) return vec4(1.,1.,0.,1.);
  else                    return vec4(1.,1.,1.,1.);
}

//returns true or false if mats are eq
bool meq(mat4 a, mat4 b)
{
  return
  (
    veq(a[0],b[0]) &&
    veq(a[1],b[1]) &&
    veq(a[2],b[2]) &&
    veq(a[3],b[3]) &&
    true
  );
}

//returns color indicating where first diff lies between mats
//white means "no diff"
vec4 cmeq(mat4 a, mat4 b)
{
       if(!veq(a[0],b[0])) return vec4(1.,0.,0.,1.);
  else if(!veq(a[1],b[1])) return vec4(0.,1.,0.,1.);
  else if(!veq(a[2],b[2])) return vec4(0.,0.,1.,1.);
  else if(!veq(a[3],b[3])) return vec4(1.,1.,0.,1.);
  else return vec4(1.,1.,1.,1.);
}

So I have a model mat, a view mat, and a proj mat. 所以我有一个模型垫,一个观察垫和一个项目垫。 I'm rendering a rectangle on screen (that is correctly projected/transformed...), and setting its color based on how well each steps of the calculations match with my on-cpu-calculated equivalents. 我正在屏幕上渲染一个矩形( 正确投影/变形...),并根据计算的每个步骤与我在CPU上计算的等效项匹配的程度来设置其颜色。

uniform mat4 model_mat;
uniform mat4 view_mat;
uniform mat4 proj_mat;

attribute vec4 position;

varying vec4 var_color;

void main()
{
  //this code works (at least visually)- the rect is transformed as expected
  vec4 model_pos = model_mat * position;
  gl_Position = proj_mat * view_mat * model_pos;

  //this is the test code that does the same as above, but tests its results against CPU calculated equivalents
  mat4 m;

  //test proj
  //compares the passed in uniform 'proj_mat' against a hardcoded rep of 'proj_mat' as printf'd by the CPU
  m[0] = vec4(1.542351,0.000000,0.000000,0.000000);
  m[1] = vec4(0.000000,1.542351,0.000000,0.000000);
  m[2] = vec4(0.000000,0.000000,-1.020202,-1.000000);
  m[3] = vec4(0.000000,0.000000,-2.020202,0.000000);
  var_color = cmeq(proj_mat,m); //THIS PASSES (the rect is white)

  //view
  //compares the passed in uniform 'view_mat' against a hardcoded rep of 'view_mat' as printf'd by the CPU
  m[0] = vec4(1.000000,0.000000,-0.000000,0.000000);
  m[1] = vec4(-0.000000,0.894427,0.447214,0.000000);
  m[2] = vec4(0.000000,-0.447214,0.894427,0.000000);
  m[3] = vec4(-0.000000,-0.000000,-22.360680,1.000000);
  var_color = cmeq(view_mat,m); //THIS PASSES (the rect is white)

  //projview
  mat4 pv = proj_mat*view_mat;

  //proj_mat*view_mat
  //compares the result of GPU computed proj*view against a hardcoded rep of proj*view **<- NOTE ORDER** as printf'd by the CPU
  m[0] = vec4(1.542351,0.000000,0.000000,0.000000);
  m[1] = vec4(0.000000,1.379521,-0.689760,0.000000);
  m[2] = vec4(0.000000,-0.456248,-0.912496,20.792208);
  m[3] = vec4(0.000000,-0.447214,-0.894427,22.360680);
  var_color = cmeq(pv,m); //THIS FAILS (the rect is green)

  //view_mat*proj_mat
  //compares the result of GPU computed proj*view against a hardcoded rep of view*proj **<- NOTE ORDER** as printf'd by the CPU
  m[0] = vec4(1.542351,0.000000,0.000000,0.000000);
  m[1] = vec4(0.000000,1.379521,0.456248,0.903462);
  m[2] = vec4(0.000000,0.689760,21.448183,-1.806924);
  m[3] = vec4(0.000000,0.000000,-1.000000,0.000000);
  var_color = cmeq(pv,m); //THIS FAILS (the rect is green)

  //view_mat_t*proj_mat_t
  //compares the result of GPU computed proj*view against a hardcoded rep of view_t*proj_t **<- '_t' = transpose, also note order** as printf'd by the CPU
  m[0] = vec4(1.542351,0.000000,0.000000,0.000000);
  m[1] = vec4(0.000000,1.379521,-0.456248,-0.447214);
  m[2] = vec4(0.000000,-0.689760,-0.912496,-0.894427);
  m[3] = vec4(0.000000,0.000000,20.792208,22.360680);
  var_color = cmeq(pv,m); //THIS PASSES (the rect is white)
}

And here are my CPU vector/matrix calcs (matrices are col-order [mx is first column, not first row]): 这是我的CPU向量/矩阵计算(矩阵按列顺序排列[mx是第一列,而不是第一行]):

fv4 matmulfv4(fm4 m, fv4 v)
{
  return fv4
    { m.x[0]*v.x+m.y[0]*v.y+m.z[0]*v.z+m.w[0]*v.w,
      m.x[1]*v.x+m.y[1]*v.y+m.z[1]*v.z+m.w[1]*v.w,
      m.x[2]*v.x+m.y[2]*v.y+m.z[2]*v.z+m.w[2]*v.w,
      m.x[3]*v.x+m.y[3]*v.y+m.z[3]*v.z+m.w[3]*v.w };
}

fm4 mulfm4(fm4 a, fm4 b)
{
  return fm4
    { { a.x[0]*b.x[0]+a.y[0]*b.x[1]+a.z[0]*b.x[2]+a.w[0]*b.x[3], a.x[0]*b.y[0]+a.y[0]*b.y[1]+a.z[0]*b.y[2]+a.w[0]*b.y[3], a.x[0]*b.z[0]+a.y[0]*b.z[1]+a.z[0]*b.z[2]+a.w[0]*b.z[3], a.x[0]*b.w[0]+a.y[0]*b.w[1]+a.z[0]*b.w[2]+a.w[0]*b.w[3] },
      { a.x[1]*b.x[0]+a.y[1]*b.x[1]+a.z[1]*b.x[2]+a.w[1]*b.x[3], a.x[1]*b.y[0]+a.y[1]*b.y[1]+a.z[1]*b.y[2]+a.w[1]*b.y[3], a.x[1]*b.z[0]+a.y[1]*b.z[1]+a.z[1]*b.z[2]+a.w[1]*b.z[3], a.x[1]*b.w[0]+a.y[1]*b.w[1]+a.z[1]*b.w[2]+a.w[1]*b.w[3] },
      { a.x[2]*b.x[0]+a.y[2]*b.x[1]+a.z[2]*b.x[2]+a.w[2]*b.x[3], a.x[2]*b.y[0]+a.y[2]*b.y[1]+a.z[2]*b.y[2]+a.w[2]*b.y[3], a.x[2]*b.z[0]+a.y[2]*b.z[1]+a.z[2]*b.z[2]+a.w[2]*b.z[3], a.x[2]*b.w[0]+a.y[2]*b.w[1]+a.z[2]*b.w[2]+a.w[2]*b.w[3] },
      { a.x[3]*b.x[0]+a.y[3]*b.x[1]+a.z[3]*b.x[2]+a.w[3]*b.x[3], a.x[3]*b.y[0]+a.y[3]*b.y[1]+a.z[3]*b.y[2]+a.w[3]*b.y[3], a.x[3]*b.z[0]+a.y[3]*b.z[1]+a.z[3]*b.z[2]+a.w[3]*b.z[3], a.x[3]*b.w[0]+a.y[3]*b.w[1]+a.z[3]*b.w[2]+a.w[3]*b.w[3] } };
}

A key thing to notice is that the view_mat_t * proj_mat_t on the CPU matched the proj_mat * view_mat on the GPU. 需要注意的关键是CPU上的view_mat_t * proj_mat_t与GPU上的proj_mat * view_mat 相匹配 Does anyone know why? 有人知道为什么吗? I've done tests on matrices on the CPU and compared them to results of online matrix multipliers, and they seem correct... 我已经对CPU上的矩阵进行了测试,并将它们与在线矩阵乘法器的结果进行了比较,它们似乎是正确的...

I know that the GPU does things between vert shader and frag shader (I think it like, divides gl_Position by gl_Position.w or something?)... is there something else I'm not taking into account going on here in just the vert shader? 我知道GPU会在vert着色器和frag着色器之间做一些事情(我想是这样,将gl_Position除以gl_Position.w之类的东西?)...还有其他一些我没有考虑到的问题吗?着色器? Is something being auto-transposed at some point? 某些东西是否在某些时候自动转置?

You may wish to consider GLM for CPU-side Matrix instantiation and calculations. 您可能希望考虑将GLM用于CPU端矩阵的实例化和计算。 It'll help reduce possible sources of errors. 这将有助于减少可能的错误源。

Secondly, GPUs and CPUs do not perform identical calculations. 其次,GPU和CPU不能执行相同的计算。 The IEEE 754 standard for computing Floating Point Numbers has relatively rigorous standards for how these calculations have to be performed and to what degree they have to be accurate, but: 用于计算浮点数的IEEE 754标准对于如何执行这些计算以及在多大程度上必须准确的标准具有相对严格的标准,但是:

  1. It's still possible for numbers to come up different in the least significant bit (and more than that depending on the specific operation/function being used) 数字仍然有可能在最低有效位上有所不同(并且更多取决于所使用的特定操作/功能)
  2. Some GPU vendors opt out of ensuring strict IEEE compliance in the first place (Nvidia has been known in the past to prioritize Speed over strict IEEE compliance) 一些GPU供应商选择不首先确保严格的IEEE遵从性(过去已知Nvidia优先于Speed而不是严格的IEEE遵从性)

I would finally note that your CPU-side computations leave a lot of room for rounding errors, which can add up. 我最后会注意到,您的CPU端计算为舍入错误留出了很大空间,舍入错误可能会加总。 The usual advice for these kinds of questions, then, is to include tolerance in your code for small amounts of deviations. 因此,针对此类问题的通常建议是在代码中包括对少量偏差的容忍度。 Usually code to check for 'equality' of two floating point numbers presumes that abs(xy) < 0.000001 means x and y are essentially equal. 通常,用于检查两个浮点数的“相等性”的代码假定abs(xy)<0.000001表示x和y本质上相等。 Naturally, the specific number will have to be calibrated for your personal use. 自然,必须为您的个人使用校准特定的数字。

And of course, you'll want to check to make sure that all your matrices/uniforms are being passed in correctly. 当然,您需要检查以确保正确传递了所有矩阵/制服。

Ok. 好。 I've found an answer. 我找到了答案。 There is nothing special about matrix operations from within a single shader . 在单个着色器中进行矩阵运算没有什么特别的 There are , however, a couple things you should be aware of: ,但是,你应该知道的几件事情:

:1: OpenGL (GLSL) uses column-major matrices. :1:OpenGL(GLSL)使用列主矩阵。 So to construct the matrix that would be visually represented in a mathematical context as this: 因此,要构建在数学上下文中可视化表示的矩阵,如下所示:

 1  2  3  4
 5  6  7  8
 9 10 11 12
13 14 15 16

you would, from within GLSL use: 您将在GLSL内部使用:

mat4 m = mat4(
  vec4( 1, 5, 9,13),
  vec4( 2, 6,10,14),
  vec4( 3, 7,11,15),
  vec4( 4, 8,12,16),
);

:2: If you instead use row-major matrices on the CPU, make sure to set the "transpose" flag to true when uploading the matrix uniforms to the shader, and make sure to set it to false if you're using col-major matrices. :2:如果您改为在CPU上使用优先矩阵,请确保在将矩阵统一上传到着色器时将“ transpose”标志设置为true,如果使用col- ,请确保将其设置为false。 主要矩阵。

So long as you are aware of these two things, you should be good to go. 只要您了解这两件事,您就应该很好。

My particular problem above was that I was in the middle of switching from row-major to col-major in my CPU implementation and wasn't thorough in ensuring that implementation was taken into account across all my CPU matrix operations. 上面我的特殊问题是,在我的CPU实现中,我正处于从 优先级转换为大学优先级的中间状态,并且在确保在我所有的CPU矩阵操作中都考虑到该实现时还不够彻底。

Specifically, here is my now-correct mat4 multiplication implementation, assuming col-major matrices : 具体来说,这是我现在正确的mat4乘法实现, 假设使用col-major矩阵

fm4 mulfm4(fm4 a, fm4 b)
{
  return fm4
    { { a.x[0]*b.x[0] + a.y[0]*b.x[1] + a.z[0]*b.x[2] + a.w[0]*b.x[3], a.x[1]*b.x[0] + a.y[1]*b.x[1] + a.z[1]*b.x[2] + a.w[1]*b.x[3], a.x[2]*b.x[0] + a.y[2]*b.x[1] + a.z[2]*b.x[2] + a.w[2]*b.x[3], a.x[3]*b.x[0] + a.y[3]*b.x[1] + a.z[3]*b.x[2] + a.w[3]*b.x[3] },
      { a.x[0]*b.y[0] + a.y[0]*b.y[1] + a.z[0]*b.y[2] + a.w[0]*b.y[3], a.x[1]*b.y[0] + a.y[1]*b.y[1] + a.z[1]*b.y[2] + a.w[1]*b.y[3], a.x[2]*b.y[0] + a.y[2]*b.y[1] + a.z[2]*b.y[2] + a.w[2]*b.y[3], a.x[3]*b.y[0] + a.y[3]*b.y[1] + a.z[3]*b.y[2] + a.w[3]*b.y[3] },
      { a.x[0]*b.z[0] + a.y[0]*b.z[1] + a.z[0]*b.z[2] + a.w[0]*b.z[3], a.x[1]*b.z[0] + a.y[1]*b.z[1] + a.z[1]*b.z[2] + a.w[1]*b.z[3], a.x[2]*b.z[0] + a.y[2]*b.z[1] + a.z[2]*b.z[2] + a.w[2]*b.z[3], a.x[3]*b.z[0] + a.y[3]*b.z[1] + a.z[3]*b.z[2] + a.w[3]*b.z[3] },
      { a.x[0]*b.w[0] + a.y[0]*b.w[1] + a.z[0]*b.w[2] + a.w[0]*b.w[3], a.x[1]*b.w[0] + a.y[1]*b.w[1] + a.z[1]*b.w[2] + a.w[1]*b.w[3], a.x[2]*b.w[0] + a.y[2]*b.w[1] + a.z[2]*b.w[2] + a.w[2]*b.w[3], a.x[3]*b.w[0] + a.y[3]*b.w[1] + a.z[3]*b.w[2] + a.w[3]*b.w[3] } };
}

again, the above implementation is for column major matrices. 同样,以上实现是针对列主要矩阵的。 That means that ax is the first column of the matrix, not the row. 这意味着ax是矩阵的第一 ,而不是行。

A key thing to notice is that the view_mat_t * proj_mat_t on the CPU matched the proj_mat * view_mat on the GPU. 需要注意的关键是CPU上的view_mat_t * proj_mat_t与GPU上的proj_mat * view_mat相匹配。 Does anyone know why? 有人知道为什么吗?

The reason for this is that for two matrices A, B: A * B = (B' * A')' , where ' indicates the transpose operation. 原因是对于两个矩阵A,B: A * B =(B'* A')' ,其中'表示转置运算。 As already pointed out by yourself, your math code (as well as popular math libraries such as GLM) uses a row-major representation of matrices, while OpenGL (by default) uses a column-major representation. 正如您自己已经指出的那样,您的数学代码(以及流行的数学库,例如GLM)使用矩阵的行主表示,而OpenGL(默认情况下)使用列主表示。 What this means is that the matrix A, 这意味着矩阵A

    (a b c)
A = (d e f)
    (g h i)

in your CPU math library is stored in memory as [a, b, c, d, e, f, g, h, i], whereas defined in a GLSL shader, it would be stored as [a, d, g, b, e, h, c, f, i]. 在您的CPU数学库中,其存储为[a,b,c,d,e,f,g,h,i],而在GLSL着色器中定义,则将其存储为[a,d,g,b ,e,h,c,f,i]。 So if you upload the data [a, b, c, d, e, f, g, h, i] of the GLM matrix with glUniformMatrix3fv with the transpose parameter set to GL_FALSE, then the matrix you will see in GLSL is 因此,如果您使用glUniformMatrix3fv并将转置参数设置为GL_FALSE上载GLM矩阵的数据[a,b,c,d,e,f,g,h,i],那么在GLSL中看到的矩阵是

     (a d g)
A' = (b e h)
     (c f i)

which is the transposed original matrix. 这是转置的原始矩阵。 Having realized that changing the interpretation of the matrix data between row-major and column-major leads to a transposed version of the original matrix, you can now explain why suddenly the matrix multiplication works the other way around. 认识到在行主列和列主列之间更改矩阵数据的解释会导致原始矩阵的转置版本,现在您可以解释为什么矩阵乘法突然相反地起作用了。 Your view_mat_t and proj_mat_t on the CPU are interpreted as view_mat_t' and proj_mat_t' in your GLSL shader, so uploading the pre-calculated view_mat_t * proj_mat_t to the shader will lead to the same result as uploading both matrices separately and then calculating proj_mat_t * view_mat_t. 在GLSL着色器中,CPU上的view_mat_t和proj_mat_t被解释为view_mat_t'和proj_mat_t',因此将预先计算的view_mat_t * proj_mat_t上载到着色器将产生与分别上载两个矩阵然后计算proj_mat_t * view_mat_t相同的结果。

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM