[英]the Floating-point error
#include <stdio.h>
int main()
{
int n;
while ( scanf( "%d", &n ) != EOF ) {
double sum = 0,k;
if( n > 5000000 || n<=0 ) //the judgment of the arrange
break;
for ( int i = 1; i <= n; i++ ) {
k = (double) 1 / i;
sum += k;
}
/*
for ( int i = n; i > 0; i-- ) {
k = 1 / (double)i;
sum += k;
}
*/
printf("%.12lf\n", sum);
}
return 0;
}
為什么在不同的循環中我得到不同的答案。 有浮動錯誤嗎? 當我輸入5000000
,總和是16.002164235299
但是當我使用另一個循環for(符號部分)時,我得到總和16.002164235300
。
因為浮點數學不是關聯的 :
即(a + b) + c
不一定等於a + (b + c)
我也碰到了+ b + c問題。 完全同意ArjunShankar。
// Here A != B in general case
float A = ( (a + b) + c) );
float B = ( (a + c) + b) );
大多數浮點運算都是在mantis數據丟失的情況下執行的,即使組件很好地適合它(數字如0.5或0.25)。 事實上,我很高興在我的應用程序中找出bug的原因。 我寫了一篇簡短的提醒文章,詳細解釋如下:
http://stepan.dyatkovskiy.com/2018/04/machine-fp-partial-invariance-issue.html
以下是C示例。 祝好運!
example.c
#include <stdio.h>
// Helpers declaration, for implementation scroll down
float getAllOnes(unsigned bits);
unsigned getMantissaBits();
int main() {
// Determine mantissa size in bits
unsigned mantissaBits = getMantissaBits();
// Considering mantissa has only 3 bits, we would then get:
// a = 0b10 m=1, e=1
// b = 0b110 m=11, e=1
// c = 0b1000 m=1, e=3
// a + b = 0b1000, m=100, e=1
// a + c = 0b1010, truncated to 0b1000, m=100, e=1
// a + b + c result: 0b1000 + 0b1000 = 0b10000, m=100, e=2
// a + c + b result: 0b1000 + 0b110 = 0b1110, m=111, e=1
float a = 2,
b = getAllOnes(mantissaBits) - 1,
c = b + 1;
float ab = a + b;
float ac = a + c;
float abc = a + b + c;
float acb = a + c + b;
printf("\n"
"FP partial invariance issue demo:\n"
"\n"
"Mantissa size = %i bits\n"
"\n"
"a = %.1f\n"
"b = %.1f\n"
"c = %.1f\n"
"(a+b) result: %.1f\n"
"(a+c) result: %.1f\n"
"(a + b + c) result: %.1f\n"
"(a + c + b) result: %.1f\n"
"---------------------------------\n"
"diff(a + b + c, a + c + b) = %.1f\n\n",
mantissaBits,
a, b, c,
ab, ac,
abc, acb,
abc - acb);
return 1;
}
// Helpers
float getAllOnes(unsigned bits) {
return (unsigned)((1 << bits) - 1);
}
unsigned getMantissaBits() {
unsigned sz = 1;
unsigned unbeleivableHugeSize = 1024;
float allOnes = 1;
for (;sz != unbeleivableHugeSize &&
allOnes + 1 != allOnes;
allOnes = getAllOnes(++sz)
) {}
return sz-1;
}
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.