为什么一种方法比另一种慢

Question

我的代码有这两种变体，其中一种的运行速度比另一种慢得多，尽管我认为它不应该这样做。 有人可以解释一下这样做需要这么长时间才能运行吗？ 代码是一个基本的floodfill算法，pos变量之前声明为unsigned long谢谢

这段代码的运行速度比第二段慢得多。

void
flood(byte * array, unsigned long x, unsigned long y, byte value)
{
    node_t *head = NULL;

    queue(&head, x, y);
    while (head != NULL) {
        unsigned long *ret = dequeue(&head);

        pos = ret[0] + ret[1] * pgmWidth;
        if (pos >= 0 && pos < pgmSize && ret[1] < pgmHeight &&
            array[pos] == UCHAR_MAX) {
            array[pos] = value;

            queue(&head, ret[0] + 1, ret[1]);
            queue(&head, ret[0], ret[1] + 1);
            queue(&head, ret[0], ret[1] - 1);
            queue(&head, ret[0] - 1, ret[1]);
        }
        free(ret);
    }
}

这件作品跑得更快，虽然条件比第一件要多

void
flood(byte * array, unsigned long x, unsigned long y, byte value)
{
    node_t *head = NULL;

    queue(&head, x, y);
    while (head != NULL) {
        unsigned long *ret = dequeue(&head);

        pos = ret[0] + ret[1] * pgmWidth;
        if (ret[0] >= 0 && ret[0] < pgmWidth && ret[1] >= 0 &&
            ret[1] < pgmHeight && array[pos] == UCHAR_MAX) {
            array[pos] = value;

            queue(&head, ret[0] + 1, ret[1]);
            queue(&head, ret[0], ret[1] + 1);
            queue(&head, ret[0], ret[1] - 1);
            queue(&head, ret[0] - 1, ret[1]);
        }
        free(ret);
    }
}

Answer 1

这是两个函数之间的显着区别：第二段代码在二维数组的左右边缘停止泛洪，而第一段代码没有。

所探索的像素集是不同的：例如，假设数组填充了UCHAR_MAX值，除了一条从上到下的垂直线。 第一个代码将从不在垂直线上的任何像素淹没阵列的两侧，而第二个代码只会淹没表面的一半。

这可能解释了运行时间的差异，具体取决于调用时数组的实际内容。

这两种方法都可能有意义，但语义不同：

第一个代码将阵列作为圆柱体的表面泛滥，沿交界处有一个像素未对齐。
第二个代码将数组作为平面矩形填充，这看起来更一致。

有一些方法可以进一步改进代码：

您应该删除 2 个冗余测试，因为坐标是无符号的，它们被定义为具有环绕语义，
你应该只排队实际到达的像素，
您应该在排队像素坐标之前更改像素值，以避免多次排队相同的像素。

void flood(byte *array, unsigned long x, unsigned long y, byte value)
{
    node_t *head = NULL;

    pos = x + y * pgmWidth;
    if (x < pgmWidth && y < pgmHeight && array[pos] == UCHAR_MAX) {
        array[pos] = value;
        queue(&head, x, y);
    }

    while (head != NULL) {
        unsigned long *ret = dequeue(&head);
        x = ret[0];
        y = ret[1];
        free(ret);

        pos = x + y * pgmWidth;
        
        if (x + 1 < pgmWidth && array[pos + 1] == UCHAR_MAX) {
            array[pos + 1] = value;
            queue(&head, x + 1, y);
        }
        if (y + 1 < pgmHeight && array[pos + pgmWidth] == UCHAR_MAX) {
            array[pos + pgmWidth] = value;
            queue(&head, x, y + 1);
        }
        if (y > 0 && array[pos - pgmWidth] == UCHAR_MAX) {
            array[pos - pgmWidth] = value;
            queue(&head, x, y - 1);
        }
        if (x > 0 && array[pos - 1] == UCHAR_MAX) {
            array[pos - 1] = value;
            queue(&head, x - 1, y);
        }
    }
}

最后，对pos 、 pgmWidth和pgmHeight使用全局变量会使代码变慢。 无论如何，对pos使用全局变量并没有真正的价值。 试试这个替代方案：

void flood(byte *array, unsigned long x, unsigned long y, byte value)
{
    node_t *head = NULL;
    unsigned long width = pgmWidth;
    unsigned long height = pgmHeight;
    unsigned long pos;

    p = x + y * width;
    if (x < width && y < height && array[pos] == UCHAR_MAX) {
        array[pos] = value;
        queue(&head, x, y);
    }

    while (head != NULL) {
        unsigned long *ret = dequeue(&head);
        x = ret[0];
        y = ret[1];
        free(ret);

        pos = x + y * width;
        
        if (x + 1 < width && array[pos + 1] == UCHAR_MAX) {
            array[pos + 1] = value;
            queue(&head, x + 1, y);
        }
        if (y + 1 < height && array[pos + width] == UCHAR_MAX) {
            array[pos + width] = value;
            queue(&head, x, y + 1);
        }
        if (y > 0 && array[pos - width] == UCHAR_MAX) {
            array[pos - width] = value;
            queue(&head, x, y - 1);
        }
        if (x > 0 && array[pos - 1] == UCHAR_MAX) {
            array[pos - 1] = value;
            queue(&head, x - 1, y);
        }
    }
}

为什么一种方法比另一种慢

问题描述

1 个解决方案

解决方案1
0 2022-01-04 22:25:10

为什么一种方法比另一种慢

问题描述

1 个解决方案

解决方案1 0 2022-01-04 22:25:10

解决方案1
0 2022-01-04 22:25:10