简体   繁体   中英

Quick-sort implementation in C

Goal: I'm trying to implement quick-sort in C.
Problem: This quick-sort implementation for C goes on an infinite loop. I think the partition function is okay, because using test cases, the pivot (which is set to index 0) always moves to the correct location. I don't understand why the quicksort function would not eventually reach the base case.

What might be the problem with this implementation?

# include <stdio.h>

// Swapping algorithm
void swap(int *a, int *b) {
    int temp = *a;
    *a = *b;
    *b = temp;
}

// Partitioning algorithm
int partition(int *L, int left, int right){
    int pivot = L[0];

    while (right > left) {
            while (L[left] < pivot) {
                    left = left + 1;
            }
            while (L[right] > pivot) {
                    right = right - 1;
            }
            swap(&L[left], &L[right]);
    }
    swap(&pivot, &L[left]);
    return left;
}

// Quicksort recursion
void quicksort(int *L, int start, int end) {
    if (start >= end) {
            return;
    }
    else {
            int splitPoint = partition(L, start, end);
            quicksort(L, start, splitPoint-1);
            quicksort(L, splitPoint+1, end);
    }
}

int main() {
    int myList[] = {12, 43, -16, 0, 2, 5, 1, 13, 2, 2, -1};
    printf("UNSORTED LIST\n");
    int *pointer = myList;
    for (int i = 0; i < 10; i++) {
            printf("%d ", *(pointer+i));
    }
    quicksort(myList, 0, 9);
    printf("\nSORTED LIST\n");
    for (int i = 0; i < 10; i++) {
            printf("%d ", *(pointer+i));
    }
    printf("\n");
}

The initial pivot choice should be L[left] not L[0] , shouldn't it? However, that's not the only problem in the partition function.

This code works:

#include <stdio.h>

// Swapping algorithm
static inline
void swap(int *a, int *b)
{
    int temp = *a;
    *a = *b;
    *b = temp;
}

static void dump_list(const char *tag, int *ptr, int left, int right)
{
    printf("%15s [%d..%d]: ", tag, left, right);
    for (int i = left; i <= right; i++)
        printf(" %3d", ptr[i]);
    putchar('\n');
}

// Partitioning algorithm
static
int partition(int *L, int left, int right)
{
    int pivot = left;
    int p_val = L[pivot];

    while (left < right)
    {
        while (L[left] <= p_val)
            left++;
        while (L[right] > p_val)
            right--;
        if (left < right)
            swap(&L[left], &L[right]);
    }
    swap(&L[pivot], &L[right]);
    return right;
}

// Quicksort recursion
static
void quicksort(int *L, int start, int end)
{
    if (start >= end)
        return;
    //dump_list("PRE-PARTITION", L, start, end);
    int splitPoint = partition(L, start, end);
    //dump_list("POST-PARTITION", L, start, end);
    //printf("Split point: %d\n", splitPoint);
    quicksort(L, start, splitPoint - 1);
    quicksort(L, splitPoint + 1, end);
}

int main(void)
{
    int myList[] = {12, 43, -16, 0, 2, 5, 1, 13, 2, 2, -1};
    dump_list("UNSORTED LIST", myList, 0, 9);
    quicksort(myList, 0, 9);
    dump_list("SORTED LIST", myList, 0, 9);
}

It produces the output:

  UNSORTED LIST [0..9]:   12  43 -16   0   2   5   1  13   2   2
    SORTED LIST [0..9]:  -16   0   1   2   2   2   5  12  13  43

With the debugging prints enabled, the output is:

  UNSORTED LIST [0..9]:   12  43 -16   0   2   5   1  13   2   2
  PRE-PARTITION [0..9]:   12  43 -16   0   2   5   1  13   2   2
 POST-PARTITION [0..9]:    2   2 -16   0   2   5   1  12  13  43
Split point: 7
  PRE-PARTITION [0..6]:    2   2 -16   0   2   5   1
 POST-PARTITION [0..6]:    1   2 -16   0   2   2   5
Split point: 5
  PRE-PARTITION [0..4]:    1   2 -16   0   2
 POST-PARTITION [0..4]:  -16   0   1   2   2
Split point: 2
  PRE-PARTITION [0..1]:  -16   0
 POST-PARTITION [0..1]:  -16   0
Split point: 0
  PRE-PARTITION [3..4]:    2   2
 POST-PARTITION [3..4]:    2   2
Split point: 4
  PRE-PARTITION [8..9]:   13  43
 POST-PARTITION [8..9]:   13  43
Split point: 8
    SORTED LIST [0..9]:  -16   0   1   2   2   2   5  12  13  43

I was annoyed that none of the code posted here previously is correct so I wrote a quicksort and proved it correct. It was much harder than I thought. The code below works as far as I can tell and I think my proof of correctness is valid:

#include <stdio.h>

void swap(int* x, int* y)
{
    int temp = *x;
    *x = *y;
    *y = temp;
}

/**
 * Assuming precondition (P) that `end - begin >= 2`, this function reorders the elements
 * of range [begin, end) and returns a pointer `ret` such that the following
 * postconditions hold:
 *   - (Q1): `ret > begin`
 *   - (Q2): `ret < end`
 * and, for some value `p` in [begin, end):
 *   - (Q3): all values in [begin, ret) are lower than or equal to `p`
 *   - (Q4): all values in [ret, end) are greater than or equal to `p`
 */
int* partition(int* begin, int* end)
{
    // These aliases are unnecessary but make the proof easier to understand.
    int* low  = begin;
    int* high = end;

    int pivot = *(low + (high - low)/2);

    // Loop invariants, all trivially verified at the start of the loop:
    //   - (A): values strictly to the left of `low` are lower than or equal to `pivot`
    //   - (B): there is at least one value at or to the right of `low` that is greater
    //     than or equal to `pivot`
    //   - (C): values at or to the right of `high` are greater than or equal to `pivot`
    //   - (D): there is at least one value strictly to the left of `high` that is lower
    //     than or equal to `pivot`
    //   - (E): `low <= high`
    //
    // The loop terminates because `high - low` decreases strictly at each execution of
    // the body (obvious).
    while (true)
    {
        // This loop terminates because of (B).
        while (*low < pivot)
            ++low;

        // Here, we have
        //   - (1): `*low >= pivot`
        //   - (2): `low <= high` because of (E) and (C)
        //   - properties (A) and (B) still hold because `low` has only moved
        //     past values strictly less than `pivot`

        // This loop terminates because of (D).
        do {
            --high;
        } while (pivot < *high);

        // Here, we have
        //   - (3): `*high <= pivot`
        //   - (4): by (C) which held before this loop, elements strictly to the
        //     right of `high` are known to be greater than or equal to `pivot`
        //     (but now (C) may not hold anymore)

        if (low >= high)
        {
            // Due to (1), (A) and (4), (Q3) and (Q4) are established with `pivot`
            // as `p`.
            // Clearly, (B) proves Q2.
            // See the rest of the answer below for a proof of (Q1).
            // This correctly finishes the partition.
            return low;
        }

        // We have `low < high` and we swap...
        swap(low, high);

        // ...and now,
        //   - by (1) and (4), invariant (C) is re-established
        //   - by (1), invariant (D) is re-established
        //   - (5): by (3), `*low <= pivot`

        ++low;
        // (A) already held before this increment. Thus, because of (5), (A)
        // still holds. Additionally, by (1), after the swap, (B) is
        // re-established. Finally, (E) is obvious.
    }
}

void qsort(int* begin, int* end)
{
    // Trivial base case...
    if (end - begin < 2)
        return;

    // ...therefore pre-condition (P) of `partition` is satisfied.
    int* p = partition(begin, end);

    // Thanks to postconditions (Q1) and (Q2) of `partition`, the ranges
    // [begin, p) and [p, end) are non-empty, therefore the size of the ranges
    // passed to the recursive calls below is strictly lower than the size of
    // [begin, end) in this call. Therefore the base case is eventually reached
    // and the algorithm terminates.

    // Thanks to postconditions (Q3) and (Q4) of `partition`, and by induction
    // on the size of [begin, end), the recursive calls below sort their
    // respective argument ranges and [begin, end) is sorted as a result.
    qsort(begin, p);
    qsort(p, end);
}

int main()
{
    int l[] = { 3, 1, 9, 6, 0, 7, 1, 7, 2, 2, 8 };

    size_t n = sizeof(l)/sizeof(int);

    qsort(l, l + n);

    for (size_t i = 0; i < n; ++i)
    {
        printf("%d, ", l[i]);
    }
}

To prove (Q1), we must prove that low has been incremented at least once before the return statement is reached.

If the return statement is reached during the first execution of the loop body, then low >= high implies that the --high; and ++low; statements in the nested loops must have been executed at least end - begin times among them. By precondition (P), end - begin >= 2 . By (D), the loop decrementing high must end with high >= begin . Therefore low must have been incremented at least once, proving (Q1).

Otherwise, if the return statement is reached during a subsequent execution of the loop body, then the unconditional ++low; statement proves (Q1).

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM