简体   繁体   中英

Why my insertion sort can only sort 90% of the data?

I have to read a csv file and then sort its data basen on the dates in ascending order. The plot twist is that the 90% of the data is sorted correctly but the other 10% is not. The dates in the csv file are written in this form 01/17/2015(month/day/year). I tested the code a bit and i noticed that if i run insertion sort twice then the 10% of the data that were not sorted, are perfectly sorted. My question is why is this happening, is it a problem with my code or with this algorithm. The fact that it runs properly for the most part and if i run it aggain it sorts the file perfectly makes me think that the only problem is with the algorithm and not my code.

#include <stdio.h>

typedef struct
{
    int month;
    int day;
    int year;
    float T_degC;
    float PO4uM;
    float SiO3uM;
    float NO2uM;
    float NO3uM;
    float Salnty;
    float O2ml_L;
} hello;

int insertionSort(hello hellos[], int records);

int main()
{
    FILE* file;

    file = fopen("ocean1.csv", "r");

    if (file == NULL)
    {
        printf("Error opening file. \n");
        return 1;
    }

    hello hellos[1405];

    int read = 0;
    int records = 0; // dhladh struct

    do
    {
        read = fscanf(file,
                      "%d/%d/%d, %f, %f, %f, %f, %f, %f, %f",
                      &hellos[records].month,
                      &hellos[records].day,
                      &hellos[records].year,
                      &hellos[records].T_degC,
                      &hellos[records].PO4uM,
                      &hellos[records].SiO3uM,
                      &hellos[records].NO2uM,
                      &hellos[records].NO3uM,
                      &hellos[records].Salnty,
                      &hellos[records].O2ml_L);

        if (read == 10)
        {
            records++;
        }
        if (read != 10 && feof(file) == 0)
        {
            printf("File format incorrect.\n");
            return 1;
        }
        if (ferror(file))
        {

            printf("Error reading file.\n");
            return 1;
        }

    } while (!feof(file));

    fclose(file);

    printf("\n%d records read.\n\n", records);

    insertionSort(hellos, records);
    // insertionSort(hellos, records);

    //Εκτυπωση των αποτελεσματων.

    for (int i = 0; i < records; i++)
    {
        printf("\n%d, %d, %d, %f, %f, %f, %f, %f, %f, %f",
               hellos[i].month,
               hellos[i].day,
               hellos[i].year,
               hellos[i].T_degC,
               hellos[i].PO4uM,
               hellos[i].SiO3uM,
               hellos[i].NO2uM,
               hellos[i].NO3uM,
               hellos[i].Salnty,
               hellos[i].O2ml_L);

        printf("\n");
    }
    return 0;
}

int insertionSort(hello hellos[], int records)
{
    int i;
    hello key;

    for (int j = 1; j < records; j++)
    {
        key = hellos[j];
        i = j - 1;

        printf("\n\n%d", i);
        // printf("\n%d", key.month);
        // printf("\n%d", hellos[i].month);

        if (key.year < hellos[i].year)
        {
            // printf("\nmpla");
            while (i >= 0 && key.year < hellos[i].year)
            {
                hellos[i + 1] = hellos[i];
                i = i - 1;
            }
            hellos[i + 1] = key;
        }
        else if (key.year == hellos[i].year)
        {
            // printf("\nmpla2");
            if (key.month < hellos[i].month)
            {
                // printf("\nmpla3");
                while (i >= 0 && key.month < hellos[i].month && key.year == hellos[i].year)
                {
                    hellos[i + 1] = hellos[i];
                    i = i - 1;
                }
                hellos[i + 1] = key;
            }
            else if (key.month == hellos[i].month)
            {
                // printf("\nmpla4");
                if (key.day < hellos[i].day)
                {
                    // printf("\nmpla5");
                    while (i >= 0 && key.day < hellos[i].day && key.month == hellos[i].month)
                    {
                        hellos[i + 1] = hellos[i];
                        i = i - 1;
                    }
                    hellos[i + 1] = key;
                }
                else if (key.day == hellos[i].day)
                {
                    // printf("\nmpla6");
                    if (key.T_degC < hellos[i].T_degC)
                    {
                        // printf("\nmpla7");
                        while (i >= 0 && key.day == hellos[i].day && key.month == hellos[i].month &&
                               key.year == hellos[i].year)
                        {
                            hellos[i + 1] = hellos[i];
                            i = i - 1;
                        }
                        hellos[i + 1] = key;
                    }
                }
            }
        }
    }
    return 0;
}

Here are some examples of data from my csv:

02/02/2015,20.37,0.22,0,0,0,33.685,5.5
02/01/2015,17.71,0.28,0.5,0,0,33.676,5.93
01/30/2015,10.85,1.32,16.5,0.05,14.8,33.752,4.19
01/31/2015,10.54,1.85,27.4,0.02,21.5,33.881,2.75
01/29/2015,10.49,1.98,30,0.02,22.6,33.946,2.41
01/28/2015,10.39,2.03,30.7,0.02,23,33.96,2.37
01/27/2015,10.22,2.1,31.8,0.02,23.6,33.982,2.31
01/26/2015,9.75,2.19,34.7,0.01,24.8,34.029,2.39
01/25/2015,18.43,0.11,1,0,0,33.464,5.83
01/24/2015,18.25,0.04,2,0,0,33.452,5.95
01/22/2015,15.6,0.19,3.8,0.04,0.7,33.423,5.91
01/23/2015,12.7,0.41,6,0.1,1.4,33.393,5.88
01/21/2015,10.98,1.09,16.6,0.07,14.1,33.481,4.04
01/20/2015,10.93,1.55,23.8,0.04,19.1,33.531,2.82
01/19/2015,10.74,1.67,26.7,0.04,20.7,33.583,2.55
01/16/2015,10.27,1.66,27.8,0.01,21.2,33.636,2.71
01/15/2015,10.02,1.76,34.4,0.03,24.3,33.747,2.11
01/17/2015,20.22,0.15,1,0,0,33.654,5.34
01/18/2015,20.22,0.15,1,0,0,33.654,5.34

First of all I would recommend that you think a bit and figure out a way of writing this without repeating the sort algorithm multiple times. HINT : you only need one function to compare two hellos and determine if the first one is smaller than the second, equal to it, or greater than it.

To the question at hand: you have a logical error under // printf("\nmpla5"); section you should be checking for month AND year equal, currently you search only checks for same month which will obviously sort the list incorrectly. EDIT : Actually, that only fixes a bug in your intended code, the code still won't sort properly unless you do the full compare between two entries (year, then month, then day, then temp). Try writing it all again, this time design your implementation with a compare function in mind (which compares two entries).

For starters it is not a C++ code. It is a C code.

The logical error is hidden in the if-else statements.

Consider for example this code snippet

        if (key.month < hellos[i].month)
        {
            // printf("\nmpla3");
            while (i >= 0 && key.month < hellos[i].month && key.year == hellos[i].year)
            {
                hellos[i + 1] = hellos[i];
                i = i - 1;
            }
            hellos[i + 1] = key;
        }
        else if (key.month == hellos[i].month)
        {
            //...
        }

If key.month is less than hellos[i].month then the else if statement

else if (key.month == hellos[i].month)

will not get the control though there can be objects (after the preceding if statement) with key.month equal to hellos[i].month but with key.day less than hellos[i].day .

I advice to write a separate comparison function similarly to the function used by qsort and call it to compare two objects of the structure type. For example

int cmp( const void *a, const void *b )
{
    const hello *left  = a;
    const hello *right = b;

    int result = ( right->year < left->year ) - ( left->year < right->year );

    if ( result == 0 )
    {
        result = ( right->month < left->month ) - ( left->month < right->month );
        if ( result == 0 )
        {
            result = ( right->day < left->day ) - ( left->day < right->day );
        }
    }

    return result;
}        

Here is a demonstration program of using this function based on calling qsort .

#include <stdio.h>
#include <stdlib.h>

typedef struct
{
    int month;
    int day;
    int year;
} hello;

int cmp( const void *a, const void *b )
{
    const hello *left  = a;
    const hello *right = b;

    int result = ( right->year < left->year ) - ( left->year < right->year );

    if ( result == 0 )
    {
        result = ( right->month < left->month ) - ( left->month < right->month );
        if ( result == 0 )
        {
            result = ( right->day < left->day ) - ( left->day < right->day );
        }
    }

    return result;
}

int main( void )
{
    hello hellos[] =
    {
        { 02, 02, 2015 },
        { 02, 01, 2015 },
        { 01, 30, 2015 },
        { 01, 31, 2015 },
        { 01, 29, 2015 },
        { 01, 28, 2015 },
    };
    size_t N = sizeof( hellos ) / sizeof( *hellos );

    qsort( hellos, N, sizeof( hello ), cmp );

    for ( size_t i = 0; i < N; i++ )
    {
        printf( "%02d/%02d/%d\n", hellos[i].month, hellos[i].day, hellos[i].year );
    }
}

The program output is

01/28/2015
01/29/2015
01/30/2015
01/31/2015
02/01/2015
02/02/2015

You can insert a call of the comparison function in only one if statement within your function and the following while loop as for example

if ( cmp( &key, &hellos[i] ) < 0 )
{
    //...
}

Taking into account your comments to my answer it seems you are unable to understand what should be done. So I will just include a demonstration program.

#include <stdio.h>

typedef struct
{
    int month;
    int day;
    int year;
} hello;

int cmp( const void *a, const void *b )
{
    const hello *left  = a;
    const hello *right = b;

    int result = ( right->year < left->year ) - ( left->year < right->year );

    if ( result == 0 )
    {
        result = ( right->month < left->month ) - ( left->month < right->month );
        if ( result == 0 )
        {
            result = ( right->day < left->day ) - ( left->day < right->day );
        }
    }

    return result;
}

void insertionSort( hello hellos[], size_t records )
{
    for ( size_t i = 1; i < records; i++ )
    {
        if ( cmp( &hellos[i], &hellos[i-1] ) < 0 )
        {
            hello tmp = hellos[i];
            size_t j = i;

            while ( j != 0 && cmp( &tmp, &hellos[j -1] ) < 0 )  
            {   --j;
                hellos[j + 1] = hellos[j];               
            } 

            hellos[j] = tmp;
        }
    }
}

int main( void )
{
    hello hellos[] =
    {
        { 02, 02, 2015 },
        { 02, 01, 2015 },
        { 01, 30, 2015 },
        { 01, 31, 2015 },
        { 01, 29, 2015 },
        { 01, 28, 2015 },
    };
    size_t N = sizeof( hellos ) / sizeof( *hellos );

    insertionSort( hellos, N );

    for ( size_t i = 0; i < N; i++ )
    {
        printf( "%02d/%02d/%d\n", hellos[i].month, hellos[i].day, hellos[i].year );
    }
}

The program output is the same as shown above

01/28/2015
01/29/2015
01/30/2015
01/31/2015
02/01/2015
02/02/2015

Or in C++ this done very easy using the standard C++ function std::tie declared in the header <tuple> .

Something like

if ( std::tie( key.year, key.month, key.day ) < 
     std::tie( hellos[i].year, hellos[i].month, hellos[i].day ) ) 
{
    //...
} 

So guys i finally came up with an idea. So my idea is, instead of having if statements inside of if statements, i thought it might be a better solution to make multiple for statements. One for year, one for month and one for day so i can first sort the code based on year after that also based on month and finally for day too. Here is the code:

#include <stdio.h>

typedef struct 
{
int month;
int day;
int year;
float T_degC;
float PO4uM;
float SiO3uM;
float NO2uM;
float NO3uM;
float Salnty;
float O2ml_L;
}
hello;

int insertionSort(hello hellos[], int records);

int main()
{
FILE *file;

file = fopen("ocean.csv", "r");

if(file == NULL)
{
    printf("Error opening file. \n");
    return 1;
}

hello hellos[1405];

int read = 0;
int records = 0;   //dhladh struct

do
{
    read = fscanf(file,
                        "%d/%d/%d, %f, %f, %f, %f, %f, %f, %f",
                        &hellos[records].month,
                        &hellos[records].day,
                        &hellos[records].year,
                        &hellos[records].T_degC,
                        &hellos[records].PO4uM,
                        &hellos[records].SiO3uM,
                        &hellos[records].NO2uM,
                        &hellos[records].NO3uM,
                        &hellos[records].Salnty,
                        &hellos[records].O2ml_L);

    if(read == 10)
    {
        records++;  

    }
    if(read != 10 && feof(file) ==0)
    {
        printf("File format incorrect.\n");
        return 1;
    }
    if(ferror(file))
    {

        printf("Error reading file.\n");
        return 1;
    }

} while (!feof(file));

fclose(file);


printf("\n%d records read.\n\n", records);

insertionSort(hellos, records);
//insertionSort(hellos, records);

//Εκτυπωση των αποτελεσματων.

for(int i=0; i<records; i++)
{
    printf("\n%d, %d, %d, %f, %f, %f, %f, %f, %f, %f",
                        hellos[i].month,
                        hellos[i].day,
                        hellos[i].year,
                        hellos[i].T_degC,
                        hellos[i].PO4uM,
                        hellos[i].SiO3uM,
                        hellos[i].NO2uM,
                        hellos[i].NO3uM,
                        hellos[i].Salnty,
                        hellos[i].O2ml_L);

    printf("\n");
}
return 0;
    }

    int insertionSort(hello hellos[], int records)
    {
int i;
hello key;

for(int j=1; j<records; j++)
{
    key = hellos[j];
    i = j-1;
    if(key.year < hellos[i].year)
    {
        while(i >= 0 && (key.year < hellos[i].year))
        {
            hellos[i+1] = hellos[i];
            i = i-1;  
        }
        hellos[i+1] = key;
    }
}

for(int k=1; k<records; k++)
{
    key = hellos[k];
    i = k-1;
    if((key.month < hellos[i].month) && (key.year == hellos[i].year))
    {
        while(i >= 0 && (key.month < hellos[i].month) && (key.year == hellos[i].year))
        {
            hellos[i+1] = hellos[i];
            i = i-1;  
        }
        hellos[i+1] = key;
    }
}

for(int l=1; l<records; l++)
{
    key = hellos[l];
    i = l-1;
    if((key.day < hellos[i].day) && (key.month == hellos[i].month) && (key.year == hellos[i].year))
    {
        while(i >= 0 && (key.day < hellos[i].day) && (key.month == hellos[i].month) && (key.year == hellos[i].year))
        {
            hellos[i+1] = hellos[i];
            i = i-1;  
        }
        hellos[i+1] = key;
    }
}
return 0;
   }

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM