简体   繁体   中英

Case-insensitive sort

I wrote a sorting program so that if the optional argument -i is present then the sorting should be done without taking into account lowercase / uppercase letters.

The program uses the standard library qsort() algorithm; case-insensitive option is handled by getopt() if the -i flag is supplied to the program.

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>

/* simple comparison function
   just plain string compare from standard library
   qsort passes to this a pointer to a member of the to be sorted array
   this member is a string, so a cast is needed to double pointer
*/
int simple_cmp(const void *a, const void *b)
{
    return strcmp(*(const char **)a, *(const char **)b);
}

int insensitive_cmp(const void *a, const void *b)
{
    return strcasecmp(*(const char **)a, *(const char **)b);
}

/* natural comparison function
   contrary to the other required sorting methods, standard library does not
   have a function for this (actually there is one for directory contents,
   but it is a specialized function not dedicated to string comparison)
   it acts as a normal comparison as long as there is no digits to compare
   when both strings have digits as the next character, instead of comparing
   them, turn it into a number (all the consecutive digits) and compare that
   number as if it was a single character
*/
int natural_cmp(const void *a, const void *b)
{
    const char *stra = *(const char **)a;
    const char *strb = *(const char **)b;

    while (*stra && *strb)
    {
        if (((*stra < '0') || (*stra > '9')) ||
            ((*strb < '0') || (*strb > '9')))
        {
            if (*stra < *strb)
                return -1;
            else if (*stra > *strb)
                return 1;
            stra++;
            strb++;
        }
        else
        {
            long long na;
            long long nb;
            char *end;
            na = strtoll(stra, &end, 10);
            stra = end;
            nb = strtoll(strb, &end, 10);
            strb = end;
            if (na < nb)
                return -1;
            if (na > nb)
                return 1;
        }
    }
    if (*stra != 0)
        return 1;
    if (*strb != 0)
        return -1;
    return 0;
}


int main(int argc,char *argv[])
{
    int i, iFlag = 0, nFlag = 0, rFlag = 0, nrFlaguri = 0;
    int opt;
    int (*sort_func)(const void *, const void *) = simple_cmp;

    /* getopt will cycle through the parameters searching for optional arguments
       if the specified argument is found store the required configuration
       one cannot parse normal arguments until getopt finished cycling through
       all the arguments
       getopt will rearrange the parameter list, so all positional arguments
       are moved to the end, hence normal processing can be done only after
       all optional arguments are consumed
       if unknonw options are detected exit the program with an error message
       as it is most likely a user error or typo and should be corrected
       to achieve expected results
    */
    while ((opt = getopt(argc, argv, "nri")) != -1)
    {
        switch (opt)
        {
            case 'n':
        nrFlaguri++;
        nFlag = 1;    
                sort_func = natural_cmp;
                break;
        case 'r':
        nrFlaguri++;
        rFlag = 1;
        break;
        case 'i':
        nrFlaguri++;
        iFlag = 1;
        sort_func = insensitive_cmp;
        break;
            default: /* '?' */
                exit(EXIT_FAILURE);
        }
    }
    
    if(argc == optind)
    {
    printf("Only optional parameters have been entered");
    return 0;
    }

    if(nrFlaguri > 1)
    {
    printf("Only one sorting option can be selected");
    return 0;
    }

    /* after rearranging the arguments, optind is updated to reflect the
       starting position of the first non-optional argument
       all parameter processing after getopt finished should start from optind
       instead of 1
    */

    /* quick-sort as implemented in the standard library
       it sorts in-place a given vector of unspecified type elements
       the size of a single element, and the length of vector must be supplied
       along the vector itself
       additionally a comparison function must be specified which can compare
       individual elements of the vector
    */
    qsort(&argv[optind], argc-optind, sizeof(char *), sort_func);
    if (rFlag == 1) {
    for(i=argc-1;i>=optind;i--)
        printf("%s\n",argv[i]);
    }
    else {
    /* displaying sorted parameters one by one */
    for(i=optind;i<argc;i++)
        printf("%s\n",argv[i]);
    }
    return 0;
}

How can I modify the program to run and -n -r -i to do natural sorting without differentiating lowercase and uppercase letters and display in reverse order? Does strcasecmp also need to be adapted?

Remove the restriction on command line options that is imposed by nrFlaguri . As is, the program does not even allow a reversed case-insensitive sort (2 flags, let alone all 3).

A quick solution is to make the case-insensitivity flag into a file scope variable, so that the behaviour of the comparison functions can be easily modified.

With this, for simplicity, simple_cmp and insensitive_cmp can be combined into a single function, and natural_cmp can use a helper function to compare characters.

#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <unistd.h>

static int is_insensitive = 0;

int simple_cmp(const void *a, const void *b)
{
    return (is_insensitive
            ? strcasecmp(*(const char **)a, *(const char **)b)
            : strcmp(*(const char **)a, *(const char **)b)
           );
}

int less_than(char a, char b)
{
    return (is_insensitive
            ? tolower((unsigned char) a) < tolower((unsigned char) b)
            : a < b
           );
}

int natural_cmp(const void *a, const void *b)
{
    const char *stra = *(const char **)a;
    const char *strb = *(const char **)b;

    while (*stra && *strb)
    {
        if (isdigit((unsigned char) *stra) && isdigit((unsigned char) *strb))
        {
            char *end;
            long na = strtoll(stra, &end, 10);
            stra = end;
            long nb = strtoll(strb, &end, 10);
            strb = end;

            if (na < nb)
                return -1;
            if (na > nb)
                return 1;
        }
        else
        {
            if (less_than(*stra, *strb))
                return -1;
            else if (less_than(*strb, *stra))
                return 1;

            stra++;
            strb++;
        }
    }

    if (*stra != 0)
        return 1;
    if (*strb != 0)
        return -1;
    return 0;
}

int main(int argc,char **argv)
{
    int (*sort_func)(const void *, const void *) = simple_cmp;
    int print_reversed = 0;
    int opt;

    while ((opt = getopt(argc, argv, "nri")) != -1)
    {
        switch (opt)
        {
            case 'n':
                sort_func = natural_cmp;
                break;
            case 'r':
                print_reversed = 1;
                break;
            case 'i':
                is_insensitive = 1;
                break;
            default: /* '?' */
                exit(EXIT_FAILURE);
        }
    }

    if (argc == optind)
    {
        fprintf(stderr, "Missing arguments to sort.\n");
        return EXIT_FAILURE;
    }

    qsort(argv + optind, argc - optind, sizeof *argv, sort_func);

    if (print_reversed)
        for (int i = argc - 1; i >= optind; i--)
            printf("%s\n", argv[i]);
    else
        for (int i = optind; i < argc; i++)
            printf("%s\n", argv[i]);
}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM