如何在C中的兩個字符串中的單詞中找到對稱差異？

Question

例如，我有兩個字符串：

lihuayu zhangxuehui sunyunlei guolei fuwenxia
lihuayu lixin fuwenxia zhangxuehui

我會得到

sunyunlei guolei lixin

我寫了以下代碼

#include<stdio.h>
#include<string.h>

#define STRINGSIZE 64
void main()
{
    char *line1 = NULL;
    char *line2 = NULL;

    size_t size1;
    size_t size2;

    getline(&line1, &size1, stdin);
    getline(&line2, &size2, stdin);

    char*  spilted1 = strtok(line1, " ");

    while (spilted1 != NULL){
        if (strstr(line2, spilted1) == NULL){
            printf("%s", spilted1);
        }
        spilted1 = strtok(NULL, " ");
    }

}

但這顯然是錯誤的，因為我無法在string2得到那些不同的詞。

我知道如何在Python中進行操作，但不知道如何在C中進行操作。

Answer 1

像這樣：

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

char **split(const char *str, const char *delimiter, size_t *len);
int cmp(const void *a, const void *b);
void find_diff(char **a1, char **a2);
void drop(char **a);

int main(void){
    char *line1 = NULL, *line2 = NULL;
    size_t size1 = 0, size2 = 0;

    getline(&line1, &size1, stdin);
    getline(&line2, &size2, stdin);

    //(1)split
    size_t len1, len2;
    char **array1 = split(line1, " \t\n", &len1);
    char **array2 = split(line2, " \t\n", &len2);

    //(2)sort
    qsort(array1, len1, sizeof(*array1), cmp);
    qsort(array2, len2, sizeof(*array2), cmp);

    //(3)compare
    find_diff(array1, array2);

    drop(array1);drop(array2);
    free(line1);free(line2);

    return 0;
}

char **split(const char *str, const char *delimiter, size_t *len){
    char *text, *p, *first, **array, **ret;
    size_t c;

    *len = 0;

    text = strdup(str);//make clone
    if(text == NULL) return NULL;

    for(c = 0, p = text; p = strtok(p, delimiter); p = NULL)
        ++c;//count elements

    ret = malloc(sizeof(char*)*(c+1));//+1 for NULL
    if(ret==NULL){
        free(text);
        return NULL;
    }
    strcpy(text, str);//restore
    array=ret;

    for(p = text; p = strtok(p, delimiter); p = NULL)
        *array++ = strdup(p);
    *array = NULL;
    *len = c;
    free(text);

    return ret;
}

int cmp(const void *a, const void *b){
    return strcmp(*(char **)a, *(char **)b);
}

void find_diff(char **a1, char **a2){//arguments has been sorted
    while(*a1 || *a2){
        if(*a1 && a1[1] && !strcmp(*a1, a1[1])){
            ++a1;//distinct
            continue;
        }
        if(*a2 && a2[1] && !strcmp(*a2, a2[1])){
            ++a2;
            continue;
        }

        if(*a1 == NULL){
            puts(*a2++);
        } else if(*a2 == NULL){
            puts(*a1++);
        } else {
            int result;
            if((result=strcmp(*a1, *a2)) < 0){
                puts(*a1++);
            } else if(result > 0){
                puts(*a2++);
            } else {
                ++a1;
                ++a2;
            }
        }
    }
}

void drop(char **a){
    char **tmp = a;
    while(*a)
        free(*a++);
    free(tmp);
}

Answer 2

這是一種方法：

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

enum { MAX_WORDS = 64 };

static int split_words(char *buffer, char **words, int max_words)
{
    char *token;
    char *next = buffer;
    int num_words = 0;
    while ((token = strtok(next, " \n")) != 0 && num_words < max_words)
    {
        words[num_words++] = token;
        next = NULL;
    }
    return num_words;
}

static int word_in_list(char *word, char **list, int list_size)
{
    for (int i = 0; i < list_size; i++)
    {
        if (strcmp(word, list[i]) == 0)
            return 1;
    }
    return 0;
}

/* Print words in list w1 that do not appear in list w2 */
static void print_unique(char **w1, int n1, char **w2, int n2)
{
    for (int i = 0; i < n1; i++)
    {
        if (!word_in_list(w1[i], w2, n2))
            printf("[%s]\n", w1[i]);
    }
}

int main(void)
{
    char  *line1 = NULL;
    char  *line2 = NULL;
    size_t size1 = 0;
    size_t size2 = 0;

    if (getline(&line1, &size1, stdin) > 0 &&
        getline(&line2, &size2, stdin) > 0)
    {
        char *w1[MAX_WORDS];
        char *w2[MAX_WORDS];
        int   n1 = split_words(line1, w1, MAX_WORDS);
        int   n2 = split_words(line2, w2, MAX_WORDS);

        print_unique(w1, n1, w2, n2);
        print_unique(w2, n2, w1, n1);
    }
    free(line1);
    free(line2);
    return 0;
}

/*
   You'll need two
   arrays of char pointers, one for each line of input.  You'll split the
   first line into the first array, and the second line into the second
   array.  Then you'll go through the two arrays of pointers, comparing
   strings and counting only those that do not match any of the entries in
   the other array.  (What do you do if one input line itself contains
   repeats — The Lion, the Witch, and the Wardrobe for example?  Also, do
   you need to treat The as the same as the in that example?)

   You can use strtok_r() or strtok_s() if you have them available; at a
   pinch, you could use strtok(), but it is dangerous to use that in
   library code.  And you'll need to use strcmp() to compare the strings
   — plus macros/functions from <ctype.h> to handle case-conversion if
   that's appropriate.

   Also note that strtok() is destructive.  If you've split string 1 with
   it, you can't then search in string 1 when you split string 2.  Also
   note that strstr("then came a deluge", "the") matches, even though most
   people would not regard the haystack string as containing the needle
   word the.
 */

所使用的算法的單詞數是二次的（它以O（N ² ）時間運行）； 它將一個列表中的每個唯一單詞與另一個列表中的每個單詞進行比較。 您可以執行諸如對列表進行排序並消除重復的操作（以O（N.log N）時間），然后逐步瀏覽兩個列表以在線性時間內找到唯一的單詞。 二次不會影響數十個單詞，也許也不會影響數百個單詞，但之后可能會變得很重要。

匯編：

$ gcc -O3 -g -std=c11 -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes \
>     -Wold-style-definition -Werror uniq_words.c -o uniq_words
$

示例運行：

$ cat data
lihuayu zhangxuehui sunyunlei guolei fuwenxia
lihuayu lixin fuwenxia zhangxuehui
$ uniq_words < data
[sunyunlei]
[guolei]
[lixin]
$

數據周圍的方括號使我確信字符串包含我認為它們應包含的內容。

如何在C中的兩個字符串中的單詞中找到對稱差異？

問題描述

2 個解決方案

解決方案1
2 2016-05-10 08:04:22

解決方案2
2 已采納 2016-05-10 15:54:56

如何在C中的兩個字符串中的單詞中找到對稱差異？

問題描述

2 個解決方案

解決方案1 2 2016-05-10 08:04:22

解決方案2 2 已采納 2016-05-10 15:54:56

解決方案1
2 2016-05-10 08:04:22

解決方案2
2 已采納 2016-05-10 15:54:56