[英]How can I find symmetric difference in the words in two strings in C?
例如,我有兩個字符串:
lihuayu zhangxuehui sunyunlei guolei fuwenxia
lihuayu lixin fuwenxia zhangxuehui
我會得到
sunyunlei guolei lixin
我寫了以下代碼
#include<stdio.h>
#include<string.h>
#define STRINGSIZE 64
void main()
{
char *line1 = NULL;
char *line2 = NULL;
size_t size1;
size_t size2;
getline(&line1, &size1, stdin);
getline(&line2, &size2, stdin);
char* spilted1 = strtok(line1, " ");
while (spilted1 != NULL){
if (strstr(line2, spilted1) == NULL){
printf("%s", spilted1);
}
spilted1 = strtok(NULL, " ");
}
}
但這顯然是錯誤的,因為我無法在string2
得到那些不同的詞。
我知道如何在Python中進行操作,但不知道如何在C中進行操作。
像這樣:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char **split(const char *str, const char *delimiter, size_t *len);
int cmp(const void *a, const void *b);
void find_diff(char **a1, char **a2);
void drop(char **a);
int main(void){
char *line1 = NULL, *line2 = NULL;
size_t size1 = 0, size2 = 0;
getline(&line1, &size1, stdin);
getline(&line2, &size2, stdin);
//(1)split
size_t len1, len2;
char **array1 = split(line1, " \t\n", &len1);
char **array2 = split(line2, " \t\n", &len2);
//(2)sort
qsort(array1, len1, sizeof(*array1), cmp);
qsort(array2, len2, sizeof(*array2), cmp);
//(3)compare
find_diff(array1, array2);
drop(array1);drop(array2);
free(line1);free(line2);
return 0;
}
char **split(const char *str, const char *delimiter, size_t *len){
char *text, *p, *first, **array, **ret;
size_t c;
*len = 0;
text = strdup(str);//make clone
if(text == NULL) return NULL;
for(c = 0, p = text; p = strtok(p, delimiter); p = NULL)
++c;//count elements
ret = malloc(sizeof(char*)*(c+1));//+1 for NULL
if(ret==NULL){
free(text);
return NULL;
}
strcpy(text, str);//restore
array=ret;
for(p = text; p = strtok(p, delimiter); p = NULL)
*array++ = strdup(p);
*array = NULL;
*len = c;
free(text);
return ret;
}
int cmp(const void *a, const void *b){
return strcmp(*(char **)a, *(char **)b);
}
void find_diff(char **a1, char **a2){//arguments has been sorted
while(*a1 || *a2){
if(*a1 && a1[1] && !strcmp(*a1, a1[1])){
++a1;//distinct
continue;
}
if(*a2 && a2[1] && !strcmp(*a2, a2[1])){
++a2;
continue;
}
if(*a1 == NULL){
puts(*a2++);
} else if(*a2 == NULL){
puts(*a1++);
} else {
int result;
if((result=strcmp(*a1, *a2)) < 0){
puts(*a1++);
} else if(result > 0){
puts(*a2++);
} else {
++a1;
++a2;
}
}
}
}
void drop(char **a){
char **tmp = a;
while(*a)
free(*a++);
free(tmp);
}
這是一種方法:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
enum { MAX_WORDS = 64 };
static int split_words(char *buffer, char **words, int max_words)
{
char *token;
char *next = buffer;
int num_words = 0;
while ((token = strtok(next, " \n")) != 0 && num_words < max_words)
{
words[num_words++] = token;
next = NULL;
}
return num_words;
}
static int word_in_list(char *word, char **list, int list_size)
{
for (int i = 0; i < list_size; i++)
{
if (strcmp(word, list[i]) == 0)
return 1;
}
return 0;
}
/* Print words in list w1 that do not appear in list w2 */
static void print_unique(char **w1, int n1, char **w2, int n2)
{
for (int i = 0; i < n1; i++)
{
if (!word_in_list(w1[i], w2, n2))
printf("[%s]\n", w1[i]);
}
}
int main(void)
{
char *line1 = NULL;
char *line2 = NULL;
size_t size1 = 0;
size_t size2 = 0;
if (getline(&line1, &size1, stdin) > 0 &&
getline(&line2, &size2, stdin) > 0)
{
char *w1[MAX_WORDS];
char *w2[MAX_WORDS];
int n1 = split_words(line1, w1, MAX_WORDS);
int n2 = split_words(line2, w2, MAX_WORDS);
print_unique(w1, n1, w2, n2);
print_unique(w2, n2, w1, n1);
}
free(line1);
free(line2);
return 0;
}
/*
You'll need two
arrays of char pointers, one for each line of input. You'll split the
first line into the first array, and the second line into the second
array. Then you'll go through the two arrays of pointers, comparing
strings and counting only those that do not match any of the entries in
the other array. (What do you do if one input line itself contains
repeats — The Lion, the Witch, and the Wardrobe for example? Also, do
you need to treat The as the same as the in that example?)
You can use strtok_r() or strtok_s() if you have them available; at a
pinch, you could use strtok(), but it is dangerous to use that in
library code. And you'll need to use strcmp() to compare the strings
— plus macros/functions from <ctype.h> to handle case-conversion if
that's appropriate.
Also note that strtok() is destructive. If you've split string 1 with
it, you can't then search in string 1 when you split string 2. Also
note that strstr("then came a deluge", "the") matches, even though most
people would not regard the haystack string as containing the needle
word the.
*/
所使用的算法的單詞數是二次的(它以O(N 2 )時間運行); 它將一個列表中的每個唯一單詞與另一個列表中的每個單詞進行比較。 您可以執行諸如對列表進行排序並消除重復的操作(以O(N.log N)時間),然后逐步瀏覽兩個列表以在線性時間內找到唯一的單詞。 二次不會影響數十個單詞,也許也不會影響數百個單詞,但之后可能會變得很重要。
匯編:
$ gcc -O3 -g -std=c11 -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes \
> -Wold-style-definition -Werror uniq_words.c -o uniq_words
$
示例運行:
$ cat data
lihuayu zhangxuehui sunyunlei guolei fuwenxia
lihuayu lixin fuwenxia zhangxuehui
$ uniq_words < data
[sunyunlei]
[guolei]
[lixin]
$
數據周圍的方括號使我確信字符串包含我認為它們應包含的內容。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.