为什么我的程序无法识别字符串中的相似词？

Question

我想编写一个接受输入T的程序。 在接下来的T行中，每行将以一个字符串作为输入。 output 将是字符串可以重新排序的方式。

#include <stdio.h>
#include <stdlib.h>

int main() {
    int T, i, l, count = 1, test = 0, word = 0, ans;
    char line[200];

    scanf("%d", &T);

    for (i = 0; i < T; i++) {
        scanf(" %[^\n]", line);
        l = strlen(line);
        for (int q = 0; q < l; q++) {
            if (line[q] == ' ') {
                word++;
            }
        }
        ans = fact(word + 1);
        word = 0;
        for (int j = 0; j < l; j++) {
            for (int k = j + 1; k < l; k++) {
                if (line[k] == ' ' && line[k + 1] == line[j]) {
                    int m = j;
                    int n = k + 1;
                    for (;;) {
                        if (line[m] != line[n]) {
                            break;
                        } else
                        if (line[m] == ' ' && line[n] == ' ') {
                            test = 1;
                            break;
                        } else {
                            m++;
                            n++;
                        }
                    }
                    if (test == 1) {
                        count++;
                        ans = ans / fact(count);
                        count = 0;
                        test = 0;
                    }
                }
            }
        }
        printf("%d\n", ans);
    }
}

int fact(int n) {
    if (n == 1) {
        return 1;
    } else {
        return n * fact(n - 1);
    }
}

现在，在我的程序中，

我的 output 是这样的：

2
no way no good
12
yes no yes yes no
120

如果T = 2并且第 1 个字符串no way no good ，它给出正确的 output 即12 (4./2,)。 也就是说，它识别出了两个相似的词。

但是在第二个输入中，字符串是yes no yes yes no 。 这意味着 3 yes和 2 no s。 所以和应该是 5!/(3!2!) = 10 。 但为什么答案是120 ？ 为什么它不能识别相似的词？

Answer 1

重复检测器中的主要问题是您使用if (line[m] == ' ' && line[n] == ' ')测试单词的结尾，但此测试无法识别最后一个单词出现的重复，因为line[n]是'\0' ，而不是' ' 。

请注意这些进一步的问题：

你没有正确处理出现两次以上的单词：你应该执行ans = ans / fact(count); 仅在外循环完成后。 例如，如果一个词出现 3 次，它将被检测为 3 对重复项，有效地导致ans除以 2 ³ = 8，而不是 3. = 6。
您应该防止缓冲区溢出并检测无效输入：
```
 if (scanf(" %199[^\n]", line);= 1) break;
```
ans的int类型范围对于中等数量的单词来说太小：13，是 6227020800，在大多数系统上大于INT_MAX 。

代码很难遵循。 您应该考虑将该行解析为一个单词数组，并使用更传统的方法来计算重复项。

这是使用这种方法的修改版本：

#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

static int cmpstr(const void *p1, const void *p2) {
    char * const *pp1 = p1;
    char * const *pp2 = p2;
    return strcmp(*pp1, *pp2);
}

unsigned long long factorial(int n) {
    unsigned long long res = 1;
    while (n > 1)
        res *= n--;
    return res;
}

int main() {
    int T, i, n, begin, count;
    unsigned long long ans;
    char line[200];
    char *words[100];

    if (!fgets(line, sizeof line, stdin) || sscanf(line, "%d", &T) != 1)
        return 1;

    while (T --> 0) {
        if (!fgets(line, sizeof line, stdin))
            break;
        n = 0;
        begin = 1;
        for (char *p = line; *p; p++) {
            if (isspace((unsigned char)*p)) {
                *p = '\0';
                begin = 1;
            } else {
                if (begin) {
                    words[n++] = p;
                    begin = 0;
                }
            }
        }
        qsort(words, n, sizeof(*words), cmpstr);
        ans = factorial(n);
        for (i = 0; i < n; i += count) {
            for (count = 1; i + count < n && !strcmp(words[i], words[i + count]); count++)
                continue;
            ans /= factorial(count);
        }
        printf("%llu\n", ans);
    }
    return 0;
}

为什么我的程序无法识别字符串中的相似词？

问题描述

1 个解决方案

解决方案1
0 2022-10-03 21:37:46

为什么我的程序无法识别字符串中的相似词？

问题描述

1 个解决方案

解决方案1 0 2022-10-03 21:37:46

解决方案1
0 2022-10-03 21:37:46