繁体   English   中英

如何扫描文本文件中的单词和单词数?

[英]How to scan text file for word and word count?

如何扫描文本文件中的单词并在每次扫描时增加计数?

我的程序主程序使用两个关键字和多个文件作为命令行参数。 在命令行中作为参数传递的每个文件将包含最多160个字符的消息列表。 例如:

$ ./main Hello Bye data1.txt data2.txt

将输出

3 messages containing Hello and 1 messages containing Bye
2 messages containing Hello and 2 messages containing Bye

在main.c中的do_file方法中,当调用stats_add_data()时不起作用,我也不知道为什么。 我检查了指针,它们似乎是正确的。

void do_file(char *filename, char *key1, char *key2) {

    stats *newstats;
    newstats = stats_create(key1, key2);

    FILE *f = fopen(filename, "r");
    char *keyword1 = newstats->key1;
    char *keyword2 = newstats->key2;

    int line_num = 1;
    int find_result1 = 0;
    int find_result2 = 0;
    char temp[160];

    const char delimter[1] = " ";
    char onewordtoken[160];

    // Get a line, up to 160 chars from file
    while(fgets(temp, 160, f) != NULL) {
        if((strstr(temp, key1)) != NULL) {
            printf("A match found on line: %d\n", line_num);
            printf("\n%s\n", temp);
            find_result1++;

            stats_add_data(newstats, temp);

            stats_print(newstats);
            printf("\n\n");
        }
        if((strstr(temp, key2)) != NULL) {
            printf("A match found on line: %d\n", line_num);
            printf("\n%s\n", temp);
            find_result2++;

            stats_add_data(newstats, temp);

            stats_print(newstats);
            printf("\n\n");
        }
        line_num++;
    }

    if((find_result1 == 0) && (find_result2 == 0)) {
        printf("\nSorry, couldn't find a match.\n");
    }
}

参考代码:

stats.c

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "stats.h"

// The stats_create function should allocate and return a new stats structure, storing key1 and key2 in the structâs string fields and setting the two integers to 0.
stats *stats_create(char *key1, char *key2) {
    stats *new = malloc(sizeof *new);
    new->key1 = key1;
    new->key2 = key2;
    new->int1 = 0;
    new->int2 = 0;
    return new;
}

// The stats_add_data function should update the structure pointed to by s against the value val. That is, if val contains the first keyword, the integer containing the count of the number of messages containing that keyword should be updated. Similarly for the second keyword.
void stats_add_data(struct stats *s, char *val) {
    int count1 = 0;
    int count2 = 0;
    char* key1 = s->key1;
    char* key2 = s->key2;

    if (strcmp(s->key1, val) == 0) {
        count1++;
    }
    if (strcmp(s->key2, val) == 0) {
        count2++;
    }

    count1 = s->int1;
    count2 = s->int2;
}

// The stats_print function should print on one line the number of messages containing the first keyword, and the number of keywords containing the second keyword. E.g. 2 messages containing Homeland and 2 messages containing Elementary.

void stats_print(struct stats *s) {
    int count1 = s->int1;
    int count2 = s->int2;
    char* key1 = s->key1;
    char* key2 = s->key2;

    printf("%d", count1);
    printf(" messages containing ");
    printf("%s", key1);
    printf(" and ");
    printf("%d", count2);
    printf(" messages containing ");
    printf("%s", key2);
}

// The stats_free function should free the given stats structure and any memory that might have been allocated for it.
void stats_free(struct stats *s) {
    if(s) {
        free(s);
    }
}

main.c中

#include <stdio.h>
#include <string.h>
#include "stats.h"

// A function void do_file(char *filename, char *key1, char*key2) that takes a file name and two keywords as its arguments. It should create a statistics struct that will be used to accumulate the statistics about the data in the file.

//Open the given filename, and iterate over each line of the file, reading in the data value and updating the statistics struct. Once the file has been read, close it and display the results.
void do_file(char *filename, char *key1, char *key2) {

    stats *newstats;
    newstats = stats_create(key1, key2);

    FILE *f = fopen(filename, "r");
    char *keyword1 = newstats->key1;
    char *keyword2 = newstats->key2;
    //char keyword1 = *key1;
    //char keyword2 = *key2;

    int line_num = 1;
    int find_result1 = 0;
    int find_result2 = 0;
    char temp[160];

    if((f = fopen(filename, "r")) == NULL) {
        printf("File does not exist!\n");
    }

    const char delimter[1] = " ";
    char onewordtoken[160];

    // Get a line, up to 160 chars from file
    while(fgets(temp, 160, f) != NULL) {
        if((strstr(temp, key1)) != NULL) {
            printf("A match found on line: %d\n", line_num);
            printf("\n%s\n", temp);
            find_result1++;

            stats_add_data(newstats, temp);

            stats_print(newstats);
            printf("\n\n");
        }
        if((strstr(temp, key2)) != NULL) {
            printf("A match found on line: %d\n", line_num);
            printf("\n%s\n", temp);
            find_result2++;

            stats_add_data(newstats, temp);

            stats_print(newstats);
            printf("\n\n");
        }
        line_num++;
    }

    stats_free(newstats);

    if((find_result1 == 0) && (find_result2 == 0)) {
        printf("\nSorry, couldn't find a match.\n");
    }

    // Close the file if still open
    if(f) {
        fclose(f);
    }
}

// A main function that iterates over each file argument, except the first, and runs do_file on it. After the 0th argument, the first two command line arguments will be the keywords to process. The remaining arguments will be the file arguments.

int main(int argc, char **argv) {

    if(argc > 2) {
        //printf("The argument supplied is %s\n", argv[1]);
        //stats_add_data();
        do_file("data.txt", argv[1], argv[2]);
        printf("\n\n");
        //stats_free(s);
    } else {
        printf("Too few arguments supplied.\n");
        //stats_free(s);
    }
}

编译时,始终启用所有警告,然后修复这些警告。

以下代码干净地编译并消除了子功能(它们在其余的代码中变成一个衬里)。

函数: free()正确处理NULL指针,因此在调用free()之前无需测试指针是否为NULL

发布的代码缺少stats.h的内容,因此我插入了原型和struct stats定义以及typedef语句

(使结构标记名和类型定义名相同是非常不好的主意)

检查最小数量的命令行参数应将usage消息输出到stderr,而不是stdout,并应指示正确的命令行参数格式。

修改了很长的注释以适合页面,包括较小的内容更改。

这是建议的代码:

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
//#include <stdio.h>
//#include <string.h>
//#include "stats.h"
#include <errno.h>

#define MAX_LINE_LEN (160)

struct stats
{
    char *key1;
    char *key2;
    int int1;
    int int2;
};

typedef struct stats stats;


// prototypes
stats *stats_create(char *key1, char *key2);
#if 0
    void stats_add_data(struct stats *s, char *val);
#endif
void stats_print(struct stats *s);
#if 0
    void stats_free(struct stats *s);
#endif
void do_file(char *filename, char *key1, char *key2);


// the following comment is not correct:

/*
 * A main function that iterates over each file argument, 
 * except the first, and runs do_file on it. 
 * After the 0th argument, 
 * the first two command line arguments will be the 
 * keywords to process. 
 *The remaining arguments will be the file arguments.
 */

int main(int argc, char **argv)
{
    //if(argc > 2)
    if( argc > 3 )
    {
        //printf("The argument supplied is %s\n", argv[1]);
        //stats_add_data();
        for( int i=3; i<argc; i++ )
        {
            do_file( argv[i], argv[1], argv[2] );
        }
        //do_file("data.txt", argv[1], argv[2]);
        printf("\n\n");
        //stats_free(s);
    }

    else
    {
        printf("Too few arguments supplied.\n");
        //stats_free(s);
    }
} // end function: main



/*
 * The stats_create function should allocate 
 * and return a new stats structure, 
 * storing key1 and key2 in the struct 
 * as pointers to string fields and setting the two integers to 0.
 */

stats *stats_create(char *key1, char *key2)
{
    stats *new = malloc(sizeof *new);
    new->key1 = key1;
    new->key2 = key2;
    new->int1 = 0;
    new->int2 = 0;
    return new;
} // end function: stats_create


#if 0
    // The stats_add_data function should update the structure pointed to by s against the value val. That is, if val contains the first keyword, the integer containing the count of the number of messages containing that keyword should be updated. Similarly for the second keyword.
    void stats_add_data(struct stats *s, char *val)
    {
        //int count1 = 0;
        //int count2 = 0;
        // char* key1 = s->key1;
        // char* key2 = s->key2;

        if (strcmp(s->key1, val) == 0)
        {
            //count1++;
            s->int1++;
        }

        if (strcmp(s->key2, val) == 0)
        {
            //count2++;
            s->int2++;
        }

        //count1 = s->int1;
        //count2 = s->int2;
    } // end function: stats_add_data
#endif

/*
 * The stats_print function should print on one line
 * the number of messages containing the first keyword, 
 * and the number of messages containing the second keyword. 
 * E.g. 2 messages containing Homeland and 2 messages containing Elementary.
 */

void stats_print(struct stats *s)
{
    //int count1 = s->int1;
    //int count2 = s->int2;
    //char* key1 = s->key1;
    //char* key2 = s->key2;

    //printf("%d", count1);
    //printf(" messages containing ");
    //printf("%s", key1);
    //printf(" and ");
    //printf("%d", count2);
    //printf(" messages containing ");
    //printf("%s", key2);
    printf( "%d messages containing <%s> and %d messages containing <%s>\n",
            s->int1, s->key1, s->int2, s->key2 );
} // end function: stats_print


#if 0
    // The stats_free function should free the given stats structure and any memory that might have been allocated for it.
    void stats_free(struct stats *s)
    {
        if(s) {
            free(s);
        }
    } // end function: stats_free
#endif

/*
 * A function void do_file(char *filename, char *key1, char*key2)
 * that takes a file name and two keywords as its arguments. 
 * It should create a statistics struct 
 * that will be used to accumulate the statistics
 * about the data in the individual file.
 */

/*
 * Open the given filename, 
 * iterate over each line of the file, 
 * searching for the two key words in the line
 * updating the statistics struct for each line read. 
 * after the whole file has been read, 
 * close it and display the results.
 */

void do_file(char *filename, char *key1, char *key2)
{

    stats *newstats = NULL;
    newstats = stats_create(key1, key2);

    // FILE *f = fopen(filename, "r");
    // char *keyword1 = newstats->key1;
    // char *keyword2 = newstats->key2;
    // char keyword1 = *key1;
    // char keyword2 = *key2;

    int line_num = 1;
    // int find_result1 = 0;
    // int find_result2 = 0;
    char temp[ MAX_LINE_LEN+1]; //+1 to allow for NUL terminator byte

    FILE *f = NULL;
    if((f = fopen(filename, "r")) == NULL)
    {
        fprintf( stderr, "fopen for %s failed due to: %s\n", filename, strerror( errno ) );
        // exit( EXIT_FAILURE );
        return;
        //printf("File does not exist!\n");
    }

    // implied else, fopen successful

    // const char delimter[1] = " ";
    // char onewordtoken[160];

    // Get a line, up to 160 chars from file
    while(fgets(temp, sizeof temp, f) != NULL)
    {
        if((strstr(temp, key1)) != NULL)
        {
            printf("A match found on line: %d\n", line_num);
            printf("\n%s\n", temp);
            //find_result1++;

            //stats_add_data(newstats, temp);
            newstats->int1++;

            stats_print(newstats);
            printf("\n\n");
        }

        if((strstr(temp, key2)) != NULL)
        {
            printf("A match found on line: %d\n", line_num);
            printf("\n%s\n", temp);
            //find_result2++;

            //stats_add_data(newstats, temp);
            newstats->int2++;

            //stats_print(newstats);
            //printf("\n\n");
        }

        line_num++;
    } // end while

    stats_print( newstats );
    printf("\n\n");

    // stats_free(newstats);

    //if((find_result1 == 0) && (find_result2 == 0)) {
    if( 0 == newstats->int1 && 0 == newstats->int2 )
    {
        printf("\nSorry, couldn't find a match.\n");
    }

    free( newstats );

    // Close the file if still open
    //if(f) {
    //    fclose(f);
    //}
    // would not get here if file did not open
    fclose( f );
} // end function: do_file

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM