简体   繁体   English

从C中的映射文件中读取多个数据文件

[英]Reading in multiple data files from a mapping file in C

First off I am creating a program that will read lines of characters and find words (they don't have to have meaning, ie 'ab' could be word ) and storing them in the appropriate data structure. 首先,我正在创建一个程序,它将读取字符行并查找单词(它们不必具有含义,即'ab'可以是单词)并将它们存储在适当的数据结构中。 I used trie structure to store the words. 我使用trie结构来存储单词。 I am given a mapping file as a command line argument yet inside the mapping file I have two data files I need to gain information from. 我给了一个映射文件作为命令行参数但在映射文件中我有两个数据文件,我需要从中获取信息。 The usage interface is as follows: first(program name) <mappingfile> . 用法界面如下: first(program name) <mappingfile>

Inside the mapping file, there exists two data files: <dictFile> and <dataFile> . 在映射文件中,存在两个数据文件: <dictFile><dataFile> Im not sure how to read and store the information presented the two data files. 我不知道如何阅读和存储提供两个数据文件的信息。 So far I have the following: 到目前为止,我有以下内容:

#include <stdio.h>
#include<stdlib.h>

void readDict(FILE *dict_file){

}

int main(int argc, char *argv[]){
  FILE* file;


  if(argc != 2){ //error in inputing, not 2 files
    printf("error\n");
    return 0;
  }

  file = fopen(argv[1],"r" ); //reading the mapping file

  input;
  if(file == NULL){ //nothing inside file
    printf("file does not exist\n");
    return 0;
  }
}

My goal is to have pointers point to respective data files in the mapping file which I can use for reading their contents. 我的目标是让指针指向映射文件中的各个数据文件,我可以使用它们来读取它们的内容。 I will be given the following input in the command line: first(program name) <mappingfile> . 我将在命令行中输入以下内容: first(program name) <mappingfile>

Inisde the mapping file contains the lines of two plain .txt files in the form <dictFile> <dataFile> . Inisde映射文件包含两个普通.txt文件的行,格式为<dictFile> <dataFile>

I wish to access both contents of <dictFile> and <dataFile> .. with pointers to the respective file. 我希望通过指向相应文件的指针访问<dictFile><dataFile> ..的内容。

If I understand you correctly this should do it. 如果我理解正确的话,应该这样做。 Note that it assumes your filenames don't have any spaces. 请注意,它假定您的文件名没有任何空格。 And if you want to use the "non secure" api's you need to add _CRT_SECURE_NO_WARNINGS to the project properties under Configuration Properties -> C/C++ -> Preprocessor -> Preprocessor Definitions. 如果你想使用“非安全”api,你需要在配置属性 - > C / C ++ - >预处理器 - >预处理器定义下的项目属性中添加_CRT_SECURE_NO_WARNINGS。

#include <stdio.h>
#include<stdlib.h>

void readDict(FILE *dict_file){

}

int main(int argc, char *argv[]){
  FILE* file;


  if(argc != 2){ //error in inputing, not 2 files
    printf("error\n");
    return 1;
  }

  file = fopen(argv[1],"r" ); //reading the mapping file

  //input;
  if(file == NULL){ //nothing inside file
    printf("file does not exist\n");
    return 1;
  }

  char dictFileString[256], dataFileString[256];
  fscanf( file, "%255s %255s", dictFileString, dataFileString );

  FILE *dictFile, *dataFile;
  dictFile = fopen( dictFileString, "r" );
  if (dictFile == NULL) {
      printf( "%s does not exist\n", dictFileString );
      fclose(file);
      return 1;
  }
  dataFile = fopen( dataFileString, "r" );
  if (dataFile == NULL) {
      printf( "%s does not exist\n", dataFileString );
      fclose(file);
      fclose(dictFile);
      return 1;
  }

  readDict(dictFile);

  //  The additional logic would be placed here.

  fclose( dictFile );
  fclose( dataFile );

  //  If you need to read additional file names then loop
  //  back up to read the next line of 'file'

  fclose( file );
  return 0;
}

If I understand your question correctly you want to parse a file where each line contains the filenames of two other files and then read from these. 如果我正确理解你的问题,你想要解析一个文件,其中每行包含两个其他文件的文件名 ,然后从这些文件中读取。 What you can do is use fgets to read your mapping file line by line. 你可以做的是使用fgets逐行读取你的映射文件。 What you can do next is use the function strtok to split your string on a whitespace. 接下来你要做的是使用函数strtok将字符串拆分为空格。 I'll break it down for you step by step. 我会一步一步地为你分解它。

Firstly we want to open the mapping file for reading 首先,我们要打开映射文件进行读取

if((file = fopen(argv[1],"r")) == NULL) {
  perror("error opening file");
  return 1;
}

This will try to open the mapping file specified by the command line arguments of your program and if it fails it will print a corresponding error message. 这将尝试打开程序的命令行参数指定的映射文件,如果失败则会打印相应的错误消息。

while(fgets(buf, sizeof(buf), file) != NULL) {

After we've opened the file we want to iterate through all the lines until we reach the end of the file and fgets will return NULL. 在我们打开文件之后,我们想要遍历所有行,直到我们到达文件的末尾, fgets将返回NULL。 fgets will put the current line into buf . fgets会将当前行放入buf

dictfilename = strtok(buf, " ");
datafilename = strtok(NULL, " ");
strtok(dictfilename, "\n"); /* Remove any trailing newlines */
strtok(datafilename, "\n");

We need to split the line read by fgets by a delimter (a whitespace) so we know which part corresponds to the dictfile and the datafile. 我们需要将由fgets读取的行拆分为分隔符(空格),以便我们知道哪个部分对应于dictfile和数据文件。 This is done by using the strtok function which returns a pointer to the substring before the whitespace and when passing in NULL it will return a pointer to the substring after the whitespace. 这是通过使用strtok函数完成的,该函数在空格之前返回指向子字符串的指针,当传入NULL时,它将返回指向空白后的子字符串的指针。 A slightly weird way of removing any trailing newlines is to use strtok and the newline as a delimiter. 删除任何尾随换行符的一种有点奇怪的方法是使用strtok和换行符作为分隔符。

if((dictfile = fopen(dictfilename,"r")) == NULL) {
  fprintf(stderr, "error opening file %s: %s\n", dictfilename, strerror(errno));
  return 1;
}

if((datafile = fopen(datafilename,"r")) == NULL) {
  fprintf(stderr, "error opening file %s: %s\n", datafilename, strerror(errno));
  return 1;
}

Very similiarly to how we open the mapping file, we now open the two files found on the current line read by fgets with "r" mode which opens for reading. 与我们如何打开映射文件非常相似,我们现在打开当前行上的两个文件,这些文件由fgets读取,其中“r”模式打开以供阅读。 If the file does not exist or cannot be found, the fopen call fails. 如果文件不存在或找不到,则fopen调用失败。

printf("Content of %s:\n", dictfilename);
while ((c = getc(dictfile)) != EOF)
  putchar(c);

printf("\nContent of %s:\n", datafilename);
while ((c = getc(datafile)) != EOF)
  putchar(c);

This is a very simple method of "dumping" the content of the files. 这是一种“转储”文件内容的非常简单的方法。 It uses getc to read the next char from the file and prints it until it reads EOF. 它使用getc从文件中读取下一个char并打印它直到它读取EOF。 This is where you should do your own function. 这是你应该做自己的功能的地方。

fclose(dictfile);
fclose(datafile);

And don't forget to close the files afterwards or you will leak resources. 并且不要忘记之后关闭文件或者您将泄漏资源。

Finally here is the code on what I just described 最后这里是我刚才描述的代码

#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>

#define MAX_LENGTH 100 // change this to the actual maximum length of your lines.

int main(int argc, char **argv){
  FILE* file, *dictfile, *datafile;
  char c;
  char buf[MAX_LENGTH];
  char *dictfilename, *datafilename;

  if(argc != 2) {
    fprintf(stderr, "Usage: %s <mapping file>\n", argv[0]);
    return 0;
  }

  if((file = fopen(argv[1],"r")) == NULL) {
    perror("error opening file");
    return 1;
  }

  while(fgets(buf, sizeof(buf), file) != NULL) {
    dictfilename = strtok(buf, " ");
    datafilename = strtok(NULL, " ");
    strtok(dictfilename, "\n"); /* Remove any trailing newlines */
    strtok(datafilename, "\n");

    if((dictfile = fopen(dictfilename,"r")) == NULL) {
      fprintf(stderr, "error opening file %s: %s\n", dictfilename, strerror(errno));
      return 1;
    }

    if((datafile = fopen(datafilename,"r")) == NULL) {
      fprintf(stderr, "error opening file %s: %s\n", datafilename, strerror(errno));
      return 1;
    }

    // do something with the files (e.g read all the content)
    printf("Content of %s:\n", dictfilename);
    while ((c = getc(dictfile)) != EOF)
      putchar(c);

    printf("\nContent of %s:\n", datafilename);
    while ((c = getc(datafile)) != EOF)
      putchar(c);
    printf("\n");

    // don't forget to close the files when you're done with them.
    fclose(dictfile);
    fclose(datafile);
  }
  fclose(file);
}

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM