繁体   English   中英

在c中的缓冲区块中读取未知长度文件

[英]reading unknown length file in buffer chunks in c

我试图将未知长度的二进制文件读入缓冲区块,而不使用像lseek()fseek这样的函数。

  1. 我一次使用了1024字节的struct buffer。 当读取大于1012字节的文件时,它将分配几个缓冲区。 但是,当遇到最后一个块时,它肯定会少于或等于1024个字节。 因此,我尝试计算最后一个块的长度,以便我可以读取最后一个块直到eof但我有点混淆如何实现它。

提前致谢。

#include <stdio.h>
#include <stdlib.h>

typedef struct Buffer{
  unsigned char data[1012];
  struct Buffer *next; //12 bytes
}Buffer;

void mymemcpy(void *dest, void *src, size_t length){
  Buffer *buffer_toFill = (Buffer *)dest;
  Buffer *buffer_toAdd = (Buffer *)src;
  int a = 0; 
  for(int i = 0; i < length; i++){
    buffer_toFill->data[i] = buffer_toAdd->data[i];
  }
}

Buffer* add_buffer_front(Buffer *head, Buffer *read_buffer, int size){
  Buffer *new_buffer = malloc(sizeof(Buffer));
  mymemcpy(new_buffer, read_buffer, size);
  if(head != NULL){
    new_buffer->next = head;
  }
  return new_buffer;
}

void display_List(Buffer *head, size_t length){
  Buffer *current = head;
  while(current != NULL){
    for(int i = 0; i < length; i++){
      printf("%02X",(unsigned)current->data[i]); //this shows different value compare with  xxd <filename>
      //printf("%c", current->data[i]);  
    }
    Buffer *prev = current;
    free(prev);
    current = current->next;
  }
}

int main(int argc, char **argv){
  FILE *fd;
  Buffer *head_buffer = NULL;
  int file_length = 0;
  int eof_int = 1;
  if(argc != 2){
    printf("Usage: readFile <filename>\n");
    return 1; 
  }

  fd = fopen(argv[1], "rb");

  while(eof_int != 0){ 
    Buffer *new_buffer = malloc(sizeof(Buffer));
    eof_int = fread(new_buffer, sizeof(Buffer)-12, 1, fd);
    if(eof_int == 0){ 
      //size_t length
      //
      //
      head_buffer = add_buffer_front(head_buffer, new_buffer, length);
      file_length += length;
    }else{
      head_buffer = add_buffer_front(head_buffer, new_buffer, (sizeof(new_buffer->data)));
      file_length += (sizeof(new_buffer->data));
    }
  }
  display_List(head_buffer, file_length);
  fclose(fd);
  return 0;
}

你有几个问题。

(1) fread返回读取的项目数,但不会返回eof指示。 您需要调用feof(stream*)来确定您是否已到达文件末尾。

(2)你说你的下一个指针是12个字节。 这是一个非常危险的假设。 更喜欢读取已分配给数据结构的1012个字节。 您很可能正在打印未读入的内容,但只是未初始化的内存。

(3)使用fread的返回值来决定要复制多少内存。

请参阅下面代码中的注释 - 还要考虑更改1012以使用#define。

#include <stdio.h>
#include <stdlib.h>

typedef struct Buffer{
  unsigned char data[1012];
  struct Buffer *next; //12 bytes
}Buffer;

// Create a structure to store stuff about a file

typedef struct {
   Buffer *head;
   Buffer *tail;
   size_t length;
} MyFile;

/*
void mymemcpy(void *dest, void *src, size_t length){
  Buffer *buffer_toFill = (Buffer *)dest;
  Buffer *buffer_toAdd = (Buffer *)src;
  int a = 0; 
  for(int i = 0; i < length; i++){
    buffer_toFill->data[i] = buffer_toAdd->data[i];
  }
}

Buffer* add_buffer_front(Buffer *head, Buffer *read_buffer, int size){
  Buffer *new_buffer = malloc(sizeof(Buffer));
  mymemcpy(new_buffer, read_buffer, size);
  if(head != NULL){
    new_buffer->next = head;
  }
  return new_buffer;
}

*/

// Lets make this easier - The buffer has already been "malloced" once - why do it again

// And why are you reversing the file

// Perhaps 

void add_buffer(Buffer *to_be_added, MyFile *file, size_t extra_length) {
   if (file->tail) { // We have one item in the list
     file->tail->next = to_be_added;
   } else { // First buffer!
     file-> head = to_be_added;
     file-> tail = to_be_added;
   }
   to_be_added->next = NULL;  // This is always the case as it is the last one
   file->length += extra_length;
}

/*
void display_List(Buffer *head, size_t length){
  Buffer *current = head;
  while(current != NULL){
    for(int i = 0; i < length; i++){
      printf("%02X",(unsigned)current->data[i]); //this shows different value compare with  xxd <filename>
      //printf("%c", current->data[i]);  
    }
    Buffer *prev = current;
    free(prev);
    current = current->next;
  }
}

*/

// Instead pass in the new structure

void display_list(MyFile *file) {
   size_t contents_left = file -> length;
   Buffer * current = file -> head;
   while (current) {
      // At most each chunk has 1012 bytes - Check for that
      size_t chunk_length = contents_left > 1012 ? 1012 : contents_left;
       for(int i = 0; i <chunk_length ; i++){
         printf("%02X",(unsigned)current->data[i]);
       }
       current = current -> next;
   }
}


}
int main(int argc, char **argv){
  FILE *fd;
  MyFile read_file;
  read_file.head = NULL;
  read_file.tail = NULL;
  read_file.length = 0;

  Buffer *head_buffer = NULL;
  int file_length = 0;
  int eof_int = 1;
  if(argc != 2){
    printf("Usage: readFile <filename>\n");
    return 1; 
  }

  fd = fopen(argv[1], "rb");

  // Check fd
  if (fd == NULL) {
    // error stuff
    return EXIT_FAILURE; // Look up the include for this
 }
  while(eof_int != 0){ 
    Buffer *new_buffer = malloc(sizeof(Buffer));
    eof_int = fread(new_buffer->data, 1012, 1, fd); // Do not make assumptions on the size of a pointer and store it in the correct location
    if(eof_int == 0) { // Read nothing
       free(new_buffer); // We was too optimistic! Did Not need this in the end 
       break;
    } else {
      add_buffer(&read_file, new_buffer, eof_int);
    }
  }
  display_List(&read_file);
  fclose(fd);
  return 0;
}

你正在寻找的技巧是fread返回读取的项目数。 您正在读取1个缓冲区已满,因此它只会告诉您读取0或1个缓冲区。 相反,翻转并反转它: 读取缓冲区的字节数

size_t bytes_read = fread(buffer, 1, sizeof(Buffer)-12, fd);

现在您可以知道缓冲区中读取了多少字节。 我们可以向Buffer添加一个size字段,这样每个缓冲区都可以记住它读取的字节数,并且只打印那么多字节。

const size_t BUFFER_SIZE = 1024;

typedef struct Buffer {
    // I'll explain why I switched to a pointer in a moment
    unsigned char *data;
    size_t size;
    struct Buffer *next;
} Buffer;

void Buffer_print( Buffer *buffer ) {
    for( size_t i = 0; i < buffer->size; i++ ) {
        printf("%02hhX ", buffer->data[i]);
    }
}

Buffer *Buffer_new() {
    Buffer *buffer = malloc(sizeof(Buffer));

    buffer->size = 0;
    buffer->data = NULL;
    buffer->next = NULL;

    return buffer;
}

请注意,我小心地初始化缓冲区的所有字段,否则我们冒着垃圾的风险。

现在我们已经改变了缓冲区,因此我们对其大小和位置的假设被打破了。 没关系,我们应该直接读取buffer->data

size_t Buffer_read( Buffer *buffer, size_t buffer_size, FILE* fp ) {
    buffer->data = malloc(buffer_size);
    size_t bytes_read = fread(buffer->data, 1, buffer_size, fp);
    buffer->size = bytes_read;
    return bytes_read;
}

现在缓冲区知道它读取了多少数据,我们可以根据需要分配任何大小的数据。 没有必要在结构中硬编码。 这使代码更加灵活和高效。 它让我们便宜地分配空缓冲区,这将使事情变得更简单。

我们也可以使用malloc并使用垃圾初始化buffer->data 如果fread只是部分填充buffer->data ,其余的将保持垃圾。 没关系,知道我们读过的数据的大小意味着我们在遇到垃圾之前就停止打印了。


现在我们可以构建循环了。 当它读取0字节时,我们知道它已完成读取。

while( Buffer_read( buffer, BUFFER_SIZE, fp ) > 0 ) {
    ... now what ...
}
fclose(fp);

链接列表的工作方式,当您添加到列表时添加到tail->next并使其成为新尾部。 这通常被称为“推动”。

Buffer *Buffer_push( Buffer *tail, Buffer *new_tail ) {
    tail->next = new_tail;
    return new_tail;
}

Buffer *head = Buffer_new();
Buffer *tail = head;
while( Buffer_read( tail, BUFFER_SIZE, fp ) > 0 ) {
    tail = Buffer_push( tail, Buffer_new() );
}
fclose(fp);

请注意,我们先从一个空的head这也是tail 从这两个分配开始使循环更简单。 没有必要检查if( head )if( tail ) 它确实意味着我们的列表末尾总是有一个空缓冲区。 没关系。 由于我们不再使用固定缓冲区 - buffer->data空缓冲区现在很小且便宜。


最后一步是打印所有内容。 我们已经可以打印一个缓冲区了,所以我们只需要遍历链表并打印每个缓冲区。

void Buffer_print_all( Buffer *head ) {
    for( Buffer *buffer = head; buffer != NULL; buffer = buffer->next ) {
        Buffer_print(buffer);
    }
}

Buffer_print_all(head);

最后,空的缓冲区悬挂在最后是好的。 它知道它的大小是0所以Buffer_print实际上不会使用null buffer->data

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM