[英]reading unknown length file in buffer chunks in c
我試圖將未知長度的二進制文件讀入緩沖區塊,而不使用像lseek()
, fseek
這樣的函數。
eof
但我有點混淆如何實現它。 提前致謝。
#include <stdio.h>
#include <stdlib.h>
typedef struct Buffer{
unsigned char data[1012];
struct Buffer *next; //12 bytes
}Buffer;
void mymemcpy(void *dest, void *src, size_t length){
Buffer *buffer_toFill = (Buffer *)dest;
Buffer *buffer_toAdd = (Buffer *)src;
int a = 0;
for(int i = 0; i < length; i++){
buffer_toFill->data[i] = buffer_toAdd->data[i];
}
}
Buffer* add_buffer_front(Buffer *head, Buffer *read_buffer, int size){
Buffer *new_buffer = malloc(sizeof(Buffer));
mymemcpy(new_buffer, read_buffer, size);
if(head != NULL){
new_buffer->next = head;
}
return new_buffer;
}
void display_List(Buffer *head, size_t length){
Buffer *current = head;
while(current != NULL){
for(int i = 0; i < length; i++){
printf("%02X",(unsigned)current->data[i]); //this shows different value compare with xxd <filename>
//printf("%c", current->data[i]);
}
Buffer *prev = current;
free(prev);
current = current->next;
}
}
int main(int argc, char **argv){
FILE *fd;
Buffer *head_buffer = NULL;
int file_length = 0;
int eof_int = 1;
if(argc != 2){
printf("Usage: readFile <filename>\n");
return 1;
}
fd = fopen(argv[1], "rb");
while(eof_int != 0){
Buffer *new_buffer = malloc(sizeof(Buffer));
eof_int = fread(new_buffer, sizeof(Buffer)-12, 1, fd);
if(eof_int == 0){
//size_t length
//
//
head_buffer = add_buffer_front(head_buffer, new_buffer, length);
file_length += length;
}else{
head_buffer = add_buffer_front(head_buffer, new_buffer, (sizeof(new_buffer->data)));
file_length += (sizeof(new_buffer->data));
}
}
display_List(head_buffer, file_length);
fclose(fd);
return 0;
}
你有幾個問題。
(1) fread
返回讀取的項目數,但不會返回eof
指示。 您需要調用feof(stream*)
來確定您是否已到達文件末尾。
(2)你說你的下一個指針是12個字節。 這是一個非常危險的假設。 更喜歡讀取已分配給數據結構的1012個字節。 您很可能正在打印未讀入的內容,但只是未初始化的內存。
(3)使用fread
的返回值來決定要復制多少內存。
請參閱下面代碼中的注釋 - 還要考慮更改1012以使用#define。
#include <stdio.h>
#include <stdlib.h>
typedef struct Buffer{
unsigned char data[1012];
struct Buffer *next; //12 bytes
}Buffer;
// Create a structure to store stuff about a file
typedef struct {
Buffer *head;
Buffer *tail;
size_t length;
} MyFile;
/*
void mymemcpy(void *dest, void *src, size_t length){
Buffer *buffer_toFill = (Buffer *)dest;
Buffer *buffer_toAdd = (Buffer *)src;
int a = 0;
for(int i = 0; i < length; i++){
buffer_toFill->data[i] = buffer_toAdd->data[i];
}
}
Buffer* add_buffer_front(Buffer *head, Buffer *read_buffer, int size){
Buffer *new_buffer = malloc(sizeof(Buffer));
mymemcpy(new_buffer, read_buffer, size);
if(head != NULL){
new_buffer->next = head;
}
return new_buffer;
}
*/
// Lets make this easier - The buffer has already been "malloced" once - why do it again
// And why are you reversing the file
// Perhaps
void add_buffer(Buffer *to_be_added, MyFile *file, size_t extra_length) {
if (file->tail) { // We have one item in the list
file->tail->next = to_be_added;
} else { // First buffer!
file-> head = to_be_added;
file-> tail = to_be_added;
}
to_be_added->next = NULL; // This is always the case as it is the last one
file->length += extra_length;
}
/*
void display_List(Buffer *head, size_t length){
Buffer *current = head;
while(current != NULL){
for(int i = 0; i < length; i++){
printf("%02X",(unsigned)current->data[i]); //this shows different value compare with xxd <filename>
//printf("%c", current->data[i]);
}
Buffer *prev = current;
free(prev);
current = current->next;
}
}
*/
// Instead pass in the new structure
void display_list(MyFile *file) {
size_t contents_left = file -> length;
Buffer * current = file -> head;
while (current) {
// At most each chunk has 1012 bytes - Check for that
size_t chunk_length = contents_left > 1012 ? 1012 : contents_left;
for(int i = 0; i <chunk_length ; i++){
printf("%02X",(unsigned)current->data[i]);
}
current = current -> next;
}
}
}
int main(int argc, char **argv){
FILE *fd;
MyFile read_file;
read_file.head = NULL;
read_file.tail = NULL;
read_file.length = 0;
Buffer *head_buffer = NULL;
int file_length = 0;
int eof_int = 1;
if(argc != 2){
printf("Usage: readFile <filename>\n");
return 1;
}
fd = fopen(argv[1], "rb");
// Check fd
if (fd == NULL) {
// error stuff
return EXIT_FAILURE; // Look up the include for this
}
while(eof_int != 0){
Buffer *new_buffer = malloc(sizeof(Buffer));
eof_int = fread(new_buffer->data, 1012, 1, fd); // Do not make assumptions on the size of a pointer and store it in the correct location
if(eof_int == 0) { // Read nothing
free(new_buffer); // We was too optimistic! Did Not need this in the end
break;
} else {
add_buffer(&read_file, new_buffer, eof_int);
}
}
display_List(&read_file);
fclose(fd);
return 0;
}
你正在尋找的技巧是fread
返回讀取的項目數。 您正在讀取1個緩沖區已滿,因此它只會告訴您讀取0或1個緩沖區。 相反,翻轉並反轉它: 讀取緩沖區的字節數 。
size_t bytes_read = fread(buffer, 1, sizeof(Buffer)-12, fd);
現在您可以知道緩沖區中讀取了多少字節。 我們可以向Buffer
添加一個size
字段,這樣每個緩沖區都可以記住它讀取的字節數,並且只打印那么多字節。
const size_t BUFFER_SIZE = 1024;
typedef struct Buffer {
// I'll explain why I switched to a pointer in a moment
unsigned char *data;
size_t size;
struct Buffer *next;
} Buffer;
void Buffer_print( Buffer *buffer ) {
for( size_t i = 0; i < buffer->size; i++ ) {
printf("%02hhX ", buffer->data[i]);
}
}
Buffer *Buffer_new() {
Buffer *buffer = malloc(sizeof(Buffer));
buffer->size = 0;
buffer->data = NULL;
buffer->next = NULL;
return buffer;
}
請注意,我小心地初始化緩沖區的所有字段,否則我們冒着垃圾的風險。
現在我們已經改變了緩沖區,因此我們對其大小和位置的假設被打破了。 沒關系,我們應該直接讀取buffer->data
。
size_t Buffer_read( Buffer *buffer, size_t buffer_size, FILE* fp ) {
buffer->data = malloc(buffer_size);
size_t bytes_read = fread(buffer->data, 1, buffer_size, fp);
buffer->size = bytes_read;
return bytes_read;
}
現在緩沖區知道它讀取了多少數據,我們可以根據需要分配任何大小的數據。 沒有必要在結構中硬編碼。 這使代碼更加靈活和高效。 它讓我們便宜地分配空緩沖區,這將使事情變得更簡單。
我們也可以使用malloc
並使用垃圾初始化buffer->data
。 如果fread
只是部分填充buffer->data
,其余的將保持垃圾。 沒關系,知道我們讀過的數據的大小意味着我們在遇到垃圾之前就停止打印了。
現在我們可以構建循環了。 當它讀取0字節時,我們知道它已完成讀取。
while( Buffer_read( buffer, BUFFER_SIZE, fp ) > 0 ) {
... now what ...
}
fclose(fp);
鏈接列表的工作方式,當您添加到列表時添加到tail->next
並使其成為新尾部。 這通常被稱為“推動”。
Buffer *Buffer_push( Buffer *tail, Buffer *new_tail ) {
tail->next = new_tail;
return new_tail;
}
Buffer *head = Buffer_new();
Buffer *tail = head;
while( Buffer_read( tail, BUFFER_SIZE, fp ) > 0 ) {
tail = Buffer_push( tail, Buffer_new() );
}
fclose(fp);
請注意,我們先從一個空的head
這也是tail
。 從這兩個分配開始使循環更簡單。 沒有必要檢查if( head )
或if( tail )
。 它確實意味着我們的列表末尾總是有一個空緩沖區。 沒關系。 由於我們不再使用固定緩沖區 - buffer->data
空緩沖區現在很小且便宜。
最后一步是打印所有內容。 我們已經可以打印一個緩沖區了,所以我們只需要遍歷鏈表並打印每個緩沖區。
void Buffer_print_all( Buffer *head ) {
for( Buffer *buffer = head; buffer != NULL; buffer = buffer->next ) {
Buffer_print(buffer);
}
}
Buffer_print_all(head);
最后,空的緩沖區懸掛在最后是好的。 它知道它的大小是0所以Buffer_print
實際上不會使用null buffer->data
。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.