![](/img/trans.png)
[英]Read characters from file to end of line without using fread, fopen, fscanf, fgets
[英]Windows 10: fopen, fread, fgets and mixed EOL characters in large text file
我正在為計算材料科學家開發一個程序:
Atomes 可以導入包含原子坐標的大型文本文件,為此我使用fread
讀取整個文件,然后使用 OpenMP 在 CPU 內核上拆分文本緩沖區。
在 Linux 和 Windows 上工作奇跡,直到有人出現了我沒想到的問題。 具有混合且不定期放置的 EOL 符號( \n
和\r
)的文件。
我找到了解決 Windows 和 Windows 問題的方法,如果我使用了正確的更正,我將非常感謝您對我所做的事情的建議/評論。
在該解決方案之前,我嘗試向fopen
function 添加選項,例如-t
或-b
但沒有效果。
最后我注意到如果我更改編譯選項並使用fgets
function 從文件中讀取數據沒有問題,只有在這種情況下,對於大文件,數據的處理更復雜,到目前為止我沒有辦法 //使用 OpenMP,並且需要更多時間。
這是我為讀取文件而編寫的代碼:
int open_coord_file (gchar * filename)
{
int res;
#ifdef OPENMP
// In that case I read the file in a single buffer, then work on that buffer
struct stat status;
res = stat (filename, & status);
if (res == -1)
{
// Basic function to store information on the reading process
add_reader_info ("Error - cannot get file statistics !");
return 1;
}
int fsize = status.st_size;
#endif
coordf = fopen (filename, "r");
if (! coordf)
{
add_reader_info ("Error - cannot open coordinates file !");
return 1;
}
int i, j, k;
#ifdef OPENMP
gchar * coord_content = g_malloc0(fsize*sizeof*coord_content);
// Using fread to read the entire file
fread (coord_content, fsize, 1, coordf);
fclose (coordf);
int linecount = 0;
// Evaluating the number of lines in the file:
for (j=0; j<fsize; j++) if (coord_content[j] == '\n') linecount ++;
#ifdef G_OS_WIN32
// What happen in Windows is that some '\r' symbols were found
// and not on all lines, so I decided to check for \r symbols:
int neolr = 0;
for (j=0; j<fsize; j++) if (coord_content[j] == '\r') neolr ++;
// And mofidy the number of lines accordingly
linecount -= neolr;
#endif
coord_line = g_malloc0 (linecount*sizeof*coord_line);
coord_line[0] = & coord_content[0];
i = 1;
int nfsize = fsize;
#ifdef G_OS_WIN32
// Now deleting the corresponding EOL symbols in the text buffer
// This is only required for Windows, and I am not sure that it is
// the proper way to do thing, any though on the matter would be appreciated.
for (j=0; j<fsize; j++)
{
if (coord_content[j] == '\n')
{
coord_content[j] = '\0';
}
else if (coord_content[j] == '\r')
{
for (k=j; k<fsize-1; k++)
{
coord_content[k] = coord_content[k+1];
}
nfsize --;
}
}
#endif
// And referencing properly the lines to work on the buffer:
for (j=0; j<nfsize; j++)
{
if (coord_content[j] == '\0')
{
if (i < linecount)
{
coord_line[i] = & coord_content[j+1];
i++;
}
}
}
#else
// On the other side if turn down OpenMP, then I use the fgets function
// to read the data from the text file, then there no problem what so ever
// with the EOL symbols and everything work smoothly.
// The fopen options being the same I am somewhat confused by this result.
gchar * buf = g_malloc0(LINE_SIZE*sizeof*buf);
struct line_node
{
gchar * line;//[LINE_SIZE];
struct line_node * next;
struct line_node * prev;
};
struct line_node * head = NULL;
struct line_node * tail = NULL;
i = 0;
while (fgets(buf, LINE_SIZE, coordf))
{
if (head == NULL)
{
head = g_malloc0 (sizeof*head);
tail = g_malloc0 (sizeof*tail);
tail = head;
}
else
{
tail -> next = g_malloc0 (sizeof*tail -> next);
tail = tail -> next;
}
tail -> line = g_strdup_printf ("%s", buf);
tail -> line = substitute_string (tail -> line, "\n", "\0");
i ++;
}
g_free (buf);
fclose (coordf);
#endif
// And then latter in the code I process the data
// providing i the number of lines as an input value.
return read_xyz_file (i);
任何建議將不勝感激。
[編輯]
我找到了一種方法來解決我的fread
問題,使用帶有fgets
的臨時緩沖區,這樣我就可以再次輕松地使用 OpenMP // 獲取我的數據:
coord_line = g_malloc0 (i*sizeof*coord_line);
tail = head;
j = 0;
while (tail)
{
coord_line[j] = & tail -> line[0];
j ++;
tail = tail -> next;
}
現在一切都很好,盡管我仍然不知道為什么我遇到fread
問題
[/編輯]
SO 不是免費的代碼編寫服務。 然而,你已經表現出真正的努力,試圖為自己解決這個問題。 逐點更正需要很長時間,所以這里有一個“代碼轉儲”,應該很容易理解,並且(我希望)能做到你努力實現的目標。
這是在“普通,普通”C 中,沒有代碼中顯示的“gXXXX”函數。 這將打開並加載整個(假定為)文本文件,如果存在 CR,則將其擠出,然后將分配指針的行分割成不斷增長的指向每一行的指針數組。 (空行也將被分配一個指針)一些printf
行報告進程的一些統計信息。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// Load file contents to alloc'd memory, return pointer to buffer (to be free'd!)
char *loadFile( char *fname ) {
FILE *fp;
if( ( fp = fopen( fname, "rb" ) ) == NULL )
fprintf( stderr, "Cannot open '%s'\n", fname ), exit(1);
fseek( fp, 0, SEEK_END );
size_t size = ftell( fp );
fseek( fp, 0, SEEK_SET );
char *buf;
if( ( buf = (char*)malloc( size + 1) ) == NULL )
fprintf( stderr, "Malloc() failed\n" ), exit(1);
if( fread( buf, sizeof *buf, size, fp ) != size )
fprintf( stderr, "Read incomplete\n" ), exit(1);
fclose( fp );
*(buf + size) = '\0'; // set xtra byte allocated to NULL (allows str functions to work)
return buf;
}
int main() {
char *fname = "FOO.BAR"; // To be defined...
char *fCont = loadFile( fname ), *d, *s;
// crush out '\r', if any
for( d = fCont, s = fCont; (*d = *s) != '\0'; s++ )
d += *d != '\r';
fprintf( stderr, "Orig %ld. Without CR %ld\n", s - fCont, d - fCont );
char **arr = NULL;
int lcnt = 0;
for( char *t = fCont; ( t = strtok( t, "\n" ) ) != NULL; t = NULL ) {
char **tmp = (char**)realloc( arr, (lcnt+1) * sizeof *tmp );
if( tmp == NULL )
fprintf( stderr, "realloc() failed\n" ), exit(1);
arr = tmp;
arr[lcnt++] = t;
}
fprintf( stderr, "%ld lines loaded\n", lcnt );
// "demo" the first 5 lines
for( int i = 0; i < 5 && i < lcnt; i++ )
fprintf( stderr, "%d - '%s'\n", i+1, arr[i] );
/* process from arr[0] to arr[lcnt-1] */
free( arr );
free( fCont );
return 0;
}
希望這可以幫助。 Ball現在在你的球場上...
這可以使用fopen
function 的b
選項來解決:
coordf = fopen (filename, "rb");
之后fread
行為正常。
請注意,在我的第一次嘗試中,我可能使用了以下錯誤的參數順序:
coordf = fopen (filename, "br");
那是行不通的。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.