简体   繁体   中英

How do I see current seek position of an open file descriptor?

I'm running an awk script on a 100G+ file. I have a suspicion that since it never finishes it got into some kind of infinite loop.

I want to see the line that script is currently at to see if there's some problem with the data.

I tried ls -l /proc/<pid>/fd but it just lists names of opened files.

Is there a way to find the current seek position of an open file?

The idea here is to use strace -p <your pid> to observe the running pid . An extension to gawk is created to call lseek from an awk script. lseek reports the file descriptor offset as you have requested.

You will need to modify the awk script to call the extension.

Without modifying the awk script, strace -p <your pid> will report lots of read calls and that usually indicates progress thru the input file.

Below is a Makefile to create the lseek extension and run a quick test.


CC = gcc
CFLAGS = -Wall -fPIC -c -O2 -DHAVE_STRING_H -DHAVE_SNPRINTF -DHAVE_STDARG_H -DHAVE_VPRINTF -DDYNAMIC
LDFLAGS = -shared 

all: awklseek.so
    gawk -l ./awklseek.so 'BEGIN{ print lseek() } { print $$0 ; lseek() ; } ' data

awklseek.so: lseek.o
    ${CC} $^ -o $@ ${LDFLAGS}

c.o:
    ${CC} ${CFLAGS} $<

.PHONY:clean
clean:
    rm awklseek.so lseek.o

Here is the extension:

/*
 * lseek.c - Provide an interface to lseek(2) routine
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/stat.h>
#include <sys/types.h>

#include "gawkapi.h"

static const gawk_api_t *api;
static awk_ext_id_t ext_id;
static const char *ext_version = "lseek extension: version 1.0";
static awk_bool_t init_lseek(void);
static 
awk_bool_t(*init_func) (void)= init_lseek;

    int     plugin_is_GPL_compatible;

/* do_lseek --- implement the lseek interface */

    static awk_value_t *
            do_lseek      (int nargs, awk_value_t * result, struct awk_ext_func *unused)
{
    awk_bool_t  api_result;
    const awk_input_buf_t *ibuf;
    const awk_output_buf_t *obuf;

    //GET FILE DESCRIPTOR FOR CURRENT INPUT FILE
        api_result = api->api_get_file(ext_id, NULL, 0, "<", 0, &ibuf, &obuf);
    if (api_result == awk_false) {
        make_number(-1.0, result);
    } else {
        //GET OFFSET OF CURRENT FILE ACCORDING TO OS
            off_t position = lseek(ibuf->fd, 0, SEEK_CUR);
        make_number(position, result);
    }
    return result;
}

/* init_lseek */
static awk_bool_t 
init_lseek(void)
{
    int     errors = 0;
    return errors == 0;
}

static awk_ext_func_t func_table[] = {
    {"lseek", do_lseek, 0, 0, awk_false, NULL},
};

/* define the dl_load function using the boilerplate macro */
dl_load_func(func_table, lseek, "")

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM