C-如何通过管道传递给仅从文件读取的程序

Question

我想将字符串传递给仅从文件而不是从stdin读取输入的程序。 从bash使用它，我可以做类似的事情

echo "hi" | program /dev/stdin

我想从C代码复制这种行为。 我做的是这个

#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/wait.h>
#include <string.h>

int main() {
 pid_t pid;
 int rv;

 int to_ext_program_pipe[2];
 int to_my_program_pipe[2];

 if(pipe(to_ext_program_pipe)) {
    fprintf(stderr,"Pipe error!\n");
    exit(1);
 }
 if(pipe(to_my_program_pipe)) {
    fprintf(stderr,"Pipe error!\n");
    exit(1);
 }

 if( (pid=fork()) == -1) {
    fprintf(stderr,"Fork error. Exiting.\n");
    exit(1);
 }

 if(pid) {
    close(to_my_program_pipe[1]);
    close(to_ext_program_pipe[0]);
    char string_to_write[] = "this is the string to write";

    write(to_ext_program_pipe[1], string_to_write, strlen(string_to_write) + 1);
    close(to_ext_program_pipe[1]);

    wait(&rv);
    if(rv != 0) {
        fprintf(stderr, "%s %d\n", "phantomjs exit status ", rv);
        exit(1);
    }

    char *string_to_read;
    char ch[1];
    size_t len = 0;
    string_to_read = malloc(sizeof(char));
    if(!string_to_read) {

        fprintf(stderr, "%s\n", "Error while allocating memory");

        exit(1);
    }
    while(read(to_my_program_pipe[0], ch, 1) == 1) {
        string_to_read[len]=ch[0];
        len++;
        string_to_read = realloc(string_to_read, len*sizeof(char));
        if(!string_to_read) {
            fprintf(stderr, "%s\n", "Error while allocating memory");
        }
        string_to_read[len] = '\0';
    }
    close(to_my_program_pipe[0]);
    printf("Output: %s\n", string_to_read);
    free(string_to_read);
} else {
    close(to_ext_program_pipe[1]);
    close(to_my_program_pipe[0]);

    dup2(to_ext_program_pipe[0],0);
    dup2(to_my_program_pipe[1],1);

    if(execlp("ext_program", "ext_program", "/dev/stdin" , NULL) == -1) {
        fprintf(stderr,"execlp Error!");
        exit(1);
    }
    close(to_ext_program_pipe[0]);
    close(to_my_program_pipe[1]);
}

 return 0; 
}

它不起作用。

编辑我没有得到ext_program输出，应该将其保存在string_to_read 。 该程序只是挂起。 我可以看到ext_program已执行，但是我什么也没得到

我想知道是否有错误，或者我想做的事无法完成。 我也知道替代方法是使用命名管道。

编辑2 ：更多详细信息

由于我仍然无法使程序正常工作，因此我发布了完整的代码

 #include <unistd.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/wait.h> int main() { pid_t pid; int rv; int to_phantomjs_pipe[2]; int to_my_program_pipe[2]; if(pipe(to_phantomjs_pipe)) { fprintf(stderr,"Pipe error!\\n"); exit(1); } if(pipe(to_my_program_pipe)) { fprintf(stderr,"Pipe error!\\n"); exit(1); } if( (pid=fork()) == -1) { fprintf(stderr,"Fork error. Exiting.\\n"); exit(1); } if(pid) { close(to_my_program_pipe[1]); close(to_phantomjs_pipe[0]); char jsToExectue[] = "var page=require(\\'webpage\\').create();page.onInitialized=function(){page.evaluate(function(){delete window._phantom;delete window.callPhantom;});};page.onResourceRequested=function(requestData,request){if((/http:\\\\/\\\\/.+\\?\\\\\\\\.css/gi).test(requestData[\\'url\\'])||requestData.headers[\\'Content-Type\\']==\\'text/css\\'){request.abort();}};page.settings.loadImage=false;page.settings.userAgent=\\'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36\\';page.open(\\'https://stackoverflow.com\\',function(status){if(status!==\\'success\\'){phantom.exit(1);}else{console.log(page.content);phantom.exit();}});"; write(to_phantomjs_pipe[1], jsToExectue, strlen(jsToExectue) + 1); close(to_phantomjs_pipe[1]); int read_chars; int BUFF=1024; char *str; char ch[BUFF]; size_t len = 0; str = malloc(sizeof(char)); if(!str) { fprintf(stderr, "%s\\n", "Error while allocating memory"); exit(1); } str[0] = '\\0'; while( (read_chars = read(to_my_program_pipe[0], ch, BUFF)) > 0) { len += read_chars; str = realloc(str, (len + 1)*sizeof(char)); if(!str) { fprintf(stderr, "%s\\n", "Error while allocating memory"); } strcat(str, ch); str[len] = '\\0'; memset(ch, '\\0', BUFF*sizeof(ch[0])); } close(to_my_program_pipe[0]); printf("%s\\n", str); free(str); wait(&rv); if(rv != 0) { fprintf(stderr, "%s %d\\n", "phantomjs exit status ", rv); exit(1); } } else { dup2(to_phantomjs_pipe[0],0); dup2(to_my_program_pipe[1],1); close(to_phantomjs_pipe[1]); close(to_my_program_pipe[0]); close(to_phantomjs_pipe[0]); close(to_my_program_pipe[1]); execlp("phantomjs", "phantomjs", "--ssl-protocol=TLSv1", "/dev/stdin" , (char *)NULL); } return 0; }

我想要做的是将要通过管道执行的脚本传递给phantomjs，然后将生成的HTML作为字符串读取。 我按要求修改了代码，但phantomjs仍未从stdin中读取。
我通过创建一个哑程序来测试脚本字符串，该程序将其写入文件，然后正常执行phantomjs并且可以正常工作。
我也尝试执行
execlp("phantomjs", "phantomjs", "--ssl-protocol=TLSv1", "path_to_script_file" , (char *)NULL);
并且也可以，显示输出HTML。
使用管道时不起作用。

Answer 1

最后的解释

对PhantomJS进行的一些实验表明，问题出在发送到PhantomJS的JavaScript程序的末尾写入了一个空字节。 这突出了两个错误：

问题中的程序发送了不必要的空字节。
当其他有效程序后跟空字节时，PhantomJS 2.1.1（在运行macOS High Sierra 10.13.3的Mac上）挂起

问题中的代码包含：

write(to_phantomjs_pipe[1], jsToExectue, strlen(jsToExectue) + 1);

+ 1表示将终止字符串的空字节也写入phantomjs 。 写入该空字节会导致phantomjs挂起。 那等于是一个错误-尚不清楚为什么PhantomJS挂起时没有检测到EOF（没有更多数据来），也没有给出错误，等等。

将该行更改为：

write(to_phantomjs_pipe[1], jsToExectue, strlen(jsToExectue));

并且代码可以正常工作-至少在运行macOS High Sierra 10.13.3的Mac上使用PhantomJS 2.1.1。

初步分析

您没有在子级中关闭足够的文件描述符。

经验法则 ：如果将管道的一端dup2()为标准输入或标准输出，请尽快关闭pipe()返回的两个原始文件描述符。 特别是，在使用任何exec*()系列函数之前，应关闭它们。
如果您使用带有F_DUPFD dup()或fcntl()复制描述符，则该规则也适用

显示的子代码为：

} else {
    close(to_ext_program_pipe[1]);
    close(to_my_program_pipe[0]);

    dup2(to_ext_program_pipe[0],0);
    dup2(to_my_program_pipe[1],1);

    if(execlp("ext_program", "ext_program", "/dev/stdin" , NULL) == -1) {
        fprintf(stderr,"execlp Error!");
        exit(1);
    }
    close(to_ext_program_pipe[0]);
    close(to_my_program_pipe[1]);
}

最后两个close()语句从不执行； 它们需要出现在execlp()之前。

您需要的是：

} else {
    dup2(to_ext_program_pipe[0], 0);
    dup2(to_my_program_pipe[1], 1);
    close(to_ext_program_pipe[0]);
    close(to_ext_program_pipe[1]);
    close(to_my_program_pipe[0]);
    close(to_my_program_pipe[1]);

    execlp("ext_program", "ext_program", "/dev/stdin" , NULL);
    fprintf(stderr, "execlp Error!\n");
    exit(1);
}

您可以对它进行重新排序以分割close()调用，但是最好如图所示重新组合它们。

请注意，无需测试execlp()是否失败。 如果返回，则失败。 如果成功，则不会返回。

可能还有另一个问题。 父进程等待子进程退出，然后再从子进程读取任何内容。 但是，如果子进程尝试写入的数据超出管道容量，则进程将挂起，等待某个进程（必须是父进程）读取管道。 因为他们俩都在等待对方做某事，然后再做对方在等待的事情，所以这是（或者至少可能是）僵局。

您还应该修改父进程以在等待之前进行阅读。

if (pid) {
    close(to_my_program_pipe[1]);
    close(to_ext_program_pipe[0]);
    char string_to_write[] = "this is the string to write";

    write(to_ext_program_pipe[1], string_to_write, strlen(string_to_write) + 1);
    close(to_ext_program_pipe[1]);

    char *string_to_read;
    char ch[1];
    size_t len = 0;
    string_to_read = malloc(sizeof(char));
    if(!string_to_read) {
        fprintf(stderr, "%s\n", "Error while allocating memory");
        exit(1);
    }
    while (read(to_my_program_pipe[0], ch, 1) == 1) {
        string_to_read[len] = ch[0];
        len++;
        string_to_read = realloc(string_to_read, len*sizeof(char));
        if (!string_to_read) {
            fprintf(stderr, "%s\n", "Error while allocating memory\n");
            exit(1);
        }
        string_to_read[len] = '\0';
    }
    close(to_my_program_pipe[0]);
    printf("Output: %s\n", string_to_read);
    free(string_to_read);

    wait(&rv);
    if (rv != 0) {
        fprintf(stderr, "%s %d\n", "phantomjs exit status ", rv);
        exit(1);
    }
} …

我还要重写代码以大块读取（1024字节或更多）。 只是不要复制比读取返回更多的数据，仅此而已。 反复使用realloc()向缓冲区分配一个以上的字节最终极其缓慢。 如果只有几个字节的数据没关系； 是否有千字节或更多的数据需要处理。

后来：由于PhantomJS程序响应发送的消息生成了90多个KiB数据，因此这是问题的一个原因-否则，不是因为PhantomJS中的空字节挂起错误。

仍然有问题2018-02-03

我将修改后的代码提取到一个程序中（ pipe89.c ，编译为pipe89 ）。 分配的空间发生更改时，出现了不一致的崩溃。 我最终意识到，您要重新分配一个字节的空间太少了-它花费的时间比应该做的要长得多（但是如果Valgrind可用于macOS High Sierra，这将有所帮助-尚未）。

这是带有调试信息注释输出的固定代码：

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/wait.h>
#include <unistd.h>

int main(void)
{
    pid_t pid;
    int rv;

    int to_ext_program_pipe[2];
    int to_my_program_pipe[2];

    if (pipe(to_ext_program_pipe))
    {
        fprintf(stderr, "Pipe error!\n");
        exit(1);
    }
    if (pipe(to_my_program_pipe))
    {
        fprintf(stderr, "Pipe error!\n");
        exit(1);
    }

    if ((pid = fork()) == -1)
    {
        fprintf(stderr, "Fork error. Exiting.\n");
        exit(1);
    }

    if (pid)
    {
        close(to_my_program_pipe[1]);
        close(to_ext_program_pipe[0]);
        char string_to_write[] = "this is the string to write";

        write(to_ext_program_pipe[1], string_to_write, sizeof(string_to_write) - 1);
        close(to_ext_program_pipe[1]);

        char ch[1];
        size_t len = 0;
        char *string_to_read = malloc(sizeof(char));
        if (string_to_read == 0)
        {
            fprintf(stderr, "%s\n", "Error while allocating memory");
            exit(1);
        }
        string_to_read[len] = '\0';

        while (read(to_my_program_pipe[0], ch, 1) == 1)
        {
            //fprintf(stderr, "%3zu: got %3d [%c]\n", len, ch[0], ch[0]); fflush(stderr);
            string_to_read[len++] = ch[0];
            char *new_space = realloc(string_to_read, len + 1);     // KEY CHANGE is " + 1"
            //if (new_space != string_to_read)
            //    fprintf(stderr, "Move: len %zu old %p vs new %p\n", len, (void *)string_to_read, (void *)new_space);
            if (new_space == 0)
            {
                fprintf(stderr, "Error while allocating %zu bytes memory\n", len);
                exit(1);
            }
            string_to_read = new_space;
            string_to_read[len] = '\0';
        }
        close(to_my_program_pipe[0]);
        printf("Output: %zu (%zu) [%s]\n", len, strlen(string_to_read), string_to_read);
        free(string_to_read);

        wait(&rv);
        if (rv != 0)
        {
            fprintf(stderr, "%s %d\n", "phantomjs exit status ", rv);
            exit(1);
        }
    }
    else
    {
        dup2(to_ext_program_pipe[0], 0);
        dup2(to_my_program_pipe[1], 1);
        close(to_ext_program_pipe[0]);
        close(to_ext_program_pipe[1]);
        close(to_my_program_pipe[0]);
        close(to_my_program_pipe[1]);

        execlp("ext_program", "ext_program", "/dev/stdin", NULL);
        fprintf(stderr, "execlp Error!\n");
        exit(1);
    }

    return 0;
}

它在一个程序中进行了测试，该程序写出5590字节输出27个字节的输入。 这并不像您的程序中的乘数那么大，但是事实证明了这一点。

我仍然认为您最好不要一次重新分配一个额外的字节-扫描循环应使用一个1 KiB的缓冲区，一次读取最多1 KiB，并一次分配所有多余的空间。 对于内存分配系统而言，这是一项强度较低的练习。

问题继续于2018-02-05

从Edit 2中获取代码，仅将函数定义从int main() {更改为int main(void) { （因为我使用的编译选项不允许使用老式的非原型函数声明或定义， void （不是原型），代码对我来说很好。 我创建了一个替代的phantomjs程序（来自另一个我已经躺在那里的程序），如下所示：

#include <stdio.h>

int main(int argc, char **argv, char **envp)
{
    for (int i = 0; i < argc; i++)
        printf("argv[%d] = <<%s>>\n", i, argv[i]);
    for (int i = 0; envp[i] != 0; i++)
        printf("envp[%d] = <<%s>>\n", i, envp[i]);
    FILE *fp = fopen(argv[argc - 1], "r");
    if (fp != 0)
    {
        int c;
        while ((c = getc(fp)) != EOF)
            putchar(c);
        fclose(fp);
    }
    else
        fprintf(stderr, "%s: failed to open file %s for reading\n",
                argv[0], argv[argc-1]);
    return(0);
}

此代码将回显参数列表，环境，然后打开名为最后一个参数的文件，并将其复制到标准输出。 （由于对argv[argc-1]的特殊处理，所以它是高度专业的，但是之前的代码有时对于调试复杂的shell脚本很有用。）

当我使用此“ phantomjs ”运行您的程序时，我得到了期望的输出：

argv[0] = <<phantomjs>>
argv[1] = <<--ssl-protocol=TLSv1>>
argv[2] = <</dev/stdin>>
envp[0] = <<MANPATH=/Users/jleffler/man:/Users/jleffler/share/man:/Users/jleffler/oss/share/man:/Users/jleffler/oss/rcs/man:/usr/local/mysql/man:/opt/gcc/v7.3.0/share/man:/Users/jleffler/perl/v5.24.0/man:/usr/local/man:/usr/local/share/man:/usr/share/man:/opt/gnu/share/man>>
envp[1] = <<IXH=/opt/informix/12.10.FC6/etc/sqlhosts>>
…
envp[49] = <<HISTFILE=/Users/jleffler/.bash.jleffler>>
envp[50] = <<_=./pipe31>>
var page=require('webpage').create();page.onInitialized=function(){page.evaluate(function(){delete window._phantom;delete window.callPhantom;});};page.onResourceRequested=function(requestData,request){if((/http:\/\/.+?\\.css/gi).test(requestData['url'])||requestData.headers['Content-Type']=='text/css'){request.abort();}};page.settings.loadImage=false;page.settings.userAgent='Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36';page.open('https://stackoverflow.com',function(status){if(status!=='success'){phantom.exit(1);}else{console.log(page.content);phantom.exit();}});

此时，我必须将矛头指向您环境中的phantomjs 。 当您执行以下操作时，它的行为似乎不符合预期：

echo "$JS_PROG" | phantomjs /dev/stdin | cat

当然，我再也无法重现您的问题。

您应该使用我的替代phantomjs代码，并使用它代替真正的phantomjs然后看看您得到了什么。
- 如果您得到的输出与我显示的类似，则问题出在真正的phantomjs 。
- 如果您没有得到与我所显示的类似的输出，那么从更新到问题的代码可能存在问题。

以后：请注意，因为printf()使用%s来打印数据，所以它不会注意到多余的空字节被发送给了子级。

Answer 2

在pipe（7）的人中写道，您应该尽快阅读管道：

如果某个进程尝试写入一个已满的管道（请参见下文），则write（2）会阻塞，直到从管道中读取了足够的数据以允许写入完成为止。 通过使用fcntl（2）F_SETFL操作来启用O_NONBLOCK打开文件状态标志，可以实现非阻塞I / O。

和

管道的容量有限。 如果管道已满，则write（2）将阻塞还是失败，具体取决于是否设置了O_NONBLOCK标志（请参见下文）。 不同的实现对管道容量有不同的限制。 应用程序不应依赖于特定的容量：应设计应用程序，以使读取过程在数据可用时就立即使用它，从而不会阻止写入过程。

在您的代码中编写，等待，然后才读取

write(to_ext_program_pipe[1], string_to_write, strlen(string_to_write) + 1);
close(to_ext_program_pipe[1]);

wait(&rv);
//...
while(read(to_my_program_pipe[0], ch, 1) == 1) {
//...

也许管道已满或ext_program正在等待读取数据，则仅应在读取之后使用wait() 。

C-如何通过管道传递给仅从文件读取的程序

问题描述

2 个解决方案

解决方案1
1 已采纳 2018-02-03 17:05:19

最后的解释

初步分析

仍然有问题2018-02-03

问题继续于2018-02-05

解决方案2
1 2018-02-03 17:17:41

C-如何通过管道传递给仅从文件读取的程序

问题描述

2 个解决方案

解决方案1 1 已采纳 2018-02-03 17:05:19

最后的解释

初步分析

仍然有问题2018-02-03

问题继续于2018-02-05

解决方案2 1 2018-02-03 17:17:41

解决方案1
1 已采纳 2018-02-03 17:05:19

解决方案2
1 2018-02-03 17:17:41