繁体   English   中英

curl_multi_exec永远付出回应

[英]curl_multi_exec is taking forever to give response

我的项目需要使用curl multi execute对存储在数组中的每个用户名命中一个url.username数组的大小几乎为45k,直到现在我创建了另一个我要命中的45k url数组,然后有效地发送了我拥有的请求将那个url数组分成每个大小为200的块。然后,我将每个块数组传递给multi_curl_execute来获取响应,但是问题是接收所有45k请求的响应都花费了太多时间。我已经打印了响应数组,一直在按预期的速度增长,但是打印所有响应却花费了太多时间。请帮助我,因为我必须在明天之前达到目标。我将在下面提供很多代码

$array1=[1,2,3,4,5,6.....45000];

现在使用每个用户名作为查询字符串创建url

foreach($array1 as $arr)
{
$url[]='abc.com?u='.$arr;
}

//创建块

$chunk[]=array_chunk($url,200,true);

//现在发送每个块

for($i=0;$i<sizeof($chunk);$i++)
{
foreach($chunk[$i] as $c_arr)
{
array_push($res,multiRequest($c_arr));
}
}

//我的multi_curl函数

function multiRequest($data,$options = array())
{
$curly = array();
$result = array();
$mh = curl_multi_init();
$ua = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.A.B.C Safari/525.13';
foreach ($data as $id => $d) 
{
$curly[$id]= curl_init();
curl_setopt($curly[$id], CURLOPT_URL,$d);
curl_setopt($curly[$id], CURLOPT_RETURNTRANSFER,true);
curl_setopt($curly[$id], CURLOPT_USERAGENT, $ua);
curl_setopt($curly[$id], CURLOPT_AUTOREFERER, true);
curl_setopt($curly[$id], CURLOPT_FOLLOWLOCATION, true);
curl_setopt($curly[$id], CURLOPT_MAXREDIRS, 20);
curl_setopt($curly[$id], CURLOPT_HTTPGET, true);
curl_setopt($curly[$id], CURLOPT_HEADER,0);
curl_setopt($curly[$id], CURLOPT_RETURNTRANSFER,1);
curl_multi_add_handle($mh, $curly[$id]);
}
$running = null;
do {
curl_multi_exec($mh, $running);
} while($running > 0);

foreach($curly as $id => $c) 
{
$result[$id] = curl_multi_getcontent($c);
curl_multi_remove_handle($mh, $c);
}
curl_multi_close($mh);

return $result;
}

请告诉我该怎么办,因为花了将近25-30分钟来传递所有45000个请求的响应。现在我正在本地计算机上运行此脚本,而稍后它将安排为实时服务器上的cron作业

您是否尝试过多处理而不是curl_multi? 也许更快? 不会是第一次。

尝试

<?php

$code = <<<'CODE'
<?php
$ch=curl_init();
curl_setopt_array($ch,array(
CURLOPT_URL=>'abc.com?u='.urlencode($argv[1]),
CURLOPT_ENCODING=>"",
CURLOPT_USERAGENT=>'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.A.B.C Safari/525.13',
CURLOPT_AUTOREFERER=>true,
CURLOPT_FOLLOWLOCATION=>true,
CURLOPT_MAXREDIRS=>20
));
curl_exec($ch);
curl_close($ch);

CODE;
$jobFileh = tmpfile ();
$jobFile = stream_get_meta_data ( $jobFileh ) ['uri'];
file_put_contents ( $jobFile, $code );

$jobs = array ();

for($i = 1; $i <= 45000; ++ $i) {
    $jobs [] = '/usr/bin/php ' . escapeshellarg ( $jobFile ) . ' ' . escapeshellarg ( ( string ) $i );
}
$starttime = microtime ( true );
$ret = hhb_exec_multi1 ( $jobs, 200 );
$seconds_used = microtime ( true ) - $starttime;
var_dump ( $ret, $seconds_used );
die ();
class hhb_exec_multi1_ret {
    public $cmd;
    public $ret;
    public $stdout;
    public $stderr;
    function __construct(array $attributes) {
        foreach ( $attributes as $name => $val ) {
            $this->$name = $val;
        }
    }
}
/**
 *
 * @param string[] $cmds
 * @param int $max_concurrent
 * @throws InvalidArgumentException
 * @return hhb_exec_multi1_ret[]
 */
function hhb_exec_multi1(array $cmds, int $max_concurrent = 10, $finished_callback = NULL): array {
    // TODO: more error checking, if proc_create fail, out of ram, tmpfile() fail, etc
    {
        // input validation
        if ($max_concurrent < 1) {
            throw new InvalidArgumentException ( '$max_concurrent must be above 0... and less or equal to' . PHP_INT_MAX );
        }
        foreach ( $cmds as $tmp ) {
            if (! is_string ( $tmp )) {
                throw new InvalidArgumentException ( '$cmds must be an array of strings!' );
            }
        }
    }
    $ret = array ();
    $running = array ();
    foreach ( $cmds as $key => $cmd ) {
        $current = array (
                'cmd' => $cmd,
                'ret' => - 1,
                'stdout' => tmpfile (),
                'stderr' => tmpfile (),
                'key' => $key 
        );
        $pipes = [ ];
        $descriptorspec = array (
                0 => array (
                        "pipe",
                        "rb" 
                ),
                1 => array (
                        "file",
                        stream_get_meta_data ( $current ['stdout'] ) ['uri'],
                        "wb" 
                ),
                2 => array (
                        "file",
                        stream_get_meta_data ( $current ['stderr'] ) ['uri'],
                        "wb" 
                )  // stderr is a file to write to
        );
        while ( count ( $running ) >= $max_concurrent ) {
            // echo ".";
            usleep ( 100 * 1000 );
            foreach ( $running as $runningkey => $check ) {
                $stat = proc_get_status ( $check ['proc'] );
                if ($stat ['running']) {
                    continue;
                }
                proc_close ( $check ['proc'] );
                $check ['ret'] = $stat ['exitcode'];
                $stdout = file_get_contents ( stream_get_meta_data ( $check ['stdout'] ) ['uri'] );
                fclose ( $check ['stdout'] );
                $check ['stdout'] = $stdout;
                $stderr = file_get_contents ( stream_get_meta_data ( $check ['stderr'] ) ['uri'] );
                fclose ( $check ['stderr'] );
                $check ['stderr'] = $stderr;
                $checkkey = $check ['key'];
                unset ( $check ['key'] );
                unset ( $check ['proc'] );
                $tmp = ($ret [$checkkey] = new hhb_exec_multi1_ret ( $check ));
                unset ( $running [$runningkey] );
                if (! empty ( $finished_callback )) {
                    $finished_callback ( $tmp );
                }
            }
        }
        $current ['proc'] = proc_open ( $cmd, $descriptorspec, $pipes );
        fclose ( $pipes [0] ); // do it like this because we don't want the children to inherit our stdin, which is the default behaviour if [0] is not defined.
        $running [] = $current;
    }
    while ( count ( $running ) > 0 ) {
        // echo ",";
        usleep ( 100 * 1000 );
        foreach ( $running as $runningkey => $check ) {
            $stat = proc_get_status ( $check ['proc'] );
            if ($stat ['running']) {
                continue;
            }
            proc_close ( $check ['proc'] );
            $check ['ret'] = $stat ['exitcode'];
            $stdout = file_get_contents ( stream_get_meta_data ( $check ['stdout'] ) ['uri'] );
            fclose ( $check ['stdout'] );
            $check ['stdout'] = $stdout;
            $stderr = file_get_contents ( stream_get_meta_data ( $check ['stderr'] ) ['uri'] );
            fclose ( $check ['stderr'] );
            $check ['stderr'] = $stderr;
            $checkkey = $check ['key'];
            unset ( $check ['key'] );
            unset ( $check ['proc'] );
            $tmp = ($ret [$checkkey] = new hhb_exec_multi1_ret ( $check ));
            unset ( $running [$runningkey] );
            if (! empty ( $finished_callback )) {
                $finished_callback ( $tmp );
            }
        }
    }
    return $ret;
}

当我在笔记本电脑上将此代码运行到本地Nginx服务器时,它在6分39秒(399秒)内执行,循环设置为45000。

编辑:糟糕,忘记将代码写入作业文件(file_put_contents),已修复。

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM