繁体   English   中英

从另一个网址登录后,如何使用php curl获取数据?

[英]How to fetch data using php curl after getting logged IN from another url?

$url="http://www.example.com/sign_in"; 

//url, i need to fetch data from
$url2="http://www.example.com/xref/quicksearches?quick_search[match]=pants&commit=Go!";

$username = "user_name123";
$password = "password@123";

$field='authenticity_token';
$cookie= "cookies.txt";

$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie);
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
$response = curl_exec($ch);
if (curl_errno($ch)) die(curl_error($ch));
$dom = new DomDocument();
$dom->loadHTML($response);
$tokens = $dom->getElementsByTagName("input");
for ($i = 0; $i < $tokens->length; $i++) {
$meta = $tokens->item($i);
if($meta->getAttribute('name') == 'authenticity_token')
$token = $meta->getAttribute('value');
}

$postinfo = "user[login]=".$username."&user[password]=".$password."&authenticity_token=".$token;
echo $token;
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie);
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $postinfo);

$html = curl_exec($ch); 
print($html);

curl_close($ch);

现在,我可以成功登录了,但是我需要从$ url2获取数据。 我该如何实现?

curl_multi_init()对我不起作用。 我正在从第一个网址获取真实性令牌,然后登录,但不知道如何转到第二个网址并获取该页面。 我是第一次使用cURL :)

在不知道url或凭据的情况下,以下内容完全未经测试,但是基于几年前我使用的一些代码登录到特定网站,发布了一些新数据,然后注销,经过大量测试后,它们都可以正常工作。 但愿您可能会发现代码有用

<?php
    function makerequest( $url=false, $options=array(), $cacert=false ){
        $res=array(
            'response'  =>  NULL,
            'info'      =>  array( 'http_code' => 500 ),
            'headers'   =>  NULL,
            'errors'    =>  NULL
        );
        if( empty( $url ) ) return (object)$res;

        session_write_close();
        /* Initialise curl request object */
        $curl=curl_init();
        if( parse_url( $url,PHP_URL_SCHEME )=='https' && $cacert ){
            curl_setopt( $curl, CURLOPT_SSL_VERIFYPEER, FALSE );
            curl_setopt( $curl, CURLOPT_SSL_VERIFYHOST, 2 );
            curl_setopt( $curl, CURLOPT_CAINFO, $cacert );
        }
        /* Define standard options */
        curl_setopt( $curl, CURLOPT_URL,trim( $url ) );
        curl_setopt( $curl, CURLOPT_AUTOREFERER, TRUE );
        curl_setopt( $curl, CURLOPT_FOLLOWLOCATION, TRUE );
        curl_setopt( $curl, CURLOPT_FRESH_CONNECT, TRUE );
        curl_setopt( $curl, CURLOPT_FORBID_REUSE, TRUE );
        curl_setopt( $curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1 );
        curl_setopt( $curl, CURLOPT_CLOSEPOLICY, CURLCLOSEPOLICY_OLDEST );
        curl_setopt( $curl, CURLOPT_MAXCONNECTS, 1 );
        curl_setopt( $curl, CURLOPT_FAILONERROR, FALSE );
        curl_setopt( $curl, CURLOPT_HEADER, FALSE );
        curl_setopt( $curl, CURLINFO_HEADER_OUT, FALSE );
        curl_setopt( $curl, CURLOPT_RETURNTRANSFER, TRUE );
        curl_setopt( $curl, CURLOPT_BINARYTRANSFER, TRUE );
        curl_setopt( $curl, CURLOPT_CONNECTTIMEOUT, 20 );
        curl_setopt( $curl, CURLOPT_TIMEOUT, 60 );
        curl_setopt( $curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:35.0) Gecko/20100101 Firefox/35.0' );
        curl_setopt( $curl, CURLOPT_MAXREDIRS, 10 );
        curl_setopt( $curl, CURLOPT_ENCODING, '' );

        /* Assign runtime parameters as options. Overwrite previous if necessary */
        if( !empty( $options ) ){
            foreach( $options as $param => $value ) curl_setopt( $curl, $param, $value );
        }

        /* Execute the request and store responses */
        $res=(object)array(
            'response'  =>  curl_exec( $curl ),
            'info'      =>  (object)curl_getinfo( $curl ),
            'errors'    =>  curl_error( $curl )
        );
        curl_close( $curl );
        return $res;
    }








    $url="http://www.example.com/sign_in";
    $url2="http://www.example.com/xref/quicksearches?quick_search[match]=pants&commit=Go!";

    $username = "user_name123";
    $password = "password@123";

    $token=false;
    $field='authenticity_token';
    $cookie=__DIR__ . "/cookies.txt";
    /*
        cacert.pem can be freely downloaded from various sites on the interwebs
        ~ https://curl.haxx.se/docs/caextract.html

        Or include the final, live url to ensure that you are only using the most
        uptodate version.

        http://curl.haxx.se/ca/cacert.pem
    */
    $cacert='c:/wwwroot/cacert.pem';

    $stdheaders=array(
        'Accept: ',
        'Accept-Encoding: gzip, deflate',
        'Connection: keep-alive',
        'Accept-Language: en-GB,en;q=0.5'
    );


    try{

        #Stage 1 ~ get login page to capture cookies
        $options=array(
            CURLOPT_COOKIESESSION   =>  TRUE,
            CURLOPT_COOKIEFILE      =>  $cookie,
            CURLOPT_COOKIEJAR       =>  $cookie,
            CURLOPT_REFERER         =>  $url,
            CURLOPT_HEADER          =>  TRUE,
            CURLINFO_HEADER_OUT     =>  TRUE
        );
        $res = makerequest( $url, $options, $cacert );
        if( $res->info->http_code==200 ){

            /* debug: view any cookies stored */
            echo '<pre>',print_r( file( $cookie ),true ),'</pre>';


            libxml_use_internal_errors( true );
            $dom = new DomDocument();
            $dom->validateOnParse=false;
            $dom->standalone=true;
            $dom->strictErrorChecking=false;
            $dom->recover=true;
            $dom->formatOutput=false;
            $dom->loadHTML( $res->response );
            $parse_errs=libxml_get_errors();
            libxml_clear_errors();

            #stage 2 - find token
            $tokens = $dom->getElementsByTagName("input");
            for( $i = 0; $i < $tokens->length; $i++ ) {
                $meta = $tokens->item( $i );
                if( $meta->hasAttribute('name') && $meta->getAttribute('name') == $field ) {
                    $token = $meta->getAttribute('value');
                }
            }

            if( $token ){
                $params=array(
                    'user[login]'           =>  $username,
                    'user[password]'        =>  $password,
                    'authenticity_token'    =>  $token
                );

                #Stage 3 - make login request
                $options=array(
                    CURLOPT_COOKIEFILE      =>  $cookie,
                    CURLOPT_COOKIEJAR       =>  $cookie,
                    CURLOPT_REFERER         =>  $url,
                    CURLOPT_POST            =>  TRUE,
                    CURLOPT_POSTFIELDS      =>  http_build_query( $params, '', '&' );,
                    CURLOPT_HTTPHEADER      =>  array_merge( $stdheaders, array( 'Content-Type: application/x-www-form-urlencoded', 'Content-Length: ' . strlen( $params ) ) )
                );
                $res = makerequest( $url, $options, $cacert );
                if( $res->http_code==200 ){
                    $html=$res->response;

                    exit( $html );
                }
                throw new Exception('Failed to get HTML',3);
            }
            throw new Exception( sprintf('Unable to find %s', $field ),2 );
        }
        throw new Exception('Bad initial response',1);
    }catch( Exception $e ){
        exit( sprintf( 'Error: Process failed at stage %d with message %s', $e->getCode(), $e->getMessage() ) );
    }
?>

所以我做错的是,我在关闭第一个连接之前从第二个URL提取数据。 这是答案

`$url="http://www.example.com/sign_in"; 

//url, i need to fetch data from
$url2="http://www.example.com/xref/quicksearches?quick_search[match]=pants&commit=Go!";

$username = "user_name123";
$password = "password@123";

$field='authenticity_token';
$cookie= "cookies.txt";

$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie);
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
$response = curl_exec($ch);
if (curl_errno($ch)) die(curl_error($ch));
$dom = new DomDocument();
$dom->loadHTML($response);
$tokens = $dom->getElementsByTagName("input");
for ($i = 0; $i < $tokens->length; $i++) {
$meta = $tokens->item($i);
if($meta->getAttribute('name') == 'authenticity_token')
$token = $meta->getAttribute('value');
}

$postinfo = "user[login]=".$username."&user[password]=".$password."&authenticity_token=".$token;
echo $token;
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie);
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $postinfo);

$html = curl_exec($ch); 
print($html);

curl_close($ch);

$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url2);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie);
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);

$html = curl_exec($ch); 
print($html);
curl_close($ch);`

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM