[英]How to fetch data using php curl after getting logged IN from another url?
$url="http://www.example.com/sign_in";
//url, i need to fetch data from
$url2="http://www.example.com/xref/quicksearches?quick_search[match]=pants&commit=Go!";
$username = "user_name123";
$password = "password@123";
$field='authenticity_token';
$cookie= "cookies.txt";
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie);
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
$response = curl_exec($ch);
if (curl_errno($ch)) die(curl_error($ch));
$dom = new DomDocument();
$dom->loadHTML($response);
$tokens = $dom->getElementsByTagName("input");
for ($i = 0; $i < $tokens->length; $i++) {
$meta = $tokens->item($i);
if($meta->getAttribute('name') == 'authenticity_token')
$token = $meta->getAttribute('value');
}
$postinfo = "user[login]=".$username."&user[password]=".$password."&authenticity_token=".$token;
echo $token;
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie);
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $postinfo);
$html = curl_exec($ch);
print($html);
curl_close($ch);
现在,我可以成功登录了,但是我需要从$ url2获取数据。 我该如何实现?
curl_multi_init()对我不起作用。 我正在从第一个网址获取真实性令牌,然后登录,但不知道如何转到第二个网址并获取该页面。 我是第一次使用cURL :)
在不知道url或凭据的情况下,以下内容完全未经测试,但是基于几年前我使用的一些代码登录到特定网站,发布了一些新数据,然后注销,经过大量测试后,它们都可以正常工作。 但愿您可能会发现代码有用
<?php
function makerequest( $url=false, $options=array(), $cacert=false ){
$res=array(
'response' => NULL,
'info' => array( 'http_code' => 500 ),
'headers' => NULL,
'errors' => NULL
);
if( empty( $url ) ) return (object)$res;
session_write_close();
/* Initialise curl request object */
$curl=curl_init();
if( parse_url( $url,PHP_URL_SCHEME )=='https' && $cacert ){
curl_setopt( $curl, CURLOPT_SSL_VERIFYPEER, FALSE );
curl_setopt( $curl, CURLOPT_SSL_VERIFYHOST, 2 );
curl_setopt( $curl, CURLOPT_CAINFO, $cacert );
}
/* Define standard options */
curl_setopt( $curl, CURLOPT_URL,trim( $url ) );
curl_setopt( $curl, CURLOPT_AUTOREFERER, TRUE );
curl_setopt( $curl, CURLOPT_FOLLOWLOCATION, TRUE );
curl_setopt( $curl, CURLOPT_FRESH_CONNECT, TRUE );
curl_setopt( $curl, CURLOPT_FORBID_REUSE, TRUE );
curl_setopt( $curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1 );
curl_setopt( $curl, CURLOPT_CLOSEPOLICY, CURLCLOSEPOLICY_OLDEST );
curl_setopt( $curl, CURLOPT_MAXCONNECTS, 1 );
curl_setopt( $curl, CURLOPT_FAILONERROR, FALSE );
curl_setopt( $curl, CURLOPT_HEADER, FALSE );
curl_setopt( $curl, CURLINFO_HEADER_OUT, FALSE );
curl_setopt( $curl, CURLOPT_RETURNTRANSFER, TRUE );
curl_setopt( $curl, CURLOPT_BINARYTRANSFER, TRUE );
curl_setopt( $curl, CURLOPT_CONNECTTIMEOUT, 20 );
curl_setopt( $curl, CURLOPT_TIMEOUT, 60 );
curl_setopt( $curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:35.0) Gecko/20100101 Firefox/35.0' );
curl_setopt( $curl, CURLOPT_MAXREDIRS, 10 );
curl_setopt( $curl, CURLOPT_ENCODING, '' );
/* Assign runtime parameters as options. Overwrite previous if necessary */
if( !empty( $options ) ){
foreach( $options as $param => $value ) curl_setopt( $curl, $param, $value );
}
/* Execute the request and store responses */
$res=(object)array(
'response' => curl_exec( $curl ),
'info' => (object)curl_getinfo( $curl ),
'errors' => curl_error( $curl )
);
curl_close( $curl );
return $res;
}
$url="http://www.example.com/sign_in";
$url2="http://www.example.com/xref/quicksearches?quick_search[match]=pants&commit=Go!";
$username = "user_name123";
$password = "password@123";
$token=false;
$field='authenticity_token';
$cookie=__DIR__ . "/cookies.txt";
/*
cacert.pem can be freely downloaded from various sites on the interwebs
~ https://curl.haxx.se/docs/caextract.html
Or include the final, live url to ensure that you are only using the most
uptodate version.
http://curl.haxx.se/ca/cacert.pem
*/
$cacert='c:/wwwroot/cacert.pem';
$stdheaders=array(
'Accept: ',
'Accept-Encoding: gzip, deflate',
'Connection: keep-alive',
'Accept-Language: en-GB,en;q=0.5'
);
try{
#Stage 1 ~ get login page to capture cookies
$options=array(
CURLOPT_COOKIESESSION => TRUE,
CURLOPT_COOKIEFILE => $cookie,
CURLOPT_COOKIEJAR => $cookie,
CURLOPT_REFERER => $url,
CURLOPT_HEADER => TRUE,
CURLINFO_HEADER_OUT => TRUE
);
$res = makerequest( $url, $options, $cacert );
if( $res->info->http_code==200 ){
/* debug: view any cookies stored */
echo '<pre>',print_r( file( $cookie ),true ),'</pre>';
libxml_use_internal_errors( true );
$dom = new DomDocument();
$dom->validateOnParse=false;
$dom->standalone=true;
$dom->strictErrorChecking=false;
$dom->recover=true;
$dom->formatOutput=false;
$dom->loadHTML( $res->response );
$parse_errs=libxml_get_errors();
libxml_clear_errors();
#stage 2 - find token
$tokens = $dom->getElementsByTagName("input");
for( $i = 0; $i < $tokens->length; $i++ ) {
$meta = $tokens->item( $i );
if( $meta->hasAttribute('name') && $meta->getAttribute('name') == $field ) {
$token = $meta->getAttribute('value');
}
}
if( $token ){
$params=array(
'user[login]' => $username,
'user[password]' => $password,
'authenticity_token' => $token
);
#Stage 3 - make login request
$options=array(
CURLOPT_COOKIEFILE => $cookie,
CURLOPT_COOKIEJAR => $cookie,
CURLOPT_REFERER => $url,
CURLOPT_POST => TRUE,
CURLOPT_POSTFIELDS => http_build_query( $params, '', '&' );,
CURLOPT_HTTPHEADER => array_merge( $stdheaders, array( 'Content-Type: application/x-www-form-urlencoded', 'Content-Length: ' . strlen( $params ) ) )
);
$res = makerequest( $url, $options, $cacert );
if( $res->http_code==200 ){
$html=$res->response;
exit( $html );
}
throw new Exception('Failed to get HTML',3);
}
throw new Exception( sprintf('Unable to find %s', $field ),2 );
}
throw new Exception('Bad initial response',1);
}catch( Exception $e ){
exit( sprintf( 'Error: Process failed at stage %d with message %s', $e->getCode(), $e->getMessage() ) );
}
?>
所以我做错的是,我在关闭第一个连接之前从第二个URL提取数据。 这是答案
`$url="http://www.example.com/sign_in";
//url, i need to fetch data from
$url2="http://www.example.com/xref/quicksearches?quick_search[match]=pants&commit=Go!";
$username = "user_name123";
$password = "password@123";
$field='authenticity_token';
$cookie= "cookies.txt";
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie);
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
$response = curl_exec($ch);
if (curl_errno($ch)) die(curl_error($ch));
$dom = new DomDocument();
$dom->loadHTML($response);
$tokens = $dom->getElementsByTagName("input");
for ($i = 0; $i < $tokens->length; $i++) {
$meta = $tokens->item($i);
if($meta->getAttribute('name') == 'authenticity_token')
$token = $meta->getAttribute('value');
}
$postinfo = "user[login]=".$username."&user[password]=".$password."&authenticity_token=".$token;
echo $token;
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie);
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $postinfo);
$html = curl_exec($ch);
print($html);
curl_close($ch);
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url2);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie);
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
$html = curl_exec($ch);
print($html);
curl_close($ch);`
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.