[英]curl redirect not properly working
我的函数应该获取$ url的目标URL:
function getUrl($url)
{
$user_agent='Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)';
$ch = curl_init();
$timeout = 10; // set to zero for no timeout
curl_setopt ($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
curl_setopt ($ch, CURLOPT_URL, $url);
curl_setopt ($ch, CURLOPT_USERAGENT, $user_agent);
curl_setopt ($ch, CURLOPT_HEADER, 1);
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, true);
$curl = curl_exec($ch);
$header = curl_getinfo($ch);
curl_close($ch);
return $header;
}
function get_url_list() {
$url = "http://www.webliste.ch/click.aspx?nr=148252";
$result=getUrl($url);
print_r($result);echo "<br>";
}
get_url_list();
结果如下:
Array
(
[url] => http://www.webliste.ch/click.aspx?nr=148252
[content_type] => text/html; charset=iso-8859-1
[http_code] => 200
[header_size] => 320
[request_size] => 139
...
[redirect_time] => 0
[certinfo] => Array
(
)
[redirect_url] =>
)
我不知所措,因为URL正在重定向,如果回显$ ch,我将获得重定向的网站。
有人知道这是什么原因吗?
以下内容也不起作用:
$final_url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
输出与$ result ['url']相同,这不是我想要的。
我已经分析了实际发生的情况,现在我发现重定向不是由该页面上的重定向标头引起的,而是JavaScript立即提交表单并将您重定向到起始页面。
可能难以确定页面的URL,但是您可以做的是查找<form>
标记,然后在其action
属性中找到URL。
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="de">
<head id="Head1">
<title></title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
<meta name="ROBOTS" content="NOINDEX, NOFOLLOW" />
</head>
<body>
<form id="form1" action="http://www.taxiherold.ch">
<div id="panGo" align="center">
<script type="text/javascript">
document.getElementById('form1').submit();
</script>
</div>
</form>
</body>
</html>
因此,请立即尝试以下代码:
$ch = curl_init('http://www.webliste.ch/click.aspx?nr=148252');
curl_setopt ($ch, CURLOPT_USERAGENT, 'Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)');
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, false);
$data = curl_exec($ch);
$dom = new DOMDocument();
@$dom->loadHTML($data);
$xpath = new DOMXPath($dom);
$url = $xpath->query('//body/form');
$url = ($url->length == 1 ? $url->item(0)->getAttribute('action') : null);
var_dump($url);
将输出:
我写的这节课可以帮助你
此类返回所有标头信息,例如重定向,...
function HeaderProc($response,$Run="",$String=1/*[Is 1 IF Use for String Mode ]*/){
print_r($response);
if($String==1){
$response=explode("\r\n",$response);
}
$PartHeader=0;
$out[$PartHeader]=array();
while(list($key,$val)=each($response)){
$name='';
$value='';
$flag=false;
for($i=0;$i<strlen($val);$i++){
if($val[$i]==":"){
$flag=true;
for($j=$i+1;$j<strlen($val);$j++){
if($val[$i]=="\r" and $val[$i+1]=="\n"){
break;
}
$value.=$val[$j];
}
break;
}
$name.=$val[$i];
}
if($flag){
if($name=='' and $value==''){
$PartHeader++;
}else{
if(isset($out[$PartHeader][$name])){
if(is_array($out[$PartHeader][$name])){
$out[$PartHeader][$name][]=$value;
}else{
$T=$out[$PartHeader][$name];
$out[$PartHeader][$name]=array();
$out[$PartHeader][$name][0]=$T;
$out[$PartHeader][$name][1]=$value;
}
}else{
$out[$PartHeader][$name]=$value;
}
}
}else{
if($name==''){
$PartHeader++;
}else{
if(isset($out[$PartHeader][$name])){
if(is_array($out[$PartHeader][$name])){
$out[$PartHeader][$name][]=$value;
}else{
$T=$out[$PartHeader][$name];
$out[$PartHeader][$name]=array();
$out[$PartHeader][$name][0]=$T;
$out[$PartHeader][$name][1]=$name;
}
}else{
$out[$PartHeader][$name]=$name;
}
}
}
if($Run!=""){
$Run($name,$value);
}
}
return $out;
}
class cURL {
var $headers;
var $user_agent;
var $compression;
var $cookie_file;
var $proxy;
var $Cookie;
function CookieAnalysis($Cookie){//convert str cookie to array cookie
//echo $Cookie;
$this->Cookie=array();
preg_replace_callback("~(.*?)=(.*?);~si",function($m){$this->Cookie[trim($m[1])]=trim($m[2]);},' '.$Cookie.'; ');
return $this->Cookie;
}
function cURL($cookies=false,$cookie='cookies.txt',$compression='gzip',$proxy='') {
$this->headers[] = 'Accept:text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8';
$this->headers[] = 'Accept-Charset:ISO-8859-1,utf-8;q=0.7,*;q=0.3';
$this->headers[] = 'Accept-Encoding:gzip,deflate,sdch';
$this->headers[] = 'Accept-Language:en-US,en;q=0.8';
$this->headers[] = 'Cache-Control:max-age=0';
$this->headers[] = 'Connection:keep-alive';
$this->user_agent = 'User-Agent:Mozilla/5.0 (SepidarSoft [Organic Search Engine Crawler] Linux Edition) AppleWebKit/536.5 (KHTML, like Gecko) SepidarBrowser/1.0.100.52 Safari/536.5';
$this->compression=$compression;
$this->proxy=$proxy;
$this->cookies=$cookies;
if ($this->cookies == TRUE) $this->cookie($cookie);
}
function cookie($cookie_file) {
if (file_exists($cookie_file)) {
$this->cookie_file=$cookie_file;
} else {
fopen($cookie_file,'w') or $this->error('The cookie file could not be opened. Make sure this directory has the correct permissions');
$this->cookie_file=$cookie_file;
@fclose($this->cookie_file);
}
}
function GET($url) {
$process = curl_init($url);
curl_setopt($process, CURLOPT_HTTPHEADER, $this->headers);
curl_setopt($process, CURLOPT_HEADER, 1);
curl_setopt($process, CURLOPT_USERAGENT, $this->user_agent);
if ($this->cookies == TRUE) curl_setopt($process, CURLOPT_COOKIEFILE, $this->cookie_file);
if ($this->cookies == TRUE) curl_setopt($process, CURLOPT_COOKIEJAR, $this->cookie_file);
curl_setopt($process,CURLOPT_ENCODING , $this->compression);
curl_setopt($process, CURLOPT_TIMEOUT, 30);
if ($this->proxy) curl_setopt($process, CURLOPT_PROXY, $this->proxy);
curl_setopt($process, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($process, CURLOPT_FOLLOWLOCATION, 1);
$response = curl_exec($process);
$header_size = curl_getinfo($process,CURLINFO_HEADER_SIZE);
$result['Header'] = HeaderProc(substr($response, 0, $header_size),'',1);
foreach($result['Header'] as $HeaderK=>$HeaderP){
foreach($HeaderP['Set-Cookie'] as $key=>$val){
$result['Header'][$HeaderK]['Set-Cookie'][$key]=$this->CookieAnalysis($val);
}
}
$result['Body'] = substr( $response, $header_size );
$result['HTTP_State'] = curl_getinfo($process,CURLINFO_HTTP_CODE);
$result['URL'] = curl_getinfo($process,CURLINFO_EFFECTIVE_URL);
curl_close($process);
return $result;
}
function POST($url,$data) {
$process = curl_init($url);
curl_setopt($process, CURLOPT_HTTPHEADER, $this->headers);
curl_setopt($process, CURLOPT_HEADER, 1);
curl_setopt($process, CURLOPT_USERAGENT, $this->user_agent);
if ($this->cookies == TRUE) curl_setopt($process, CURLOPT_COOKIEFILE, $this->cookie_file);
if ($this->cookies == TRUE) curl_setopt($process, CURLOPT_COOKIEJAR, $this->cookie_file);
curl_setopt($process, CURLOPT_ENCODING , $this->compression);
curl_setopt($process, CURLOPT_TIMEOUT, 30);
if ($this->proxy) curl_setopt($process, CURLOPT_PROXY, $this->proxy);
curl_setopt($process, CURLOPT_POSTFIELDS, $data);
curl_setopt($process, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($process, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($process, CURLOPT_POST, 1);
$response = curl_exec($process);
$header_size = curl_getinfo($process,CURLINFO_HEADER_SIZE);
$result['Header'] = HeaderProc(substr($response, 0, $header_size),'',1);
foreach($result['Header'] as $HeaderK=>$HeaderP){
foreach($HeaderP['Set-Cookie'] as $key=>$val){
$result['Header'][$HeaderK]['Set-Cookie'][$key]=$this->CookieAnalysis($val);
}
}
$result['Body'] = substr( $response, $header_size );
$result['HTTP_State'] = curl_getinfo($process,CURLINFO_HTTP_CODE);
$result['URL'] = curl_getinfo($process,CURLINFO_EFFECTIVE_URL);
curl_close($process);
return $result;
}
function error($error) {
echo "<center><div style='width:500px;border: 3px solid #FFEEFF; padding: 3px; background-color: #FFDDFF;font-family: verdana; font-size: 10px'><b>cURL Error</b><br>$error</div></center>";
die;
}
}
样品:
$cc = new cURL();
print_r( $cc->POST('http://www.domain.com'));
对于旧的PHP
function HeaderProc($response,$Run="",$String=1/*[Is 1 IF Use for String Mode ]*/){
if($String==1){
$response=explode("\r\n",$response);
}
$PartHeader=0;
$out[$PartHeader]=array();
while(list($key,$val)=each($response)){
$name='';
$value='';
$flag=false;
for($i=0;$i<strlen($val);$i++){
if($val[$i]==":"){
$flag=true;
for($j=$i+1;$j<strlen($val);$j++){
if($val[$i]=="\r" and $val[$i+1]=="\n"){
break;
}
$value.=$val[$j];
}
break;
}
$name.=$val[$i];
}
if($flag){
if($name=='' and $value==''){
$PartHeader++;
}else{
if(isset($out[$PartHeader][$name])){
if(is_array($out[$PartHeader][$name])){
$out[$PartHeader][$name][]=$value;
}else{
$T=$out[$PartHeader][$name];
$out[$PartHeader][$name]=array();
$out[$PartHeader][$name][0]=$T;
$out[$PartHeader][$name][1]=$value;
}
}else{
$out[$PartHeader][$name]=$value;
}
}
}else{
if($name==''){
$PartHeader++;
}else{
if(isset($out[$PartHeader][$name])){
if(is_array($out[$PartHeader][$name])){
$out[$PartHeader][$name][]=$value;
}else{
$T=$out[$PartHeader][$name];
$out[$PartHeader][$name]=array();
$out[$PartHeader][$name][0]=$T;
$out[$PartHeader][$name][1]=$name;
}
}else{
$out[$PartHeader][$name]=$name;
}
}
}
if($Run!=""){
$Run($name,$value);
}
}
return $out;
}
class cURL {
var $headers;
var $user_agent;
var $compression;
var $cookie_file;
var $proxy;
var $Cookie;
function CookieAnalysis($Cookie){//convert str cookie to array cookie
//echo $Cookie;
$this->Cookie=array();
preg_match("~(.*?)=(.*?);~si",' '.$Cookie.'; ',$M);
$this->Cookie[trim($M[1])]=trim($M[2]);
return $this->Cookie;
}
function cURL($cookies=false,$cookie='cookies.txt',$compression='gzip',$proxy='') {
$this->headers[] = 'Accept:text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8';
$this->headers[] = 'Accept-Charset:ISO-8859-1,utf-8;q=0.7,*;q=0.3';
$this->headers[] = 'Accept-Encoding:gzip,deflate,sdch';
$this->headers[] = 'Accept-Language:en-US,en;q=0.8';
$this->headers[] = 'Cache-Control:max-age=0';
$this->headers[] = 'Connection:keep-alive';
$this->user_agent = 'User-Agent:Mozilla/5.0 (SepidarSoft [Organic Search Engine Crawler] Linux Edition) AppleWebKit/536.5 (KHTML, like Gecko) SepidarBrowser/1.0.100.52 Safari/536.5';
$this->compression=$compression;
$this->proxy=$proxy;
$this->cookies=$cookies;
if ($this->cookies == TRUE) $this->cookie($cookie);
}
function cookie($cookie_file) {
if (file_exists($cookie_file)) {
$this->cookie_file=$cookie_file;
} else {
fopen($cookie_file,'w') or $this->error('The cookie file could not be opened. Make sure this directory has the correct permissions');
$this->cookie_file=$cookie_file;
@fclose($this->cookie_file);
}
}
function GET($url) {
$process = curl_init($url);
curl_setopt($process, CURLOPT_HTTPHEADER, $this->headers);
curl_setopt($process, CURLOPT_HEADER, 1);
curl_setopt($process, CURLOPT_USERAGENT, $this->user_agent);
if ($this->cookies == TRUE) curl_setopt($process, CURLOPT_COOKIEFILE, $this->cookie_file);
if ($this->cookies == TRUE) curl_setopt($process, CURLOPT_COOKIEJAR, $this->cookie_file);
curl_setopt($process,CURLOPT_ENCODING , $this->compression);
curl_setopt($process, CURLOPT_TIMEOUT, 30);
if ($this->proxy) curl_setopt($process, CURLOPT_PROXY, $this->proxy);
curl_setopt($process, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($process, CURLOPT_FOLLOWLOCATION, 1);
$response = curl_exec($process);
$header_size = curl_getinfo($process,CURLINFO_HEADER_SIZE);
$result['Header'] = HeaderProc(substr($response, 0, $header_size),'',1);
foreach($result['Header'] as $HeaderK=>$HeaderP){
if(!is_array($HeaderP['Set-Cookie']))continue;
foreach($HeaderP['Set-Cookie'] as $key=>$val){
$result['Header'][$HeaderK]['Set-Cookie'][$key]=$this->CookieAnalysis($val);
}
}
$result['Body'] = substr( $response, $header_size );
$result['HTTP_State'] = curl_getinfo($process,CURLINFO_HTTP_CODE);
$result['URL'] = curl_getinfo($process,CURLINFO_EFFECTIVE_URL);
curl_close($process);
return $result;
}
function POST($url,$data) {
$process = curl_init($url);
curl_setopt($process, CURLOPT_HTTPHEADER, $this->headers);
curl_setopt($process, CURLOPT_HEADER, 1);
curl_setopt($process, CURLOPT_USERAGENT, $this->user_agent);
if ($this->cookies == TRUE) curl_setopt($process, CURLOPT_COOKIEFILE, $this->cookie_file);
if ($this->cookies == TRUE) curl_setopt($process, CURLOPT_COOKIEJAR, $this->cookie_file);
curl_setopt($process, CURLOPT_ENCODING , $this->compression);
curl_setopt($process, CURLOPT_TIMEOUT, 30);
if ($this->proxy) curl_setopt($process, CURLOPT_PROXY, $this->proxy);
curl_setopt($process, CURLOPT_POSTFIELDS, $data);
curl_setopt($process, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($process, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($process, CURLOPT_POST, 1);
$response = curl_exec($process);
$header_size = curl_getinfo($process,CURLINFO_HEADER_SIZE);
$result['Header'] = HeaderProc(substr($response, 0, $header_size),'',1);
foreach($result['Header'] as $HeaderK=>$HeaderP){
if(!is_array($HeaderP['Set-Cookie']))continue;
foreach($HeaderP['Set-Cookie'] as $key=>$val){
$result['Header'][$HeaderK]['Set-Cookie'][$key]=$this->CookieAnalysis($val);
}
}
$result['Body'] = substr( $response, $header_size );
$result['HTTP_State'] = curl_getinfo($process,CURLINFO_HTTP_CODE);
$result['URL'] = curl_getinfo($process,CURLINFO_EFFECTIVE_URL);
curl_close($process);
return $result;
}
function error($error) {
echo "<center><div style='width:500px;border: 3px solid #FFEEFF; padding: 3px; background-color: #FFDDFF;font-family: verdana; font-size: 10px'><b>cURL Error</b><br>$error</div></center>";
die;
}
}
样品:
$cc = new cURL();
print_r( $cc->POST('http://www.domain.com'));
Yahoo网站样本标头输出
[Header] => Array
(
[0] => Array
(
[HTTP/1.1 302 Found] => HTTP/1.1 302 Found
[Date] => Sat, 02 Mar 2013 14:37:19 GMT
[P3P] => policyref="http://info.yahoo.com/w3c/p3p.xml", CP="CAO DSP COR CUR ADM DEV TAI PSA PSD IVAi IVDi CONi TELo OTPi OUR DELi SAMi OTRi UNRi PUBi IND PHY ONL UNI PUR FIN COM NAV INT DEM CNT STA POL HEA PRE LOC GOV"
[Cache-Control] => private
[X-Frame-Options] => SAMEORIGIN
[Set-Cookie] => fpc=d=a2polPzlISX4q5OZQBxq.CKduGwG2Wm1YrPD59ENCUl3uTzrs.8HlnpJROO8MWa6M.B8e1JuCsbW25qwqY5zEs.mA0_EVlAVPMhFCdfCxhZf6vWmmqpPm9bVzGYs8Y7IyTG7IFp9p0MN_FPQmzNM7I8XBu4iGCI8MbHWFvOMKmhN9MTkPC4KbNJ2izSK9xBXTedDnYw-&v=2; expires=Sun, 02-Mar-2014 14:37:19 GMT; path=/; domain=www.yahoo.com
[Location] => http://en-maktoob.yahoo.com/?p=us
[Vary] => Accept-Encoding
[Content-Type] => text/html; charset=utf-8
[Age] => 0
[Transfer-Encoding] => chunked
[Connection] => keep-alive
[Server] => YTS/1.20.13
)
[1] => Array
(
[HTTP/1.1 200 OK] => HTTP/1.1 200 OK
[Date] => Sat, 02 Mar 2013 14:37:20 GMT
[P3P] => policyref="http://info.yahoo.com/w3c/p3p.xml", CP="CAO DSP COR CUR ADM DEV TAI PSA PSD IVAi IVDi CONi TELo OTPi OUR DELi SAMi OTRi UNRi PUBi IND PHY ONL UNI PUR FIN COM NAV INT DEM CNT STA POL HEA PRE LOC GOV"
[Cache-Control] => private
[X-Frame-Options] => SAMEORIGIN
[Set-Cookie] => Array
(
[0] => Array
(
[IU] => deleted
)
[1] => Array
(
[PH] => deleted
)
[2] => Array
(
[MSC] => t=1362235040X
)
[3] => Array
(
[fpc] => d=_7tfRPjaISWhpxKrzORZ47ywABwHrUd0vF3WBQH9UYD6KMC7fyjTBdcMMh1FYiufGwiXnhHgDV9gK_VrwVf.q.n_MoJj3B4OMV5Lw42TXrYN_xGhwsnsyUPvQTy79LJ.twkY0IQ3culhr0osKxe0MvGIPSRcYDWH13TUS5YhrnIP731WRyEDZlPh2gPUXxNc1nRtr7Y-&v=2
)
[4] => Array
(
[fpms] => p_30345347=%7B%22loc%22%3A%7B%22id%22%3A1940330%2C%22city%22%3A%22Abu+Dhabi%22%2C%22state%22%3A%22%22%2C%22country%22%3A%22UAE%22%7D%7D
)
[5] => Array
(
[fpc] => d=SaiDIsbaISXJV8ztcJqpafzGA13Lsq0TPQ7HJOn_.yLYWvNZF75ELqLKTLekfVYxmFj0OxOH_thzdIa9UNQIiwYXt99qJ8HNsqpWubAPIFaO1o36VbPBUz9Qu0Rzgzh6Qh.rJQnhPnj1m3NMeFlpYZ7kpVAsjL88RMdcGP82RMUEENd9mWXC7SkuY_CIR76Ne3pEgotZlVDVMABYyxJbM4N4jqG5zkC23Gy8epD4JzxcUuTWDyUn.LZaIqX1Gn6Fcn_f6de3&v=2
)
[6] => Array
(
[fpps] => _page=%7B%22wsid%22%3A%2221445690%22%7D
)
[7] => Array
(
[fpt] => d=Zc7DH53Xe9za_cphyvUoTpFDnmIFF977Sv9yIyBJtqtpcN4aLM18CC3FKuMd6AMXylr7FJjRBtWkJYiIdmrER9MPUOFt22FcF8rNk8Lu_kQMbAEra9CnHEhP0N8DVz6iKlRji6wGv_.3pOxmx_7Td1bq2D4RtVTE93P1kVGFgxlSV7Vtdf8JUxoRTq3dMKZuNQD5vY76rjiXf64lrQ89ONTWEpCGE3MxGVHnegZ71MiuKmYPLxH.AdNFzgw_EoD5QFWyxBxC3GNq7CarXzwJ5D4Uoiw690kzihlRQ66UgGj6sAdIIB_haiXQ6pJ7Q_w86gen6FBolLLiIBrDaujASks1fNzrWOfSH7HDn3GfqcCycIXcJDw_Xb8eGBgJVZFK2yuM0BF68NOW.nkACke1I.ufHsJXrvZH51Pg4dh9hMIsqeI-&v=1
)
[8] => Array
(
[fpc_s] => d=jbVQS4TaISWRQmb4Qu6ANMqdtfYe_QawTKJ.rdl.9vdhjLe6UHD_z3Pvh2HhUHGn2i4oPThLzibGfAmid4zCCnYjxdTbby8pCY566kgiSjnvroDbRszWKfTL4j8Bew5x1VnLUqLfpKWUq2jwAOj1WdBhiSajBzp_hg.8q8O1M0XO.hd7YXRtm66BnbOtcTli3arG1nfT96JakB5i8cyNrUMl1m4czoVB7MqJDipKCfQ.19r98RG0dJELW.fFXfry0AApcU8cweMqTTIuks1LAeVRngCAX7eRfB0eknd5DOqTpZlrMTmW.JjNnbI-&v=2
)
)
[Vary] => Accept-Encoding
[Content-Type] => text/html;charset=utf-8
[Content-Encoding] => gzip
[Age] => 0
[Transfer-Encoding] => chunked
[Connection] => keep-alive
[Server] => YTS/1.20.13
)
)
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.