I'm trying to log in and scrape a value from this page: https://adf.ly
, specifically the amount of views my links have. I have achieved to login and get to the page where the value I want appears, but the problem is that this value isn't loaded with the page, it is loaded later using a javascript function. The code I have used is the following one:
<?php
function getViewsAdfly(){
$cookiePath = './cookieAnuncios.txt';
$url = "https://login.adf.ly/login";
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0');
curl_setopt($ch, CURLOPT_HTTPHEADER, array("Accept-Language: es-es,en"));
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 1);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 1);
curl_setopt($ch, CURLOPT_COOKIESESSION, true);
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookiePath);
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookiePath);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
$result = curl_exec($ch);
$token = urlencode(getMatchToken($result));
$email = urlencode("my email");
$password = urlencode("my password");
$parametrosPost = "token=" . $token . "&bmlUrl=&bmlType=&bmlDomain=&bmlFolder=&dest=&response=&challenge=&email=" . $email . "&password=" . $password;
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $parametrosPost);
curl_exec($ch);
$urlViews = "https://adf. ly/publisher";
curl_setopt($ch, CURLOPT_URL, $urlViews);
curl_setopt($ch, CURLOPT_POST, 0);
$resultViews = curl_exec($ch);
curl_close($ch);
return getMatchViewsAdfly($resultViews);
}
function getMatchToken($result){
preg_match_all("(<input type=\"hidden\" name=\"token\" id=\"token\" value=\"(.*)\" />)siU", $result, $matchesToken, PREG_PATTERN_ORDER);
return $matchesToken[1][0];
}
function getMatchViewsAdfly($result){
preg_match_all("(<h4 id=\"summary-views\">(.*)</h4>)siU", $result, $matchesViews, PREG_PATTERN_ORDER);
return $matchesViews[1][0];
}
?>
If I use this code, the page shows a message saying "There has been a problem while fetching the stats. Please refresh the page and try again." and the <h4 id=\\"summary-views\\"></h4>
which contains the data I want to scrape only shows "loading...". I think that the javascript method which charges the real number on the cointainer isn't running correctly. Do I have to add any curl setopt which is missing so that the javascript runs? How can I manage to get the real data I want to scrape after it gets loaded on the container?
EDIT:
The javascript saves the data on s[0]
. Any way I can get what is stored there?
$(document).ready(function() {
$('#edit_link').dialog({
autoOpen: false,
height: AdFly.awesomeBrowser() ? 420 : 450,
width: AdFly.awesomeBrowser() ? 550 : 580,
modal: true,
closeOnEscape: false,
resizable: false
});
$('#edit_advert_type').chosen();
$('#charts-tabs, #links-tabs').easytabs({animate: false, updateHash: false, tabs: "nav > ul > li"});
$('#charts-tabs')
.bind('easytabs:after', function (event, $clicked, $targetPanel, settings) {
PublisherIndex.updateCountries();
$('ul.earnings').addClass('displayNone');
if($targetPanel.selector == '#charts-tab-4'){
$('ul.earnings.popads').removeClass('displayNone');
}else{
$('ul.earnings.simplelinks').removeClass('displayNone');
}
});
$('#report_date').chosen();
$('#report_date').change(function() {
$('.timePicker').removeClass('current');
$(this).parent().addClass('current');
PublisherIndex.renderStats();
PublisherIndex.updateLinks();
});
$('.timePicker').click(function() {
if ($(this).parent().attr('class') == 'all_time') {
alert("Sorry all time stats are currently disabled.");
return;
}
$('.timePicker').removeClass('current');
$('#report_date').parent().removeClass('current');
$(this).addClass('current');
PublisherIndex.renderStats();
PublisherIndex.updateLinks();
PublisherIndex.renderSummary();
return false;
});
$('#findlink').keypress(function(event) {
var keycode = (event.keyCode ? event.keyCode : event.which);
if (keycode == '13') {
PublisherIndex.updateLinks();
$('#search_clear').show();
}
});
$('#links-tabs').bind('easytabs:after', function(event, $clicked, $targetPanel, settings) {
PublisherIndex.updateLinks();
});
PublisherIndex.getAnnouncements();
//PublisherIndex.renderSummary();
PublisherIndex.renderStats();
PublisherIndex.renderLinks();
$('.monthlyEarningsSummarySelector').click(function() {
$('.monthlyEarningsSummarySelector').removeClass('current');
$(this).addClass('current');
PublisherIndex.renderStats();
PublisherIndex.renderSummary();
return false;
});
});
$(document).ready(function() {
// Little hack for getting the URL Groups.
$('#edit_group').html($('section.shorten select[name="group"]').html());
$('#edit_group').chosen();
});
var PublisherIndex = {
countriesDt: null,
countriesLength: null,
referralsDt: null,
earningsChart: null,
graphData: null,
popupsChart: null,
popupGraphData: null,
earningsSummary: null,
tokenCSRF: null,
linkId: null,
updateCountries: function() {
var oTable = $('#charts-tab-2').find('table').dataTable();
var table_height = 419;
var num_rows = this.countriesLength;
var row_height = $('section.dashboard section.charts table tr').outerHeight();
if(num_rows<10){
num_rows = 10;
}else if(num_rows>20){
num_rows=20;
}
var required_height = parseInt(table_height) + (parseInt(num_rows)-10)*parseInt(row_height);
$('section.dashboard section.charts nav.menu').css('height', parseInt(required_height));
$('section.dashboard section.charts .data .panels').css('height', parseInt(required_height)+8);
var oSettings = oTable.fnSettings();
oSettings._iDisplayLength = num_rows;
oTable.fnDraw();
},
getAnnouncements: function() {
ApiCaller.getAnnouncements(AdFly.USER_TYPE.PUBLISHER, $('#announcements'), false);
},
renderSummary: function() {
$('ul.earnings:not(.totals)').css('visibility','visible');
$t = $('.timePicker.current, #reportDateWrapper.current');
var key = null;
var copy = null;
key = $("#report_date").val();
var i = 1;
copy = $('.chzn-container-single .chzn-single span').eq(3).text();
copy = copy.substring(0, copy.length - 5);
var views = 0;
var earnings = 0;
var referrals = 0;
var total = 0;
$.each(this.graph.data,function(i,day){
views+=parseFloat(day.views);
earnings+=parseFloat(day.earnings);
});
}
}
Thank you in advance.
Curl doesn't execute JS. You have two options:
Good luck!
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.