[英]How to forge request using createRequest by subclassing QNetworkAccessManager?
I would like to download the dynamic generated image from a website. 我想从网站下载动态生成的图像。 The website is has javascript code and click button to turn to previous image and next image. 该网站具有javascript代码,然后单击按钮转到上一张和下一张图片。 I inspected the http request and response in chrome. 我检查了Chrome中的http请求和响应。 The request is almost the same except the image name(it is numerically increased like: 000001.jpg,000002.jpg
). 除了图像名称外,该请求几乎是相同的(它随数字增加,如: 000001.jpg,000002.jpg
)。 Now I can access the first image and save it to disk by subclassing QWebView with a customized QNetworkAccessManager. 现在,我可以使用自定义的QNetworkAccessManager子类化QWebView来访问第一个图像并将其保存到磁盘。 I overload the createRequest function: 我重载了createRequest函数:
import sys,urllib,time,os
from PyQt4.QtCore import *
from PyQt4.QtGui import *
from PyQt4.QtWebKit import *
from PyQt4.QtNetwork import *
from PIL import Image
class NetworkAccessManager(QNetworkAccessManager):
def __init__(self,old_manager):
QNetworkAccessManager.__init__(self)
self.old_manager = old_manager
self.setCache(old_manager.cache())
self.setCookieJar(old_manager.cookieJar())
self.setProxy(old_manager.proxy())
self.setProxyFactory(old_manager.proxyFactory())
self.imreply=None
self.reqstr=None
self.otherreply=None
self.current_req=None
self.cnt=0
self.jpgName="test.jpg"
self.first=True
self.ba=QByteArray()
self.ba.clear()
def createRequest(self, operation, request, data):
req = request.url().toString()
if req.contains(QString("zoom=")) and req.contains(QString("ss2jpg")) and not req.contains(QString("pi=2")):
strreq=str(req)
l=strreq.find("jid=")
r=strreq.find(".jpg&a")
self.jpgName=strreq[l+5:r+4]
self.jpgcnt=int(strreq[l+5:r])
print self.jpgName,self.jpgcnt
self.imreply=QNetworkAccessManager.createRequest(self,operation, request, data)
self.connect(self.imreply,SIGNAL("readyRead()"),self.saveImage)
return self.imreply
elif req.contains(QString("uf=ssr")):
strreq=str(req)
self.reqstr=strreq
self.current_req=request
r=strreq.find("?")
self.jpgcnt=int(strreq[r-6:r])
self.otherreply=QNetworkAccessManager.createRequest(self,operation, request, data)
return self.otherreply
else:
return QNetworkAccessManager.createRequest(self,operation, request, data)
def saveImage(self):
if self.imreply.header(QNetworkRequest.ContentTypeHeader).toString().contains(QString("image/jpeg")) or self.imreply.header(QNetworkRequest.ContentTypeHeader).toString().contains(QString("image/png")):
contentLen,flag = QString(self.imreply.rawHeader("Content-Length")).toInt()
self.ba=self.ba.append(self.imreply.readAll())
if self.ba.size() == contentLen:
#self.ba=self.imreply.readAll()
im=QImage.fromData(self.ba)
im.save(self.jpgName)
im=Image.open(self.jpgName)
print "saving image",contentLen,self.jpgName
im.save(self.jpgName)
self.ba.clear()
self.emit(SIGNAL("nextPage()"))
class dxWebView(QWebView):
def __init__(self):
QWebView.__init__(self)
def clickNext(self):
manager=self.page().networkAccessManager()
if manager.cnt<50:
nextreq=manager.current_req
nexturl=manager.reqstr.replace(str(manager.jpgcnt),str(manager.jpgcnt+1))
print "next url",nexturl
nextreq.setUrl(QUrl(nexturl))
manager.get(QNetworkRequest(nextreq))
manager.cnt=manager.cnt+1
def main():
app=QApplication(sys.argv)
QWebSettings.globalSettings().setAttribute(QWebSettings.PluginsEnabled, True);
view=dxWebView()
old_manager=view.page().networkAccessManager()
new_manager=NetworkAccessManager(old_manager)
view.page().setNetworkAccessManager(new_manager)
QObject.connect(new_manager,SIGNAL("nextPage()"),view.clickNext)
url="http://www.yishuleia.cn/DrsPath.do?kid=686A67696A6F6A673134343438303337&username=gdnz2&spagenum=201&pages=50&fid=14813857&a=3fc3e380601ced0f08749c964294120e&btime=2013-04-03&etime=2013-04-23&template=bookdsr1&firstdrs=http%3A%2F%2Fbook.duxiu.com%2FbookDetail.jsp%3FdxNumber%3D000008299393%26d%3D592DC22226A893A958A6578E7D039A43"
view.load(QUrl(url))
view.show()
sys.exit(app.exec_())
if __name__=='__main__':
main()
When the first image is saved, the clickNext is triggered and qnetworkaccessmanager send the next request.But I found the manager.get(nextreq) did not work.The http analyzer did not siffered any http request and response. 保存第一个图像后,将触发clickNext并qnetworkaccessmanager发送下一个请求。但是我发现manager.get(nextreq)无法正常工作.http分析器没有处理任何http请求和响应。 Am I wrong in clickNext function? 我在clickNext函数中输入错误吗? How to do this? 这个怎么做? Thanks! 谢谢!
As such the QNetworkAccessManager is a part of the QWebPage object, and the createRequest() method is invoked whenever there is any request for a resource from the rendered HTML (and any javascript it contains). 因此,QNetworkAccessManager是QWebPage对象的一部分,只要从呈现的HTML(及其包含的任何JavaScript)中有对资源的任何请求,都将调用createRequest()方法。 As per my understanding the clickNext() function won't really have access to the actual DOM of the webpage in the manner you require. 根据我的理解,clickNext()函数实际上无法以您所需的方式访问网页的实际DOM。
If your aim is to build an application that can download all of these pictures, you can run some simple javascript on the site that automatically clicks through to the 'Next' image. 如果您的目标是构建一个可以下载所有这些图片的应用程序,则可以在该网站上运行一些简单的javascript,该javascript自动点击进入“下一张”图片。 Then, as you have done, you watch for requests to load images in your overloaded createRequest() function. 然后,完成操作后,您将监视在重载的createRequest()函数中加载图像的请求。
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.