Is it possible to perform in asynchrone(like with asyncio) web requests under Pyqt4 (QwebPage)?
For example, how can I call multiple urls in parallel with this code:
#!/usr/bin/env python3.4
import sys
import signal
from PyQt4.QtCore import *
from PyQt4.QtGui import *
from PyQt4.QtWebKit import QWebPage
class Crawler( QWebPage ):
def __init__(self, url):
QWebPage.__init__( self )
self._url = url
self.content = ''
def crawl( self ):
signal.signal( signal.SIGINT, signal.SIG_DFL )
self.connect( self, SIGNAL( 'loadFinished(bool)' ), self._finished_loading )
self.mainFrame().load( QUrl( self._url ) )
def _finished_loading( self, result ):
self.content = self.mainFrame().toHtml()
print(self.content)
sys.exit( 0 )
def main():
app = QApplication( sys.argv )
crawler = Crawler( self._url, self._file )
crawler.crawl()
sys.exit( app.exec_() )
if __name__ == '__main__':
crawl = Crawler( 'http://www.example.com')
crawl.main()
Thanks
You cannot make self.mainFrame().load(QUrl(self._url))
working through asyncio, sorry -- the method implemented in Qt itself.
But you can install quamash event loop and asynchronously call aiohttp.request coroutine to get web pages.
The way doesn't work with QWebPage
though.
Requests are already done asynchronously, so you all you need to do is create multiple instances of QWebPage
.
Here's a simple demo based on your example script:
import sys, signal
from PyQt4 import QtCore, QtGui, QtWebKit
urls = [
'http://qt-project.org/doc/qt-4.8/qwebelement.html',
'http://qt-project.org/doc/qt-4.8/qwebframe.html',
'http://qt-project.org/doc/qt-4.8/qwebinspector.html',
'http://qt-project.org/doc/qt-4.8/qwebpage.html',
'http://qt-project.org/doc/qt-4.8/qwebsettings.html',
'http://qt-project.org/doc/qt-4.8/qwebview.html',
]
class Crawler(QtWebKit.QWebPage):
def __init__(self, url, identifier):
super(Crawler, self).__init__()
self.loadFinished.connect(self._finished_loading)
self._id = identifier
self._url = url
self.content = ''
def crawl(self):
self.mainFrame().load(QtCore.QUrl(self._url))
def _finished_loading(self, result):
self.content = self.mainFrame().toHtml()
print('[%d] %s' % (self._id, self._url))
print(self.content[:250].rstrip(), '...')
print()
self.deleteLater()
if __name__ == '__main__':
app = QtGui.QApplication( sys.argv )
signal.signal( signal.SIGINT, signal.SIG_DFL)
crawlers = []
for index, url in enumerate(urls):
crawlers.append(Crawler(url, index))
crawlers[-1].crawl()
sys.exit( app.exec_() )
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.