[英]How to extract data from webpage using python
有人可以指出我做錯了什么嗎?
輸入項目名稱:Rockfish Traceback(最近一次通話):文件“ C:\\ Users \\ partn_000 \\ Desktop \\ sarvesh \\ Python源代碼\\ working \\ jellyneoscraper.py”,第45行,位於search(br,ITEMNAME)文件中:\\ Users \\ partn_000 \\ Desktop \\ sarvesh \\ Python源代碼\\ working \\ jellyneoscraper.py“,行33,搜索增量=增量[0] IndexError:列表索引超出范圍
這是我寫的代碼
#Library Imports
import mechanize
import cookielib
import re
import sys
import time
import os.path
from operator import itemgetter
import ctypes
ctypes.windll.kernel32.SetConsoleTitleA("test")
def init_browser():
br = mechanize.Browser()
br.set_handle_equiv(True)
br.set_handle_redirect(True)
br.set_handle_referer(True)
br.set_handle_robots(False)
br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
br.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36')]
cj = cookielib.LWPCookieJar()
br.set_cookiejar(cj)
return br
def search(br, ITEMNAME):
datapage = br.open('http://items.jellyneo.net/index.php?go=show_items&name=' +ITEMNAME +'&name_type=exact&desc=&cat=0&specialcat=0&status=0&rarity=0&sortby=name&numitems=20')
f = open('search.html', 'w')
f.write(datapage.read())
f.close()
value = re.findall('style="font-weight:bold;">(.+) NP</a></td>"',datapage.read()) #(.+) is replaced in place of required value
value = value[0].replace(",","")
value = int(value)
print value
#http://items.jellyneo.net/index.php?go=show_items&name=Rockfish&name_type=exact&desc=&cat=0&specialcat=0&status=0&rarity=0&sortby=name&numitems=20
#('style="font-weight:bold;"> (.+) NP</a>"',search.read())
ITEMNAME = raw_input('Enter Item name:eg. Rockfish')
br = init_browser()
search(br, ITEMNAME)
在您的搜索方法中,您將讀取整個頁面並將其保存到文件中,然后嘗試重新讀取它並執行您的正則表達式,但是您已經在頁面末尾,因此它返回空字符串。 您應該先添加datapage.seek(0),然后再像這樣讀取它:
datapage = br.open('http://items.jellyneo.net/index.php?go=show_items&name=' +ITEMNAME +'&name_type=exact&desc=&cat=0&specialcat=0&status=0&rarity=0&sortby=name&numitems=20')
f = open('search.html', 'w')
f.write(datapage.read())
f.close()
datapage.seek(0)
value = re.findall('style="font-weight:bold;">(.+) NP</a></td>"',datapage.read()) #(.+) is replaced in place of required value
value = value[0].replace(",","")
value = int(value)
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.