[英]Calling an inner function in Python
我有这个最终的 main.py,它结合了我单独编写的每个 function,但我不能让它工作,它实际上在最后返回了 Success,但它实际上什么也没做,也不在我的本地文件夹或 MongoDB 中。 function 是这个:
def gw2_etl(url):
def log_scrape(url):
HEADERS = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246'}
response = requests.get(url=url, headers=HEADERS)
soup = BeautifulSoup(response.content, 'html.parser')
data = soup.find_all('script')[8]
dataString = data.text.rstrip()
logData = re.findall(r'{.*}', dataString)
try:
urlLines = url.split('/')
if len(urlLines) < 5:
bossName = urlLines[3]
elif len(urlLines) == 5:
bossName = urlLines[4]
except Exception as e:
return 'Error' + str(e)
tag = bossName.split('_')
bossTag = tag[1]
try:
# Wing_1
if bossTag == 'vg':
pathName = 'ETL\EXTRACT_00\Web Scraping\Boss_data\Wing_1\Valley_Guardian'
with open(f'{pathName}\{bossName}.json', 'w') as f:
for line in logData:
jsonFile = f.write(line)
return jsonFile
return log_scrape()
def store_data(jsonFile):
with open(jsonFile) as f:
data = json.load(f)
sp = jsonFile.split('\\')
posSp = sp[-1]
bossTag = posSp.split('_')
nameTag = bossTag[1]
if len(bossTag) > 2:
nameTag = bossTag[1]
elif len(bossTag) == 2:
tagSplit = nameTag.split('.')
nameTag = tagSplit[0]
# Players Data:
player_group = []
player_acc = []
player_names = []
player_classes = []
for player in data['players']:
player_group.append(player['group'])
player_acc.append(player['acc'])
player_names.append(player['name'])
player_classes.append(player['profession'])
try:
# Wing-1
if nameTag == 'vg':
# Create lists:
player_dps1 = []
player_dps2 = []
player_dps3 = []
# Phase_1
phase1 = data['phases'][1]['dpsStats']
phase1_time_raw = data['phases'][1]['duration']
phase1_time = round(phase1_time_raw/1000,1)
for dps in phase1:
dps1_raw = dps[0]
player_dps1.append(round(dps1_raw/phase1_time,2))
# Phase_2
phase2 = data['phases'][6]['dpsStats']
phase2_time_raw = data['phases'][6]['duration']
phase2_time = round(phase2_time_raw/1000,1)
for dps in phase2:
dps2_raw = dps[0]
player_dps2.append(round(dps2_raw/phase2_time,2))
# Phase_3
phase3 = data['phases'][12]['dpsStats']
phase3_time_raw = data['phases'][12]['duration']
phase3_time = round(phase3_time_raw/1000,1)
for dps in phase3:
dps3_raw = dps[0]
player_dps3.append(round(dps3_raw/phase3_time,2))
stats_dict = {
'players':{
'group': player_group,
'account': player_acc,
'names': player_names,
'profession': player_classes,
'phase_1_dps': player_dps1,
'phase_2_dps': player_dps2,
'phase_3_dps': player_dps3
}
}
df = pd.DataFrame(stats_dict['players'], columns=['group','account','names','profession','phase_1_dps','phase_2_dps','phase_3_dps'])
return stats_dict
except Exception as e:
print('Error' + str(e))
sys.exit()
# JSON generator (MongoDB)
pathName = 'ETL\TRANSFORM_01\Players_info'
jsonString = json.dumps(stats_dict)
with open(f"{pathName}\{nameTag}_player_stats.json", 'w') as f:
f.write(jsonString)
# CSV generator (MySQL, PostgreSQL)
df.to_csv(f"{pathName}\{nameTag}_player_stats.csv",index=True)
return store_data()
def mongo_connect(stats_dict):
try:
client = pymongo.MongoClient('mongodb://localhost:27017/')
except Exception as e:
print('Connection could not be done' + str(e))
sys.exit()
db = client['GW2_SRS']
collection = db['players_info']
mongo_insert = collection.insert_one(stats_dict)
return mongo_connect()
return 'Success!'
pass
我的目标是,当我调用 gw2_etl() 时,它会运行内部的每个进程(log_scrape、store_data 和 mongo_connect)并在最后返回成功消息。 我可能做错了,因为它既不运行任何东西也不发送错误消息。
对于mongo连接,我需要返回stats_dict,因为它是我要上传到那里的JSON文件,csv文件仅用于本地存储。
我实际上得到了一些老板,因为代码实际上很长。
如果您对我如何使这项工作有任何提示或线索,我将不胜感激。
在从 function 返回之前,您仍然需要在gw2_etl()
中单独调用所有这些函数。 在另一个内部定义函数只是意味着您无法在外部 function 之外访问它们。 所以在return语句之前添加
log_scraper(url)
store_data(json_file)
mongo_connect(stats_dict)
并从那里继续。 您会注意到您需要传递一些变量来调用具有正确 arguments 的函数,但我将这部分留给您自己弄清楚。
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.