[英]Q: Pandas dataframe from for loop
編輯2,9 / 1,請參閱下面的答案!
這里的Python和Pandas相當新。 我這里有一個腳本,該腳本使用for循環使用列表中的每一行查詢數據庫。 一切都很好,但是我無法弄清楚如何從該循環的結果構建數據框架。 任何指針都歡迎!
#Remove stuff
print "Cleaning list"
def multiple_replacer(key_values):
replace_dict = dict(key_values)
replacement_function = lambda match: replace_dict[match.group(0)]
pattern = re.compile("|".join([re.escape(k) for k, v in key_values]), re.M)
return lambda string: pattern.sub(replacement_function, string)
multi_line = multiple_replacer(key_values)
print "Querying Database..."
for line in source:
brand_url = multi_line(line)
#Run Query with cleaned list
mysql_query = ("select ub.url as 'URL', b.name as 'Name', b.id as 'ID' from api.brand b join api.url_brand ub on b.id=ub.brand_id where ub.url like '%%%s%%' and b.deleted=0 group by 3;" % brand_url)
list1 = []
brands = my_query('prod', mysql_query)
print "Writing CSV..."
#Create DF and CSV
for row in brands:
list1.append({"URL":row['URL'],"Name":['Name'],"ID":['ID']})
if brands.shape == (3,0):
df1 = pd.DataFrame(data = brands, columns=['URL','Name','ID'])
output = df1.to_csv('ongoing.csv',index=False)
編輯8/30這是我的編輯,嘗試使用zyxue的方法:
#Remove stuff
print "Cleaning list"
def multiple_replacer(key_values):
replace_dict = dict(key_values)
replacement_function = lambda match: replace_dict[match.group(0)]
pattern = re.compile("|".join([re.escape(k) for k, v in key_values]), re.M)
return lambda string: pattern.sub(replacement_function, string)
multi_line = multiple_replacer(key_values)
print "Querying Database..."
for line in source:
brand_url = multi_line(line)
#Run Query with cleaned list
mysql_query = ("select ub.url as 'URL', b.name as 'Name', b.id as 'ID' from api.brand b join api.url_brand ub on b.id=ub.brand_id where ub.url like '%%%s%%' and b.deleted=0 group by 3;" % brand_url)
brands = my_query('prod', mysql_query)
print "Writing CSV..."
#Create DF and CSV
records = []
for row in brands:
records.append({"URL":row['URL'],"Name":['Name'],"ID":['ID']})
if brands.shape == (3,0):
records.append(dict(zip(brands, ['URL', 'Name', 'ID'])))
df1 = pd.DataFrame.from_records(records)
output = df1.to_csv('ongoing.csv', index=False)
但這只會返回空白的CSV。 我確定我將其應用錯誤。
records = []
for row in brands:
# if brands.shape == (3,0):
# records.append(dict(zip(brands, ['URL', 'Name', 'ID'])))
# update bug fix:
if row.shape == (3,0):
records.append(dict(zip(row, ['URL', 'Name', 'ID'])))
df1 = pd.DataFrame.from_records(records)
output = df1.to_csv('ongoing.csv', index=False)
# ref:
# >>> pd.DataFrame.from_records([{'a': 1, 'b':2}, {'a': 11, 'b': 22}])
# a b
# 0 1 2
# 1 11 22
好的,我知道了,我認為應該發布工作腳本。 @zyxue非常正確。
source = open('urls.txt')
key_values = ("http://",""), ("https://",""), ("www.",""), ("\n","")
#Remove stuff
print "Cleaning list"
def multiple_replacer(key_values):
replace_dict = dict(key_values)
replacement_function = lambda match: replace_dict[match.group(0)]
pattern = re.compile("|".join([re.escape(k) for k, v in key_values]), re.M)
return lambda string: pattern.sub(replacement_function, string)
multi_line = multiple_replacer(key_values)
print "Querying Database..."
records = []
for line in source:
brand_url = multi_line(line)
#Run Query with cleaned list
mysql_query = ("select ub.url as 'URL', b.name as 'Name', b.id as 'ID' from api.brand b join api.url_brand ub on b.id=ub.brand_id where ub.url like '%%%s%%' and b.deleted=0 group by 3;" % brand_url)
brands = my_query('prod', mysql_query)
#Append results to dict (records)
for row in brands:
records.append({"URL":row['URL'],"Name":row['Name'],"ID":row['ID']})
#Create DataFrame
df = pd.DataFrame.from_dict(records)
#Create CSV
output = df.to_csv('ongoing.csv',index=False)
本質上,我需要在第二個for循環的第一個之下分層並在循環開始之前創建“ records”字典。 這將為“源”中的每一行添加到字典的追加。 現在好像是一個非常簡單的概念!
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.