[英]Writing Python output as xlsx
我想對給定路徑中可用的所有文件(庫)執行相同的 function(給出 output A、B、C、D)。 我正在嘗試在 xlsx 工作表的四個不同列中寫入 output(A、B、C、D)。 此外,xlsx 的工作表名稱應與路徑中可用的相關文件相同。
我寫了以下代碼:
def create_xlsx_file(xlsx_name, file_path):
workbook = xlsxwriter.Workbook(xlsx_name) ### creates a xlsx file
workbook.close()
libraries=os.listdir(file_path)
file_path=os.chdir(file_path)
for library in libraries: ### to create the sheets named same as the library
# print(library)
if library.endswith('.txt'):
# library=file_path+library
# print(library)
main(library, xlsx_name)
def main(library, xlsx_name): ###library = all files in the given path
directory=os.chdir(os.getcwd())
workbook = openpyxl.load_workbook(xlsx_name)
worksheet = workbook.create_sheet(library, 0)##### creates workshhets named same as library name
#print('library is: - ',library)
sheet=workbook[library] ###to create column headers
sheet.cell(column=1, row=1, value='value_A')
sheet.cell(column=2, row=1, value='value_B')
sheet.cell(column=3, row=1, value='value_C')
sheet.cell(column=4, row=1, value='value_D')
workbook.save(xlsx_name)
with open(library, 'r') as library:
for line in library:
A=line.split(' ')[0]
B=line.split(' ')[1]
C=line.split(' ')[2]
D=line.split(' ')[3]
sheet=workbook[library]
sheet.cell(column=1, row=sheet.max_row+1, value=str(A))
sheet.cell(column=2, row=sheet.max_row, value=str(B))
sheet.cell(column=3, row=sheet.max_row, value=str(C))
sheet.cell(column=4, row=sheet.max_row, value=str(D))
print(f'library {library} has been written at {os.getcwd()}')
#time.sleep(1)
workbook.save(xlsx_name)
這段代碼對我來說絕對沒問題,但是寫 xlsx 文件太慢了,因為我的路徑有數百個 .txt 庫,每個庫都有超過數百萬行。
我可以將輸出(A,B,C,D)保存為.txt格式,然后可以手動編寫xlsx文件,但這非常費力。
有什么辦法可以加快這個過程嗎? 或任何其他快速 xlsx 編寫器可用? 任何幫助將不勝感激。 謝謝
我發現一種將數據保存到 excel 的更快方法是:由於 for 循環的結果是 output,因此首先將輸出(A,B,C,D)保存到字典中,然后使用 pandas 保存到 excel。
def create_xlsx_file(xlsx_name, file_path):
workbook = xlsxwriter.Workbook(xlsx_name) ### creates a xlsx file
workbook.close()
libraries=os.listdir(file_path)
file_path=os.chdir(file_path)
for library in libraries: ### to create the sheets named same as the library
# print(library)
if library.endswith('.txt'):
# library=file_path+library
# print(library)
main(library, xlsx_name)
def main(library, xlsx_name): ###library = all files in the given path
dic={'label_A':[], 'label_B':[],'label_C':[],'label_D':[]}# to store A,B,C,D values.
directory=os.chdir(os.getcwd())
workbook = openpyxl.load_workbook(xlsx_name)
worksheet = workbook.create_sheet(library, 0)##### creates workshhets named same as library name
#print('library is: - ',library)
sheet=workbook[library] ###to create column headers
sheet.cell(column=1, row=1, value='value_A')
sheet.cell(column=2, row=1, value='value_B')
sheet.cell(column=3, row=1, value='value_C')
sheet.cell(column=4, row=1, value='value_D')
workbook.save(xlsx_name)
with open(library, 'r') as library:
for line in library:
A=line.split(' ')[0]
B=line.split(' ')[1]
C=line.split(' ')[2]
D=line.split(' ')[3]
dic['label_A'].append(A)
dic['label_B'].append(B)
dic['label_C'].append(C)
dic['label_D'].append(D)
df=pd.DataFrame(data=dic, columns=['label_A', 'label_B', 'label_C', 'label_D'])
df.to_excel(xlsx_name, sheet_name=library)
print(f'library {library} has been written at {os.getcwd()}')
#time.sleep(1)
workbook.save(xlsx_name)
根據我的經驗,pandas 庫處理信息的速度非常快,並且有一個 function 可以導出 xlsx 格式的數據。 你可以創建一個空的DataFrame
data = pd.DataFrame()
將您的行保存在pd.Series
中。 例如:
row = pd.Series(data=[A,B,C,D], index = ['value_A', 'value_B', 'value_C', 'value_D'])
索引參數是列的名稱,數據參數是每行的值。
將每一行添加到DataFrame
data = data.append(row, ignore_index = True)
只需將 DataFrame 導出到 xlsx
data.to_excel("output.xlsx")
參考資料https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_excel.html
不要忘記安裝 pandas 庫並將其導入到您的程序import pandas as pd
我希望這個對你有用。
從你的代碼看來,你不需要任何格式或 function 的 xlsx,如果你只需要集中你的數據,一個簡單的 csv 將只對你的代碼進行微小的更改
import csv
def create_xlsx_file(xlsx_name, file_path):
with open(xlsx_name, 'w', encoding='UTF8') as output_file:
writer = csv.writer(output_file)
writer.writerow(['value_A','value_B','value_C','value_D'])
libraries=os.listdir(file_path)
for library in libraries: ### to create the sheets named same as the library
# print(library)
if library.endswith('.txt'):
# library=file_path+library
# print(library)
main(library, xlsx_name)
def main(library, xlsx_name)
with open(xlsx_name, 'a', encoding='UTF8') as output_file:
writer = csv.writer(output_file)
with open(library, 'r', encoding='UTF8') as input_file:
lines = input_file.read().splitlines()
for line in lines:
A=line.split(' ')[0]
B=line.split(' ')[1]
C=line.split(' ')[2]
D=line.split(' ')[3]
writer.writerow([A,B,C,D])
print(f'library {library} has been written at {os.getcwd()}')
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.