I really don't get what I am doing wrong. I am still very new to python. Any help would be greatly appreciated.
import requests
import bs4
import openpyxl
import os
from requests_ntlm import HttpNtlmAuth
domain = input('What is the name of your domain: ')
username = input('What is your Username: ')
password = input('What is your password: ')
r = requests.get('Some Website', auth=HttpNtlmAuth(domain + '\\' + username, password))
desktop = os.path.join(os.path.join(os.environ['USERPROFILE']), 'Desktop')
os.chdir(desktop)
wb = openpyxl.Workbook()
ws = wb.active
ws.title = 'Report'
soup = bs4.BeautifulSoup(res.text, 'html.parser')
ro = 1
for tr in soup.find_all('tr'):
#print(tr)
col = 1
for td in soup.find_all('td'):
#print(td.text.strip())
ws.cell(row=ro, column=col, value = td.text.strip())
col += col
ro += ro
wb.save('WebsiteInfo.xlsx')
wb.close()
The error I get is
Traceback (most recent call last):
File "C:\Users\ThisUser\AppData\Local\Programs\Python\Python37\lib\site-packages\openpyxl\utils\cell.py", line 110, in get_column_letter
return _STRING_COL_CACHE[idx]
KeyError: 2854495385411919762116571938898990272765493248
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "ae.py", line 35, in <module>
wb.save('balances.xlsx')
File "C:\Users\ThisUser\AppData\Local\Programs\Python\Python37\lib\site-packages\openpyxl\workbook\workbook.py", line 408, in save
save_workbook(self, filename)
File "C:\Users\ThisUser\AppData\Local\Programs\Python\Python37\lib\site-packages\openpyxl\writer\excel.py", line 293, in save_workbook
writer.save()
File "C:\Users\ThisUser\AppData\Local\Programs\Python\Python37\lib\site-packages\openpyxl\writer\excel.py", line 275, in save
self.write_data()
File "C:\Users\ThisUser\AppData\Local\Programs\Python\Python37\lib\site-packages\openpyxl\writer\excel.py", line 75, in
write_data
self._write_worksheets()
File "C:\Users\ThisUser\AppData\Local\Programs\Python\Python37\lib\site-packages\openpyxl\writer\excel.py", line 215, in _write_worksheets
self.write_worksheet(ws)
File "C:\Users\ThisUser\AppData\Local\Programs\Python\Python37\lib\site-packages\openpyxl\writer\excel.py", line 200, in write_worksheet
writer.write()
File "C:\Users\ThisUser\AppData\Local\Programs\Python\Python37\lib\site-packages\openpyxl\worksheet\_writer.py", line 355, in write
self.write_top()
File "C:\Users\ThisUser\AppData\Local\Programs\Python\Python37\lib\site-packages\openpyxl\worksheet\_writer.py", line 99, in write_top
self.write_dimensions()
File "C:\Users\ThisUser\AppData\Local\Programs\Python\Python37\lib\site-packages\openpyxl\worksheet\_writer.py", line 69, in write_dimensions
dim = SheetDimension(ref())
File "C:\Users\ThisUser\AppData\Local\Programs\Python\Python37\lib\site-packages\openpyxl\worksheet\worksheet.py", line
389, in calculate_dimension
get_column_letter(max_col), max_row
File "C:\Users\ThisUser\AppData\Local\Programs\Python\Python37\lib\site-packages\openpyxl\utils\cell.py", line 112, in get_column_letter
raise ValueError("Invalid column index {0}".format(idx))
ValueError: Invalid column index 2854495385411919762116571938898990272765493248
Error in atexit._run_exitfuncs:
Traceback (most recent call last):
File "C:\Users\ThisUser\AppData\Local\Programs\Python\Python37\lib\site-packages\openpyxl\worksheet\_writer.py", line 32, in _openpyxl_shutdown
os.remove(path)
PermissionError: [WinError 32] The process cannot access the file because it is being used by another process: 'C:\\Windows\\Temp\\1\\openpyxl.we3wcfk_'
If I put a print(desktop)
It prints C:\Users\ThisUser\Desktop
If I put wb.save('C:\\Users\\ThisUser\\Desktop\\WebsiteInfo.xlsx')
It gives this error
Traceback (most recent call last):
File "C:\Users\ThisUser\AppData\Local\Programs\Python\Python37\lib\site-packages\openpyxl\utils\cell.py", line 110, in get_column_letter
return _STRING_COL_CACHE[idx]
KeyError: 2854495385411919762116571938898990272765493248
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "ae.py", line 36, in <module>
wb.save('C:\\Users\\ThisUser\\Desktop\\WebsiteInfo.xlsx')
File "C:\Users\ThisUser\AppData\Local\Programs\Python\Python37\lib\site-packages\openpyxl\workbook\workbook.py", line 408, in save
save_workbook(self, filename)
File "C:\Users\ThisUser\AppData\Local\Programs\Python\Python37\lib\site-packages\openpyxl\writer\excel.py", line 293, in save_workbook
writer.save()
File "C:\Users\ThisUser\AppData\Local\Programs\Python\Python37\lib\site-packages\openpyxl\writer\excel.py", line 275, in save
self.write_data()
File "C:\Users\ThisUser\AppData\Local\Programs\Python\Python37\lib\site-packages\openpyxl\writer\excel.py", line 75, in
write_data
self._write_worksheets()
File "C:\Users\ThisUser\AppData\Local\Programs\Python\Python37\lib\site-packages\openpyxl\writer\excel.py", line 215, in _write_worksheets
self.write_worksheet(ws)
File "C:\Users\ThisUser\AppData\Local\Programs\Python\Python37\lib\site-packages\openpyxl\writer\excel.py", line 200, in write_worksheet
writer.write()
File "C:\Users\ThisUser\AppData\Local\Programs\Python\Python37\lib\site-packages\openpyxl\worksheet\_writer.py", line 355, in write
self.write_top()
File "C:\Users\ThisUser\AppData\Local\Programs\Python\Python37\lib\site-packages\openpyxl\worksheet\_writer.py", line 99, in write_top
self.write_dimensions()
File "C:\Users\ThisUser\AppData\Local\Programs\Python\Python37\lib\site-packages\openpyxl\worksheet\_writer.py", line 69, in write_dimensions
dim = SheetDimension(ref())
File "C:\Users\ThisUser\AppData\Local\Programs\Python\Python37\lib\site-packages\openpyxl\worksheet\worksheet.py", line
389, in calculate_dimension
get_column_letter(max_col), max_row
File "C:\Users\ThisUser\AppData\Local\Programs\Python\Python37\lib\site-packages\openpyxl\utils\cell.py", line 112, in get_column_letter
raise ValueError("Invalid column index {0}".format(idx))
ValueError: Invalid column index 2854495385411919762116571938898990272765493248
Error in atexit._run_exitfuncs:
Traceback (most recent call last):
File "C:\Users\ThisUser\AppData\Local\Programs\Python\Python37\lib\site-packages\openpyxl\worksheet\_writer.py", line 32, in _openpyxl_shutdown
os.remove(path)
PermissionError: [WinError 32] The process cannot access the file because it is being used by another process: 'C:\\Windows\\Temp\\1\\openpyxl.84t28sc7'
So I did figure out that if I comment out the ws.cell part and uncomment the print it runs fine
ro = 1
for tr in soup.find_all('tr'):
co = 1
for td in soup.find_all('td'):
print(td.text.strip())
#ws.cell(row=ro, column=co, value=td.text.strip())
co += co
ro += ro
I can even throw in ws.cell(row=1, column=1, value=1) ws.cell(row=1, column=2, value=2) ws.cell(row=1, column=3, value=3)
So my issue seems to be the ws.cell(row=ro, column=co, value=td.text.strip())
Not sure if this is what's causing it, but you are trying to write data into cells and columns that go beyond the capabilities of excel.
ValueError: Invalid column index 2854495385411919762116571938898990272765493248
First thing I would change is: col += col
and ro += ro
to be: col += 1
and ro += 1
in your code and see if that makes a difference. Regarless if that fixes it or not, I think this is what you want/meant to do anyway.
You're essentially tell it to write the data like:
ws.cell(row=1, column=2854495385411919762116571938898990272765493248, value=1)
Think about it, you start with ro = 1
and col = 1
, and then after each iteration you're increasing your row and column exponentially:
r = 1
for x in range(0,20):
print (r)
r += r
1
2
4
8
16
32
64
128
256
512
1024
2048
4096
8192
16384
32768
65536
131072
262144
524288
So try:
import requests
import bs4
import openpyxl
import os
from requests_ntlm import HttpNtlmAuth
domain = input('What is the name of your domain: ')
username = input('What is your Username: ')
password = input('What is your password: ')
r = requests.get('Some Website', auth=HttpNtlmAuth(domain + '\\' + username, password))
desktop = os.path.join(os.path.join(os.environ['USERPROFILE']), 'Desktop')
os.chdir(desktop)
wb = openpyxl.Workbook()
ws = wb.active
ws.title = 'Report'
soup = bs4.BeautifulSoup(res.text, 'html.parser')
ro = 1
for tr in soup.find_all('tr'):
#print(tr)
col = 1
for td in soup.find_all('td'):
#print(td.text.strip())
ws.cell(row=ro, column=col, value = td.text.strip())
col += 1 # <---- CHANGED HERE
ro += 1 # <------ CHANGED HERE
wb.save('WebsiteInfo.xlsx')
wb.close()
Have you considered using Pandas to use .read_html()
in the html (since you're pulling <tr>
tags, I'm assuming there are <table>
tags) and then just use pandas .to_excel()
function?
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.