I have an excel file as follows:
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.options.display.max_colwidth
df = pd.read_excel("./test.xlsx")
print(df)
Output:
city buildingName buildingID imgType imgUrl
0 bj LG tower 123456 inside http://pic3.nipic.com/20090629/827780_144001014_2.jpg
1 bj LG tower 123456 outside http://pic.baike.soso.com/p/20140321/20140321160157-391052318.jpg
2 sh LXD 123457 inside http://pic10.nipic.com/20101008/2634566_104534032717_2.jpg
3 gz GM 123458 inside http://pic1.to8to.com/case/day_120720/20120720_fb680a57416b8d16bad2kO1kOUIzkNxO.jpg
I need to download images by reading and iterating column imgUrl
and save the images to the path combine by columns city, buildingName, buildingId, imgType.
The final output folders and subfolders' structure will be like this, they will be saved in a folder named output
:
├── bj
│ └── LG tower_123456
│ ├── inside
│ │ └── 827780_144001014_2.jpg
│ └── outside
│ └── 20140321160157-391052318.jpg
├── gz
│ └── GM_123458
│ └── inside
│ └── 2634566_104534032717_2.jpg
├── sh
│ └── LXD_123457
│ └── inside
│ └── 20120720_fb680a57416b8d16bad2kO1kOUIzkNxO.jpg
How can I have do this in Python? Thanks for your help at advance.
I have tried to download one image:
import requests
r = requests.get("http://pic1.to8to.com/case/day_120720/20120720_fb680a57416b8d16bad2kO1kOUIzkNxO.jpg")
if r.status_code == 200:
with open("test.jpg", "wb") as f:
f.write(r.content)
You can do something like this assuming you have the dataframe loaded.
import requests
from os.path import join
for index, row in df.iterrows():
url = row['url']
file_name = url.split('/')[-1]
r = requests.get(url)
abs_file_name = join(row['city'],row['buildingName']+str(row['buildingId']),row['imgType'],file_name)
if r.status_code == 200:
with open(abs_file_name, "wb") as f:
f.write(r.content)
Edited code:
import requests
from os.path import join,expanduser
import os
home = expanduser("~")
df = pd.DataFrame()
# df.append({})
for index, row in df.iterrows():
url = row['url']
file_name = url.split('/')[-1]
r = requests.get(url)
filepath = join(home,row['city'],row['buildingName']+str(row['buildingId']),row['imgType'])
if not os.path.exists(filepath):
os.makedirs(filepath)
filepath = join(filepath, file_name)
# print(filepath)
if r.status_code == 200:
with open(filepath, "wb") as f:
f.write(r.content)
import pandas as pd
import requests
def download_urls(csv_path):
df = pd.read_csv(csv_path,encoding='utf-8',error_bad_lines=False)
for index, row in df.iterrows():
folder = row[0]
sub_folder = row[1]
url = row[3]
r = requests.get(url)
if r.status_code == 200:
with open("/{0}/{1}/{2}".format(folder, sub_folder, url.split("/")[-1]), "wb") as f:
f.write(r.content)
path = r"C:\path\your_csv_path"
download_urls(path)
try this assuming you have csv file as input , there is no elegant way of iterate rows with pandas so you can use csv libary instead
import pandas as pd
import requests
import os
def download_urls(csv_path):
df = pd.read_csv(csv_path,encoding='utf-8',error_bad_lines=False)
for index, row in df.iterrows():
folder = row[0]
sub_folder = row[1]
url = row[3]
r = requests.get(url)
if r.status_code == 200:
if not os.path.exists(folder):
os.makedirs(folder)
if not os.path.exists(sub_folder):
os.makedirs(sub_folder)
with open("/{0}/{1}/{2}".format(folder, sub_folder, url.split("/")[-1]), "wb") as f:
f.write(r.content)
path = r"C:\path\your_csv_path"
download_urls(path)
try this with open folder if not exist (will open directory first only run )
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.