简体   繁体   中英

Download web images by URL from excel and save to folders in Python

I have an excel file as follows:

import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.options.display.max_colwidth

df = pd.read_excel("./test.xlsx")
print(df)

Output:

  city buildingName  buildingID  imgType                 imgUrl
0   bj     LG tower      123456   inside  http://pic3.nipic.com/20090629/827780_144001014_2.jpg
1   bj     LG tower      123456  outside  http://pic.baike.soso.com/p/20140321/20140321160157-391052318.jpg
2   sh          LXD      123457   inside  http://pic10.nipic.com/20101008/2634566_104534032717_2.jpg
3   gz           GM      123458   inside  http://pic1.to8to.com/case/day_120720/20120720_fb680a57416b8d16bad2kO1kOUIzkNxO.jpg

I need to download images by reading and iterating column imgUrl and save the images to the path combine by columns city, buildingName, buildingId, imgType.

The final output folders and subfolders' structure will be like this, they will be saved in a folder named output :

├── bj
│   └── LG tower_123456
│       ├── inside
│       │   └── 827780_144001014_2.jpg
│       └── outside
│           └── 20140321160157-391052318.jpg
├── gz
│   └── GM_123458
│       └── inside
│           └── 2634566_104534032717_2.jpg
├── sh
│   └── LXD_123457
│       └── inside
│           └── 20120720_fb680a57416b8d16bad2kO1kOUIzkNxO.jpg

How can I have do this in Python? Thanks for your help at advance.

I have tried to download one image:

import requests

r = requests.get("http://pic1.to8to.com/case/day_120720/20120720_fb680a57416b8d16bad2kO1kOUIzkNxO.jpg")
if r.status_code == 200:
    with open("test.jpg", "wb") as f:
        f.write(r.content)

You can do something like this assuming you have the dataframe loaded.

    import requests
    from os.path import join
    for index, row in df.iterrows():
        url = row['url']
        file_name = url.split('/')[-1]
        r = requests.get(url)
        abs_file_name = join(row['city'],row['buildingName']+str(row['buildingId']),row['imgType'],file_name)
        if r.status_code == 200:
            with open(abs_file_name, "wb") as f:
                f.write(r.content)

Edited code:

    import requests
    from os.path import join,expanduser
    import os

    home = expanduser("~")
    df = pd.DataFrame()
    # df.append({})
    for index, row in df.iterrows():
        url = row['url']
        file_name = url.split('/')[-1]
        r = requests.get(url)
        filepath = join(home,row['city'],row['buildingName']+str(row['buildingId']),row['imgType'])
        if not os.path.exists(filepath):
            os.makedirs(filepath)
        filepath = join(filepath, file_name)
        # print(filepath)
        if r.status_code == 200:
            with open(filepath, "wb") as f:
                f.write(r.content)
import pandas as pd
import requests


def download_urls(csv_path):
    df = pd.read_csv(csv_path,encoding='utf-8',error_bad_lines=False)
    for index, row in df.iterrows():
        folder  = row[0]
        sub_folder = row[1]
        url = row[3]
        r = requests.get(url)
        if r.status_code == 200:
            with open("/{0}/{1}/{2}".format(folder, sub_folder, url.split("/")[-1]), "wb") as f:
                f.write(r.content)

path = r"C:\path\your_csv_path"
download_urls(path)

try this assuming you have csv file as input , there is no elegant way of iterate rows with pandas so you can use csv libary instead

import pandas as pd
import requests
import os

def download_urls(csv_path):
    df = pd.read_csv(csv_path,encoding='utf-8',error_bad_lines=False)
    for index, row in df.iterrows():
        folder  = row[0]
        sub_folder = row[1]
        url = row[3]
        r = requests.get(url)
        if r.status_code == 200:
            if not os.path.exists(folder):
                os.makedirs(folder)
                if not os.path.exists(sub_folder):
                    os.makedirs(sub_folder)

            with open("/{0}/{1}/{2}".format(folder, sub_folder, url.split("/")[-1]), "wb") as f:
                f.write(r.content)

path = r"C:\path\your_csv_path"
download_urls(path)

try this with open folder if not exist (will open directory first only run )

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM