I am trying to download all images of a product in Trendyol . The Soup does not see the div "gallery-container" when I try to use the find_all method. It's getting empty.After that, I tried to find all images on the page. I can't access all images. I get low-resolution images. However, when I check from chrome inspect, I can see the images with high-resolution. How can I get images?
Code - GetProductInfo.py
from bs4 import BeautifulSoup
from requests_html import HTMLSession
from os.path import basename
import requests
class GetProductInfo:
global session
global html_text
def __init__(self,link):
session = HTMLSession()
html_text = session.get(link)
html_text.html.render()
self.soup = BeautifulSoup(html_text.html.html, 'lxml')
def saveImages(self):
for link in self.soup.select("img[src^=http]"):
lnk = link["src"]
with open("images/"+basename(lnk),"wb") as f:
f.write(requests.get(lnk).content)
Code - test.py
from GetProductInfo import *
product = GetProductInfo("https://www.trendyol.com/aqua-di-polo-1987/unisex-kol-saati-apl12c195h02-p-3929108")
product.saveImages()
You could simply regex out the JavaScript object hosting the image links, then parse with json. No need to render the page at all. The links are for the high resolution images viewable in that gallery.
import requests, json, re
r = requests.get('https://www.trendyol.com/aqua-di-polo-1987/unisex-kol-saati-apl12c195h02-p-3929108')
data = json.loads(re.search(r'PRODUCT_DETAIL_APP_INITIAL_STATE__=(.*?);', r.text).group(1))
images = ['https://www.trendyol.com' + img for img in data['product']['images']]
print(images)
Hey I'm testing out on how github copilot solves stackoverflow questions.
# Problem: Beautiful Soup can't access all images in a webpage
# Solution:
# 1. Get all images from the webpage
# 2. Save all images to a folder
import requests
from bs4 import BeautifulSoup
import os
# Get all images from a webpage
def get_all_images(url):
# get the webpage
r = requests.get(url)
# create a BeautifulSoup object
soup = BeautifulSoup(r.text, "html.parser")
# find all images
images = [img for img in soup.findAll('img')]
# return all images
print(str(len(images)) + " images found.")
# print(images)
# print(str(len(images)) + " images found.")
return images
# Save all images to a folder
def save_all_images(images, folder):
# create a directory to store images
if not os.path.exists(folder):
os.makedirs(folder)
print("Saving images to " + folder)
# save all images to the folder
for i, img in enumerate(images):
try:
# get the image source
img_data = requests.get(img['src']).content
# get the image name
with open(folder + '/' + str(i) + '.' + img['src'].split('.')[-1], 'wb') as handler:
# save the image to the folder
handler.write(img_data)
except Exception as e:
# skip the image if any error
print(e)
print("All images saved.")
# Main
def main():
url = "https://www.trendyol.com/aqua-di-polo-1987/unisex-kol-saati-apl12c195h02-p-3929108"
images = get_all_images(url)
save_all_images(images, 'images')
main()
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.