Python 刮板无法正确写入 csv

Question

我是 Python 和编程的新手，但我的项目有点问题。 我正在尝试抓取网站的数据并将其保存在 csv 中。 我工作，但是当我将“lst”列表写入“Image URL”和“Image Featured”时，括号“[”和“]”和“””也被写入csv文件。有没有办法删除这个？我知道这是因为“lst”列表包含其他带有 url 的列表。

import csv
from bs4 import BeautifulSoup
import requests
import pandas as pd
from datetime import date

today = date.today()

source = requests.get('https://www.meklarin.fo/').text
soup = BeautifulSoup(source, 'lxml')

df = pd.read_csv(r'C:\Users\username\Desktop\Kassin.fo\kassin\blog\management\commands\test.csv')
print(df.to_string())
                        
original_house_title_list = []
original_house_link_list = []
house_titles_list = []
house_asking_price_list = []
house_current_bid_price_list = []
house_link_list = []
product = 'product'
current_date = today.strftime("%m.%d.%y")
house_image_list = []
house_location_list = []
lst = []
lst1 = []

house_info_list = []
house_final_info = []
list_convert = []

for house_link in soup.find_all('a', class_='house-air-content'):
                house_link = house_link.get('href')
                house_link_list.append(house_link.strip())
                print(house_link.strip())

for house_link in house_link_list:
    if house_link in original_house_link_list:
        continue
    else:
        source = requests.get(house_link).text
        soup = BeautifulSoup(source, 'lxml')

        for house_titles in soup.find_all('div', class_='ogn-base-info'):
            house_title = house_titles.h1.text
            house_titles_list.append(house_title)
            #print(house_title)

        for house__asking_price in soup.find_all('div', class_='col-xs-12 col-sm-12 col-md-6 house-ask-price house-price-column'):
                house_asking_price = house__asking_price.text
                house_asking_price = str(house_asking_price)
                house_asking_price = house_asking_price.removeprefix('Prísuppskotkr.')
                house_asking_price = house_asking_price.replace('.','')
                house_asking_price_list.append(house_asking_price.strip())
                #print(house_asking_price.strip())

        for house__current_bid_price in soup.find_all('div', class_='col-xs-12 col-sm-12 col-md-6 house-bid-price house-price-column'):
                house_current_bid_price = house__current_bid_price.h3.text
                house_current_bid_price = str(house_current_bid_price)
                house_current_bid_price = house_current_bid_price.replace('.','')
                house_current_bid_price = house_current_bid_price.replace('kr','')                        
                house_current_bid_price_list.append(house_current_bid_price.strip())
                print(house_current_bid_price.strip())

        for house_all_images in soup.find_all('a'):
            if 'https://www.meklarin.fo/wp-content/uploads' in str(house_all_images):
                house_all_images = house_all_images.get('href')
                house_image_list.append(house_all_images)
                #print(house_all_images)
            else:
                continue
        lst.append(house_image_list)
        lst1.append(lst)
        house_image_list=[]
        
        for house_build_year in soup.find_all('div', class_='house-info-box-value'):
                if 'Trýst her' in str(house_build_year):
                    continue
                else:
                    print(house_build_year.text)

        for house_info in soup.find_all('div', class_='house-desc-comp'):
                house_info = house_info.text
                house_info = str(house_info)
                house_info = house_info.replace('Upplýsingar um bústaðin','')
                house_info_list.append(house_info)
                #print(house_info)
        house_final_info.append(house_info)
        house_info_list = []


dict = {'Title': house_titles_list, 'Content': house_final_info, 'Date':current_date, 'Post Type': product, 'Price': house_asking_price_list, 'Regular Price': house_asking_price_list, 'Sale Price':house_asking_price_list, 'Stock Status': 'instock', 'Image URL': lst, 'Image Title': house_titles_list, 'Image Featured': lst} 

df = pd.DataFrame(dict)
df.to_csv('test.csv')

print(len(house_titles_list))
print(len(house_asking_price_list))
print(len(lst))
print(len(house_final_info))

Answer 1

要删除（示例）图像 URL 的单元格中的列表，请在写入文件之前尝试：

df['Image URL'] = [','.join(map(str, i)) for i in df['Image URL']]

可以复制上面的行并将上面的Image URL更改为Image Featured以清理另一列中的列表。

Python 刮板无法正确写入 csv

问题描述

1 个解决方案

解决方案1
2 2021-07-18 18:32:46

Python 刮板无法正确写入 csv

问题描述

1 个解决方案

解决方案1 2 2021-07-18 18:32:46

解决方案1
2 2021-07-18 18:32:46