简体   繁体   中英

How to save a list of lists as csv

I'm trying to webscrape a list of players and save it as csv.

This is the result I'm looking for: a list of the first x (4 players in this example) for y teams (2 teams in the example, total of 8 players)

在此处输入图像描述

Here is the code

from bs4 import BeautifulSoup
import requests
import pandas as pd

html_text = requests.get('https://www.rotowire.com/soccer/lineups.php?league=MLS').text
soup = BeautifulSoup(html_text, 'html.parser')
lineups = soup.find_all('div', class_='lineup is-soccer')
j = range(2)
selections = []
for index1 in j:
    selections.append(lineups[index1])
for selection in selections:
    home_squad = selection.find('ul', class_='lineup__list is-home')
    home_players = home_squad.find_all('li', class_='lineup__player')
    list_home = []
    for home_player in home_players:
        h_player_name = home_player.find('a').text
        list_home.append(h_player_name)
    start_11 = list(list_home[i] for i in range(4))
    df_h = pd.DataFrame(start_11)
    df_h.to_csv('home.csv', index=False, header=False)

Unfortunately I'm able to save only the last occurrence of the inner loop, thus just the last 4 players. I'm stuck, since if I try to append inside the inner loop I get the letters of the names, or "out of range", if I go to the outermost loop I get only the last player in the last team.

What I'm missing? Thanks for the help

from bs4 import BeautifulSoup
import requests
import pandas as pd

headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.60 Safari/537.17'}

top_4_players = []
r = requests.get('https://www.rotowire.com/soccer/lineups.php?league=MLS', headers=headers)
soup = BeautifulSoup(r.text, 'html.parser')
players_lists = soup.select('ul.lineup__list.is-home')
for x in players_lists[:2]:
    players_list = x.select('li.lineup__player')[:4]
    for player_link in players_list:
        top_4_players.append(player_link.select_one('a').text)
df = pd.DataFrame(top_4_players, columns = ['Top 4 Players'])
df.to_csv('home.csv', index=False, header=False)
df

This returns (for only 2 home lineup lists):

Top 4 Players
0   Sean Johnson
1   Malte Amundsen
2   T. Martins
3   A. Callens
4   Q. Westberg
5   D. Criscito
6   S. O'Neill
7   L. MacNaughton

I had a similar script, I changed it a bit for your, it ll show much more than just usernames, you can find out if the player has an injury, his position, name.

from bs4 import BeautifulSoup
import requests

url = 'https://www.rotowire.com/soccer/lineups.php?league=MLS'
response = requests.get(url)
soup = BeautifulSoup(response.text, 'lxml')
data = {}
for place in ['is-home', 'is-visit']:
    for box in soup.find_all('div', class_='lineup__box')[:-1]:
        team = box.find('div', {'class': ['lineup__team', place]}).get_text(strip=True)
        visit_team = box.find('div', {'class': ['lineup__team', place]}).get_text(strip=True)
        players = []
        for player in box.find('ul', {'class': ['lineup__list', place]}).find_all('li', class_='lineup__player'):
            name = player.find('a').get_text(strip=True)
            pos = player.find('div', class_='lineup__pos').get_text(strip=True)
            link = player.find('a').get('href')
            injury = None
            if player.find('span', class_='lineup__inj'):
                injury = player.find('span', class_='lineup__inj').get_text(strip=True)
            players.append({'Name': name, 'Position': pos, 'Link': link, 'Injury': injury})
        data[team] = players
        
for team in data:
    print('Team:' + team)
    for player in data[team][:4]:
        print(player['Name'])

OUTPUT:

Team:NYC
Sean Johnson
Malte Amundsen
T. Martins
A. Callens
Team:TOR
Q. Westberg
D. Criscito
S. O'Neill
L. MacNaughton
Team:ORL
P. Gallese
Kyle Smith
R. Jansson
Rodrigo Schlegel
Team:CLB
Eloy Room
J. Mensah
J. Williams
J. Anibaba
Team:CIN
R. Celentano
G. Cameron
T. Blackett
N. Hagglund
Team:DCU
Rafael Romo
Sami Guediri
B. Hines-Ike
S. Birnbaum
Team:SKC
J. Pulskamp
Logan Ndenbe
A. Fontas
N. Isimat-Mirin
Team:HOU
Steve Clark
A. Lundqvist
T. Hadebe
Tim Parker
Team:WHI
I. Boehmer
R. Veselinovic
T. Blackmon
J. Nerwinski
Team:SOU
Stefan Frei
Nouhou Tolo
X. Arreaga
Yeimar Gomez
Team:RSL
Zac MacMath
Andrew Brody
Justen Glad
M. Silva
Team:POR
Aljaz Ivacic
C. Bravo
L. Mabiala
D. Zuparic
Team:ATX
Brad Stuver
J. Gallagher
R. Gabrielsen
J. Cascante
Team:GAL
J. Bond
R. Edwards
S. Coulibaly
D. Williams
Team:LAF
J. Gaines
D. Musovski
B. Rodriguez
Erik Duenas

Full output:

Team:NYC
{'Name': 'Sean Johnson', 'Position': 'GK', 'Link': '/soccer/player/sean-johnson-17054', 'Injury': None}
{'Name': 'Malte Amundsen', 'Position': 'DL', 'Link': '/soccer/player/malte-amundsen-34461', 'Injury': None}
{'Name': 'T. Martins', 'Position': 'DC', 'Link': '/soccer/player/thiago-martins-24061', 'Injury': None}
{'Name': 'A. Callens', 'Position': 'DC', 'Link': '/soccer/player/alexander-callens-23428', 'Injury': None}
Team:TOR
{'Name': 'Q. Westberg', 'Position': 'GK', 'Link': '/soccer/player/quentin-westberg-27608', 'Injury': None}
{'Name': 'D. Criscito', 'Position': 'DL', 'Link': '/soccer/player/domenico-criscito-16059', 'Injury': None}
{'Name': "S. O'Neill", 'Position': 'DC', 'Link': '/soccer/player/shane-oneill-18957', 'Injury': None}
{'Name': 'L. MacNaughton', 'Position': 'DC', 'Link': '/soccer/player/lukas-macnaughton-36348', 'Injury': None}
Team:ORL
{'Name': 'P. Gallese', 'Position': 'GK', 'Link': '/soccer/player/pedro-gallese-5303', 'Injury': None}
{'Name': 'Kyle Smith', 'Position': 'DL', 'Link': '/soccer/player/kyle-smith-27215', 'Injury': None}
{'Name': 'R. Jansson', 'Position': 'DC', 'Link': '/soccer/player/robin-jansson-27664', 'Injury': None}
{'Name': 'Rodrigo Schlegel', 'Position': 'DC', 'Link': '/soccer/player/rodrigo-schlegel-30948', 'Injury': None}
Team:CLB
{'Name': 'Eloy Room', 'Position': 'GK', 'Link': '/soccer/player/eloy-room-26699', 'Injury': None}
{'Name': 'J. Mensah', 'Position': 'DC', 'Link': '/soccer/player/jonathan-mensah-15891', 'Injury': None}
{'Name': 'J. Williams', 'Position': 'DC', 'Link': '/soccer/player/josh-williams-17090', 'Injury': None}
{'Name': 'J. Anibaba', 'Position': 'DC', 'Link': '/soccer/player/jalil-anibaba-17048', 'Injury': None}
Team:CIN
{'Name': 'R. Celentano', 'Position': 'GK', 'Link': '/soccer/player/roman-celentano-36502', 'Injury': None}
{'Name': 'G. Cameron', 'Position': 'DC', 'Link': '/soccer/player/geoff-cameron-16320', 'Injury': None}
{'Name': 'T. Blackett', 'Position': 'DC', 'Link': '/soccer/player/tyler-blackett-17942', 'Injury': None}
{'Name': 'N. Hagglund', 'Position': 'DC', 'Link': '/soccer/player/nick-hagglund-18854', 'Injury': None}
Team:DCU
{'Name': 'Rafael Romo', 'Position': 'GK', 'Link': '/soccer/player/rafael-romo-16116', 'Injury': None}
{'Name': 'Sami Guediri', 'Position': 'DL', 'Link': '/soccer/player/sami-guediri-34637', 'Injury': None}
{'Name': 'B. Hines-Ike', 'Position': 'DC', 'Link': '/soccer/player/brendan-hines-ike-34544', 'Injury': None}
{'Name': 'S. Birnbaum', 'Position': 'DC', 'Link': '/soccer/player/steve-birnbaum-18636', 'Injury': None}
Team:SKC
{'Name': 'J. Pulskamp', 'Position': 'GK', 'Link': '/soccer/player/john-pulskamp-33317', 'Injury': None}
{'Name': 'Logan Ndenbe', 'Position': 'DC', 'Link': '/soccer/player/logan-ndenbe-36246', 'Injury': None}
{'Name': 'A. Fontas', 'Position': 'DC', 'Link': '/soccer/player/andreu-fontas-20565', 'Injury': None}
{'Name': 'N. Isimat-Mirin', 'Position': 'DC', 'Link': '/soccer/player/nicolas-isimat-mirin-19443', 'Injury': None}
Team:HOU
{'Name': 'Steve Clark', 'Position': 'GK', 'Link': '/soccer/player/steve-clark-18721', 'Injury': None}
{'Name': 'A. Lundqvist', 'Position': 'DL', 'Link': '/soccer/player/adam-lundqvist-25811', 'Injury': None}
{'Name': 'T. Hadebe', 'Position': 'DC', 'Link': '/soccer/player/teenage-hadebe-31664', 'Injury': None}
{'Name': 'Tim Parker', 'Position': 'DC', 'Link': '/soccer/player/tim-parker-18932', 'Injury': None}
Team:WHI
{'Name': 'I. Boehmer', 'Position': 'GK', 'Link': '/soccer/player/isaac-boehmer-33098', 'Injury': None}
{'Name': 'R. Veselinovic', 'Position': 'DC', 'Link': '/soccer/player/ranko-veselinovic-30989', 'Injury': None}
{'Name': 'T. Blackmon', 'Position': 'DC', 'Link': '/soccer/player/tristan-blackmon-25566', 'Injury': None}
{'Name': 'J. Nerwinski', 'Position': 'DC', 'Link': '/soccer/player/jake-nerwinski-23510', 'Injury': None}
Team:SOU
{'Name': 'Stefan Frei', 'Position': 'GK', 'Link': '/soccer/player/stefan-frei-18658', 'Injury': 'QUES'}
{'Name': 'Nouhou Tolo', 'Position': 'DL', 'Link': '/soccer/player/nouhou-tolo-23430', 'Injury': None}
{'Name': 'X. Arreaga', 'Position': 'DC', 'Link': '/soccer/player/xavier-arreaga-27919', 'Injury': None}
{'Name': 'Yeimar Gomez', 'Position': 'DC', 'Link': '/soccer/player/yeimar-gomez-30976', 'Injury': None}
Team:RSL
{'Name': 'Zac MacMath', 'Position': 'GK', 'Link': '/soccer/player/zac-macmath-18844', 'Injury': None}
{'Name': 'Andrew Brody', 'Position': 'DL', 'Link': '/soccer/player/andrew-brody-34542', 'Injury': None}
{'Name': 'Justen Glad', 'Position': 'DC', 'Link': '/soccer/player/justen-glad-18802', 'Injury': None}
{'Name': 'M. Silva', 'Position': 'DC', 'Link': '/soccer/player/marcelo-silva-12977', 'Injury': None}
Team:POR
{'Name': 'Aljaz Ivacic', 'Position': 'GK', 'Link': '/soccer/player/aljaz-ivacic-27224', 'Injury': None}
{'Name': 'C. Bravo', 'Position': 'DL', 'Link': '/soccer/player/claudio-nicolas-bravo-34541', 'Injury': None}
{'Name': 'L. Mabiala', 'Position': 'DC', 'Link': '/soccer/player/larrys-mabiala-24574', 'Injury': None}
{'Name': 'D. Zuparic', 'Position': 'DC', 'Link': '/soccer/player/dario-zuparic-21718', 'Injury': None}
Team:ATX
{'Name': 'Brad Stuver', 'Position': 'GK', 'Link': '/soccer/player/brad-stuver-18866', 'Injury': None}
{'Name': 'J. Gallagher', 'Position': 'DL', 'Link': '/soccer/player/jon-gallagher-25490', 'Injury': None}
{'Name': 'R. Gabrielsen', 'Position': 'DC', 'Link': '/soccer/player/ruben-gabrielsen-30690', 'Injury': None}
{'Name': 'J. Cascante', 'Position': 'DC', 'Link': '/soccer/player/julio-cascante-25531', 'Injury': None}
Team:GAL
{'Name': 'J. Bond', 'Position': 'GK', 'Link': '/soccer/player/jonathan-bond-19111', 'Injury': None}
{'Name': 'R. Edwards', 'Position': 'DL', 'Link': '/soccer/player/raheem-edwards-21458', 'Injury': None}
{'Name': 'S. Coulibaly', 'Position': 'DC', 'Link': '/soccer/player/sega-coulibaly-34663', 'Injury': None}
{'Name': 'D. Williams', 'Position': 'DC', 'Link': '/soccer/player/derrick-williams-31084', 'Injury': None}
Team:LAF
{'Name': 'J. Gaines', 'Position': 'D', 'Link': '/soccer/player/julian-gaines-35748', 'Injury': 'QUES'}
{'Name': 'D. Musovski', 'Position': 'F', 'Link': '/soccer/player/danny-musovski-30911', 'Injury': 'QUES'}
{'Name': 'B. Rodriguez', 'Position': 'F', 'Link': '/soccer/player/brian-rodriguez-29638', 'Injury': 'QUES'}
{'Name': 'Erik Duenas', 'Position': 'M/D', 'Link': '/soccer/player/erik-duenas-33112', 'Injury': 'OUT'}

Don't write inside loop but after all loops.

And you should append to list created before all loops

You should also use slices [:4] and [:2] instead of range() and for -loop

from bs4 import BeautifulSoup
import requests
import pandas as pd

html_text = requests.get('https://www.rotowire.com/soccer/lineups.php?league=MLS').text
soup = BeautifulSoup(html_text, 'html.parser')
lineups = soup.find_all('div', class_='lineup is-soccer')

# --- before loop ---

all_results = []

# --- loop ---

for selection in lineups[:2]:  # <-- use directly slice `[:2]`
    home_squad = selection.find('ul', class_='lineup__list is-home')
    home_players = home_squad.find_all('li', class_='lineup__player')

    for home_player in home_players[:4]:  # <-- use directly slice `[:4]`
        h_player_name = home_player.find('a').text
        all_results.append(h_player_name)  # <-- append 

# --- after loop ---

df_h = pd.DataFrame(all_results)
df_h.to_csv('home.csv', index=False, header=False)

And if you want to write inside loop then you have to use append mode - to_csv(..., mode="a")

from bs4 import BeautifulSoup
import requests
import pandas as pd

html_text = requests.get('https://www.rotowire.com/soccer/lineups.php?league=MLS').text
soup = BeautifulSoup(html_text, 'html.parser')
lineups = soup.find_all('div', class_='lineup is-soccer')

# --- before loop ---

# --- loop ---

for selection in lineups[:2]:
    home_squad = selection.find('ul', class_='lineup__list is-home')
    home_players = home_squad.find_all('li', class_='lineup__player')
    
    partial_results = []
    
    for home_player in home_players[:4]:
        h_player_name = home_player.find('a').text
        partial_results.append(h_player_name)
        
    df_h = pd.DataFrame(partial_results)
    df_h.to_csv('home.csv', index=False, header=False, mode='a')

# --- after loop ---

Is this what you are looking for ?

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Jul 22 13:16:32 2022

@author: 
"""

from bs4 import BeautifulSoup
import requests
import pandas as pd

html_text = requests.get("https://www.rotowire.com/soccer/lineups.php?league=MLS").text
soup = BeautifulSoup(html_text, "html.parser")
lineups = soup.find_all("div", class_="lineup is-soccer")
# print(lineups)
j = range(2)
selections = []
list_home = []

count = 0
for index1 in j:
    selections.append(lineups[index1])
for selection in selections:

    home_squad = selection.find("ul", class_="lineup__list is-home")
    home_players = home_squad.find_all("li", class_="lineup__player")

    for home_player in home_players[0:4]:
        h_player_name = home_player.find("a").text
        print(h_player_name)
        list_home.append(h_player_name)

df_h = pd.DataFrame(list_home)
df_h.to_csv("home.csv", index=False, header=False)

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM