简体   繁体   中英

scraping web-table using selenium in R

I'm trying to scraping league-standing in here . so i use selenium server using code like this

url <- "https://www.whoscored.com/Regions/252/Tournaments/2/England-Premier-League"
remDr$navigate(url)
remDr$getTitle()
remDr$findElement("css","#tournament-tables-13796")$getElementText()
doc <- htmlParse(remDr$getPageSource()[[1]])
a <- readHTMLTable(doc)[[6]]
do <- as.data.frame(a)

I can get overall table with this code. but the problem is. I wanna get home & away league table. please help that would be really appreciated.

You can consider the following approach:

library(rvest)
library(RSelenium)

port <- as.integer(4444L + rpois(lambda = 1000, 1))
rd <- rsDriver(chromever = "105.0.5195.52", browser = "chrome", port = port)
remDr <- rd$client
remDr$open()

url <- "https://www.whoscored.com/Regions/252/Tournaments/2/England-Premier-League"

##############
#### Home ####
##############
remDr$navigate(url)

Sys.sleep(3)

web_Obj_Home <- remDr$findElement("css selector", "#tournament-filter-standings > dd:nth-child(2) > a")
web_Obj_Home$clickElement()

Sys.sleep(3)
html_Content_Home <- remDr$getPageSource()[[1]]
table_Home <- read_html(html_Content_Home) %>% html_table()

##############
#### Away ####
##############

Sys.sleep(3)

web_Obj_Away <- remDr$findElement("css selector", "#tournament-filter-standings > dd:nth-child(3) > a")
web_Obj_Away$clickElement()

Sys.sleep(3)
html_Content_Away <- remDr$getPageSource()[[1]]
table_Away <- read_html(html_Content_Away) %>% html_table()

table_Home[[5]]
# A tibble: 22 x 23
   Team              P     W     D     L     GF    GA    GD    Pts   Form  ``    ``    ``    ``    ``    ``    ``    ``    ``    ``    ``    ``    ``   
   <chr>             <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
 1 ""                ""    Over~ Over~ Over~ Over~ Over~ Over~ Over~ Home  Home  Home  Home  Home  Home  Home  Away  Away  Away  Away  Away  Away  Away 
 2 "Team"            "P"   W     D     L     GF    GA    Pts   P     W     D     L     GF    GA    Pts   P     W     D     L     GF    GA    Pts   NA   
 3 "1Manchester Cit~ "5"   5     0     0     24    5     +19   15    wwwww NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
 4 "2Arsenal"        "5"   5     0     0     14    7     +7    15    wwwww NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
 5 "3Tottenham"      "4"   4     0     0     13    4     +9    12    wwww  NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
 6 "4Chelsea"        "4"   3     1     0     9     4     +5    10    dwww  NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
 7 "5Newcastle"      "5"   2     3     0     11    5     +6    9     wdddw NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
 8 "6Liverpool"      "4"   2     2     0     15    5     +10   8     dwwd  NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
 9 "7Leeds"          "4"   2     2     0     6     2     +4    8     wwdd  NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
10 "8Bournemouth"    "5"   2     2     1     4     4     0     8     wlddw NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
# ... with 12 more rows
> 
> table_Away[[5]]
# A tibble: 22 x 23
   Team              P     W     D     L     GF    GA    GD    Pts   Form  ``    ``    ``    ``    ``    ``    ``    ``    ``    ``    ``    ``    ``   
   <chr>             <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
 1 ""                ""    Over~ Over~ Over~ Over~ Over~ Over~ Over~ Home  Home  Home  Home  Home  Home  Home  Away  Away  Away  Away  Away  Away  Away 
 2 "Team"            "P"   W     D     L     GF    GA    Pts   P     W     D     L     GF    GA    Pts   P     W     D     L     GF    GA    Pts   NA   
 3 "1Arsenal"        "4"   3     0     1     9     3     +6    9     wwlw  NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
 4 "2Manchester Uni~ "5"   3     0     2     7     11    -4    9     lwwlw NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
 5 "3Manchester Cit~ "4"   2     2     0     9     4     +5    8     wddw  NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
 6 "4Tottenham"      "5"   2     2     1     7     6     +1    8     dwdlw NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
 7 "5Brighton"       "4"   2     1     1     8     6     +2    7     wwld  NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
 8 "6Chelsea"        "4"   2     0     2     4     6     -2    6     wllw  NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
 9 "7Newcastle"      "4"   1     2     1     6     4     +2    5     ddlw  NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
10 "8Everton"        "4"   1     2     1     5     5     0     5     lddw  NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
# ... with 12 more rows

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM