[英]Pass a list through a function and create a summary df
我想將一個列表傳遞給 function,它從 Tfl 的 API 中提取旅程信息(腿、步行時間、持續時間等)並輸出此信息的摘要 df。
這是我的列表taxi.args.new
的示例(列表長度為 1):
我只能通過引用第一個元素來為長度為 1 的列表運行下面的代碼(它工作正常):
library(httr)
library(jsonlite)
library(tidyverse)
# this function works for the list taxis.args.new (calling the first element)
api_function <-function(from, to, date, time, timeIs, journeyPreference, accessibilityPreference, walkingSpeed, cyclePreference, bikeProficiency){
tflpath <- "https://api.tfl.gov.uk/Journey/JourneyResults/{from}/to/{to}?date=[date]&time=[time]&timeIs=[timeIs]&journeyPreference=[journeyPreference]&mode=tube%2Cbus%2Coverground&accessibilityPreference=[accessibilityPreference]&walkingSpeed=[walkingSpeed]&cyclePreference=[cyclePreference]&bikeProficiency=[bikeProficiency]"
dat <- data.frame()
for (i in 1){
request <- GET(url = tflpath, query = taxi.args.new[[i]])
json_content <- fromJSON(content(request, "text"), flatten = TRUE)
# obtain info about journey legs
df <- as.data.frame(json_content$journeys$legs)
# obtain info about journey origin and destination
od <- as.data.frame(json_content$journeyVector)[,2:3]
df1 <- df[,c("duration", "mode.id")]
# tidy the df to get summary of legs, duration, walk time etc
df2 <- df1%>%
group_by(mode.id)%>%
summarise(legs = n(), total_dur = sum(duration))%>%
mutate(tube_legs = case_when(mode.id== 'tube'~ as.numeric(legs) ,
mode.id== 'walking'~ 0,
mode.id== 'bus'~ 0),
bus_legs = case_when( mode.id== 'tube'~ 0,
mode.id== 'walking'~ 0,
mode.id== 'bus'~ as.numeric(legs)),
walk_legs = case_when(mode.id== 'tube'~ 0,
mode.id== 'walking'~ as.numeric(legs),
mode.id== 'bus'~ 0),
walk_dur = case_when(mode.id== 'walking' ~ as.numeric(total_dur), TRUE ~ 0))%>%
ungroup()%>%
select(-mode.id)%>%
summarise(total_legs = sum(legs),
total_dur = sum(total_dur),
tube_legs = sum(tube_legs),
bus_legs = sum(bus_legs),
walk_legs = sum(walk_legs),
walk_dur = sum(walk_dur))%>%
cbind(od)%>%
cbind(min(df$departureTime), max(df$arrivalTime))%>%
rename(dep_time = "min(df$departureTime)" , arrival_time = "max(df$arrivalTime)")%>%
select(from, to, dep_time, arrival_time, tube_legs,
bus_legs, walk_legs, total_legs, walk_dur, total_dur)
dat <- append(dat, df2)
}
return(dat)
}
## this returns a df which summarises jny info
test_df <- as.data.frame(do.call(api_function, taxi.args.new[[1]]))
這就是test_df
的樣子:
但是,我想擴大規模並為長度為 200k 的列表執行此操作,但無法弄清楚如何執行此操作。 我希望 output 是一個 df,所有結果都附加在一起。 所以它看起來有點像這樣(只有 2 行的示例):
任何幫助將不勝感激。 你可能會說我已經嘗試過這樣的事情(因此代碼中的 for 循環)
更新:
非常感謝 Ronak 的建議。 這是整理后的最終結果:
api_function <- function(from, to, date, time, timeIs, journeyPreference, accessibilityPreference, walkingSpeed, cyclePreference, bikeProficiency){
tflpath <- "https://api.tfl.gov.uk/Journey/JourneyResults/{from}/to/{to}?date=[date]&time=[time]&timeIs=[timeIs]&journeyPreference=[journeyPreference]&mode=tube%2Cbus%2Coverground&accessibilityPreference=[accessibilityPreference]&walkingSpeed=[walkingSpeed]&cyclePreference=[cyclePreference]&bikeProficiency=[bikeProficiency]"
request <- GET(url = tflpath,
query = list( from =from,
to =to,
date =date,
time =time,
timeIs =timeIs,
journeyPreference =journeyPreference,
accessibilityPreference =accessibilityPreference,
walkingSpeed =walkingSpeed ,
cyclePreference =cyclePreference,
bikeProficiency =bikeProficiency))
json_content <- fromJSON(content(request, "text"), flatten = TRUE)
# obtain info about journey legs
x <- flatten(json_content$journeys$legs)
# obtain info about mode and duration of each leg
df1 <- data.frame(duration = x$duration, mode.id = x$mode.id)
# obtain info about journey origin and destination
od <- as.data.frame(json_content$journeyVector)[,2:3]
df2 <- df1%>%
group_by(mode.id)%>%
summarise(legs = n(), total_dur = sum(duration))%>%
mutate(tube_legs = case_when(mode.id== 'tube'~ as.numeric(legs) ,
mode.id== 'walking'~ 0,
mode.id== 'bus'~ 0,
mode.id== 'overground'~ 0),
bus_legs = case_when(mode.id== 'tube'~ 0,
mode.id== 'walking'~ 0,
mode.id== 'overground'~ 0,
mode.id== 'bus'~ as.numeric(legs)),
walk_legs = case_when(mode.id== 'tube'~ 0,
mode.id== 'walking'~ as.numeric(legs),
mode.id== 'bus'~ 0,
mode.id== 'overground'~ 0),
overground_legs = case_when(mode.id== 'tube'~ 0,
mode.id== 'walking'~ 0,
mode.id== 'bus'~ 0,
mode.id== 'overground'~ as.numeric(legs)),
walk_dur = case_when(mode.id== 'walking' ~ as.numeric(total_dur), TRUE ~ 0))%>%
ungroup()%>%
select(-mode.id)%>%
summarise(total_legs = sum(legs),
total_dur = sum(total_dur),
tube_legs = sum(tube_legs),
bus_legs = sum(bus_legs),
overground_legs = sum(overground_legs),
walk_legs = sum(walk_legs),
walk_dur = sum(walk_dur))%>%
cbind(od)%>%
cbind(min(x$departureTime), max(x$arrivalTime))%>%
rename(dep_time = "min(x$departureTime)" , arrival_time = "max(x$arrivalTime)")%>%
select(from, to, dep_time, arrival_time, tube_legs,
bus_legs, walk_legs, overground_legs, total_legs, walk_dur, total_dur)
return(df2)
}
results <- do.call(rbind, lapply(seq_along(taxi.args.new), function(x) do.call(api_function, taxi.args.new[[x]])))
您可以嘗試使用lapply
:
result <- do.call(rbind, lapply(seq_along(taxi.args.new), function(x)
do.call(api_function, c(taxi.args.new[[x]], x))))
或使用purrr
:
library(purrr)
result <- map_df(seq_along(taxi.args.new),
~invoke(api_function, c(taxi.args.new[[x]], .x)))
我稍微更改了 function 並添加了另一個參數i
。
api_function <-function(from, to, date, time, timeIs, journeyPreference, accessibilityPreference, walkingSpeed, cyclePreference, bikeProficiency, i){
tflpath <- "https://api.tfl.gov.uk/Journey/JourneyResults/{from}/to/{to}?date=[date]&time=[time]&timeIs=[timeIs]&journeyPreference=[journeyPreference]&mode=tube%2Cbus%2Coverground&accessibilityPreference=[accessibilityPreference]&walkingSpeed=[walkingSpeed]&cyclePreference=[cyclePreference]&bikeProficiency=[bikeProficiency]"
request <- GET(url = tflpath, query = taxi.args.new[[i]])
json_content <- fromJSON(content(request, "text"), flatten = TRUE)
# obtain info about journey legs
df <- as.data.frame(json_content$journeys$legs)
# obtain info about journey origin and destination
od <- as.data.frame(json_content$journeyVector)[,2:3]
df1 <- df[,c("duration", "mode.id")]
# tidy the df to get summary of legs, duration, walk time etc
df2 <- df1%>%
group_by(mode.id)%>%
summarise(legs = n(), total_dur = sum(duration))%>%
mutate(tube_legs = case_when(mode.id== 'tube'~ as.numeric(legs) ,
mode.id== 'walking'~ 0,
mode.id== 'bus'~ 0),
bus_legs = case_when( mode.id== 'tube'~ 0,
mode.id== 'walking'~ 0,
mode.id== 'bus'~ as.numeric(legs)),
walk_legs = case_when(mode.id== 'tube'~ 0,
mode.id== 'walking'~ as.numeric(legs),
mode.id== 'bus'~ 0),
walk_dur = case_when(mode.id== 'walking' ~ as.numeric(total_dur), TRUE ~ 0))%>%
ungroup()%>%
select(-mode.id)%>%
summarise(total_legs = sum(legs),
total_dur = sum(total_dur),
tube_legs = sum(tube_legs),
bus_legs = sum(bus_legs),
walk_legs = sum(walk_legs),
walk_dur = sum(walk_dur))%>%
cbind(od)%>%
cbind(min(df$departureTime), max(df$arrivalTime))%>%
rename(dep_time = "min(df$departureTime)" , arrival_time = "max(df$arrivalTime)")%>%
select(from, to, dep_time, arrival_time, tube_legs,
bus_legs, walk_legs, total_legs, walk_dur, total_dur)
return(df2)
}
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.