簡體   English   中英

通過 function 傳遞列表並創建摘要 df

[英]Pass a list through a function and create a summary df

我想將一個列表傳遞給 function,它從 Tfl 的 API 中提取旅程信息(腿、步行時間、持續時間等)並輸出此信息的摘要 df。

這是我的列表taxi.args.new的示例(列表長度為 1):

在此處輸入圖像描述

只能通過引用第一個元素來為長度為 1 的列表運行下面的代碼(它工作正常):


library(httr)
library(jsonlite)
library(tidyverse)

# this function works for the list taxis.args.new (calling the first element)

api_function <-function(from, to, date, time, timeIs, journeyPreference, accessibilityPreference, walkingSpeed, cyclePreference, bikeProficiency){
  
  tflpath <- "https://api.tfl.gov.uk/Journey/JourneyResults/{from}/to/{to}?date=[date]&time=[time]&timeIs=[timeIs]&journeyPreference=[journeyPreference]&mode=tube%2Cbus%2Coverground&accessibilityPreference=[accessibilityPreference]&walkingSpeed=[walkingSpeed]&cyclePreference=[cyclePreference]&bikeProficiency=[bikeProficiency]"
  
  dat <- data.frame()
  
  for (i in 1){
    request      <- GET(url = tflpath, query = taxi.args.new[[i]])
    json_content <- fromJSON(content(request, "text"), flatten = TRUE) 

    # obtain info about journey legs
    df           <- as.data.frame(json_content$journeys$legs)

    # obtain info about journey origin and destination
    od           <- as.data.frame(json_content$journeyVector)[,2:3]

    df1          <- df[,c("duration", "mode.id")]

    # tidy the df to get summary of legs, duration, walk time etc
    df2          <- df1%>%
                     group_by(mode.id)%>%
                     summarise(legs = n(), total_dur = sum(duration))%>%
                     mutate(tube_legs = case_when(mode.id== 'tube'~ as.numeric(legs) ,
                                 mode.id== 'walking'~ 0,
                                 mode.id== 'bus'~ 0),
                           bus_legs  = case_when( mode.id== 'tube'~ 0,
                                 mode.id== 'walking'~ 0,
                                 mode.id== 'bus'~ as.numeric(legs)),
                           walk_legs = case_when(mode.id== 'tube'~ 0,
                                 mode.id== 'walking'~ as.numeric(legs),
                                 mode.id== 'bus'~ 0),
                           walk_dur  = case_when(mode.id== 'walking' ~ as.numeric(total_dur), TRUE ~ 0))%>%
                    ungroup()%>%
                    select(-mode.id)%>%
                    summarise(total_legs  = sum(legs),
                              total_dur   = sum(total_dur),
                              tube_legs   = sum(tube_legs),
                              bus_legs    = sum(bus_legs),
                              walk_legs   = sum(walk_legs),
                              walk_dur    = sum(walk_dur))%>%
                    cbind(od)%>%
                    cbind(min(df$departureTime), max(df$arrivalTime))%>%
                    rename(dep_time = "min(df$departureTime)" ,  arrival_time = "max(df$arrivalTime)")%>%
                    select(from, to, dep_time, arrival_time, tube_legs,
                    bus_legs, walk_legs, total_legs, walk_dur, total_dur)
                  
          dat <- append(dat, df2) 
          }
return(dat)
}

## this returns a df which summarises jny info 
test_df <- as.data.frame(do.call(api_function, taxi.args.new[[1]]))

這就是test_df的樣子:

在此處輸入圖像描述

但是,我想擴大規模並為長度為 200k 的列表執行此操作,但無法弄清楚如何執行此操作。 我希望 output 是一個 df,所有結果都附加在一起。 所以它看起來有點像這樣(只有 2 行的示例):

在此處輸入圖像描述

任何幫助將不勝感激。 你可能會說我已經嘗試過這樣的事情(因此代碼中的 for 循環)

更新:

非常感謝 Ronak 的建議。 這是整理后的最終結果:

api_function <- function(from, to, date, time, timeIs, journeyPreference, accessibilityPreference, walkingSpeed, cyclePreference, bikeProficiency){
  
  tflpath <- "https://api.tfl.gov.uk/Journey/JourneyResults/{from}/to/{to}?date=[date]&time=[time]&timeIs=[timeIs]&journeyPreference=[journeyPreference]&mode=tube%2Cbus%2Coverground&accessibilityPreference=[accessibilityPreference]&walkingSpeed=[walkingSpeed]&cyclePreference=[cyclePreference]&bikeProficiency=[bikeProficiency]"
  
  request      <- GET(url = tflpath, 
                      query = list( from                     =from,
                                    to                       =to,
                                    date                     =date,
                                    time                     =time,
                                    timeIs                   =timeIs,
                                    journeyPreference        =journeyPreference,
                                    accessibilityPreference  =accessibilityPreference,
                                    walkingSpeed             =walkingSpeed ,
                                    cyclePreference          =cyclePreference,
                                    bikeProficiency          =bikeProficiency))
  json_content <- fromJSON(content(request, "text"), flatten = TRUE) 
  # obtain info about journey legs
  x            <- flatten(json_content$journeys$legs)
  # obtain info about mode and duration of each leg
  df1          <- data.frame(duration = x$duration, mode.id  = x$mode.id)
  # obtain info about journey origin and destination
  od           <- as.data.frame(json_content$journeyVector)[,2:3]
  
  df2 <- df1%>%
    group_by(mode.id)%>%
    summarise(legs = n(), total_dur = sum(duration))%>%
    mutate(tube_legs       = case_when(mode.id== 'tube'~ as.numeric(legs) ,
                                       mode.id== 'walking'~ 0,
                                       mode.id== 'bus'~ 0,
                                       mode.id== 'overground'~ 0),
           bus_legs        = case_when(mode.id== 'tube'~ 0,
                                       mode.id== 'walking'~ 0,
                                       mode.id== 'overground'~ 0,
                                       mode.id== 'bus'~ as.numeric(legs)),
           walk_legs       = case_when(mode.id== 'tube'~ 0,
                                       mode.id== 'walking'~ as.numeric(legs),
                                       mode.id== 'bus'~ 0,
                                       mode.id== 'overground'~ 0),
           overground_legs = case_when(mode.id== 'tube'~ 0,
                                       mode.id== 'walking'~ 0,
                                       mode.id== 'bus'~ 0,
                                       mode.id== 'overground'~ as.numeric(legs)),
           walk_dur        = case_when(mode.id== 'walking' ~ as.numeric(total_dur), TRUE ~ 0))%>%
    ungroup()%>%
    select(-mode.id)%>%
    summarise(total_legs      = sum(legs),
              total_dur       = sum(total_dur),
              tube_legs       = sum(tube_legs),
              bus_legs        = sum(bus_legs),
              overground_legs = sum(overground_legs),
              walk_legs       = sum(walk_legs),
              walk_dur        = sum(walk_dur))%>%
    cbind(od)%>%
    cbind(min(x$departureTime), max(x$arrivalTime))%>%
    rename(dep_time = "min(x$departureTime)" ,  arrival_time = "max(x$arrivalTime)")%>%
    select(from, to, dep_time, arrival_time, tube_legs,
           bus_legs, walk_legs, overground_legs, total_legs, walk_dur, total_dur)
  return(df2)
}

results <- do.call(rbind, lapply(seq_along(taxi.args.new), function(x) do.call(api_function, taxi.args.new[[x]])))

您可以嘗試使用lapply

result <- do.call(rbind, lapply(seq_along(taxi.args.new), function(x) 
                  do.call(api_function, c(taxi.args.new[[x]], x))))

或使用purrr

library(purrr)
result <- map_df(seq_along(taxi.args.new), 
                 ~invoke(api_function, c(taxi.args.new[[x]], .x)))

我稍微更改了 function 並添加了另一個參數i

api_function <-function(from, to, date, time, timeIs, journeyPreference, accessibilityPreference, walkingSpeed, cyclePreference, bikeProficiency, i){
  tflpath <- "https://api.tfl.gov.uk/Journey/JourneyResults/{from}/to/{to}?date=[date]&time=[time]&timeIs=[timeIs]&journeyPreference=[journeyPreference]&mode=tube%2Cbus%2Coverground&accessibilityPreference=[accessibilityPreference]&walkingSpeed=[walkingSpeed]&cyclePreference=[cyclePreference]&bikeProficiency=[bikeProficiency]"
    request      <- GET(url = tflpath, query = taxi.args.new[[i]])
    json_content <- fromJSON(content(request, "text"), flatten = TRUE) 
    # obtain info about journey legs
    df           <- as.data.frame(json_content$journeys$legs)
    # obtain info about journey origin and destination
    od           <- as.data.frame(json_content$journeyVector)[,2:3]
    df1          <- df[,c("duration", "mode.id")]
    # tidy the df to get summary of legs, duration, walk time etc
    df2          <- df1%>%
      group_by(mode.id)%>%
      summarise(legs = n(), total_dur = sum(duration))%>%
      mutate(tube_legs = case_when(mode.id== 'tube'~ as.numeric(legs) ,
                                   mode.id== 'walking'~ 0,
                                   mode.id== 'bus'~ 0),
             bus_legs  = case_when( mode.id== 'tube'~ 0,
                                    mode.id== 'walking'~ 0,
                                    mode.id== 'bus'~ as.numeric(legs)),
             walk_legs = case_when(mode.id== 'tube'~ 0,
                                   mode.id== 'walking'~ as.numeric(legs),
                                   mode.id== 'bus'~ 0),
             walk_dur  = case_when(mode.id== 'walking' ~ as.numeric(total_dur), TRUE ~ 0))%>%
      ungroup()%>%
      select(-mode.id)%>%
      summarise(total_legs  = sum(legs),
                total_dur   = sum(total_dur),
                tube_legs   = sum(tube_legs),
                bus_legs    = sum(bus_legs),
                walk_legs   = sum(walk_legs),
                walk_dur    = sum(walk_dur))%>%
      cbind(od)%>%
      cbind(min(df$departureTime), max(df$arrivalTime))%>%
      rename(dep_time = "min(df$departureTime)" ,  arrival_time = "max(df$arrivalTime)")%>%
      select(from, to, dep_time, arrival_time, tube_legs,
             bus_legs, walk_legs, total_legs, walk_dur, total_dur)
    return(df2)
}

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM