简体   繁体   English

咕噜 map_df output 一个新的嵌套 dataframe 列

[英]purrr map_df output a new nested dataframe column

Is there a way to add an additional nested column that contains a new dataframe output from a function?有没有办法从 function 添加一个包含新 dataframe output 的附加嵌套列? Below is an example where i have written a function and now I'm trying to iterate over each row.下面是我编写了 function 的示例,现在我正在尝试遍历每一行。

Here is the function, which works if i run this on a single set of data.这是 function,如果我在一组数据上运行它,它就可以工作。 (See a,b,c,d) (见 a、b、c、d)

test data
a=759145
b=76619
c=257124
d=265261

spacing<- 880
distance <- c(spacing,spacing*2,spacing*3,spacing*4,spacing*5,spacing*6,spacing*7,spacing*8,spacing*9)   # distance away from the road


function function

parallel_spacing_fn<-function(a1,b1,c2,d2){

  x <-  c(a1,b1)
  y <-  c(c2 ,d2)
  
  datalist = list()
  datalist2 = list()
  
  
for (d in distance) {
  # Given a vector (defined by 2 points) and the distance, 
  # calculate a new vector that is distance away from the original 
  segment.shift <- function(x, y, d){
    
    # calculate vector
    v <- c(x[2] - x[1],y[2] - y[1])
    
    # normalize vector
    v <- v/sqrt((v[1]**2 + v[2]**2))
    
    # perpendicular unit vector
    vnp <- c( -v[2], v[1] )
    
    return(list(x =  c( x[1] + d*vnp[1], x[2] + d*vnp[1]), 
                y =  c( y[1] + d*vnp[2], y[2] + d*vnp[2])))
    
  }
  
  
  # allocate memory for the bike path
  xn <- numeric( (length(x) - 1) * 2 )
  yn <- numeric( (length(y) - 1) * 2 )
  
  for ( i in 1:(length(x) - 1) ) {
    xs <- c(x[i], x[i+1])
    ys <- c(y[i], y[i+1])
    new.s <- segment.shift( xs, ys, d )
    xn[(i-1)*2+1] <- new.s$x[1] ; xn[(i-1)*2+2] <- new.s$x[2]
    yn[(i-1)*2+1] <- new.s$y[1] ; yn[(i-1)*2+2] <- new.s$y[2]
  }
  
  
  
  dat1<-as_tibble()
  dat1<-as.data.frame(xn)%>%mutate(id=row_number())%>%mutate(Dist_Col=paste(d,"left",sep="_"))
  #datalist[[d]] <- dat1 # add it to your list
  
  dat2<-as_tibble()
  dat2<-as.data.frame(yn)%>%mutate(id=row_number())%>%mutate(Dist_Col=paste(d,"left",sep="_"))
  #datalist2[[d]] <- dat2 # add it to your list
  
  
  ###Now do right side
  
  # allocate memory for the bike path
  xn <- numeric( (length(x) - 1) * 2 )
  yn <- numeric( (length(y) - 1) * 2 )
  
  for ( i in 1:(length(x) - 1) ) {
    xs <- c(x[i], x[i+1])
    ys <- c(y[i], y[i+1])
    new.s <- segment.shift( xs, ys, -d )
    xn[(i-1)*2+1] <- new.s$x[1] ; xn[(i-1)*2+2] <- new.s$x[2]
    yn[(i-1)*2+1] <- new.s$y[1] ; yn[(i-1)*2+2] <- new.s$y[2]
  }
  
  
  dat3<-as_tibble()
  dat3<-as.data.frame(xn)%>%mutate(id=row_number())%>%mutate(Dist_Col=paste(d,"right",sep="_"))
  
  datcomb<- full_join(dat1,dat3)
  
  datalist[[d]] <- datcomb # add it to your list
  
  dat4<-as_tibble()
  dat4<-as.data.frame(yn)%>%mutate(id=row_number())%>%mutate(Dist_Col=paste(d,"right",sep="_"))
  
  dat2comb<- full_join(dat2,dat4)
  datalist2[[d]] <- dat2comb # add it to your list
  
}
  big_data = do.call(rbind, datalist)
  big_data2 = do.call(rbind, datalist2)
  
  
  comb_data<- full_join(big_data,big_data2)  
  
}   
x=parallel_spacing_fn(a,b,c,d) 

Here is the nested dataframe i would like to iterate over.这是我想迭代的嵌套 dataframe。 My intital attempt was to use PURR map_df, but now I'm wondering if i should write another for loop?我最初的尝试是使用 PURR map_df,但现在我想知道是否应该编写另一个 for 循环?

structure(list(OBJECTID_1 = c(170795, 158926, 170796, 170797, 
74758, 170798, 74757, 71331, 158748, 158800, 171144, 167991, 
170985, 159202, 167990), data = list(structure(list(X_1 = 791806.957864181, 
    X_2 = 785512.771698002, Y_1 = 233314.224607777, Y_2 = 229184.215067145), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -1L)), structure(list(
    X_1 = 792533.074659662, X_2 = 783388.018236045, Y_1 = 230885.419496296, 
    Y_2 = 224878.340874981), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -1L)), structure(list(X_1 = 795052.843843351, 
    X_2 = 785643.485631476, Y_1 = 229406.40394036, Y_2 = 223245.75510431), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -1L)), structure(list(
    X_1 = 796821.226335759, X_2 = 787145.416317165, Y_1 = 227462.665657252, 
    Y_2 = 221047.564227364), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -1L)), structure(list(X_1 = 795356.971998954, 
    X_2 = 791651.414871993, Y_1 = 237855.746923772, Y_2 = 233539.238149352), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -1L)), structure(list(
    X_1 = 787145.416317165, X_2 = 796821.226335759, Y_1 = 221047.564227364, 
    Y_2 = 227462.665657252), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -1L)), structure(list(X_1 = 798885.441403441, 
    X_2 = 792816.47413827, Y_1 = 237907.774432991, Y_2 = 230870.388411334), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -1L)), structure(list(
    X_1 = 801886.200413522, X_2 = 795052.843843351, Y_1 = 237384.986466147, 
    Y_2 = 229406.40394036), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -1L)), structure(list(X_1 = 782215.495007085, 
    X_2 = 778004.911567101, Y_1 = 229531.311160664, Y_2 = 226740.660699846), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -1L)), structure(list(
    X_1 = 774111.10739776, X_2 = 779461.875017808, Y_1 = 221345.75680274, 
    Y_2 = 221361.262444083), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -1L)), structure(list(X_1 = 779461.875017808, 
    X_2 = 774111.10739776, Y_1 = 221361.262444083, Y_2 = 221345.75680274), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -1L)), structure(list(
    X_1 = 779284.987142645, X_2 = 785357.019122782, Y_1 = 225436.143812854, 
    Y_2 = 229420.355663708), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -1L)), structure(list(X_1 = 785357.019122782, 
    X_2 = 779284.987142645, Y_1 = 229420.355663708, Y_2 = 225436.143812854), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -1L)), structure(list(
    X_1 = 784672.158689655, X_2 = 784708.07793811, Y_1 = 221376.364048245, 
    Y_2 = 216070.684445299), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -1L)), structure(list(X_1 = 784708.07793811, 
    X_2 = 784672.158689655, Y_1 = 216070.684445299, Y_2 = 221376.364048245), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -1L)))), row.names = c(NA, 
-15L), groups = structure(list(OBJECTID_1 = c(71331, 74757, 74758, 
158748, 158800, 158926, 159202, 167990, 167991, 170795, 170796, 
170797, 170798, 170985, 171144), .rows = structure(list(8L, 7L, 
    5L, 9L, 10L, 2L, 14L, 15L, 12L, 1L, 3L, 4L, 6L, 13L, 11L), ptype = integer(0), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))), row.names = c(NA, 15L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))

This is what i've tried with map_df这就是我用 map_df 尝试过的

simplepolys_filtered_nest %>%
  mutate(df2= ~map_df(.,parallel_spacing_fn(X_1,X_2,Y_1,Y_2)))
simplepolys_filtered_nest %>%
  mutate(df2= ~map_dfr(.,parallel_spacing_fn(X_1,X_2,Y_1,Y_2)))

thanks for your help!感谢您的帮助!

Does this what you are looking for:这是你正在寻找的:

df %>% 
unnest_wider(data) %>% 
mutate(res=pmap(list(X_1, X_2, Y_1, Y_2), parallel_spacing_fn)) %>% 
nest(X_1:Y_2) 


# A tibble: 15 x 3
# Groups:   OBJECTID_1 [15]
   OBJECTID_1 res               data            
        <dbl> <list>            <list>          
 1     170795 <df[,4] [36 x 4]> <tibble [1 x 4]>
 2     158926 <df[,4] [36 x 4]> <tibble [1 x 4]>
 3     170796 <df[,4] [36 x 4]> <tibble [1 x 4]>
 4     170797 <df[,4] [36 x 4]> <tibble [1 x 4]>
 5      74758 <df[,4] [36 x 4]> <tibble [1 x 4]>
 6     170798 <df[,4] [36 x 4]> <tibble [1 x 4]>
 7      74757 <df[,4] [36 x 4]> <tibble [1 x 4]>
 8      71331 <df[,4] [36 x 4]> <tibble [1 x 4]>
 9     158748 <df[,4] [36 x 4]> <tibble [1 x 4]>
10     158800 <df[,4] [36 x 4]> <tibble [1 x 4]>
11     171144 <df[,4] [36 x 4]> <tibble [1 x 4]>
12     167991 <df[,4] [36 x 4]> <tibble [1 x 4]>
13     170985 <df[,4] [36 x 4]> <tibble [1 x 4]>
14     159202 <df[,4] [36 x 4]> <tibble [1 x 4]>
15     167990 <df[,4] [36 x 4]> <tibble [1 x 4]>

There is definitely a more elegant way to access the elements of the list column without unnesting it before apply the function, but it seemed clear to me this way.在应用 function 之前,肯定有一种更优雅的方式可以访问列表列的元素,而无需取消嵌套,但这种方式对我来说似乎很清楚。

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM