简体   繁体   中英

R-How to dynamically add column based on multiple list(columns) of variable increses into dataframe

i need to add new column into data frame ,if list of multiple column variable increases.( if column have list need to add new column otherwise keep that column as same)

My data frame,

U_ID  Value                                 AD   CT value1              Citycode
    1 list(`Cno`="50",'cna'="\n\rjhon\n")   ia   BG list(`Cno`="50")       TY
    1 list(`Cno`="20",`cna`="guna")         AS   DB list(`Cno`="\n\r20")   UI
    2 list(`Cno`="30",`cna`="rt",`cf`="ty") BN   FV list(`Cno`="30")       GH
    2 NULL                                  VF   TY NULL                   TY
    3 list(`Cno`="\n\r30")                  RR   TT list(`Cno`="30")       ST

My desire output will be,

U_ID  Value                                  Cno   cna   cf      AD   CT   value1                Cno1           Citycode
1     list(`Cno`="50",`cna'="\n\rjhon\n")    50    jhon  NULL    ia   BG   list(`Cno1`="50")       50              TY
1     list(`Cno`="20",`cna'="guna")          20   guna  NULL     AS   DB   list(`Cno1`="\n\r20")   20              UI
2     list(`Cno`="30",`cna'="rt",`cf'="ty")  30    rt    ty      BN   FV   list(`Cno1`="30")       30              GH
2     NULL                                   NULL  NULL  NULL    VF   TY   NULL                    NULL            TY
3     list(`Cno`="\n\r30")                   30    NULL  NULL     RR   TT  list(`Cno1`="30")       30              ST

Data,

structure(list(U_ID = c(1, 1, 2, 2, 3), Value = list(structure(list(
    `Cno#` = "50", cna = "\n\rjhon\n"), .Names = c("Cno#", "cna"
)), structure(list(`Cno#` = "50", cna = "guna"), .Names = c("Cno#", 
"cna")), structure(list(`Cno#` = "30", cna = "rt", cf = "ty"), .Names = c("Cno#", 
"cna", "cf")), "NULL", structure(list(`Cno#` = "\n\r30"), .Names = "Cno#")), 
    AD = c("ia", "AS", "BN", "VF", "RR"), CT = c("BG", "DB", 
    "FV", "TY", "TT"), Value1 = list(structure(list(`Cno#` = "50"), .Names = "Cno#"), 
        structure(list(`Cno#` = "\n\r20"), .Names = "Cno#"), 
        structure(list(`Cno#` = "30"), .Names = "Cno#"), "NULL", 
        structure(list(`Cno#` = "30"), .Names = "Cno#")), Citycode = c("TY", 
    "UI", "GH", "RY", "ST")), .Names = c("U_ID", "Value", "AD", 
"CT", "Value1", "Citycode"), row.names = c(NA, -5L), class = "data.frame")

This is a solution with dplyr .

library(dplyr)

dat %>%
  mutate(idx = as.character(`is.na<-`(cumsum(Value != "NULL"),
                                      Value == "NULL"))) %>%
  left_join(filter(., Value != "NULL") %>%
              pull(Value) %>%
              bind_rows(.id = "idx"),
            by = "idx") %>%
  mutate(idx2 = as.character(`is.na<-`(cumsum(Value1 != "NULL"),
                                      Value1 == "NULL"))) %>%
  left_join(filter(., Value1 != "NULL") %>%
              pull(Value1) %>%
              bind_rows(.id = "idx2"),
            by = "idx2") %>%  
  select(-idx, -idx2)

Here, dat is the name of your data frame.

The result:

  U_ID          Value AD CT Value1 Citycode Cno#.x        cna   cf Cno#.y
1    1 50, \n\rjhon\n ia BG     50       TY     50 \n\rjhon\n <NA>     50
2    1       50, guna AS DB \n\r20       UI     50       guna <NA> \n\r20
3    2     30, rt, ty BN FV     30       GH     30         rt   ty     30
4    2           NULL VF TY   NULL       RY   <NA>       <NA> <NA>   <NA>
5    3         \n\r30 RR TT     30       ST \n\r30       <NA> <NA>     30

Edit
Replaced my answer with one that accounts for multiple such list-columns.


Here is a possible base R approach:

na_if_null <- function(x) if (is.null(x)) NA else x

new_cols <- lapply(
  Filter(is.list, df),
  function(list_col) {
    names_ <- setNames(nm = unique(do.call(c, lapply(list_col, names))))
    lapply(names_, function(name) sapply(list_col, function(x) 
      trimws(na_if_null(as.list(x)[[name]]))))
  }
)

res <- do.call(
  data.frame,
  c(
    list(df, check.names = FALSE, stringsAsFactors = FALSE),
    do.call(c, new_cols)
  )
)

#   U_ID          Value AD CT Value1 Citycode Value.Cno# Value.cna Value.cf Value1.Cno#
# 1    1 50, \n\rjhon\n ia BG     50       TY         50      jhon     <NA>          50
# 2    1       50, guna AS DB \n\r20       UI         50      guna     <NA>          20
# 3    2     30, rt, ty BN FV     30       GH         30        rt       ty          30
# 4    2           NULL VF TY   NULL       RY       <NA>      <NA>     <NA>        <NA>
# 5    3         \n\r30 RR TT     30       ST         30      <NA>     <NA>          30

I believe this gives exactly your expected output:

library(dplyr)
df1 %>%
  left_join(df1 %>%
              filter(Value != "NULL") %>%
              mutate(Value_ = map(Value,unlist), vnames = map(Value_,names)) %>%
              unnest(Value_,vnames) %>%
              spread(vnames,Value_) %>%
              rename(Cno = `Cno#`)) %>%
  left_join(df1 %>%
              filter(Value1 != "NULL") %>%
              mutate(Cno1 = map(Value1,~as.numeric(unlist(.x)))) %>%
              select(-Value,-Value1)) %>%
  select(U_ID,Value,Cno,cna,cf,AD,CT,Value1,Cno1,Citycode)

#   U_ID          Value    Cno        cna   cf AD CT Value1 Cno1 Citycode
# 1    1 50, \n\rjhon\n     50 \n\rjhon\n <NA> ia BG     50   50       TY
# 2    1       50, guna     50       guna <NA> AS DB \n\r20   20       UI
# 3    2     30, rt, ty     30         rt   ty BN FV     30   30       GH
# 4    2           NULL   <NA>       <NA> <NA> VF TY   NULL NULL       RY
# 5    3         \n\r30 \n\r30       <NA> <NA> RR TT     30   30       ST

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM