简体   繁体   中英

tidyr::pivot_longer tidying multiple columns

I need to tidy the following data set in a compact manner using pivot_longer.

My aim is to have a trial column that shows the numbers in the column names from will_f1:will_f16, dface1:dface16, dfood1:dfood16, dout1:dout16. I would like to also retrieve multiple columns for the variables as will, face, food, out.

Currently I am using a code like the following for each variable (will, face, food, out) separately then I merge the separate data frames based on id and trial:

will_long<-data.will %>% 
  select(ID:will_f16) %>%
  pivot_longer(starts_with("will_"),
               names_to ="trial", 
               names_prefix = "will_f",
               values_to = "will")

Data sample:

ID,will_f1,will_f2,will_f3,will_f4,will_f5,will_f6,will_f7,will_f8,will_f9,will_f10,will_f11,will_f12,will_f13,will_f14,will_f15,will_f16,will_ef1,will_ef2,will_ef3,will_ef4,will_ef5,will_ef6,will_ef7,will_ef8,dface1,dface2,dface3,dface4,dface5,dface6,dface7,dface8,dface9,dface10,dface11,dface12,dface13,dface14,dface15,dface16,dfood1,dfood2,dfood3,dfood4,dfood5,dfood6,dfood7,dfood8,dfood9,dfood10,dfood11,dfood12,dfood13,dfood14,dfood15,dfood16,dout1,dout2,dout3,dout4,dout5,dout6,dout7,dout8,dout9,dout10,dout11,dout12,dout13,dout14,dout15,dout16,dexfood1,dexfood2,dexfood3,dexfood4,dexfood5,dexfood6,dexfood7,dexfood8
1,6,1,1,5,6,2,1,1,1,1,5,3,2,5,3,1,1,3,2,1,5,1,4,1,QID117,QID118,QID123,QID114,QID113,QID111,QID110,QID124,QID122,QID119,QID109,QID112,QID115,QID121,QID120,QID108,QID70,QID56,QID61,QID49,QID80,QID3,QID53,QID57,QID29,QID69,QID73,QID81,QID63,QID76,QID47,QID85,QID92,QID103,QID95,QID97,QID106,QID90,QID93,QID105,QID100,QID107,QID102,QID99,QID101,QID94,QID91,QID104,QID45,QID35,QID67,QID51,QID86,QID58,QID74,QID82

Perhaps this helps - rename the columns that doesn't have the _ by inserting the _ between the letter and the digit using a regex lookaround, then use pivot_longer to split at the _ by capturing the characters before and after the _ with names_pattern

library(dplyr)
library(stringr)
library(tidyr)
data.will %>% 
   rename_with(~ str_replace(., "(?<=[A-Za-z])(?=\\d)", "_"), -contains("_")) %>% 
   pivot_longer(col = -ID, names_to = c(".value", "trial"),
       names_pattern = "^([^_]+)_f?(.*)")

-output

# A tibble: 24 × 7
      ID trial  will dface  dfood dout   dexfood
   <int> <chr> <int> <chr>  <chr> <chr>  <chr>  
 1     1 1         6 QID117 QID70 QID92  QID45  
 2     1 2         1 QID118 QID56 QID103 QID35  
 3     1 3         1 QID123 QID61 QID95  QID67  
 4     1 4         5 QID114 QID49 QID97  QID51  
 5     1 5         6 QID113 QID80 QID106 QID86  
 6     1 6         2 QID111 QID3  QID90  QID58  
 7     1 7         1 QID110 QID53 QID93  QID74  
 8     1 8         1 QID124 QID57 QID105 QID82  
 9     1 9         1 QID122 QID29 QID100 <NA>   
10     1 10        1 QID119 QID69 QID107 <NA>   
# … with 14 more rows

data

data.will <- structure(list(ID = 1L, will_f1 = 6L, will_f2 = 1L, will_f3 = 1L, 
    will_f4 = 5L, will_f5 = 6L, will_f6 = 2L, will_f7 = 1L, will_f8 = 1L, 
    will_f9 = 1L, will_f10 = 1L, will_f11 = 5L, will_f12 = 3L, 
    will_f13 = 2L, will_f14 = 5L, will_f15 = 3L, will_f16 = 1L, 
    will_ef1 = 1L, will_ef2 = 3L, will_ef3 = 2L, will_ef4 = 1L, 
    will_ef5 = 5L, will_ef6 = 1L, will_ef7 = 4L, will_ef8 = 1L, 
    dface1 = "QID117", dface2 = "QID118", dface3 = "QID123", 
    dface4 = "QID114", dface5 = "QID113", dface6 = "QID111", 
    dface7 = "QID110", dface8 = "QID124", dface9 = "QID122", 
    dface10 = "QID119", dface11 = "QID109", dface12 = "QID112", 
    dface13 = "QID115", dface14 = "QID121", dface15 = "QID120", 
    dface16 = "QID108", dfood1 = "QID70", dfood2 = "QID56", dfood3 = "QID61", 
    dfood4 = "QID49", dfood5 = "QID80", dfood6 = "QID3", dfood7 = "QID53", 
    dfood8 = "QID57", dfood9 = "QID29", dfood10 = "QID69", dfood11 = "QID73", 
    dfood12 = "QID81", dfood13 = "QID63", dfood14 = "QID76", 
    dfood15 = "QID47", dfood16 = "QID85", dout1 = "QID92", dout2 = "QID103", 
    dout3 = "QID95", dout4 = "QID97", dout5 = "QID106", dout6 = "QID90", 
    dout7 = "QID93", dout8 = "QID105", dout9 = "QID100", dout10 = "QID107", 
    dout11 = "QID102", dout12 = "QID99", dout13 = "QID101", dout14 = "QID94", 
    dout15 = "QID91", dout16 = "QID104", dexfood1 = "QID45", 
    dexfood2 = "QID35", dexfood3 = "QID67", dexfood4 = "QID51", 
    dexfood5 = "QID86", dexfood6 = "QID58", dexfood7 = "QID74", 
    dexfood8 = "QID82"), class = "data.frame", row.names = c(NA, 
-1L))

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM