繁体   English   中英

重塑 R 中的数据框和连接列

[英]reshape data frame and concatenating columns in R

我试图将我的数据框从宽格式改成长格式。 目前数据框如下所示:

structure(list(study_site = structure(c(5L, 5L, 5L, 5L, 5L, 5L, 
5L, 1L, 1L, 3L, 3L, 1L, 1L, 1L, 4L, 4L, 1L, 2L, 3L, 1L, 4L, 1L, 
4L, 3L, 3L, 3L, 1L, 3L, 5L, 4L, 4L, 4L, 3L, 3L, 5L, 5L, 4L, 4L, 
4L, 1L, 4L, 3L, 5L, 5L, 5L, 1L, 3L, 5L, 3L, 3L, 3L, 5L, 5L, 3L, 
4L, 2L), .Label = c("N", "no_nest", "O", "S", "W"), class = "factor"), 
    coords.N = structure(c(54L, 54L, 40L, 40L, 40L, 40L, 39L, 
    67L, 67L, 55L, 55L, 64L, 64L, 64L, 78L, 81L, 47L, 80L, 83L, 
    60L, 46L, 46L, 76L, 88L, 88L, 88L, 84L, 84L, 30L, 58L, 58L, 
    58L, 25L, 25L, 19L, 19L, 42L, 42L, 42L, 29L, 45L, 90L, 91L, 
    91L, 91L, 91L, 89L, 89L, 87L, 87L, 87L, 56L, 56L, 61L, 35L, 
    36L), .Label = c("40.40463", "48.40168", "48.40178", "48.40215", 
    "48.40235", "48.40309", "48.40390", "48.40393", "48.40396", 
    "48.40405", "48.40410", "48.40411", "48.40415", "48.40416", 
    "48.40424", "48.40425", "48.40430", "48.40435", "48.40436 ", 
    "48.40438", "48.40443", "48.40450", "48.40451", "48.40454", 
    "48.40455", "48.40459", "48.40460", "48.40461", "48.40466", 
    "48.40466 ", "48.40467", "48.40469", "48.40471", "48.40477", 
    "48.40479 ", "48.40481", "48.40482", "48.40483", "48.40488 ", 
    "48.40491", "48.40493", "48.40504 ", "48.40508", "48.40513", 
    "48.40515", "48.40519 ", "48.40522 ", "48.40523", "48.40525", 
    "48.40526", "48.40529", "48.40532", "48.40537", "48.40537 ", 
    "48.40538 ", "48.40543 ", "48.40549", "48.40549 ", "48.40557", 
    "48.40557 ", "48.40558", "48.40565", "48.40571", "48.40575", 
    "48.40580", "48.40584", "48.40586 ", "48.40591", "48.40596", 
    "48.40598", "48.40599", "48.40611", "48.40612", "48.40617", 
    "48.40626", "48.40632 ", "48.40633", "48.40635 ", "48.40636", 
    "48.40637", "48.40638 ", "48.40639", "48.40639 ", "48.40641 ", 
    "48.40652", "48.40655", "48.40656 ", "48.40657 ", "48.40687 ", 
    "48.40690 ", "48.40703", "48.40718", "48.40719", "48.40726", 
    "48.40742", "48.40748", "NO_DATA"), class = "factor"), coords.E = structure(c(67L, 
    67L, 49L, 49L, 49L, 49L, 27L, 67L, 67L, 70L, 70L, 68L, 68L, 
    68L, 87L, 94L, 68L, 83L, 90L, 73L, 52L, 52L, 2L, 95L, 95L, 
    95L, 93L, 93L, 32L, 69L, 69L, 69L, 55L, 55L, 24L, 24L, 29L, 
    29L, 29L, 30L, 48L, 85L, 1L, 1L, 1L, 1L, 78L, 78L, 79L, 79L, 
    79L, 64L, 64L, 63L, 66L, 45L), .Label = c(" 015.82024", " 015.82164", 
    "015.80237", "015.80263", "015.80309", "015.80341", "015.80369", 
    "015.80388", "015.80394", "015.80399", "015.80406", "015.80435", 
    "015.80436", "015.80466", "015.80512", "015.80517", "015.80548", 
    "015.80551", "015.80572", "015.80583", "015.80609", "015.80636", 
    "015.80659", "015.80703", "015.80723", "015.80779", "015.80795", 
    "015.80803", "015.80821", "015.80843", "015.80871", "015.80875", 
    "015.80888", "015.80897", "015.80901", "015.80903", "015.80905", 
    "015.80906", "015.80908", "015.80909", "015.80921", "015.80923", 
    "015.80929", "015.80939", "015.80993", "015.81007", "015.81018", 
    "015.81087", "015.81113", "015.81132", "015.81151", "015.81180", 
    "015.81241", "015.81273", "015.81305", "015.81406", "015.81422", 
    "015.81522", "015.81526", "015.81543", "015.81546", "015.81564", 
    "015.81628", "015.81632", "015.81678", "015.81682", "015.81700", 
    "015.81703", "015.81735", "015.81739", "015.81770", "015.81783", 
    "015.81784", "015.81800", "015.81849", "015.81992", "015.82012", 
    "015.82029", "015.82039", "015.82083", "015.82099", "015.82126", 
    "015.82180", "015.82230", "015.82232", "015.82255", "015.82265", 
    "015.82290", "015.82303", "015.82304", "015.82346", "015.82362", 
    "015.82376", "015.82398", "015.82451", "015.82500", "015.82519", 
    "015.82555", "015.82579", "015.82634", "NO_DATA"), class = "factor"), 
    study_ID = c(120L, 120L, 1L, 1L, 1L, 1L, 9L, 39L, 39L, 109L, 
    109L, 110L, 110L, 110L, 45L, 58L, 121L, 96L, 97L, 40L, 43L, 
    43L, 47L, 57L, 57L, 57L, 114L, 114L, 67L, 71L, 71L, 71L, 
    83L, 83L, 4L, 4L, 10L, 10L, 10L, 106L, 108L, 46L, 115L, 115L, 
    115L, 115L, 116L, 116L, 117L, 117L, 117L, 70L, 70L, 119L, 
    95L, 3L), species = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 2L), .Label = c("barn swallow", "no_nest"), class = "factor"), 
    first_visit = c(1L, 2L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 5L, 0L, 
    1L, 0L, 2L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 
    1L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 2L, 0L, 3L, 0L, 0L, 0L, 4L, 
    1L, 8L, 0L, 0L, 1L, 2L, 1L, 5L, 0L, 0L, 1L, 0L, 1L, 1L, 0L
    ), second_visit = c(1L, 2L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 5L, 
    0L, 1L, 0L, 2L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 
    0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 2L, 0L, 4L, 0L, 0L, 
    4L, 1L, 0L, 8L, 0L, 1L, 2L, 1L, 0L, 5L, 0L, 1L, 0L, 0L, 1L, 
    0L), third_visit = c(0L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 
    0L, 5L, 0L, 1L, 2L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 
    0L, 0L, 0L, 1L, 1L, 0L, 0L, 2L, 0L, 1L, 0L, 2L, 0L, 0L, 6L, 
    1L, 4L, 1L, 0L, 0L, 8L, 1L, 2L, 1L, 0L, 0L, 5L, 1L, 1L, 0L, 
    0L, 0L), used_1st_visit = c(0L, 2L, 1L, 0L, 0L, 0L, 1L, 0L, 
    1L, 2L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 
    0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 3L, 0L, 
    0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 
    1L, 0L, 0L), used_2nd_visit = c(0L, 2L, 1L, 0L, 0L, 0L, 1L, 
    0L, 1L, 2L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 
    1L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 2L, 0L, 
    4L, 0L, 0L, 0L, 1L, 0L, 5L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 
    0L, 0L, 0L, 0L), used_3rd_visit = c(0L, 0L, 1L, 0L, 0L, 0L, 
    1L, 0L, 1L, 0L, 4L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 
    1L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 2L, 0L, 1L, 0L, 2L, 
    0L, 0L, 6L, 1L, 0L, 1L, 0L, 0L, 2L, 0L, 0L, 1L, 0L, 0L, 2L, 
    0L, 1L, 0L, 0L, 0L), nest_condition = structure(c(3L, 5L, 
    5L, 2L, 5L, 5L, 5L, 5L, 3L, 5L, 5L, 5L, 5L, 2L, 5L, 3L, 2L, 
    4L, 5L, 5L, 5L, 5L, 3L, 2L, 5L, 5L, 2L, 2L, 5L, 1L, 5L, 5L, 
    5L, 5L, 5L, 5L, 3L, 5L, 5L, 5L, 2L, 3L, 5L, 5L, 5L, 2L, 5L, 
    3L, 5L, 5L, 5L, 2L, 5L, 3L, 5L, 4L), .Label = c(" ready ", 
    "damaged", "in_progress", "no_nest", "ready"), class = "factor"), 
    nesting_site = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 3L, 1L, 
    1L, 1L, 1L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 2L), .Label = c("inside", "no_nest", "outside"), class = "factor"), 
    distance = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 4L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 3L, 4L, 3L, 6L, 4L, 4L, 2L, 2L, 4L, 2L, 
    2L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    3L, 2L, 4L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    4L, 6L), .Label = c("1", "2", "3", "4", "no_data", "no_nest"
    ), class = "factor"), material = structure(c(5L, 5L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 5L, 5L, 1L, 1L, 2L), .Label = c("fine", "fine plaster", 
    "medium fine plaster", "no_data", "rough", "rough plaster", 
    "smooth plaster", "under construction", "wood"), class = "factor"), 
    housetype = structure(c(4L, 4L, 4L, 4L, 4L, 4L, 5L, 4L, 4L, 
    3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 
    3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 5L, 5L, 5L, 
    4L, 4L, 5L, 3L, 3L, 3L, 3L, 5L, 5L, 3L, 3L, 3L, 4L, 4L, 5L, 
    5L, 4L), .Label = c("auto repair shop", "barn ", "hall", 
    "residence", "stable"), class = "factor"), usage_house = structure(c(5L, 
    5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
    5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
    5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
    5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 3L), .Label = c("auto_repair", 
    "barn", "inhabited", "under construction", "used"), class = "factor"), 
    age = c(1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
    4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L)), row.names = c(1L, 
2L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 
19L, 21L, 22L, 23L, 24L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 
35L, 36L, 37L, 38L, 39L, 40L, 41L, 89L, 90L, 91L, 92L, 93L, 94L, 
95L, 96L, 97L, 98L, 99L, 100L, 101L, 102L, 103L, 104L, 105L, 
106L, 107L, 108L, 109L, 111L), class = "data.frame")

used_1st..2nd... 表示鸟类在第一、第二、...控制时使用了一定数量的巢。

我希望我的数据框中的每一行始终代表一个已使用/未使用的嵌套以及 no_nest:

     ID species `1st_visit` `2nd_visit` `3rd_visit` used_1st_visit used_2nd_visit used_3rd_visit
  <dbl> <chr>         <dbl>       <dbl>       <dbl>          <dbl>          <dbl>          <dbl>
1   120 barn s~           1           1           0              0              0              0
2   120 barn s~           1           1           0              1              1              0
3   120 barn s~           1           1           0              1              1              0
4    39 barn s~           1           1           1              1              1              1
5     8 barn s~           1           1           1              1              0              0
6     8 barn s~           1           1           1              0              0              0

不幸的是,我不知道如何连接列来获得最终的数据框。 有人有想法吗?

我不完全确定您要的是什么,但这就是我所理解的:在长数据框中...

  • 如果所有访问(coulmns used_first_visit、used_sec_visit 等)为 0,则将它们合并为一行,将其标记为 0
  • 如果任何访问不为 0,则保留与非零访问一样多的行并用 1 标记它们

这是我的 dplyr 解决方案(它不是很漂亮,但它有效):

# create data
dat <- data.frame("visits" = c("first", "first", "second", "second", "third", "third"), "study_id" = rep(120, 6), "used_first_visit" = c(0, 2, 0, 2, 0, 2), "used_sec_visit" = c(0, 2, 0, 2, 0, 2), "used_thrd_visit" = rep(0, 6), "nest_cond" = c("damaged", "ready", "damaged", "ready", "damaged", "ready"))

# make long data frame and filter values
dat_long <- dat %>%
  pivot_longer(c(3:5),names_to = "whatever", values_to = "used")  %>% # make long data frame
  select(-c(whatever)) %>% # get rid of name column
  group_by(visits, nest_cond) %>% # group data
  mutate(used = ifelse(all(used == 0) & row_number() == 1, 10, used)) %>% # if the whole group is 0, mark one row for later filtering
  filter(used > 0 ) %>% # filter
  mutate(used = ifelse(used == 10, 0, 1)) # change to correct numbers

如果这不是您要找的,请告诉我!

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM