简体   繁体   中英

How to combine two rows of a dataframe into one row

I have a dataframe which looks like this.

 Name  info.1 info.2 
  ab      a      1
  123     a      1
  de      c      4
  456     c      4
  fg      d      5
  789     d      5 

The two rows that need to be combined are identical aside from the name column and are together in the dataframe. I want the new dataframe to look like this:

 Name ID  info.1 info.2 
  ab  123    a      1
  de  456    c      4
  fg  789    d      5
  

I have no clue how to do this and google search hasn't been helpful so far

A possible solution:

library(tidyverse)

df %>% 
  group_by(info.1) %>% 
  summarise(Name = str_c(Name, collapse = "_"), info.2 = first(info.2)) %>% 
  separate(Name, into = c("Name", "ID"), convert = T) %>% 
  relocate(info.1, .before = info.2)

#> # A tibble: 3 × 4
#>   Name     ID info.1 info.2
#>   <chr> <int> <chr>   <int>
#> 1 ab      123 a           1
#> 2 de      456 c           4
#> 3 fg      789 d           5

In base R you could do:

data.frame(Name = df[seq(nrow(df)) %% 2 == 0, 1], 
           ID   = df[seq(nrow(df)) %% 2 == 1, 1],
           df[seq(nrow(df)) %% 2 == 0, 2:3])
#>   Name  ID info.1 info.2
#> 2   ab 456      a      1
#> 4  123  fg      c      4
#> 6   de 789      d      5

Created on 2022-07-20 by the reprex package (v2.0.1)

Assuming the Name column is consistently ordered Name-ID-Name-ID then:

library(tidyverse)
data <- tibble(Name = c('ab', 123, 'de', 456, 'fg', 789),
               info.1 = c('a', 'a', 'c', 'c', 'd', 'd'),
               info.2 = c(1, 1, 4, 4, 5, 5))

# remove the troublesome column and make a tibble
# with the unique combos of info1 and 2
data_2 <- data %>% select(info.1, info.2) %>% distinct()

# add columns for name and ID by skipping every other row in the
# original tibble
data_2$Name <- data$Name[seq(from = 1, to = nrow(data), by = 2)]
data_2$ID <- data$Name[seq(from = 2, to = nrow(data), by = 2)]

We could also use summarise and extract first as name and last as id:

data |>
  group_by(info.1, info.2) |>
  summarise(name = first(Name), ID = last(Name)) |>
  ungroup() #|>
  #relocate(3:4,1:2)

Output:

# A tibble: 3 × 4
  info.1 info.2 name  ID   
  <chr>   <dbl> <chr> <chr>
1 a           1 ab    123  
2 c           4 de    456  
3 d           5 fg    789  

We could also use

library(dplyr)
library(stringr)
data %>% 
  group_by(across(starts_with('info'))) %>% 
  mutate(ID = str_subset(Name, "^\\d+$"), .before = 2) %>% 
  ungroup %>%
  filter(str_detect(Name, '^\\d+$', negate = TRUE))

-output

# A tibble: 3 × 4
  Name  ID    info.1 info.2
  <chr> <chr> <chr>   <dbl>
1 ab    123   a           1
2 de    456   c           4
3 fg    789   d           5

data

data <- structure(list(Name = c("ab", "123", "de", "456", "fg", "789"
), info.1 = c("a", "a", "c", "c", "d", "d"), info.2 = c(1, 1, 
4, 4, 5, 5)), row.names = c(NA, -6L), class = "data.frame")

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM