简体   繁体   中英

Merge two data frames R

I created two data frames:

tab1 <- medivmind %>% select(medi_total_factor, participant_id) %>% filter(!is.na(medi_total_factor)) %>% count(medi_total_factor, .drop = FALSE) %>% add_column(diet = "medi") %>% rename(n_medi = n, score = medi_total_factor)
DF1
score n_medi diet
1 0 0 medi
2 0.5 0 medi
3 1 7 medi
4 1.5 0 medi
5 2 12 medi
6 2.5 0 medi
...

tab2 <- medivmind %>% select(mind_total_factor, participant_id) %>% filter(!is.na(mind_total_factor)) %>% count(mind_total_factor, .drop = FALSE) %>% add_column(diet = "mind") %>% rename(n_mind = n, score = mind_total_factor) DF2
score n_mind diet
1 0 0 mind
2 0.5 0 mind
3 1 0 mind
4 1.5 0 mind
5 2 0 mind
6 2.5 0 mind
...

I want to merge them to look like:
score n_mind diet
1 0 0 mind
2 0 0 medi
3 0.5 0 mind
4 0.5 0 medi
5 1 0 mind
6 1 0 medi
7 1.5 0 mind
8 1.5 0 medi
...

I've tried rbind, cbind, left_join but I keep getting the following (or variation of):
score n_medi diet score n_mind diet
1 0 0 medi 0 0 mind
2 0.5 0 medi 0.5 0 mind
3 1 7 medi 1 0 mind
4 1.5 0 medi 1.5 0 mind
5 2 12 medi 2 0 mind

Update: Do you want this:

tab3 <- left_join(tab1, tab2, by="score") %>% 
  pivot_longer(starts_with("diet")) %>% 
  select(score, n_medi, n_mind, diet=value) %>% 
  group_by(score) %>% 
  arrange(desc(diet), .by_group = TRUE) %>% 
  mutate(n_medi = ifelse(lead(n_medi==n_medi), n_medi, NA))
   score n_medi n_mind diet 
   <dbl>  <int>  <int> <chr>
 1   0        0      0 mind 
 2   0       NA      0 medi 
 3   0.5      0      0 mind 
 4   0.5     NA      0 medi 
 5   1        7      0 mind 
 6   1       NA      0 medi 
 7   1.5      0      0 mind 
 8   1.5     NA      0 medi 
 9   2       12      0 mind 
10   2       NA      0 medi 
11   2.5      0      0 mind 
12   2.5     NA      0 medi 

We could do it this way:

library(dplyr)
library(tidyr)

left_join(DF1, DF2, by="score") %>% 
  pivot_longer(starts_with("diet")) %>% 
  select(score, n_medi, diet=value) %>% 
  group_by(score) %>% 
  arrange(desc(diet), .by_group = TRUE)
  score n_medi diet 
   <dbl>  <int> <chr>
 1   0        0 mind 
 2   0        0 medi 
 3   0.5      0 mind 
 4   0.5      0 medi 
 5   1        7 mind 
 6   1        7 medi 
 7   1.5      0 mind 
 8   1.5      0 medi 
 9   2       12 mind 
10   2       12 medi 
11   2.5      0 mind 
12   2.5      0 medi 

data:

> dput(DF1)
structure(list(score = c(0, 0.5, 1, 1.5, 2, 2.5), n_medi = c(0L, 
0L, 7L, 0L, 12L, 0L), diet = c("medi", "medi", "medi", "medi", 
"medi", "medi")), class = "data.frame", row.names = c("1", "2", 
"3", "4", "5", "6"))
> dput(DF2)
structure(list(score = c(0, 0.5, 1, 1.5, 2, 2.5), n_mind = c(0L, 
0L, 0L, 0L, 0L, 0L), diet = c("mind", "mind", "mind", "mind", 
"mind", "mind")), class = "data.frame", row.names = c("1", "2", 
"3", "4", "5", "6"))

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM