简体   繁体   中英

Creating new row values based on second column in R

I want to create a new variable called "X" whic is the sum of "B" and "D"

type <- c( "A", "B","C","D","E")
cnt <- c(2,5,3,7,8)

df <- data.frame(type,cnt)

> df
  type cnt
1    A   2
2    B   5
3    C   3
4    D   7
5    E   8

The desired output is

> df
  type cnt
1    A   2
2    B   5
3    C   3
4    D   7
5    E   8
6    X  12

How could extend this, if we add another grouping variable like date. Would like to add up X for each day

 date <- c("2022-01-01","2022-01-01","2022-01-01","2022-01-01","2022-01-01","2022-01-02","2022-01-02","2022-01-02","2022-01-02","2022-01-02")
type <- c("A", "B","C","D","E","A", "B","C","D","E")
cnt <- c(2,5,3,7,8, 1,9,8,2,5)

df <- data.frame(date,type,cnt)

df
         date type cnt
1  2022-01-01    A   2
2  2022-01-01    B   5
3  2022-01-01    C   3
4  2022-01-01    D   7
5  2022-01-01    E   8
6  2022-01-02    A   1
7  2022-01-02    B   9
8  2022-01-02    C   8
9  2022-01-02    D   2
10 2022-01-02    E   5

Desired output is

df
         date type cnt
1  2022-01-01    A   2
2  2022-01-01    B   5
3  2022-01-01    C   3
4  2022-01-01    D   7
5  2022-01-01    E   8
6  2022-01-01    X  12
7  2022-01-02    A   1
8  2022-01-02    B   9
9  2022-01-02    C   8
10 2022-01-02    D   2
11 2022-01-02    E   5
12 2022-01-02    X   11

You could also use:

df %>%
  add_row(type= 'X', cnt = sum(.$cnt[.$type %in% c('B', 'D')]))

  type cnt
1    A   2
2    B   5
3    C   3
4    D   7
5    E   8
6    X  12

UPDATE:

df %>%
   group_by(date)%>%
   group_modify(~add_row(.,type = 'X', 
                           cnt = sum(.$cnt[.$type%in%c('B', 'D')])))
# A tibble: 12 x 3
# Groups:   date [2]
   date       type    cnt
   <chr>      <chr> <int>
 1 2022-01-01 A         2
 2 2022-01-01 B         5
 3 2022-01-01 C         3
 4 2022-01-01 D         7
 5 2022-01-01 E         8
 6 2022-01-01 X        12
 7 2022-01-02 A         1
 8 2022-01-02 B         9
 9 2022-01-02 C         8
10 2022-01-02 D         2
11 2022-01-02 E         5
12 2022-01-02 X        11

We can subset and rbind

rbind(df, data.frame(type = "X", cnt = sum(df$cnt[df$type %in% c("B", "D")])))

-output

 type cnt
1    A   2
2    B   5
3    C   3
4    D   7
5    E   8
6    X  12

Or in dplyr , filter the rows based on the 'type' values, summarise by taking the sum of 'cnt', while creating 'type' as 'X' and use bind_rows with original dataset

library(dplyr)
df %>% 
  filter(type %in% c("B", "D")) %>% 
  summarise(type = 'X', cnt = sum(cnt)) %>%
  bind_rows(df, .)

Or without using bind_rows

df %>% 
   summarise(type = c(type, 'X'), cnt = c(cnt, sum(cnt[type %in% c("B", "D")])))
  type cnt
1    A   2
2    B   5
3    C   3
4    D   7
5    E   8
6    X  12

Or using complete

library(tidyr)
complete(df, type = c(type, "X"), fill = list(cnt = sum(cnt[type %in% c("B", "D")])))
# A tibble: 6 × 2
  type    cnt
  <chr> <dbl>
1 A         2
2 B         5
3 C         3
4 D         7
5 E         8
6 X        12

Update

For the updated data, just add a group_by

df %>% 
  group_by(date) %>%
  summarise(type = c(type, "X"), 
    cnt = c(cnt, sum(cnt[type %in% c("B", "D")])), .groups = 'drop')

-output

# A tibble: 12 × 3
   date       type    cnt
   <chr>      <chr> <dbl>
 1 2022-01-01 A         2
 2 2022-01-01 B         5
 3 2022-01-01 C         3
 4 2022-01-01 D         7
 5 2022-01-01 E         8
 6 2022-01-01 X        12
 7 2022-01-02 A         1
 8 2022-01-02 B         9
 9 2022-01-02 C         8
10 2022-01-02 D         2
11 2022-01-02 E         5
12 2022-01-02 X        11

Or using the filter approach

df %>%
   filter(type %in% c("B", "D")) %>% 
   group_by(date) %>% 
   summarise(type = 'X', cnt = sum(cnt), .groups = 'drop') %>% 
   bind_rows(df, .) %>% 
   arrange(date)

Another possible solution, in base R:

rbind(df, c(type = "X", sum(ifelse(type %in% c("B", "D"), cnt, 0))))     

#>   type cnt
#> 1    A   2
#> 2    B   5
#> 3    C   3
#> 4    D   7
#> 5    E   8
#> 6    X  12

With dplyr :

bind_rows(df, list(type = "X", cnt = sum(if_else(type %in% c("B","D"), cnt, 0))))

Here is an alternative dplyr in combination with janitor package:

df %>% 
  filter(type == "B" |type == "D") %>% 
  adorn_totals(name="X") %>% 
  filter(type == "X") %>% 
  bind_rows(df) %>% 
  arrange(cnt)
 type cnt
    A   2
    C   3
    B   5
    D   7
    E   8
    X  12

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM