簡體   English   中英

對行求和並添加到 R 中的下一行

[英]Sum rows and adding to the next row in R

我想總結 Col1 中 Col2 等於 0 的行。並將總和添加到 Col1 中的下一個值。 我在下面展示一個例子。 我在數據框中有很多產品。 我從這個開始:

在此處輸入圖像描述

library(dplyr)

TD %>% group_by(Product,Date) %>% mutate(Sum = sum(Col1[Col2 == 0])[1]) %>% 
  mutate(Expected = Col1)

Date <- seq(as.Date("2021-01-01"), as.Date("2021-01-07"), by = "day")
Product<-rep("A",7)
Col1 <- c(13, 10, 15, 7, 9, 4, 3)
Col2 <- c(7, 0, 0, 8, 0, 0, 27)
Expected <- c(13, 10, 15, 32, 9, 4, 16)
TD <- data.frame(Date, Product, Col1, Col2, Expected)

其他數據:

   Date       Product   Col1  Col2 Expected2
   <date>     <chr>   <dbl>  <dbl>     <dbl>
 1 2021-02-12 831        15    384       631
 2 2021-02-13 831        11    373       631
 3 2021-02-14 831        13    360       631
 4 2021-02-15 831        14    826       631
 5 2020-12-03 832        10     11        20
 6 2020-12-04 832        10      1        20
 7 2020-12-05 832         7      0         7
 8 2020-12-06 832        11      0        11
 9 2020-12-07 832        13      0        13
10 2020-12-08 832        10      0        10



structure(list(Date = structure(c(18670, 18671, 18672, 18673, 
18599, 18600, 18601, 18602, 18603, 18604, 18605, 18606, 18607, 
18608, 18609, 18610, 18611, 18612, 18613, 18614, 18615, 18616, 
18617, 18618, 18619, 18620, 18621, 18622, 18623, 18624), class = "Date"), 
    Product = c("831", "831", "831", "831", "832", "832", "832", 
    "832", "832", "832", "832", "832", "832", "832", "832", "832", 
    "832", "832", "832", "832", "832", "832", "832", "832", "832", 
    "832", "832", "832", "832", "832"), Col1 = c(15, 11, 13, 
    14, 10, 10, 7, 11, 13, 10, 8, 11, 9, 8, 10, 17, 15, 17, 16, 
    16, 14, 14, 15, 17, 18, 16, 17, 18, 18, 8), Col2 = c(384, 
    373, 360, 826, 11, 1, 0, 0, 0, 0, 0, 70, 61, 53, 43, 26, 
    11, 0, 0, 84, 70, 56, 41, 24, 6, 0, 0, 0, 0, 0), Expected2 = c(631, 
    631, 631, 631, 20, 20, 7, 11, 13, 10, 8, 119, 119, 119, 119, 
    119, 119, 17, 16, 127, 127, 127, 127, 127, 127, 16, 17, 18, 
    18, 8)), row.names = c(NA, -30L), class = c("tbl_df", "tbl", 
"data.frame"))

我們可以做的

library(dplyr)    
TD %>%
   group_by(Product) %>%
   group_by(grp = cumsum(lag(Col2 != 0, default = FALSE)), .add = TRUE) %>% 
   mutate(Expected2 = sum(Col1)) %>%
   group_by(Product) %>%     
   mutate(Expected2 = case_when(Col2 == 0 | !any(Col2 == 0) ~
       Col1, TRUE ~ Expected2)) %>%
  ungroup %>% 
  select(-grp)

-輸出

# A tibble: 7 x 6
#  Date       Product  Col1  Col2 Expected Expected2
#  <date>     <chr>   <dbl> <dbl>    <dbl>     <dbl>
#1 2021-01-01 A          13     7       13        13
#2 2021-01-02 A          10     0       10        10
#3 2021-01-03 A          15     0       15        15
#4 2021-01-04 A           7     8       32        32
#5 2021-01-05 A           9     0        9         9
#6 2021-01-06 A           4     0        4         4
#7 2021-01-07 A           3    27       16        16

- 測試第二個數據集

TD2 %>%
     group_by(Product) %>%
     group_by(grp = cumsum(lag(Col2 != 0, default = FALSE)), .add = TRUE)%>% 
     mutate(tmp = sum(Col1), Expected2 = case_when(any(Col2 == 0) & 
        (row_number() == n() & Col2 != 0) ~ tmp, TRUE ~ Col1)) %>%
     ungroup %>%
     select(-grp, -tmp)

-輸出

# A tibble: 15 x 5
#   Date                Product  Col1  Col2 Expected2
#   <chr>               <chr>   <int> <int>     <int>
# 1 2020-12-03 00:00:00 B          10   206        10
# 2 2020-12-04 00:00:00 B           5   364         5
# 3 2020-12-05 00:00:00 B          10   354        10
# 4 2020-12-06 00:00:00 B           8   346         8
# 5 2020-12-07 00:00:00 B           5   341         5
# 6 2020-12-08 00:00:00 B           8   333         8
# 7 2020-12-09 00:00:00 B          12   321        12
# 8 2020-12-10 00:00:00 B           5   316         5
# 9 2020-12-11 00:00:00 B           7   309         7
#10 2020-12-12 00:00:00 B          13   296        13
#11 2020-12-13 00:00:00 B           9   287         9
#12 2020-12-14 00:00:00 B          11   276        11
#13 2020-12-15 00:00:00 B          10   266        10
#14 2020-12-16 00:00:00 B          17   249        17
#15 2020-12-17 00:00:00 B          14   235        14

或者圖像數據

TD3 %>%
    group_by(Product) %>%
    group_by(grp = cumsum(lag(Col2 != 0, default = FALSE)), .add = TRUE) %>%
    mutate(tmp = sum(Col1), Expected2 = case_when(any(Col2 == 0) & 
       (row_number() == n() & Col2 != 0) ~ tmp, TRUE ~ Col1)) %>%
    ungroup %>%
    select(-grp, -tmp)

-輸出

# A tibble: 21 x 4
#   Product  Col1  Col2 Expected2
#   <chr>   <dbl> <dbl>     <dbl>
# 1 C          11    52        11
# 2 C           7    45         7
# 3 C           6    39         6
# 4 C          15    24        15
# 5 C          14    10        14
# 6 C          10     0        10
# 7 C           8     0         8
# 8 C          10   125        28
# 9 C          12   113        12
#10 C          11   102        11
# … with 11 more rows

或使用 OP 的最新dput數據

 TD4 %>%
     group_by(Product) %>%
     group_by(grp = cumsum(lag(Col2 != 0, default = FALSE)), .add = TRUE) %>%
     mutate(tmp = sum(Col1), Expected2 = case_when(any(Col2 == 0) & 
        (row_number() == n() & Col2 != 0) ~ tmp, TRUE ~ Col1)) %>%
     ungroup %>%
     select(-grp, -tmp) %>%
     as.data.frame

-輸出

#         Date Product Col1 Col2 Expected2
#1  2021-02-12     831   15  384        15
#2  2021-02-13     831   11  373        11
#3  2021-02-14     831   13  360        13
#4  2021-02-15     831   14  826        14
#5  2020-12-03     832   10   11        10
#6  2020-12-04     832   10    1        10
#7  2020-12-05     832    7    0         7
#8  2020-12-06     832   11    0        11
#9  2020-12-07     832   13    0        13
#10 2020-12-08     832   10    0        10
#11 2020-12-09     832    8    0         8
#12 2020-12-10     832   11   70        60
#13 2020-12-11     832    9   61         9
#14 2020-12-12     832    8   53         8
#15 2020-12-13     832   10   43        10
#16 2020-12-14     832   17   26        17
#17 2020-12-15     832   15   11        15
#18 2020-12-16     832   17    0        17
#19 2020-12-17     832   16    0        16
#20 2020-12-18     832   16   84        49
#21 2020-12-19     832   14   70        14
#22 2020-12-20     832   14   56        14
#23 2020-12-21     832   15   41        15
#24 2020-12-22     832   17   24        17
#25 2020-12-23     832   18    6        18
#26 2020-12-24     832   16    0        16
#27 2020-12-25     832   17    0        17
#28 2020-12-26     832   18    0        18
#29 2020-12-27     832   18    0        18
#30 2020-12-28     832    8    0         8

數據

TD2 <- structure(list(Date = c("2020-12-03 00:00:00", "2020-12-04 00:00:00", 
"2020-12-05 00:00:00", "2020-12-06 00:00:00", "2020-12-07 00:00:00", 
"2020-12-08 00:00:00", "2020-12-09 00:00:00", "2020-12-10 00:00:00", 
"2020-12-11 00:00:00", "2020-12-12 00:00:00", "2020-12-13 00:00:00", 
"2020-12-14 00:00:00", "2020-12-15 00:00:00", "2020-12-16 00:00:00", 
"2020-12-17 00:00:00"), Product = c("B", "B", "B", "B", "B", 
"B", "B", "B", "B", "B", "B", "B", "B", "B", "B"), Col1 = c(10L, 
5L, 10L, 8L, 5L, 8L, 12L, 5L, 7L, 13L, 9L, 11L, 10L, 17L, 14L
), Col2 = c(206L, 364L, 354L, 346L, 341L, 333L, 321L, 316L, 309L, 
296L, 287L, 276L, 266L, 249L, 235L), Expected2 = c(144L, 144L, 
144L, 144L, 144L, 144L, 144L, 144L, 144L, 144L, 144L, 144L, 144L, 
144L, 144L)), class = "data.frame", row.names = c("1", "2", "3", 
"4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15"
))




TD3 <- structure(list(Product = c("C", "C", "C", "C", "C", "C", "C", 
"C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", 
"C"), Col1 = c(11, 7, 6, 15, 14, 10, 8, 10, 12, 11, 10, 20, 20, 
22, 19, 23, 21, 20, 26, 26, 27), Col2 = c(52, 45, 39, 24, 10, 
0, 0, 125, 113, 102, 92, 72, 52, 30, 11, 138, 117, 97, 71, 45, 
18)), class = "data.frame", row.names = c(NA, -21L))

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM