簡體   English   中英

r中具有多個條件的累積和

[英]Cumulative Sum with multiple condition in r

我有一個事務數據集如下所示(簡化)

dat <- data.frame(ISIN=c("abc", "abc", "ghi", "def", "def", "def", "ghi", "ghi", "ghi"),
                  ID =c("A", "A", "A", "B", "B", "B", "C", "C", "C"),
                  Date=c("2022-07-01", "2022-07-02", "2022-07-03", "2022-07-01", "2022-07-02", "2022-07-03","2022-07-01", "2022-07-02", "2022-07-03"),
                  Quantity=c("-4", "8", "1", "2", "-6","9", "4", "8", "9"),
                  Factor=c("0", "0", "0.1", "0", "0","0", "0", "0.5", "0"),
                  Ind=c("0", "0", "1", "0", "0","0", "0", "1", "0"))

  ISIN ID       Date Quantity Factor Ind
1  abc  A 2022-07-01       -4      0   0
2  abc  A 2022-07-02        8      0   0
3  ghi  A 2022-07-03        1    0.1   1
4  def  B 2022-07-01        2      0   0
5  def  B 2022-07-02       -6      0   0
6  def  B 2022-07-03        9      0   0
7  ghi  C 2022-07-01        4      0   0
8  ghi  C 2022-07-02        8    0.5   1
9  ghi  C 2022-07-03        9      0   0

我需要做以下事情

  1. ISIN 和 ID 的累計總和

  2. 如果 cumulatvie sume 為負

    2-1。 將其保存在單獨的數據框中

    2-2。 將累積總和設置為 0

  3. 如果 Ind=1,將累積和除以因子

所以它應該看起來像這樣

dat <- data.frame(ISIN=c("abc", "abc", "ghi", "def", "def", "def", "ghi", "ghi", "ghi"),
                  ID =c("A", "A", "A", "B", "B", "B", "C", "C", "C"),
                  Date=c("2022-07-01", "2022-07-02", "2022-07-03", "2022-07-01", "2022-07-02", "2022-07-03","2022-07-01", "2022-07-02", "2022-07-03"),
                  Quantity=c("-4", "8", "1", "2", "-6","9", "4", "8", "9"),
                  Factor=c("0", "0", "0.1", "0", "0","0", "0", "0.5", "0"),
                  Ind=c("0", "0", "1", "0", "0","0", "0", "1", "0"),
                  CumulativeSum= c("0", "8", "10", "2", "0","9", "4", "24", "33"))

  ISIN ID       Date Quantity Factor Ind CumulativeSum
1  abc  A 2022-07-01       -4      0   0             0
2  abc  A 2022-07-02        8      0   0             8
3  ghi  A 2022-07-03        1    0.1   1            10
4  def  B 2022-07-01        2      0   0             2
5  def  B 2022-07-02       -6      0   0             0
6  def  B 2022-07-03        9      0   0             9
7  ghi  C 2022-07-01        4      0   0             4
8  ghi  C 2022-07-02        8    0.5   1            24
9  ghi  C 2022-07-03        9      0   0            33

最后的結果最好是這樣的

下面我發布了一些我嘗試過的代碼(無法弄清楚如何保存具有負 CumulativeSum 的行以分散數據幀)

try2<-dat%>% 
  group_by(ID, ISIN) %>% 
  arrange(Date) %>% 
  mutate(CumulativeSum=cumsum(Quantity), 
                                    ifelse(CumulativeSum<0,
                                    (CumulativeSum==0),
                                    ifelse (Ind==1,
                                    CumulativeSum==CumulativeSum/Factor,
                                    CumulativeSum))
         )

S<-dat[is.na(ISIN)]

dat %>% 
  group_by(ID, ISIN) %>% 
  arrange(Date) %>% 
  for (i in 1:length(dat$Quantity)) {
    dat$CumulativeSum[0] =="0"
    dat$CumulativeSum[i] == dat$CumulativeSum[i-1] + dat$Quantity[i]
    dat $CumulativeSum[i]== ifelse(dat $CumulativeSum[i]<0,
      (bind_rows(S, dat$CumulativeSum[i]))&
      (dat$CumulativeSum[i]==0),
        ifelse (dat$CumulativeSum[i]==1,
        dat$CumulativeSum[i]==dat$CumulativeSum[i]/dat$Factor[i],
        dat$CumulativeSum[i]))
       }

我已經嘗試了許多參考上一篇文章的代碼,但到目前為止都失敗了。 你能幫我解決這個問題並拯救我嗎? 非常感謝提前!!

我不確定,但也許你想要這個,你可以使用case_when來執行多個條件:

dat <- data.frame(ISIN=c("abc", "abc", "ghi", "def", "def", "def", "ghi", "ghi", "ghi"),
                  ID =c("A", "A", "A", "B", "B", "B", "C", "C", "C"),
                  Date=c("2022-07-01", "2022-07-02", "2022-07-03", "2022-07-01", "2022-07-02", "2022-07-03","2022-07-01", "2022-07-02", "2022-07-03"),
                  Quantity=c("-4", "8", "1", "2", "-6","9", "4", "8", "9"),
                  Factor=c("0", "0", "0.1", "0", "0","0", "0", "0.5", "0"),
                  Ind=c("0", "0", "1", "0", "0","0", "0", "1", "0"))

library(dplyr)
dat %>%
  group_by(ID, ISIN) %>%
  arrange(Date) %>%
  mutate(CumulativeSum = cumsum(as.numeric(Quantity))) %>%
  rowwise() %>%
  mutate(CumulativeSum = case_when(CumulativeSum < 0 ~ 0,
                                   Ind == 1 ~ CumulativeSum/as.numeric(Factor),
                                   TRUE ~ CumulativeSum)) %>%
  ungroup()
#> # A tibble: 9 × 7
#>   ISIN  ID    Date       Quantity Factor Ind   CumulativeSum
#>   <chr> <chr> <chr>      <chr>    <chr>  <chr>         <dbl>
#> 1 abc   A     2022-07-01 -4       0      0                 0
#> 2 def   B     2022-07-01 2        0      0                 2
#> 3 ghi   C     2022-07-01 4        0      0                 4
#> 4 abc   A     2022-07-02 8        0      0                 4
#> 5 def   B     2022-07-02 -6       0      0                 0
#> 6 ghi   C     2022-07-02 8        0.5    1                24
#> 7 ghi   A     2022-07-03 1        0.1    1                10
#> 8 def   B     2022-07-03 9        0      0                 5
#> 9 ghi   C     2022-07-03 9        0      0                21

reprex 包創建於 2022-07-04 (v2.0.1)

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM