簡體   English   中英

R:如何對具有不同窗口大小的多列求和?

[英]R: How to sum across multiple columns with varying window size?

這是一個說明性的數據集:

set.seed(1)
sam_dat <- data.frame(
  Operator = seq(1:3),
  t1 = sample(runif(10)*10, 3),
  t2 = sample(runif(3)*10, 3),
  t3 = sample(runif(12)*10, 3),
  t4 = sample(runif(34)*10, 3)
)

計算以下(tidyverse、base R 或其他)的最簡單方法是什么?

t1_t2 = t1 + t2
t1_t3 = t1 + t2 + t3
t1_t4 = t1 + t2 + t3 + t4

編輯有沒有辦法做到這一點而不必明確寫出每個計算? 雖然這對於我的示例數據集中的四列是可行的,但我的真實數據還有更多。

通過手動計算,操作員 1 的結果將是:

10.71,  19.40,  20.48

提前致謝!

base R ,我們可以使用transform

transform(sam_dat, t1_t2 = t1 + t2, 
          t1_t3 = t1 + t2 + t3, t1_t4 = t1 + t2 + t3 + t4)
#  Operator       t1       t2       t3       t4     t1_t2    t1_t3    t1_t4
#1        1 5.728534 4.976992 8.696908 1.079436 10.705526 19.40243 20.48187
#2        2 2.655087 7.176185 9.347052 7.829328  9.831272 19.17832 27.00765
#3        3 2.016819 7.698414 1.255551 4.068302  9.715234 10.97078 15.03909

或者另一種選擇是將數據rowSums集到list ,然后使用rowSums

sam_dat[c("t1_t2", "t1_t3", "t1_t4")] <- sapply(list(sam_dat[c('t1', 't2')], 
       sam_dat[c('t1', 't2', 't3')], 
          sam_dat[c('t1', 't2', 't3', 't4')]), rowSums)

或者通過索引使其更緊湊

nm1 <- paste(names(sam_dat)[2], names(sam_dat)[3:5], sep="_")
sam_dat[nm1] <- sapply(3:5, function(i) rowSums(sam_dat[2:i]))

或者另一種選擇是rowCumsumsmatrixStats

library(matrixStats)
sam_dat[nm1] <- rowCumsums(as.matrix(sam_dat[-1]))[,-1]

或者類似於從purrr accumulatebase R操作是Reduce

sam_dat[nm1] <- do.call(cbind, Reduce(`+`, sam_dat[-1], accumulate = TRUE)[-1])

或者使用dplyr mutate

library(dplyr)
sam_dat %>%
  mutate(t1_t2 = t1 + t2, 
         t1_t3 = t1 + t2 + t3,
         t1_t4 = t1 + t2 + t3 + t4)

或者tidyverse另一個選擇是轉為“長”格式,進行計算,然后轉換回“寬”

 library(tidyr)
 library(stringr)
 sam_dat %>%
     pivot_longer(cols = -Operator) %>%
     group_by(Operator) %>%
     mutate(value = cumsum(value)) %>% 
     slice(-1) %>%
     ungroup %>% 
     mutate(name = str_c('t1_', name)) %>%
    pivot_wider(names_from = name, values_from = value) %>% 
    select(-Operator) %>% 
    bind_cols(sam_dat, .)
# A tibble: 3 x 8
#  Operator    t1    t2    t3    t4 t1_t2 t1_t3 t1_t4
#     <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#1        1  5.73  4.98  8.70  1.08 10.7   19.4  20.5
#2        2  2.66  7.18  9.35  7.83  9.83  19.2  27.0
#3        3  2.02  7.70  1.26  4.07  9.72  11.0  15.0

這是使用apply + cumsum的基本 R 解決方案,即,

sam_dat <- cbind(sam_dat,
                 `colnames<-`(t(apply(sam_dat[-1], 1,cumsum))[,-1],
                              paste0("t1_",names(sam_dat)[-c(1,2)])))

以至於

> sam_dat
  Operator       t1       t2       t3       t4     t1_t2    t1_t3    t1_t4
1        1 5.728534 4.976992 8.696908 1.079436 10.705526 19.40243 20.48187
2        2 2.655087 7.176185 9.347052 7.829328  9.831272 19.17832 27.00765
3        3 2.016819 7.698414 1.255551 4.068302  9.715234 10.97078 15.03909

另一種選擇是在 dplyr 中使用rowwise函數

sam_dat[-1]%>%
    rowwise()%>%
    do(setNames(data.frame(t(cumsum(unlist(.)))),sprintf('t1_%s',names(.)))[-1])%>%
    cbind(sam_dat,.)
  Operator       t1       t2       t3       t4     t1_t2    t1_t3    t1_t4
1        1 5.728534 4.976992 8.696908 1.079436 10.705526 19.40243 20.48187
2        2 2.655087 7.176185 9.347052 7.829328  9.831272 19.17832 27.00765
3        3 2.016819 7.698414 1.255551 4.068302  9.715234 10.97078 15.03909

基本操作是:

sam_dat[-1]%>%
    rowwise()%>%
     do(data.frame(t(cumsum(unlist(.)))))

一個dplyrpurrr選項可以是:

map(.x = accumulate(names(sam_dat[-1]), c)[-1], 
    ~ sam_dat %>%
     mutate(!!paste(.x, collapse = "_") := rowSums(select(., one_of(.x))))) %>%
 reduce(full_join)

  Operator       t1       t2        t3       t4    t1_t2 t1_t2_t3 t1_t2_t3_t4
1        1 5.728534 4.976992 0.1339033 7.942399 10.70553 10.83943    18.78183
2        2 3.721239 7.698414 2.6722067 4.590657 11.41965 14.09186    18.68252
3        3 8.983897 3.841037 9.3470523 5.297196 12.82493 22.17199    27.46918

或者進一步匹配您想要的輸出:

map(.x = accumulate(names(sam_dat[-1]), c)[-1], 
    ~ sam_dat %>%
     mutate(!!paste(head(.x, 1), tail(.x, 1), sep = "_") := rowSums(select(., one_of(.x))))) %>%
 reduce(full_join)

  Operator       t1       t2        t3       t4    t1_t2    t1_t3    t1_t4
1        1 5.728534 4.976992 0.1339033 7.942399 10.70553 10.83943 18.78183
2        2 3.721239 7.698414 2.6722067 4.590657 11.41965 14.09186 18.68252
3        3 8.983897 3.841037 9.3470523 5.297196 12.82493 22.17199 27.46918

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM