[英]Averaging different columns in R
我有一个像
> dput(head(b1))
structure(list(hgnc_symbol = c("DDX11L1", "WASH7P", "MIR6859-1",
"MIR1302-2HG", "MIR1302-2", "FAM138A"), CS001_t1 = c(0L, 70L,
9L, 0L, 0L, 0L), CS001_t2 = c(0L, 100L, 1L, 0L, 0L, 0L), CS001_1 = c(0L,
80L, 1L, 0L, 0L, 0L), CS001_2 = c(5L, 86L, 2L, 0L, 0L, 0L), CS002_1 = c(1L,
82L, 14L, 1L, 0L, 0L), CS002_2 = c(2L, 78L, 15L, 1L, 0L, 0L),
HC002_1 = c(4L, 83L, 21L, 0L, 0L, 0L), HC002_2 = c(8L, 94L,
14L, 0L, 0L, 0L), HC003_1 = c(17L, 101L, 18L, 0L, 0L, 0L),
HC003_2 = c(18L, 84L, 22L, 1L, 0L, 0L)), row.names = c("1",
"2", "3", "4", "5", "6"), class = "data.frame")
>
> names(b1)
[1] "hgnc_symbol" "CS001_t1" "CS001_t2" "CS001_1" "CS001_2"
[6] "CS002_1" "CS002_2" "HC002_1" "HC002_2" "HC003_1"
[11] "HC003_2"
>
我想平均"CS001_t1" and "CS001_t2"
、 "CS001_1" and "CS001_2"
等中的数字(四舍五入到十进制)
所以,最后我有five
列
您可以使用split.default
:
cbind(b1[1], sapply(split.default(b1[-1],
sub('(.*_.*?)\\d$', '\\1', names(b1)[-1])), rowMeans, na.rm = TRUE))
# hgnc_symbol CS001_ CS001_t CS002_ HC002_ HC003_
#1 DDX11L1 2.5 0 1.5 6.0 17.5
#2 WASH7P 83.0 85 80.0 88.5 92.5
#3 MIR6859-1 1.5 5 14.5 17.5 20.0
#4 MIR1302-2HG 0.0 0 1.0 0.0 0.5
#5 MIR1302-2 0.0 0 0.0 0.0 0.0
#6 FAM138A 0.0 0 0.0 0.0 0.0
sub
部分用于区分每组列。
sub('(.*_.*?)\\d$', '\\1', names(b1)[-1])
#[1] "CS001_t" "CS001_t" "CS001_" "CS001_" "CS002_"
#[6] "CS002_" "HC002_" "HC002_" "HC003_" "HC003_"
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.