[英]r efficient way to calculate the difference between rows
考慮到這是我下面的數據集
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
5.7 2.5 5.0 2.0 virginica
7.7 3.0 6.1 2.3 virginica
6.7 3.3 5.7 2.1 virginica
4.8 3.0 1.4 0.1 setosa
5.5 4.2 1.4 0.2 setosa
4.9 3.6 1.4 0.1 setosa
6.3 3.3 4.7 1.6 versicolor
5.6 2.9 3.6 1.3 versicolor
5.9 3.0 4.2 1.5 versicolor
df <- structure(list(Sepal.Length = c(5.7, 7.7, 6.7, 4.8, 5.5, 4.9,
6.3, 5.6, 5.9), Sepal.Width = c(2.5, 3, 3.3, 3, 4.2, 3.6, 3.3,
2.9, 3), Petal.Length = c(5, 6.1, 5.7, 1.4, 1.4, 1.4, 4.7, 3.6,
4.2), Petal.Width = c(2, 2.3, 2.1, 0.1, 0.2, 0.1, 1.6, 1.3, 1.5
), Species = structure(c(3L, 3L, 3L, 1L, 1L, 1L, 2L, 2L, 2L), .Label = c("setosa",
"versicolor", "virginica"), class = "factor")), row.names = c(NA,
-9L), class = "data.frame")
我的目標是
從第一行物種==“ virginica”減去每行“ Setosa”的Sepal.Length Sepal.Width Petal.Length Petal.Width
的值,
我在下面這樣做
Virginia1_vs_Setosa1a <- df[1:4][df$Species == "virginica",][1,] - df[1:4][df$Species == "setosa",][1,] Virginia1_vs_Setosa1a 0.9 -0.5 3.6 1.9 Virginia1_vs_Setosa2a <- df[1:4][df$Species == "virginica",][1,] - df[1:4][df$Species == "setosa",][2,] Virginia1_vs_Setosa2a 0.2 -1.7 3.6 1.8 Virginia1_vs_Setosa3a <- df[1:4][df$Species == "virginica",][1,] - df[1:4][df$Species == "setosa",][3,] Virginia1_vs_Setosa3a 0.8 -1.1 3.6 1.9
取每個元素的乘積
Virginia1_vs_Setosa1 <- as.numeric( Virginia1_vs_Setosa1a[1]*Virginia1_vs_Setosa1a[2]* Virginia1_vs_Setosa1a[3]*Virginia1_vs_Setosa1a[4]) 0.9*-0.5*3.6*1.9 = -3.078 Virginia1_vs_Setosa2 <- as.numeric( Virginia1_vs_Setosa2a[1]*Virginia1_vs_Setosa2a[2]* Virginia1_vs_Setosa2a[3]*Virginia1_vs_Setosa2a[4]) 0.2*-1.7*3.6*1.8 = -2.2032 Virginia1_vs_Setosa3 <- as.numeric( Virginia1_vs_Setosa3a[1]*Virginia1_vs_Setosa3a[2]* Virginia1_vs_Setosa3a[3]*Virginia1_vs_Setosa3a[4]) 0.8*-1.1*3.6*1.9 = -6.0192
對於弗吉尼亞州的第二行,setosa中的每一行也是如此。
Virginia2_vs_Setosa1a <- df[1:4][df$Species == "virginica",][2,] - df[1:4][df$Species == "setosa",][1,]
Virginia2_vs_Setosa2a <- df[1:4][df$Species == "virginica",][2,] - df[1:4][df$Species == "setosa",][2,]
Virginia2_vs_Setosa3a <- df[1:4][df$Species == "virginica",][2,] - df[1:4][df$Species == "setosa",][3,]
Virginia2_vs_Setosa1 <- as.numeric(
Virginia2_vs_Setosa1a[1]*Virginia2_vs_Setosa1a[2]*
Virginia2_vs_Setosa1a[3]*Virginia2_vs_Setosa1a[4])
Virginia2_vs_Setosa2 <- as.numeric(
Virginia2_vs_Setosa2a[1]*Virginia2_vs_Setosa2a[2]*
Virginia2_vs_Setosa2a[3]*Virginia2_vs_Setosa2a[4])
Virginia2_vs_Setosa3 <- as.numeric(
Virginia2_vs_Setosa3a[1]*Virginia2_vs_Setosa3a[2]*
Virginia2_vs_Setosa3a[3]*Virginia2_vs_Setosa3a[4])
rm(Virginia2_vs_Setosa1a, Virginia2_vs_Setosa2a,
Virginia2_vs_Setosa3a)
與弗吉尼亞州的第三行相似,setosa的每一行
Virginia3_vs_Setosa1a <- df[1:4][df$Species == "virginica",][3,] - df[1:4][df$Species == "setosa",][1,]
Virginia3_vs_Setosa2a <- df[1:4][df$Species == "virginica",][3,] - df[1:4][df$Species == "setosa",][2,]
Virginia3_vs_Setosa3a <- df[1:4][df$Species == "virginica",][3,] - df[1:4][df$Species == "setosa",][3,]
Virginia3_vs_Setosa1 <- as.numeric(
Virginia3_vs_Setosa1a[1]*Virginia3_vs_Setosa1a[2]*
Virginia3_vs_Setosa1a[3]*Virginia3_vs_Setosa1a[4])
Virginia3_vs_Setosa2 <- as.numeric(
Virginia3_vs_Setosa2a[1]*Virginia3_vs_Setosa2a[2]*
Virginia3_vs_Setosa2a[3]*Virginia3_vs_Setosa2a[4])
Virginia3_vs_Setosa3 <- as.numeric(
Virginia3_vs_Setosa3a[1]*Virginia3_vs_Setosa3a[2]*
Virginia3_vs_Setosa3a[3]*Virginia3_vs_Setosa3a[4])
rm(Virginia3_vs_Setosa1a, Virginia3_vs_Setosa2a,
Virginia3_vs_Setosa3a)
最后像下面這樣創建一個3 * 3矩陣
matrix(c(Virginia1_vs_Setosa1, Virginia1_vs_Setosa2, Virginia1_vs_Setosa3, Virginia2_vs_Setosa1, Virginia2_vs_Setosa2, Virginia2_vs_Setosa3,
Virginia3_vs_Setosa1, Virginia3_vs_Setosa2, Virginia3_vs_Setosa3), nrow=3, ncol=3)
[,1] [,2] [,3]
[1,] -3.0780 0.0000 4.9020
[2,] -2.2032 -26.0568 -8.8236
[3,] -6.0192 -17.3712 -4.6440
如您所見,我的解決方案非常笨拙且效率低下。 如果有人可以向我展示一種實現相同結果的有效方法,我將非常感激。
您可以使用double for
循環來執行此操作。 *apply
系列功能可能有解決方案,但這一項可行。
f <- droplevels(df$Species[df$Species != "versicolor"])
sp <- split(df[df$Species != "versicolor", ], f)
res <- matrix(0, 3, 3)
for(i in 1:nrow(sp[[1]])){
for(j in 1:nrow(sp[[2]])){
res[i, j] <- prod(sp[[2]][j, -5] - sp[[1]][i, -5])
}
}
res
# [,1] [,2] [,3]
#[1,] -3.0780 0.0000 4.9020
#[2,] -2.2032 -26.0568 -8.8236
#[3,] -6.0192 -17.3712 -4.6440
對於這種特殊情況,您可以從outer
借鑒一些想法。
X <- lapply(split(df[df$Species=="virginica", 1:4], 1:3), unlist)
Y <- lapply(split(df[df$Species=="setosa", 1:4], 1:3), unlist)
FUN <- function(l1, l2) mapply(function(v,w) prod(v-w), l1, l2)
Y <- rep(Y, rep.int(length(X), length(Y)))
if (length(X))
X <- rep(X, times = ceiling(length(Y)/length(X)))
matrix(FUN(X, Y), ncol=3L, byrow=TRUE)
對於最一般的情況,您將需要生成每對可能的不同行對,然后根據您的公式進行計算。 使用data.table
,它將類似於:
library(data.table)
setDT(df)
setorder(df, Species)[, numid := rowid(Species)]
parts <- split(df, by=c("Species", "numid"))
combis <- CJ(parts, parts, sorted=FALSE)
combis[, .(
Species1=V1[[1]][,Species],
numid1=V1[[1]][,numid],
Species2=V2[[1]][,Species],
numid2=V2[[1]][,numid],
differ=prod(V1[[1]][, 1:4] - V2[[1]][, 1:4])),
by=seq_len(combis[,.N])][
Species1!=Species2, -1L]
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.