[英]Convert Nested Loop to lapply
我正在使用兩個循環。 在第二個循環中,我將值遞增 1,然后基於該值應用過濾器並將其轉換為 data.matrix,以便可以在后面的步驟中完成矩陣乘法。 有沒有辦法使用 lapply、expand.grid 或任何其他方法使其高效?
library(dplyr)
xx <- structure(list(Ars_0 = c(1308.56, 5728.84, 2177.82), Ars_1 = c(0, 0, 0),
Ars_2 = c(0, 0, 0), age = c(13, 31, 43), region = c('A','A','B')),
row.names = c(NA, -3L),
class = "data.frame")
mx_long2 = read.table(header = T, text = '
Arrears Ars_0 Ars_1 Ars_2 Seasoning Region
Ars_0 0.985 0.0148 0.0002 mths:36-47 A
Ars_1 0.3816 0.286 0.3317 mths:36-47 A
Ars_2 0.2959 0.0057 0.2524 mths:36-47 A
Ars_0 0.9822 0.0176 0.0002 mths:24-35 A
Ars_1 0.389 0.2753 0.3347 mths:24-35 A
Ars_2 0.3026 0.0334 0.2399 mths:24-35 A
Ars_0 0.9753 0.0243 0.0004 mths:12-23 A
Ars_1 0.4002 0.2592 0.3394 mths:12-23 A
Ars_2 0.3032 0.0208 0.2387 mths:12-23 A
Ars_0 0.8865 0.01332 0.00018 mths:36-47 B
Ars_1 0.34344 0.2574 0.29853 mths:36-47 B
Ars_2 0.26631 0.00513 0.22716 mths:36-47 B
Ars_0 0.88398 0.01584 0.00018 mths:24-35 B
Ars_1 0.3501 0.24777 0.30123 mths:24-35 B
Ars_2 0.27234 0.03006 0.21591 mths:24-35 B
Ars_0 0.87777 0.02187 0.00036 mths:12-23 B
Ars_1 0.36018 0.23328 0.30546 mths:12-23 B
Ars_2 0.27288 0.01872 0.21483 mths:12-23 B
')
mx_long2 = mx_long2 %>% mutate(minage = as.numeric(substr(as.character(Seasoning), 6,7)),
maxage = as.numeric(substr(as.character(Seasoning), 9,10)))
x <- xx %>% select(starts_with('Ars')) %>% data.matrix()
l <- list()
p <- 1
for (i in 1:nrow(x)) {
for (j in 1:3) {
Bx = filter(mx_long2, (j + xx[i, 'age']) >= minage, (j + xx[i, 'age']) <= maxage,
Region == xx[i, 'region']) %>%
select(starts_with('Ars_')) %>% data.matrix()
# Matrix Multiplication
x <- x %*% Bx
l[[p]] <- x
p = p + 1
}
}
l
這是使用data.table
的另一個選項:
library(data.table)
cols <- c("Ars_0","Ars_1","Ars_2")
setDT(mx_long2)[, c("minage","maxage") :=
lapply(.(substring(Seasoning, 6, 7), substring(Seasoning, 9, 10)), as.numeric)]
xxCJ <- setDT(xx)[, .(rn=.GRP, age=c(outer(1:3, age, `+`))), c("region", cols)]
jDT <- mx_long2[xxCJ, on=.(Region=region, minage<=age, maxage>=age), nomatch=0L]
x <- as.matrix(xx[, ..cols])
jDT[, {
mat <- matrix(unlist(mget(cols)), nrow=.N)
x <- x %*% mat
.(.(x))
}, by=.(rn, age=minage)]$V1
output:
[[1]]
[,1] [,2] [,3]
[1,] 1276.239 31.79801 0.523424
[2,] 5587.338 139.21081 2.291536
[3,] 2124.028 52.92103 0.871128
[[2]]
[,1] [,2] [,3]
[1,] 1257.600 39.26553 11.42768
[2,] 5505.737 171.90341 50.03007
[3,] 2093.007 65.34913 19.01895
[[3]]
[,1] [,2] [,3]
[1,] 1245.716 40.97499 16.55755
[2,] 5453.711 179.38741 72.48849
[3,] 2073.229 68.19417 27.55652
[[4]]
[,1] [,2] [,3]
[1,] 1244.492 33.75804 17.93563
[2,] 5448.351 147.79177 78.52170
[3,] 2071.192 56.18308 29.85004
[[5]]
[,1] [,2] [,3]
[1,] 1240.899 31.79569 15.85047
[2,] 5432.622 139.20068 69.39293
[3,] 2065.213 52.91717 26.37974
[[6]]
[,1] [,2] [,3]
[1,] 1235.976 31.12258 14.69273
[2,] 5411.069 136.25382 64.32436
[3,] 2057.019 51.79693 24.45292
[[7]]
[,1] [,2] [,3]
[1,] 1110.294 24.54953 12.85110
[2,] 4860.838 107.47716 56.26177
[3,] 1847.849 40.85747 21.38793
[[8]]
[,1] [,2] [,3]
[1,] 996.1295 21.17409 10.44788
[2,] 4361.0277 92.69960 45.74053
[3,] 1657.8458 35.23978 17.38827
[[9]]
[,1] [,2] [,3]
[1,] 893.1232 18.77225 8.873746
[2,] 3910.0690 82.18442 38.849016
[3,] 1486.4137 31.24243 14.768463
這是一個連接所有內容然后拆分的答案::
library(tidyr)
library(dplyr)
xx%>%
mutate(id_xx = seq_len(n()))%>%
crossing(j = 1:3)%>%
mutate(age = age + j)%>%
inner_join(mx_long2, ., by = c('Region' = 'region'))%>%
filter(age >= minage, age <= maxage)%>%
arrange(j, id_xx)%>%
select(starts_with('Ars_'))%>%
select(ends_with('x'))%>%
split(rep(1:9, each = 3))%>%
lapply(function(Bx) {
x <<- x %*% as.matrix(Bx)
return(x)
})
與data.table類似的方法:
library(data.table)
x <- xx %>% select(starts_with('Ars')) %>% data.matrix()
mx_dt <- as.data.table(mx_long2)
# prepare xx for a join by expanding it by 3
j <- 3
xx_dt <- as.data.table(xx)
xx_dt <- xx_dt[rep(seq_len(nrow(xx_dt)), each = j)
][, `:=`(age= age + rep(seq_len(j), nrow(xx_dt)),
ID = .I)]
# non-equi join
BX <- mx_dt[xx_dt[, .(region, age, ID)],
on = .(Region = region,
minage <= age,
maxage >= age),
allow.cartesian = T,
nomatch = 0L,
.(Ars_0, Ars_1, Ars_2, ID)]
# loop through split.
## NOTE x <<- ... the "<<-" is a global assignment
lapply(split(BX, by = 'ID', keep.by = F),
function(bx) {
x <<- x %*% as.matrix(bx)
return(x)
}
)
我想你可以使用基礎 R 做這樣的事情
temp <- transform(xx[rep(seq(nrow(xx)), 3), ], age = age + 1:3)
lapply(seq(nrow(temp)), function(i) {
new <- subset(mx_long2, temp$age[i] >= minage &
temp$age[i] <= maxage & temp$region[i] == Region)
x %*% as.matrix(new[startsWith(names(new), "Ars")])
})
這是我的 go 用purrr
接近嵌套循環
l
x <- xx %>% select(starts_with('Ars')) %>% data.matrix()
my_list <- purrr::pmap(
# use expand.grid() to create your iterators
.l = expand.grid(1:nrow(x),
1:3),
.f = ~{
Bx = filter(mx_long2, (.y + xx[.x, 'age']) >= minage, (.y + xx[.x, 'age']) <= maxage,
Region == xx[.x, 'region']) %>%
select(starts_with('Ars_')) %>% data.matrix()
# Matrix Multiplication
# global assignment operator <<-
x <<- x %*% Bx
return(x)
}
)
all.equal(l, my_list)
不過, l
和my_list
之間的結果存在一些小的差異。 我不確定為什么會這樣。 我錯過了代碼中的一些隨機部分嗎?
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.