[英]Apply a set of functions to multiple data frames, considering a specific sequence
考虑以下数据:
df <- structure(list(V1 = c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0), V2 = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0), V3 = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1), V4 = c(1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 1, 1, 0, 0, 0), V5 = c(1, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1), V6 = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1, 0, 0), V7 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
1, 1, 1, 1, 0, 0, 0, 0, 0, 0), V8 = c(1, 1, 1, 0, 0, 0, 0, 1,
1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0), V9 = c(1,
1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1, 1, 1)), class = "data.frame", row.names = c(NA, -46L))
我想对df
中的所有可能对应用以下函数。
应用于对V1
和V2
:
df$V1V2 <- (df$V1 * df$V2) + (1 - df$V1) * (1 - df$V2)
sum <- (sum(df$V1V2)/46)
df$VD <- (df$V1/sd(df$V1))
df$VI <- (df$V2/sd(df$V2))
est <- lm(df$VD ~ df$VI)
summary(est)
ndf <- data.frame(NA)
ndf$V1V2 <- summary(est)[["coefficients"]][, "Pr(>|t|)"][2]
应用于V1
和V3
对:
df$V1V3 <- (df$V1 * df$V3) + (1 - df$V1) * (1 - df$V3)
sum <- (sum(df$V1V3)/46)
df$VD <- (df$V1/sd(df$V1))
df$VI <- (df$V3/sd(df$V3))
est <- lm(df$VD ~ df$VI)
summary(est)
ndf$V1V3 <- summary(est)[["coefficients"]][, "Pr(>|t|)"][2]
我可以将此应用于所有其他对V1
(即V3
、 V4
、 V5
、 V6
、 V7
、 V8
和V9
)。 尽管如此,我确信这不是最好的方法。
更新
这是我走了多远:
dfV1 <- df[, c("V1", "V2", "V1", "V3", "V1", "V4", "V1", "V5", "V1", "V6", "V1", "V7", "V1", "V8", "V1", "V9")]
colnames(dfV1) <- c("V1", "V2", "V1", "V3", "V1", "V4", "V1", "V5", "V1", "V6", "V1", "V7", "V1", "V8", "V1", "V9")
sep <- lapply(seq(1, ncol(dfV1), by=2), function(i)
dfV1[i: pmin((i+1), ncol(dfV1))])
V1V2 <- sep[[1]]
V1V3 <- sep[[2]]
V1V4 <- sep[[3]]
V1V5 <- sep[[4]]
V1V6 <- sep[[5]]
V1V7 <- sep[[6]]
V1V8 <- sep[[7]]
V1V9 <- sep[[8]]
list_V1 <- tibble::lst(V1V2, V1V3, V1V4, V1V5, V1V6, V1V7, V1V8, V1V9)
library(dplyr)
my_func <- function(x) {
x <- x %>%
mutate(First = (x[,1] * x[,2] + (1 - x[,1] * (1 - x[,2]))),
VD = x[,1] / sd(x[,1]),
VI = x[,2] / sd(x[,2]))
}
res <- lapply(list_V1, my_func)
list2env(res, .GlobalEnv)
df.IC.V1 <- cbind.data.frame(V1V2$First, V1V3$First, V1V4$First, V1V5$First, V1V6$First, V1V7$First, V1V8$First, V1V9$First)
IC.all.V1 <- data.frame(colSums(df.IC.V1)/46)
我不知道如何将此部分应用于列表dfV1
:
est <- lm(df$VD ~ df$VI)
summary(est)
ndf$V1V3 <- summary(est)[["coefficients"]][, "Pr(>|t|)"][2]
避免使用宽数据并保持数据长或整洁,这几乎有助于数据科学的各个方面:聚合、建模和绘图。 因此,考虑reshape
(或pivot_longer
的整洁语义)以将数据重新格式化为 long 并使用通用方法, by
9 个不同变量中的每一个调用来运行 model。
最后,您可能希望在新数据框中分离出此过程的结果,以避免由于长度不同而导致原始值重复,特别是因为您的 model 输出两行用于截距和多列变量。
long_df <- reshape(df, idvar="V1", varying=names(df)[-1],
times=names(df)[-1],
v.names="value", timevar="variable",
new.row.names=1:1E4, direction="long")
pairwise_model <- function(sub) {
sub$vpair <- (sub$V1 * sub$value) + (1 - sub$value) * (1 - sub$value)
sum_v <- (sum(sub$vpair)/46)
sub$VD <- (sub$V1/sd(sub$V1))
sub$VI <- (sub$value/sd(sub$value))
est <- lm(VD ~ VI, data=sub)
print(summary(est))
ndf <- data.frame(variable = sub$variable[[1]],
summary(est)[["coefficients"]])
return(ndf)
}
df_list <- by(long_df, long_df$variable, pairwise_model)
results_df <- do.call(rbind, df_list)
results_df
# variable Estimate Std..Error t.value Pr...t..
# V2.(Intercept) V2 0.70897157 0.1697361 4.1769050 1.376794e-04
# V2.VI V2 0.20546559 0.1475392 1.3926169 1.707334e-01
# V3.(Intercept) V3 0.00000000 0.4324256 0.0000000 1.000000e+00
# V3.VI V3 0.29294628 0.1441419 2.0323471 4.818178e-02
# V4.(Intercept) V4 0.59719677 0.1461565 4.0860091 1.829775e-04
# V4.VI V4 0.47663445 0.1325296 3.5964390 8.118808e-04
# V5.(Intercept) V5 0.77259722 0.1604268 4.8158874 1.766754e-05
# V5.VI V5 0.13627939 0.1493492 0.9124883 3.664845e-01
# V6.(Intercept) V6 0.68720490 0.1557572 4.4120255 6.533736e-05
# V6.VI V6 0.31399751 0.1431310 2.1937768 3.357833e-02
# V7.(Intercept) V7 0.57392936 0.1516681 3.7841150 4.627413e-04
# V7.VI V7 0.46128020 0.1337587 3.4486009 1.253140e-03
# V8.(Intercept) V8 0.90047538 0.1869717 4.8161045 1.765498e-05
# V8.VI V8 -0.09345783 0.1500959 -0.6226544 5.367256e-01
# V9.(Intercept) V9 0.60871296 0.1645124 3.7001029 5.960399e-04
# V9.VI V9 0.35598290 0.1408800 2.5268512 1.517916e-02
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.