[英]create multiple columns with apply on a dataframe
我正在嘗試根據列的值和條件檢查添加幾列,我正在檢索好的數據但我無法添加列,也許 apply(x,1,fun) 不可能,但只能使用 apply( x,2,有趣) ? 謝謝你的啟蒙
df <- data.frame(id = 411:420,
value = c(10,0,25,0,32,66,45,88,0,23),
prod = c(500,300,400,600,0,800,400,300,200,0)
)
add_coll <- function(x) {
if (x["value"] >0 & x["prod"] > 0) {
varname_v <- paste0("col_",x["id"],"_v")
varname_p <- paste0("col_",x["id"],"_p")
print(varname_v)
print(varname_p)
df[, varname_v] <- x["value"] #not working
df[, varname_p] <- 55 #not working
df$"test" <- 44 #not working
}
else {
print("not creating columns")
}
}
apply(df,1,add_coll )
這就是我所期待的:
df_expected <- data.frame(
"id" = 411:420,
"value" = c(10,0,25,0,32,66,45,88,0,23),
"prod" = c(500,300,400,600,0,800,400,300,200,0),
"col_411_v" = c(10,0,0,0,0,0,0,0,0,0),
"col_411_p" = c(500,0,0,0,0,0,0,0,0,0),
"col_413_v" = c(0,0,25,0,0,0,0,0,0,0),
"col_413_p" = c(0,0,400,0,0,0,0,0,0,0),
"col_416_v" = c(0,0,0,0,0,66,0,0,0,0),
"col_416_p" = c(0,0,0,0,0,800,0,0,0,0),
"col_417_v" = c(0,0,0,0,0,0,45,0,0,0),
"col_417_p" = c(0,0,0,0,0,0,400,0,0,0),
"col_418_v" = c(0,0,0,0,0,0,0,88,0,0),
"col_418_p" = c(0,0,0,0,0,0,0,300,0,0)
)
id value prod col_411_v col_411_p col_413_v col_413_p col_416_v col_416_p col_417_v col_417_p col_418_v col_418_p
1 411 10 500 10 500 0 0 0 0 0 0 0 0
2 412 0 300 0 0 0 0 0 0 0 0 0 0
3 413 25 400 0 0 25 400 0 0 0 0 0 0
4 414 0 600 0 0 0 0 0 0 0 0 0 0
5 415 32 0 0 0 0 0 0 0 0 0 0 0
6 416 66 800 0 0 0 0 66 800 0 0 0 0
7 417 45 400 0 0 0 0 0 0 45 400 0 0
8 418 88 300 0 0 0 0 0 0 0 0 88 300
9 419 0 200 0 0 0 0 0 0 0 0 0 0
10 420 23 0 0 0 0 0 0 0 0 0 0 0
我建議不要從應用“循環”中寫入(全局)環境。 如果你真的想這樣做,你總是可以使用一個很好的清晰的for
循環。
我建議在您的情況下使用apply
的方法是創建一個帶有附加列的 data.frame ,然后將它們附加到df
。 然后有一些長期到廣泛的詭計。 隨意單獨檢查每個步驟。
library(tidyr)
df <- data.frame(id = 411:420,
value = c(10,0,25,0,32,66,45,88,0,23),
prod = c(500,300,400,600,0,800,400,300,200,0)
)
add_coll <- function(x) {
if (x["value"] >0 & x["prod"] > 0) {
varname_v <- paste0("col_",x["id"],"_v")
varname_p <- paste0("col_",x["id"],"_p")
return(data.frame(varname_v, varname_p))
} else {
return(data.frame(varname_v = NA, varname_p = NA))
}
}
out <- apply(df, MARGIN = 1, FUN = add_coll)
out <- do.call(rbind, out)
xy <- cbind(df, out)
xywide <- pivot_wider(xy, names_from = varname_v, values_from = value)
xywide <- xywide[, colnames(xywide) != "NA"]
xywide <- pivot_wider(xywide, names_from = varname_p, values_from = prod)
xywide <- xywide[, colnames(xywide) != "NA"]
xywide[is.na(xywide)] <- 0
res <- merge(df, xywide)
res
id value prod col_411_v col_413_v col_416_v col_417_v col_418_v col_411_p col_413_p col_416_p
1 411 10 500 10 0 0 0 0 500 0 0
2 412 0 300 0 0 0 0 0 0 0 0
3 413 25 400 0 25 0 0 0 0 400 0
4 414 0 600 0 0 0 0 0 0 0 0
5 415 32 0 0 0 0 0 0 0 0 0
6 416 66 800 0 0 66 0 0 0 0 800
7 417 45 400 0 0 0 45 0 0 0 0
8 418 88 300 0 0 0 0 88 0 0 0
9 419 0 200 0 0 0 0 0 0 0 0
10 420 23 0 0 0 0 0 0 0 0 0
col_417_p col_418_p
1 0 0
2 0 0
3 0 0
4 0 0
5 0 0
6 0 0
7 400 0
8 0 300
9 0 0
10 0 0
如果您要采用for
循環方式,這將是一種解決方法。 基本上創建一個全零的幻影 data.frame,然后在該行符合您的條件時填寫數據。 這種方法的好處是它非常可擴展。 但它使用cbind
的方式對於相當大的數據集可能效率低下。
for (i in seq_len(nrow(df))) {
myrow <- df[i, ]
temp.cols <- dummy.template
colnames(temp.cols) <- c(
paste0("col_", myrow$id, "_v"),
paste0("col_", myrow$id, "_p")
)
if (myrow$value > 0 & myrow$prod > 0) {
temp.cols[i, 1] <- myrow$value
temp.cols[i, 2] <- myrow$prod
df <- cbind(df, temp.cols)
}
}
id value prod col_411_v col_411_p col_413_v col_413_p col_416_v
1 411 10 500 10 500 0 0 0
2 412 0 300 0 0 0 0 0
3 413 25 400 0 0 25 400 0
4 414 0 600 0 0 0 0 0
5 415 32 0 0 0 0 0 0
6 416 66 800 0 0 0 0 66
7 417 45 400 0 0 0 0 0
8 418 88 300 0 0 0 0 0
9 419 0 200 0 0 0 0 0
10 420 23 0 0 0 0 0 0
col_416_p col_417_v col_417_p col_418_v col_418_p
1 0 0 0 0 0
2 0 0 0 0 0
3 0 0 0 0 0
4 0 0 0 0 0
5 0 0 0 0 0
6 800 0 0 0 0
7 0 45 400 0 0
8 0 0 0 88 300
9 0 0 0 0 0
10 0 0 0 0 0
您可以創建帶有行號的列,以寬格式獲取數據並將列綁定到原始數據集。
library(dplyr)
library(tidyr)
df %>%
mutate(row = row_number()) %>%
pivot_wider(names_from = id, values_from = c(value, prod),
values_fill = list(value = 0, prod = 0)) %>%
select(-row) %>%
bind_cols(df, .) %>%
mutate_at(-(1:3), ~replace(., prod <= 0, 0))
# id value prod value_411 value_412 value_413 value_414 value_415 value_416 ...
#1 411 10 500 10 0 0 0 0 0 ...
#2 412 0 300 0 0 0 0 0 0 ...
#3 413 25 400 0 0 25 0 0 0 ...
#4 414 0 600 0 0 0 0 0 0 ...
#5 415 32 0 0 0 0 0 0 0 ...
#6 416 66 800 0 0 0 0 0 66 ...
#7 417 45 400 0 0 0 0 0 0 ...
#8 418 88 300 0 0 0 0 0 0 ...
#9 419 0 200 0 0 0 0 0 0 ...
#10 420 23 0 0 0 0 0 0 0 ...
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.