[英]R weights are ignored, while using weighted.mean as value_fn in pivot.wider
我正在嘗試對因變量列表按自變量列表細分的加權平均值。
為此,我首先創建了一個函數“cross_fun”,然后將其映射到第二個函數“multi”中。 這工作得很好。 但是,我注意到平均值沒有加權。 似乎 value_fn 的權重在 pivot_wider 中被忽略了。
誰能明白為什么?
library(tidyverse)
library(rlang)
library(scales)
dv1 <- c(1, 2, 1, 2, 1, 2) # dependent variable 1
dv2 <- c(2, 1, 2, 1, 2, 1) # dependent variable 2
wt <- c(0.5, 5, 0.5, 5, 0.5, 5) # weighting variable
iv1 <- c("m", "f", "m", "f", "m", "f") # independent variable 1
iv2 <- c("b", "b", "b", "a", "a", "a") # indipendent variable 2
iv3 <- c("x", "y", "y", "x", "y", "y") # indipendent variable 3
DATA <- dplyr::tibble(iv1, iv2, iv3, dv1, dv2, wt) %>% # build data frame
mutate(one = 1, # for Tatals
no_weight = 1) # as weight with 1 for all rows
IV_List = c('one', 'iv1', 'iv2', 'iv3') # List of independent variables
DV_List = c("dv1", "dv2") # List of dependent variables
cross_fun <- function(.data, DV, IVs, weight, fun) { # calculate a function for a DV by every IV
List <- list() # initialize a List
df <- .data %>%
select(all_of({{ IVs }}), {{ DV }}, {{ weight }}) # select the variables to get the List
for (i in 1:(ncol(df) - 2)) { # a list for every IV
List[[i]] <- df %>%
select(all_of(i), {{ DV }}, {{ weight }}) %>% # build the lists out of the IVs and the DV
mutate(ORDER = 1)
}
dt <- purrr::map( # pivot wider for every value of IVs and calculate the values with the function "fun"
.x = List,
.f = ~ pivot_wider(.x, id_cols = "ORDER", names_from = 1, values_from = {{ DV }}, values_fn = {{ fun }})
) %>%
purrr::reduce(left_join, by = "ORDER") %>% # reduce by leftjoin
select(-any_of(c("ORDER"))) %>%
rename(Total = 1) # column one are the Totals
return(dt)
}
DATA %>% cross_fun(dv1, IVs = IV_List, weight = wt, fun = ~weighted.mean(.x, weight = wt, na.rm = TRUE)) %>% # calculate the crosstab for dv1
mutate(across(where(is.numeric), ~scales::number(.x, accuracy = .1)))
multi <- function(.data, DVs, IVs, weight, ...){ # calculate weighted means for a List of DVs by a List of IVs
Answers <- .data %>% # extract a first column for the resulting table
select(all_of(DVs)) %>%
colnames() %>%
tibble() %>%
select("Variable" = 1)
dt <- .data %>%
select(all_of(DVs), {{ weight }}, all_of({{ IV_List }})) %>% # select all needed columns
map_dfr(all_of(DVs), cross_fun, .data = ., IVs = IV_List, weight = {{ weight }}, fun = ~weighted.mean(.x, weight = {{ weight }}, na.rm = TRUE)) %>% # map all DVs to the cross_fun from above and bind it to dataframe
cbind(Answers, .) %>% # add the first column
mutate(across(where(is.numeric), ~scales::number(.x, accuracy = .1))) # round
return(dt)
}
DATA %>% # calculate the means for the DVs by the IVs with the weight wt
multi(DVs = DV_List, IVs = IV_List, weight = wt)
DATA %>% # calculate the means for the DVs by the IVs without weights (no_weight is always = 1)
multi(DVs = DV_List, IVs = IV_List, weight = no_weight)
# it seams that the weights are ignored, when using weighted.mean as values_fn in pivot_wider
謝謝! 本
解決方案:將 weighted.mean-function 放在這里,而不是作為 pivot_wider-function 的一部分
工作 RepEx 來了:
library(tidyverse)
library(rlang)
library(scales)
dv1 <- c(1, 2, 1, 2, 1, 2) # dependent variable 1
dv2 <- c(2, 1, 2, 1, 2, 1) # dependent variable 2
wt <- c(0.5, 5, 0.5, 5, 0.5, 5) # weighting variable
iv1 <- c("m", "f", "m", "f", "m", "f") # independent variable 1
iv2 <- c("b", "b", "b", "a", "a", "a") # indipendent variable 2
iv3 <- c("x", "y", "y", "x", "y", "y") # indipendent variable 3
DATA <- dplyr::tibble(iv1, iv2, iv3, dv1, dv2, wt) %>% # build data frame
mutate(one = 1, # for Tatals
no_weight = 1) # as weight with 1 for all rows
IV_List = c('one', 'iv1', 'iv2', 'iv3') # List of column variables
DV_List = c('dv1', 'dv2') # List of dependent variables
cross_fun <- function(.data, DV, IVs, weight, ...) { # calculate a function for a DV by every IV
List <- list() # initialize a List
df <- .data %>%
select(all_of({{ IVs }}), {{ DV }}, {{ weight }}) # select the variables to get the List
for (i in 1:(ncol(df) - 2)) { # a list for every IV
List[[i]] <- df %>%
select(all_of(i), {{ DV }}, {{ weight }}) %>% # build the lists out of the IVs and the DV
group_by(UV = df[[i]]) %>%
summarise(mean = weighted.mean({{ DV }}, {{weight}})) %>% # THE SOLUTION: Put the weighted.mean-function here and not as a part fo the pivot_wider-function
mutate(ORDER = 1)
}
dt <- purrr::map( # pivot wider for every value of IVs and calculate the values with the function "fun"
.x = List,
.f = ~ pivot_wider(.x, id_cols = "ORDER", names_from = 1, values_from = mean) # THE SOLUTION: Here only the "mean" from the summarise above
) %>%
purrr::reduce(left_join, by = "ORDER") %>% # reduce by leftjoin
select(-any_of(c("ORDER"))) %>%
rename(Total = 1) # column one are the Totals
return(dt)
}
multi <- function(.data, DVs, IVs, weight, fun, ...){ # calculate weighted means for a List of DVs by a List of IVs
Answers <- .data %>% # extract a first column for the resulting table
select(all_of(DVs)) %>%
colnames() %>%
tibble() %>%
select("Variable" = 1)
dt <- .data %>%
select(all_of(DVs), {{ weight }}, all_of({{ IV_List }})) %>% # select all needed columns
map_dfr(syms(DVs), cross_fun, .data = ., IVs = IV_List, weight = {{ weight }}) %>% # map all DVs to the cross_fun from above and bind it to dataframe
cbind(Answers, .) %>% # add the first column
mutate(across(where(is.numeric), ~scales::number(.x, accuracy = .1))) # round
return(dt)
}
DATA %>% cross_fun(dv1, IVs = IV_List, weight = wt) %>% # calculate the crosstab for dv1
mutate(across(where(is.numeric), ~scales::number(.x, accuracy = .1)))
DATA %>% cross_fun(dv2, IVs = IV_List, weight = wt) %>% # calculate the crosstab for dv1
mutate(across(where(is.numeric), ~scales::number(.x, accuracy = .1)))
DATA %>% # calculate the means for the DVs by the IVs with the weight wt
multi(DVs = c('dv1', 'dv2'), IVs = IV_List, weight = wt, fun = (sum({{ DV }} * {{ weight }}) /sum({{ weight }})))
DATA %>% # calculate the means for the DVs by the IVs without weights (no_weight always = 1)
multi(DVs = DV_List, IVs = IV_List, weight = no_weight)
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.