繁体   English   中英

R 权重被忽略,而在 pivot.wider 中使用 weighted.mean 作为 value_fn

[英]R weights are ignored, while using weighted.mean as value_fn in pivot.wider

我正在尝试对因变量列表按自变量列表细分的加权平均值。

为此,我首先创建了一个函数“cross_fun”,然后将其映射到第二个函数“multi”中。 这工作得很好。 但是,我注意到平均值没有加权。 似乎 value_fn 的权重在 pivot_wider 中被忽略了。

谁能明白为什么?

library(tidyverse)
library(rlang)
library(scales)

dv1 <- c(1, 2, 1, 2, 1, 2) # dependent variable 1
dv2 <- c(2, 1, 2, 1, 2, 1) # dependent variable 2
wt <- c(0.5, 5, 0.5, 5, 0.5, 5) # weighting variable
iv1 <- c("m", "f", "m", "f", "m", "f") # independent variable 1
iv2 <- c("b", "b", "b", "a", "a", "a") # indipendent variable 2
iv3 <- c("x", "y", "y", "x", "y", "y") # indipendent variable 3

DATA <- dplyr::tibble(iv1, iv2, iv3, dv1, dv2, wt) %>% # build data frame
  mutate(one = 1, # for Tatals
         no_weight = 1) # as weight with 1 for all rows

IV_List = c('one', 'iv1', 'iv2', 'iv3') # List of independent variables 
DV_List = c("dv1", "dv2") # List of dependent variables

cross_fun <- function(.data, DV, IVs, weight, fun) { # calculate a function for  a DV by every IV
  
 List <- list() # initialize a List
  
  df <- .data %>% 
    select(all_of({{ IVs }}), {{ DV }}, {{ weight }}) # select the variables to get the List
  
  for (i in 1:(ncol(df) - 2)) { # a list for every IV
    List[[i]] <- df %>%
      select(all_of(i), {{ DV }}, {{ weight }}) %>% # build the lists out of the IVs and the DV
      mutate(ORDER = 1) 
  }
  
    dt <- purrr::map( # pivot wider for every value of IVs and calculate the values with the function "fun"
      .x = List,
      .f = ~ pivot_wider(.x, id_cols = "ORDER", names_from = 1, values_from = {{ DV }}, values_fn = {{ fun }}) 
    ) %>%
      purrr::reduce(left_join, by = "ORDER") %>% # reduce by leftjoin
      select(-any_of(c("ORDER"))) %>%
      rename(Total = 1) # column one are the Totals
  return(dt)
}

DATA %>% cross_fun(dv1, IVs = IV_List, weight = wt, fun = ~weighted.mean(.x, weight = wt, na.rm = TRUE)) %>% # calculate the crosstab for dv1
  mutate(across(where(is.numeric), ~scales::number(.x, accuracy = .1))) 

multi <- function(.data, DVs, IVs, weight, ...){ # calculate weighted means for a List of DVs by a List of IVs
  
  Answers <- .data %>% # extract a first column for the resulting table
    select(all_of(DVs)) %>% 
    colnames() %>% 
    tibble() %>% 
    select("Variable" = 1)
  
    dt <- .data %>%
      select(all_of(DVs), {{ weight }}, all_of({{ IV_List }})) %>% # select all needed columns
      map_dfr(all_of(DVs), cross_fun, .data = ., IVs = IV_List, weight = {{ weight }}, fun = ~weighted.mean(.x, weight = {{ weight }}, na.rm = TRUE)) %>% # map all DVs to the cross_fun from above and bind it to dataframe
      cbind(Answers, .) %>% # add the first column
      mutate(across(where(is.numeric), ~scales::number(.x, accuracy = .1))) # round 

  return(dt)
}

DATA %>% # calculate the means for the DVs by the IVs with the weight wt
  multi(DVs = DV_List, IVs = IV_List, weight = wt)

DATA %>% # calculate the means for the DVs by the IVs without weights (no_weight is always = 1)
  multi(DVs = DV_List, IVs = IV_List, weight = no_weight)

# it seams that the weights are ignored, when using weighted.mean as values_fn in pivot_wider

谢谢!

解决方案:将 weighted.mean-function 放在这里,而不是作为 pivot_wider-function 的一部分

工作 RepEx 来了:

library(tidyverse)
library(rlang)
library(scales)

dv1 <- c(1, 2, 1, 2, 1, 2) # dependent variable 1
dv2 <- c(2, 1, 2, 1, 2, 1) # dependent variable 2
wt <- c(0.5, 5, 0.5, 5, 0.5, 5) # weighting variable
iv1 <- c("m", "f", "m", "f", "m", "f") # independent variable 1
iv2 <- c("b", "b", "b", "a", "a", "a") # indipendent variable 2
iv3 <- c("x", "y", "y", "x", "y", "y") # indipendent variable 3

DATA <- dplyr::tibble(iv1, iv2, iv3, dv1, dv2, wt) %>% # build data frame
  mutate(one = 1, # for Tatals
         no_weight = 1) # as weight with 1 for all rows

IV_List = c('one', 'iv1', 'iv2', 'iv3') # List of column variables 
DV_List = c('dv1', 'dv2') # List of dependent variables

cross_fun <- function(.data, DV, IVs, weight, ...) { # calculate a function for  a DV by every IV
  
 List <- list() # initialize a List
  
  df <- .data %>% 
    select(all_of({{ IVs }}), {{ DV }}, {{ weight }}) # select the variables to get the List
  
  for (i in 1:(ncol(df) - 2)) { # a list for every IV
    List[[i]] <- df %>%
      select(all_of(i), {{ DV }}, {{ weight }}) %>% # build the lists out of the IVs and the DV
      group_by(UV = df[[i]]) %>% 
      summarise(mean = weighted.mean({{ DV }}, {{weight}})) %>% # THE SOLUTION: Put the weighted.mean-function here and not as a part fo the pivot_wider-function
      mutate(ORDER = 1) 
  }
  
    dt <- purrr::map( # pivot wider for every value of IVs and calculate the values with the function "fun"
      .x = List,
      .f = ~ pivot_wider(.x, id_cols = "ORDER", names_from = 1, values_from = mean) # THE SOLUTION: Here only the "mean" from the summarise above
    ) %>%
      purrr::reduce(left_join, by = "ORDER") %>% # reduce by leftjoin
      select(-any_of(c("ORDER"))) %>%
      rename(Total = 1) # column one are the Totals
  return(dt)
}

multi <- function(.data, DVs, IVs, weight, fun, ...){ # calculate weighted means for a List of DVs by a List of IVs

  Answers <- .data %>% # extract a first column for the resulting table
    select(all_of(DVs)) %>% 
    colnames() %>% 
    tibble() %>% 
    select("Variable" = 1)
  
    dt <- .data %>%
      select(all_of(DVs), {{ weight }}, all_of({{ IV_List }})) %>% # select all needed columns
      map_dfr(syms(DVs), cross_fun, .data = ., IVs = IV_List, weight = {{ weight }}) %>% # map all DVs to the cross_fun from above and bind it to dataframe
      cbind(Answers, .) %>% # add the first column
      mutate(across(where(is.numeric), ~scales::number(.x, accuracy = .1))) # round 

  return(dt)
}

DATA %>% cross_fun(dv1, IVs = IV_List, weight = wt) %>% # calculate the crosstab for dv1
  mutate(across(where(is.numeric), ~scales::number(.x, accuracy = .1))) 

DATA %>% cross_fun(dv2, IVs = IV_List, weight = wt) %>% # calculate the crosstab for dv1
  mutate(across(where(is.numeric), ~scales::number(.x, accuracy = .1))) 


DATA %>% # calculate the means for the DVs by the IVs with the weight wt
  multi(DVs = c('dv1', 'dv2'), IVs = IV_List, weight = wt, fun = (sum({{ DV }} * {{ weight }}) /sum({{ weight }})))

DATA %>% # calculate the means for the DVs by the IVs without weights (no_weight always = 1)
  multi(DVs = DV_List, IVs = IV_List, weight = no_weight)

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM