簡體   English   中英

R 權重被忽略,而在 pivot.wider 中使用 weighted.mean 作為 value_fn

[英]R weights are ignored, while using weighted.mean as value_fn in pivot.wider

我正在嘗試對因變量列表按自變量列表細分的加權平均值。

為此,我首先創建了一個函數“cross_fun”,然后將其映射到第二個函數“multi”中。 這工作得很好。 但是,我注意到平均值沒有加權。 似乎 value_fn 的權重在 pivot_wider 中被忽略了。

誰能明白為什么?

library(tidyverse)
library(rlang)
library(scales)

dv1 <- c(1, 2, 1, 2, 1, 2) # dependent variable 1
dv2 <- c(2, 1, 2, 1, 2, 1) # dependent variable 2
wt <- c(0.5, 5, 0.5, 5, 0.5, 5) # weighting variable
iv1 <- c("m", "f", "m", "f", "m", "f") # independent variable 1
iv2 <- c("b", "b", "b", "a", "a", "a") # indipendent variable 2
iv3 <- c("x", "y", "y", "x", "y", "y") # indipendent variable 3

DATA <- dplyr::tibble(iv1, iv2, iv3, dv1, dv2, wt) %>% # build data frame
  mutate(one = 1, # for Tatals
         no_weight = 1) # as weight with 1 for all rows

IV_List = c('one', 'iv1', 'iv2', 'iv3') # List of independent variables 
DV_List = c("dv1", "dv2") # List of dependent variables

cross_fun <- function(.data, DV, IVs, weight, fun) { # calculate a function for  a DV by every IV
  
 List <- list() # initialize a List
  
  df <- .data %>% 
    select(all_of({{ IVs }}), {{ DV }}, {{ weight }}) # select the variables to get the List
  
  for (i in 1:(ncol(df) - 2)) { # a list for every IV
    List[[i]] <- df %>%
      select(all_of(i), {{ DV }}, {{ weight }}) %>% # build the lists out of the IVs and the DV
      mutate(ORDER = 1) 
  }
  
    dt <- purrr::map( # pivot wider for every value of IVs and calculate the values with the function "fun"
      .x = List,
      .f = ~ pivot_wider(.x, id_cols = "ORDER", names_from = 1, values_from = {{ DV }}, values_fn = {{ fun }}) 
    ) %>%
      purrr::reduce(left_join, by = "ORDER") %>% # reduce by leftjoin
      select(-any_of(c("ORDER"))) %>%
      rename(Total = 1) # column one are the Totals
  return(dt)
}

DATA %>% cross_fun(dv1, IVs = IV_List, weight = wt, fun = ~weighted.mean(.x, weight = wt, na.rm = TRUE)) %>% # calculate the crosstab for dv1
  mutate(across(where(is.numeric), ~scales::number(.x, accuracy = .1))) 

multi <- function(.data, DVs, IVs, weight, ...){ # calculate weighted means for a List of DVs by a List of IVs
  
  Answers <- .data %>% # extract a first column for the resulting table
    select(all_of(DVs)) %>% 
    colnames() %>% 
    tibble() %>% 
    select("Variable" = 1)
  
    dt <- .data %>%
      select(all_of(DVs), {{ weight }}, all_of({{ IV_List }})) %>% # select all needed columns
      map_dfr(all_of(DVs), cross_fun, .data = ., IVs = IV_List, weight = {{ weight }}, fun = ~weighted.mean(.x, weight = {{ weight }}, na.rm = TRUE)) %>% # map all DVs to the cross_fun from above and bind it to dataframe
      cbind(Answers, .) %>% # add the first column
      mutate(across(where(is.numeric), ~scales::number(.x, accuracy = .1))) # round 

  return(dt)
}

DATA %>% # calculate the means for the DVs by the IVs with the weight wt
  multi(DVs = DV_List, IVs = IV_List, weight = wt)

DATA %>% # calculate the means for the DVs by the IVs without weights (no_weight is always = 1)
  multi(DVs = DV_List, IVs = IV_List, weight = no_weight)

# it seams that the weights are ignored, when using weighted.mean as values_fn in pivot_wider

謝謝!

解決方案:將 weighted.mean-function 放在這里,而不是作為 pivot_wider-function 的一部分

工作 RepEx 來了:

library(tidyverse)
library(rlang)
library(scales)

dv1 <- c(1, 2, 1, 2, 1, 2) # dependent variable 1
dv2 <- c(2, 1, 2, 1, 2, 1) # dependent variable 2
wt <- c(0.5, 5, 0.5, 5, 0.5, 5) # weighting variable
iv1 <- c("m", "f", "m", "f", "m", "f") # independent variable 1
iv2 <- c("b", "b", "b", "a", "a", "a") # indipendent variable 2
iv3 <- c("x", "y", "y", "x", "y", "y") # indipendent variable 3

DATA <- dplyr::tibble(iv1, iv2, iv3, dv1, dv2, wt) %>% # build data frame
  mutate(one = 1, # for Tatals
         no_weight = 1) # as weight with 1 for all rows

IV_List = c('one', 'iv1', 'iv2', 'iv3') # List of column variables 
DV_List = c('dv1', 'dv2') # List of dependent variables

cross_fun <- function(.data, DV, IVs, weight, ...) { # calculate a function for  a DV by every IV
  
 List <- list() # initialize a List
  
  df <- .data %>% 
    select(all_of({{ IVs }}), {{ DV }}, {{ weight }}) # select the variables to get the List
  
  for (i in 1:(ncol(df) - 2)) { # a list for every IV
    List[[i]] <- df %>%
      select(all_of(i), {{ DV }}, {{ weight }}) %>% # build the lists out of the IVs and the DV
      group_by(UV = df[[i]]) %>% 
      summarise(mean = weighted.mean({{ DV }}, {{weight}})) %>% # THE SOLUTION: Put the weighted.mean-function here and not as a part fo the pivot_wider-function
      mutate(ORDER = 1) 
  }
  
    dt <- purrr::map( # pivot wider for every value of IVs and calculate the values with the function "fun"
      .x = List,
      .f = ~ pivot_wider(.x, id_cols = "ORDER", names_from = 1, values_from = mean) # THE SOLUTION: Here only the "mean" from the summarise above
    ) %>%
      purrr::reduce(left_join, by = "ORDER") %>% # reduce by leftjoin
      select(-any_of(c("ORDER"))) %>%
      rename(Total = 1) # column one are the Totals
  return(dt)
}

multi <- function(.data, DVs, IVs, weight, fun, ...){ # calculate weighted means for a List of DVs by a List of IVs

  Answers <- .data %>% # extract a first column for the resulting table
    select(all_of(DVs)) %>% 
    colnames() %>% 
    tibble() %>% 
    select("Variable" = 1)
  
    dt <- .data %>%
      select(all_of(DVs), {{ weight }}, all_of({{ IV_List }})) %>% # select all needed columns
      map_dfr(syms(DVs), cross_fun, .data = ., IVs = IV_List, weight = {{ weight }}) %>% # map all DVs to the cross_fun from above and bind it to dataframe
      cbind(Answers, .) %>% # add the first column
      mutate(across(where(is.numeric), ~scales::number(.x, accuracy = .1))) # round 

  return(dt)
}

DATA %>% cross_fun(dv1, IVs = IV_List, weight = wt) %>% # calculate the crosstab for dv1
  mutate(across(where(is.numeric), ~scales::number(.x, accuracy = .1))) 

DATA %>% cross_fun(dv2, IVs = IV_List, weight = wt) %>% # calculate the crosstab for dv1
  mutate(across(where(is.numeric), ~scales::number(.x, accuracy = .1))) 


DATA %>% # calculate the means for the DVs by the IVs with the weight wt
  multi(DVs = c('dv1', 'dv2'), IVs = IV_List, weight = wt, fun = (sum({{ DV }} * {{ weight }}) /sum({{ weight }})))

DATA %>% # calculate the means for the DVs by the IVs without weights (no_weight always = 1)
  multi(DVs = DV_List, IVs = IV_List, weight = no_weight)

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM