如何为多个不同的响应和预测变量运行分段线性回归，其中响应在不同的时间开始？

Question

My data frame looks like this我的数据框看起来像这样

df = structure(list(Date_Time_GMT_3 = structure(c(1622552400, 1622552400, 
                                                  1622552400, 1622552400, 1622552400, 1622552400, 1622552400, 1622552400, 
                                                  1622553300, 1622553300, 1622553300, 1622553300, 1622553300, 1622553300, 
                                                  1622553300, 1622553300, 1622554200, 1622554200, 1622554200, 1622554200
), tzone = "EST", class = c("POSIXct", "POSIXt")), response = c("20817727_F8AR_U", 
                                                                "20817727_F8AR_U", "20817727_F8AR_U", "20817727_F8AR_U", "20819742_X1AR_U", 
                                                                "20819742_X1AR_U", "20819742_X1AR_U", "20819742_X1AR_U", "20817727_F8AR_U", 
                                                                "20817727_F8AR_U", "20817727_F8AR_U", "20817727_F8AR_U", "20819742_X1AR_U", 
                                                                "20819742_X1AR_U", "20819742_X1AR_U", "20819742_X1AR_U", "20817727_F8AR_U", 
                                                                "20817727_F8AR_U", "20817727_F8AR_U", "20817727_F8AR_U"), 
y = c(NA_real_, 14.421, 14.421, 14.421, 14.996, 14.996, 14.996, 14.996, 14.421, 14.421, 14.421, 14.421, NA_real_, NA_real_, 14.996, 14.996, NA_real_, 
      NA_real_, 14.421, 14.421), predictor = c("20819830_R1AR_U_Stationary", "20822215_R3AR_U_Stationary", "20874235_R4AR_U_Stationary", "20874311_F1AR_U_Stationary",
                                               "20819830_R1AR_U_Stationary", "20822215_R3AR_U_Stationary", "20874235_R4AR_U_Stationary", "20874311_F1AR_U_Stationary", 
                                               "20819830_R1AR_U_Stationary", "20822215_R3AR_U_Stationary",  "20874235_R4AR_U_Stationary", "20874311_F1AR_U_Stationary", 
                                               "20819830_R1AR_U_Stationary",  "20822215_R3AR_U_Stationary", "20874235_R4AR_U_Stationary", "20874311_F1AR_U_Stationary", 
                                               "20819830_R1AR_U_Stationary", "20822215_R3AR_U_Stationary", "20874235_R4AR_U_Stationary", "20874311_F1AR_U_Stationary"), 
x = c(16.903, 13.942, 14.23, 15.282,  16.903, 13.942, 14.23, 15.282, 16.808, 13.942, 14.23, 15.187,  16.808, 13.942, 14.23, 15.187, 16.808, 13.942, 14.134, 15.187)), 
class = c("grouped_df", "tbl_df", "tbl", "data.frame"), row.names = c(NA,  -20L), groups = structure(list(response = c("20817727_F8AR_U", "20817727_F8AR_U", "20817727_F8AR_U", 
                                                                                                                       "20817727_F8AR_U", "20819742_X1AR_U", "20819742_X1AR_U", "20819742_X1AR_U", "20819742_X1AR_U"), 
                                                                                                          predictor = c("20819830_R1AR_U_Stationary",  "20822215_R3AR_U_Stationary", "20874235_R4AR_U_Stationary", 
                                                                                                                        "20874311_F1AR_U_Stationary", "20819830_R1AR_U_Stationary", "20822215_R3AR_U_Stationary", 
                                                                                                                        "20874235_R4AR_U_Stationary", "20874311_F1AR_U_Stationary"), 
                                                                                                          .rows = structure(list(c(1L, 9L,  17L), c(2L, 10L, 18L), c(3L, 11L, 19L), c(4L, 12L, 20L), 
                                                                                                                                 c(5L, 13L), c(6L, 14L), c(7L, 15L), c(8L, 16L)), 
                                                                                                                            ptype = integer(0), class = c("vctrs_list_of", "vctrs_vctr", "list"))), 
                                                                                                     class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -8L), .drop = TRUE))

I want to run all the different combinations of predictors against response to get an output that will tell me the R squared value of piecewise regression model between each group of response and predictor.我想针对响应运行所有不同的预测变量组合，以获得一个输出，该输出将告诉我每组响应和预测变量之间分段回归模型的 R 平方值。 I tried using this code我尝试使用此代码

#create the linear regression model with above out put
df = lm(y~x, data = results)



segmented.fit <- segmented(df, seg.Z = ~x)
summary(segmented.fit)

but it only gives me 1 R squared value for the entire dataframe, instead of grouping by each response and predictor.但它只给了我整个数据帧的 1 R 平方值，而不是按每个响应和预测变量进行分组。

Any idea how to group the response and predictors before I run the model?知道如何在运行模型之前对响应和预测变量进行分组吗？

Answer 1

library(tidyverse)

vars <- colnames(df)

tibble(x = vars) |>
  expand_grid(y = vars) |>
  mutate(
    data = map2(x, y, possibly(~ {
      str_glue("{.y} ~ {.x}") |>
        as.formula() |>
        lm(data = df)
    }, NA))
  )
#> Warning in model.matrix.default(mt, mf, contrasts): the response appeared on the
#> right-hand side and was dropped
#> Warning in model.matrix.default(mt, mf, contrasts): problem with term 1 in
#> model.matrix: no columns are assigned
#> Warning in storage.mode(v) <- "double": NAs introduced by coercion

#> Warning in storage.mode(v) <- "double": NAs introduced by coercion

#> Warning in storage.mode(v) <- "double": NAs introduced by coercion
#> Warning in model.matrix.default(mt, mf, contrasts): the response appeared on the
#> right-hand side and was dropped
#> Warning in model.matrix.default(mt, mf, contrasts): problem with term 1 in
#> model.matrix: no columns are assigned
#> Warning in storage.mode(v) <- "double": NAs introduced by coercion

#> Warning in storage.mode(v) <- "double": NAs introduced by coercion
#> Warning in model.matrix.default(mt, mf, contrasts): the response appeared on the
#> right-hand side and was dropped
#> Warning in model.matrix.default(mt, mf, contrasts): problem with term 1 in
#> model.matrix: no columns are assigned
#> Warning in storage.mode(v) <- "double": NAs introduced by coercion

#> Warning in storage.mode(v) <- "double": NAs introduced by coercion

#> Warning in storage.mode(v) <- "double": NAs introduced by coercion
#> Warning in model.matrix.default(mt, mf, contrasts): the response appeared on the
#> right-hand side and was dropped
#> Warning in model.matrix.default(mt, mf, contrasts): problem with term 1 in
#> model.matrix: no columns are assigned
#> Warning in storage.mode(v) <- "double": NAs introduced by coercion

#> Warning in storage.mode(v) <- "double": NAs introduced by coercion
#> Warning in model.matrix.default(mt, mf, contrasts): the response appeared on the
#> right-hand side and was dropped
#> Warning in model.matrix.default(mt, mf, contrasts): problem with term 1 in
#> model.matrix: no columns are assigned
#> # A tibble: 25 × 3
#>    x               y               data     
#>    <chr>           <chr>           <list>   
#>  1 Date_Time_GMT_3 Date_Time_GMT_3 <lm>     
#>  2 Date_Time_GMT_3 response        <lgl [1]>
#>  3 Date_Time_GMT_3 y               <lm>     
#>  4 Date_Time_GMT_3 predictor       <lgl [1]>
#>  5 Date_Time_GMT_3 x               <lm>     
#>  6 response        Date_Time_GMT_3 <lm>     
#>  7 response        response        <lgl [1]>
#>  8 response        y               <lm>     
#>  9 response        predictor       <lgl [1]>
#> 10 response        x               <lm>     
#> # … with 15 more rows

^{Created on 2022-06-13 by the reprex package (v2.0.0)}^{由reprex 包于 2022-06-13 创建 (v2.0.0)}

如何为多个不同的响应和预测变量运行分段线性回归，其中响应在不同的时间开始？

问题描述

1 个解决方案

解决方案1
0 2022-06-13 11:56:41

如何为多个不同的响应和预测变量运行分段线性回归，其中响应在不同的时间开始？

问题描述

1 个解决方案

解决方案1 0 2022-06-13 11:56:41

解决方案1
0 2022-06-13 11:56:41