I'd like to use a variable for the subset argument so I can put it into a function
formula <- paste0(response_name,
" ~ .")
if (subset_filter != ""){
subset_filter <- "G3 < 10"
model <-
lm(as.formula(formula),
subset = subset_filter,
data = train_dataset)
} else {
model <-
lm(as.formula(formula),
data = train_dataset)
}
My dataset is this -
student_performance <-
read_csv("https://raw.githubusercontent.com/UBC-MDS/ellognea-smwatts-student-performance/master/data/student-math-perf.csv") %>%
as_tibble()
And my response variable is G3 and I split the sets with this code
split_sets <- function(dataset,
response_name,
output_set_type){
set.seed(1)
training.samples <- createDataPartition(as_vector(dataset[response_name]),
p = 0.8,
list = F)
train.data <- suppressWarnings(dataset[training.samples, ])
test.data <- suppressWarnings(dataset[-training.samples, ])
l <- list()
l[["train.data"]] <-
train.data
l[["test.data"]] <-
test.data
ifelse(output_set_type == "train",
return(as_tibble(l$train.data)),
return(as_tibble(l$test.data)))
}
I'd like to make it so that I can submit values into the subset filter argument and then use them
If we need to pass a string, then we could parse
and eval
uate
library(caret)
library(readr)
create_model <- function(data, response_name, subset_filter) {
formula <- paste0(response_name, " ~ .")
if (subset_filter != ""){
model <-
lm(as.formula(formula),
subset = eval(parse(text = subset_filter), envir = data),
data = data)
} else {
model <-
lm(as.formula(formula),
data = data)
}
model$call <- as.formula(formula)
return(model)
}
-apply the function on the data
create_model(train_dat, "G3", "G3 < 10" )
#Call:
#G3 ~ .
#Coefficients:
# (Intercept) schoolMS sexM age addressU famsizeLE3
# -4.42602 1.18145 0.15315 -0.16790 1.11708 -0.08173
# PstatusT Medu Fedu Mjobhealth Mjobother Mjobservices
# 1.32870 1.00518 -0.62716 -1.98356 -1.31388 -0.94443
# Mjobteacher Fjobhealth Fjobother Fjobservices Fjobteacher reasonhome
# -1.28718 0.03242 0.02968 0.32962 -1.53201 -2.10665
# reasonother reasonreputation guardianmother guardianother traveltime studytime
# -0.51770 0.22395 -0.29893 1.85975 -0.39072 -1.56920
# failures schoolsupyes famsupyes paidyes activitiesyes nurseryyes
# -0.17344 2.35607 0.35207 0.29857 -0.91373 0.09838
# higheryes internetyes romanticyes famrel freetime goout
# 1.06065 -0.58727 0.09469 0.69217 0.25081 0.14379
# Dalc Walc health absences G1 G2
# -1.39164 0.60450 0.86492 0.12033 0.11660 0.78624
Here, the 'train_data' is created from
split_sets <- function(dataset,
response_name,
output_set_type){
set.seed(1)
training.samples <- createDataPartition(as_vector(dataset[response_name]),
p = 0.8,
list = F)
train.data <- suppressWarnings(dataset[training.samples, ])
test.data <- suppressWarnings(dataset[-training.samples, ])
l <- list()
l[["train.data"]] <-
train.data
l[["test.data"]] <-
test.data
out <- if(output_set_type == "train") {
as_tibble(l$train.data)
} else {
as_tibble(l$test.data)
}
return(out)
}
train_dat <- split_sets(student_performance, "G3", "train")
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.