简体   繁体   中英

linear regression model using R

I am a novice in R but am trying to learn. I have a dataset in excel and i import it in R using:

stockPrice<-read.csv("C:/Users/Desktop/prova.csv", sep=";", header=T, check.names = FALSE, stringsAsFactors=FALSE)

and the result of the import is this. there are 100 rows and columns.

        1       2       3      4       5       6       7      8       9     10      11      12
1  -1,8669 -1,2096  1,0358 0,0239 -1,0284 -0,0259  0,8801 0,4778  1,1449 0,4397 -0,1530 -0,3123
2  -2,1469 -0,4331 -0,0891 1,3842 -1,4148  0,1138 -0,8275 0,5115 -1,2898 1,8105  0,8521 -1,4327
3  -1,8919 -0,6469 -0,4098 2,8243 -1,3704 -1,6783 -0,6159 1,2910 -1,4260 2,4720  0,5230 -1,6965
4  -0,7912  0,4075  0,1092 3,8167 -0,9085 -1,0804  0,4104 0,9577 -0,2531 1,1191  1,5688 -0,8727
5  -0,2726  0,1827  0,7973 3,3848  1,0666  1,1254 -1,4111 1,2030 -0,9559 1,7813  1,8331 -1,0933
6   0,0539 -0,8640  2,0607 3,4989  2,1625  0,5226 -1,3890 2,6475 -0,6684 0,4587  0,7694  0,3462
7   0,6813 -1,9639  0,1362 1,9797  2,8645 -0,1524 -1,2367 4,6739 -1,7459 2,2648  1,8341 -0,4107
8  -0,4228 -0,3357  0,1201 2,1603  4,2053 -0,3679 -0,5577 3,7251 -1,6288 2,0168  1,1571 -0,8601
9   0,3020 -0,0523  1,4912 2,6993  5,2069 -0,0497 -0,3139 3,2010 -1,1773 1,8993  0,3357 -3,4239
10 -0,0832  0,2051  2,2387 2,9303  6,1984  1,9706 -0,3759 2,7283 -2,1752 2,0772  0,3298 -4,3092

I just copy part of the dataset. each column is referred to an asset. Now, what am trying to do is to calculate a linear regression, for example, the first asset y will be column 1 row from 1 to 9 and x will be column 1 row from 2 to 10.And I have to do this for each asset.i just need the value of the coefficient.

One option is to loop through the columns with lapply , extract the x and 'y' and create a model with lm after removing the , and convert it to numeric type

out <- lapply(stockPrice, function(vec) {
                      vec <- as.numeric(sub(",", "", vec))
                      y <- vec[1:(length(vec)-1)]
                     x <- vec[2:length(vec)]
                     coef(lm(y ~ x))
                  }
    )

out[[1]]
#  (Intercept)             x 
#-2798.4234922     0.8392437

If we want the slope, then rbind the list elements and extract the second column

do.call(rbind, out)[, 2]
#   1           2           3           4           5           6           7           8           9          10          11 
# 0.83924375  0.21597272  0.21761992  0.95551414  0.86204662  0.10036499  0.02051160  0.84014384  0.01129873 -0.31601104  0.18362571 
#        12 
# 0.46161256 

-checking in excel for first column output

-data

在此处输入图片说明

-output

在此处输入图片说明


In addition to the above, we can also make use of lmList from nlme after reshaping to 'long' format

library(dplyr)
library(tidyr)
library(nlme)
stockPrice %>%
    mutate_all(readr::parse_number) %>% 
    pivot_longer(everything(), values_to = 'y') %>% 
    group_by(name  = factor(name, levels = unique(name))) 
    mutate(x = lead(y)) %>% 
    ungroup %>% 
    na.omit %>%
    lmList(y ~ x|name, data = .)
#Call:
#  Model: y ~ x | name 
#   Data: . 

#Coefficients:
#   (Intercept)           x
#1    -2798.423  0.83924375
#2    -4621.407  0.21597272
#3     4274.414  0.21761992
#4    -2009.506  0.95551414
#5    -5269.101  0.86204662
#6    -1814.797  0.10036499
#7    -5479.691  0.02051160
#8     1218.587  0.84014384
#9    -8747.104  0.01129873
#10   21429.645 -0.31601104
#11    7811.527  0.18362571
#12   -3786.098  0.46161256

#Degrees of freedom: 108 total; 84 residual
#Residual standard error: 8419.53

data

stockPrice <- structure(list(`1` = c("-1,8669", "-2,1469", "-1,8919", "-0,7912", 
"-0,2726", "0,0539", "0,6813", "-0,4228", "0,3020", "-0,0832"
), `2` = c("-1,2096", "-0,4331", "-0,6469", "0,4075", "0,1827", 
"-0,8640", "-1,9639", "-0,3357", "-0,0523", "0,2051"), `3` = c("1,0358", 
"-0,0891", "-0,4098", "0,1092", "0,7973", "2,0607", "0,1362", 
"0,1201", "1,4912", "2,2387"), `4` = c("0,0239", "1,3842", "2,8243", 
"3,8167", "3,3848", "3,4989", "1,9797", "2,1603", "2,6993", "2,9303"
), `5` = c("-1,0284", "-1,4148", "-1,3704", "-0,9085", "1,0666", 
"2,1625", "2,8645", "4,2053", "5,2069", "6,1984"), `6` = c("-0,0259", 
"0,1138", "-1,6783", "-1,0804", "1,1254", "0,5226", "-0,1524", 
"-0,3679", "-0,0497", "1,9706"), `7` = c("0,8801", "-0,8275", 
"-0,6159", "0,4104", "-1,4111", "-1,3890", "-1,2367", "-0,5577", 
"-0,3139", "-0,3759"), `8` = c("0,4778", "0,5115", "1,2910", 
"0,9577", "1,2030", "2,6475", "4,6739", "3,7251", "3,2010", "2,7283"
), `9` = c("1,1449", "-1,2898", "-1,4260", "-0,2531", "-0,9559", 
"-0,6684", "-1,7459", "-1,6288", "-1,1773", "-2,1752"), `10` = c("0,4397", 
"1,8105", "2,4720", "1,1191", "1,7813", "0,4587", "2,2648", "2,0168", 
"1,8993", "2,0772"), `11` = c("-0,1530", "0,8521", "0,5230", 
"1,5688", "1,8331", "0,7694", "1,8341", "1,1571", "0,3357", "0,3298"
), `12` = c("-0,3123", "-1,4327", "-1,6965", "-0,8727", "-1,0933", 
"0,3462", "-0,4107", "-0,8601", "-3,4239", "-4,3092")),
class = "data.frame", row.names = c("1", 
"2", "3", "4", "5", "6", "7", "8", "9", "10"))

As an alternative to @akrun answer, you can use apply instead of lapply :

Using a fake example m

m <- data.frame(matrix(rnorm(100), ncol = 10, nrow = 10))

> head(m[1:3,1:3])
           X1        X2          X3
1 -0.81150290 0.3196615 -0.70848803
2  1.39105642 0.8232761  0.02241253
3 -0.01187938 0.9158422 -0.21934718

You can do:

coeff = apply(m, 2, function(x) lm(x[1:9] ~ x[2:10])$coefficients[2])

And get a vector with all coefficients calculated from each asset:

> coeff
         X1          X2          X3          X4          X5          X6 
-0.19160847 -0.52686830  0.36973049  0.29217668 -0.70102686  0.22142335 
         X7          X8          X9         X10 
-0.13817910 -0.14292086  0.05105796 -0.22829763 

BTW, when you open your dataset using read.table , you should add the argument dec = "," in order to not have to deal with non-numerical values. So something like:

stockPrice<-read.csv("C:/Users/Desktop/prova.csv", sep=";", header=T, check.names = FALSE, stringsAsFactors=FALSE, dec = ",")

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM