简体   繁体   中英

Calculate standard deviation based on a specific time frame

I want to calculate the standard deviation of daily log returns over a period of time after a certain date. In other words: I want to set a data (eg 2019-01-15) and I want to calculate the standard deviation of the log returns for the 5 days after this date (so from 2019-01-16 to 2019-01-20). The problem is, that those start dates are different for the stocks. So I always have to link the ticker to the start date and then find the subsequent 5 days.

For this purpose, I need to link two data frames: Data frame 1 contains the identifier of the stock and one specific date (start date). Data frame 2 contains a list of all daily log returns over a period of one year (2020) for different tickers. Data frame 2 looks like this:

在此处输入图像描述

Here is the code to reproduce the example:

#Data frame 1: 

Identifier <- c("ACCR.PK", "ANIX.OQ", "TLRS.PK")
Dates <- c("2019-11-22", "2019-11-01", "2019-11-15")
df1 <- data.frame(Identifier, Dates)


# Data frame 2: 
 
Timeframe <- c("2019-11-04", "2019-11-05", "2019-11-06", "2019-11-07", "2019-11-08", "2019-11-09", "2019-11-10", "2019-11-11", "2019-11-12", "2019-11-13", "2019-11-14", "2019-11-15", "2019-11-16", "2019-11-17", "2019-11-18", "2019-11-19", "2019-11-20",
 "2019-11-21", "2019-11-22", "2019-11-23", "2019-11-24", "2019-11-25", "2019-11-26", "2019-11-27", "2019-11-28", "2019-11-29", "2019-11-30")

ACCR.PK <- c(-0.15415068, 0.15415068, 0.487703206, 0.782759339, -0.577315365, 0, 0.145953913, -0.01242252, -0.064538521, 0.026317308, -0.124297717, 0.097980408, -0.679901954, 0.051293294, -0.162518929, 0.028987537, 0.451985124,
 -0.09531018, 0, -0.105360516, -0.045462374, 0.022989518, 0.127833372, 0, 0.336472237, 0, -0.15415068)
 
HURC.OQ <- c(0.00252986782857967, 0.00392267244379774,-0.00673403218134361, 0.00334262149668962,  0.0131158570574628, -0.00891122577543113, 0.00669085295092264, -0.00669085295092264, -0.00420463128203163, -0.00365836907245454, -0.01534120996679, -0.00315412447869745, -0.00201236232924185, -0.0104137475666262, -0.00934859277129974,
 0.0269308298165383, 0.0237165266173163, -0.00501813152284614, -0.0109597837232012, 0.00334262149668962, -0.0119387432820877, 0.00712355199277548, 0.0216270190228793, 0.013797128357417, 0.041337071491812, 0.00733563677238935, 0.041337071491812) 
 
ANIX.OQ <- c(0.00629328697578901, 0.0112290637164134, -0.0288999622523214, -0.0064572560759153, 0.0102302682508149, 0.00507615303186082, -0.0309813325455195, 0.00518304563137528, 0.00015585630080639, -0.00260078170005729, -0.0263867551731949, -0.0437228110138317, -0.0140649294674036, -0.0200292818755725, 0.0256790144176915, 0.0236615074981583, 0.0703674421501179, 
 -0.00256739550524565, 0, -0.0155443544378002, -0.0131407935610586, 0.00530505222969313, -0.00264900817157687, -0.00798939003347865, 0.018543577712169, 0.0182059644965724, 0.041337071491812)

UBP.N <- c(0.0132452267500205, 0, -0.00400802139753864, 0.00242782617802106, -0.00263922032319019, -0.0149097543662875, -0.00484002020400087, -0.00215866246803786, 0.00753501950441837, 0, -0.00807541400554568,
 0.0224488315394535, -0.0116960397631916, -0.00643779047484871, -0.00973506877075225, 0.0118856072339812, 0.00967378806172681, -0.000585246485274027, 0, 0.0179329700267874, 0.0140152826171209, -0.0040442494375279, -0.000087987847382287, 0, -0.00623054975063608, 0.00260078170005729, 0.0144406841547942)

TLRS.PK <- c(-0.0723206615796261, -0.0645385211375711,-0.0689928714869512, 0.0689928714869512, 0.0744888519907394, -0,143481723477691, -0.0037336695520489, -0.0433188747188424, 0, -0.092709398104267, 0,123481056771021,  0.0394340692454072, 0.0682815074164056, 0, 0.0209496263115372, 0, -0,130053128248198, 0,183478294921779, 0, -0,141323855167744, -0.033855636939339, -0.0422003544903764)

df2 <- data.frame(Timeframe, UBP.N, HURC.OQ, ANIX.OQ, TLRS.PK)

I am stuck with combining the two data frames and to find the subsequent dates as well as linking them to the daily log returns.

Can somebody help me?

After converting the dates to Date -class,

df1$Dates <- as.Date(df1$Dates)
df2$Timeframe <- as.Date(df2$Timeframe)

we can do this with dplyr:

library(dplyr)
library(tidyr) # pivot_longer
df1 %>%
  mutate(rn = row_number()) %>%
  rowwise() %>%
  summarize(
    rn,
    Identifier,
    Timeframe = seq(Dates, Dates + 5, by = "days")
  ) %>%
  left_join(
    pivot_longer(df2, -Timeframe, names_to = "Identifier"),
    by = c("Identifier", "Timeframe")
  ) %>%
  group_by(rn, Identifier) %>%
  summarize(
    Timeframe = first(Timeframe),
    sigma = sd(value, na.rm = TRUE),
    .groups = "drop"
  )
# # A tibble: 3 × 4
#      rn Identifier Timeframe      sigma
#   <int> <chr>      <date>         <dbl>
# 1     1 ACCR.PK    2019-11-22 NA       
# 2     2 ANIX.OQ    2019-11-01  2.19e- 2
# 3     3 TLRS.PK    2019-11-15  5.04e+13

Quick validation:

sd(filter(df2, between(Timeframe, df1$Dates[2], df1$Dates[2]+5))$ANIX.OQ)
# [1] 0.02188327

Data

df1 <- structure(list(Identifier = c("ACCR.PK", "ANIX.OQ", "TLRS.PK"), Dates = structure(c(18222, 18201, 18215), class = "Date")), row.names = c(NA, -3L), class = "data.frame")
df2 <- structure(list(Timeframe = structure(c(18204, 18205, 18206, 18207, 18208, 18209, 18210, 18211, 18212, 18213, 18214, 18215, 18216, 18217, 18218, 18219, 18220, 18221, 18222, 18223, 18224, 18225, 18226, 18227, 18228, 18229, 18230), class = "Date"), UBP.N = c(0.0132452267500205, 0, -0.00400802139753864, 0.00242782617802106, -0.00263922032319019, -0.0149097543662875, -0.00484002020400087, -0.00215866246803786, 0.00753501950441837, 0, -0.00807541400554568, 0.0224488315394535, -0.0116960397631916, -0.00643779047484871,  -0.00973506877075225, 0.0118856072339812, 0.00967378806172681, -0.000585246485274027, 0, 0.0179329700267874, 0.0140152826171209, -0.0040442494375279, -8.7987847382287e-05, 0, -0.00623054975063608, 0.00260078170005729, 0.0144406841547942), HURC.OQ = c(0.00252986782857967, 0.00392267244379774, -0.00673403218134361, 0.00334262149668962, 0.0131158570574628, -0.00891122577543113, 0.00669085295092264, -0.00669085295092264, -0.00420463128203163, -0.00365836907245454, -0.01534120996679, -0.00315412447869745,  -0.00201236232924185, -0.0104137475666262, -0.00934859277129974, 0.0269308298165383, 0.0237165266173163, -0.00501813152284614, -0.0109597837232012, 0.00334262149668962, -0.0119387432820877, 0.00712355199277548, 0.0216270190228793, 0.013797128357417, 0.041337071491812, 0.00733563677238935, 0.041337071491812), ANIX.OQ = c(0.00629328697578901, 0.0112290637164134, -0.0288999622523214, -0.0064572560759153, 0.0102302682508149, 0.00507615303186082, -0.0309813325455195, 0.00518304563137528, 0.00015585630080639,  -0.00260078170005729, -0.0263867551731949, -0.0437228110138317, -0.0140649294674036, -0.0200292818755725, 0.0256790144176915, 0.0236615074981583, 0.0703674421501179, -0.00256739550524565, 0, -0.0155443544378002, -0.0131407935610586, 0.00530505222969313, -0.00264900817157687, -0.00798939003347865, 0.018543577712169, 0.0182059644965724, 0.041337071491812), TLRS.PK = c(-0.0723206615796261, -0.0645385211375711, -0.0689928714869512, 0.0689928714869512, 0.0744888519907394, 0, 143481723477691, -0.0037336695520489,  -0.0433188747188424, 0, -0.092709398104267, 0, 123481056771021, 0.0394340692454072, 0.0682815074164056, 0, 0.0209496263115372, 0, 0, 130053128248198, 0, 183478294921779, 0, 0, 141323855167744, -0.033855636939339, -0.0422003544903764)), row.names = c(NA, -27L), class = "data.frame")

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM