繁体   English   中英

R 中特定行与前一行之间的滞后差异

[英]Lagged difference between specific row and previous rows in R

我有这个 dataframe,时间以分钟为单位。 我想创建一个新列,指示发生替换的行与前几行之间的时间差(以分钟为单位)。

# Data

dat <- structure(list(game_id = c(303377L, 303377L, 303377L, 303377L, 
303377L, 303377L, 303377L, 303377L, 303377L, 303377L, 303377L, 
303377L, 303377L, 303377L, 303377L, 303377L, 303377L, 303377L, 
303377L, 303377L, 303377L, 303377L, 303377L, 303377L, 303377L, 
303377L, 303377L, 303377L, 303377L, 303377L, 303377L, 303377L, 
303377L, 303377L, 303377L, 303377L, 303377L, 303377L, 303377L, 
303377L, 303377L, 303377L, 303377L, 303377L, 303377L, 303377L, 
303377L, 303377L, 303377L), minute = c(57L, 57L, 57L, 57L, 57L, 
60L, 60L, 60L, 60L, 61L, 62L, 62L, 62L, 62L, 62L, 62L, 62L, 62L, 
62L, 62L, 62L, 62L, 62L, 62L, 63L, 63L, 63L, 64L, 64L, 64L, 65L, 
65L, 65L, 65L, 65L, 65L, 65L, 65L, 65L, 65L, 65L, 65L, 65L, 65L, 
65L, 65L, 65L, 65L, 66L), team_name = c("Getafe", "Getafe", "Getafe", 
"Getafe", "Getafe", "Getafe", "Getafe", "Getafe", "Getafe", "Getafe", 
"Getafe", "Getafe", "Getafe", "Getafe", "Getafe", "Getafe", "Getafe", 
"Getafe", "Getafe", "Getafe", "Getafe", "Getafe", "Getafe", "Getafe", 
"Getafe", "Getafe", "Getafe", "Getafe", "Getafe", "Getafe", "Getafe", 
"Getafe", "Getafe", "Getafe", "Getafe", "Getafe", "Getafe", "Getafe", 
"Getafe", "Getafe", "Getafe", "Getafe", "Getafe", "Getafe", "Getafe", 
"Getafe", "Getafe", "Getafe", "Getafe"), type_name = c("pass", 
"dribble", "pass", "foul", "foul", "foul", "foul", "dribble", 
"pass", "throw_in", "tackle", "dribble", "tackle", "pass", "dribble", 
"interception", "pass", "dribble", "pass", "dribble", "pass", 
"dribble", "cross", "pass", "dribble", "interception", "take_on", 
"clearance", "dribble", "pass", "dribble", "pass", "dribble", 
"freekick_short", "pass", "dribble", "pass", "dribble", "pass", 
"dribble", "pass", "dribble", "pass", "dribble", "pass", "dribble", 
"cross", "shot", "substitution"), target = c(1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1), timediff = c(0, 0, 0, 0, 0, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 
5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 8, 8, 8, 
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9)), row.names = 150:198, class = "data.frame")

我想计算由名称“替代”设置的特定行与之前的行之间的minutes差。 我目前使用以下代码创建了下面的 dataframe:

dat$timediff <- ave(dat$minute, dat$game_id, FUN=function(x) replace(x - x[1], 1, 0))

但是,这与我想要实现的目标相反,即:

Output

    game_id minute team_name      type_name target timediff
150  303377     57    Getafe           pass      1        0
151  303377     57    Getafe        dribble      1        0
152  303377     57    Getafe           pass      1        0
153  303377     57    Getafe           foul      1        0
154  303377     57    Getafe           foul      1        0
155  303377     60    Getafe           foul      1        3
156  303377     60    Getafe           foul      1        3
157  303377     60    Getafe        dribble      1        3
158  303377     60    Getafe           pass      1        3
159  303377     61    Getafe       throw_in      1        4
160  303377     62    Getafe         tackle      1        5
161  303377     62    Getafe        dribble      1        5
162  303377     62    Getafe         tackle      1        5
163  303377     62    Getafe           pass      1        5
164  303377     62    Getafe        dribble      1        5
165  303377     62    Getafe   interception      1        5
166  303377     62    Getafe           pass      1        5
167  303377     62    Getafe        dribble      1        5
168  303377     62    Getafe           pass      1        5
169  303377     62    Getafe        dribble      1        5
170  303377     62    Getafe           pass      1        5
171  303377     62    Getafe        dribble      1        5
172  303377     62    Getafe          cross      1        5
173  303377     62    Getafe           pass      1        5
174  303377     63    Getafe        dribble      1        6
175  303377     63    Getafe   interception      1        6
176  303377     63    Getafe        take_on      1        6
177  303377     64    Getafe      clearance      1        7
178  303377     64    Getafe        dribble      1        7
179  303377     64    Getafe           pass      1        7
180  303377     65    Getafe        dribble      1        8
181  303377     65    Getafe           pass      1        8
182  303377     65    Getafe        dribble      1        8
183  303377     65    Getafe freekick_short      1        8
184  303377     65    Getafe           pass      1        8
185  303377     65    Getafe        dribble      1        8
186  303377     65    Getafe           pass      1        8
187  303377     65    Getafe        dribble      1        8
188  303377     65    Getafe           pass      1        8
189  303377     65    Getafe        dribble      1        8
190  303377     65    Getafe           pass      1        8
191  303377     65    Getafe        dribble      1        8
192  303377     65    Getafe           pass      1        8
193  303377     65    Getafe        dribble      1        8
194  303377     65    Getafe           pass      1        8
195  303377     65    Getafe        dribble      1        8
196  303377     65    Getafe          cross      1        8
197  303377     65    Getafe           shot      1        8
198  303377     66    Getafe   substitution      1        9

我想达到相反的效果,所以type_name == substitution发生的行应该有timediff == 0 ,然后用之前的行建立 timediff :

    game_id minute team_name      type_name target timediff

173  303377     62    Getafe           pass      1        4
174  303377     63    Getafe        dribble      1        3
175  303377     63    Getafe   interception      1        3
176  303377     63    Getafe        take_on      1        3
177  303377     64    Getafe      clearance      1        2
178  303377     64    Getafe        dribble      1        2
179  303377     64    Getafe           pass      1        2
180  303377     65    Getafe        dribble      1        1
181  303377     65    Getafe           pass      1        1
182  303377     65    Getafe        dribble      1        1
183  303377     65    Getafe freekick_short      1        1
184  303377     65    Getafe           pass      1        1
185  303377     65    Getafe        dribble      1        1
186  303377     65    Getafe           pass      1        1
187  303377     65    Getafe        dribble      1        1
188  303377     65    Getafe           pass      1        1
189  303377     65    Getafe        dribble      1        1
190  303377     65    Getafe           pass      1        1
191  303377     65    Getafe        dribble      1        1
192  303377     65    Getafe           pass      1        1
193  303377     65    Getafe        dribble      1        1
194  303377     65    Getafe           pass      1        1
195  303377     65    Getafe        dribble      1        1
196  303377     65    Getafe          cross      1        1
197  303377     65    Getafe           shot      1        1
198  303377     66    Getafe   substitution      1        0

您可以使用match获取type_name'substitution'minute值,并用每minute值减去它。

library(dplyr)

dat %>%
  group_by(game_id) %>%
  mutate(timediff = minute[match('substitution', type_name)] - minute) %>%
  ungroup

如果您在游戏中有多个替换,您可以使用 -

dat %>%
  group_by(game_id, grp = cumsum(lag(type_name, default = '') == 'substitution')) %>% 
  mutate(timediff = minute[match('substitution', type_name)] - minute) %>%
  ungroup

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM