简体   繁体   English

根据二进制对数据进行分组,并计算平均值sd

[英]Group the data based on binary, and calculate mean, sd

I have df looks like this 我有DF看起来像这样

        options(scipen=999) 
        df = data.frame(imei = c(35745407328, 35745407328, 35745407328, 
                                 35745407328, 35745407328, 35745407328,
                                 35745407328, 35745407328, 35745407328,
                                 35745407328, 35745407328, 35745407328),
                        ign = c("Off", "Off", "Off", "On",
                                "On", "Off", "Off", "On",
                                "On", "On",  "On",  "On" ),
                        unixTime =  c(1514313014000, 1514313074000, 1514313134000, 1514313194000,
                                      1514313254000, 1514313314000, 1514313374000, 1514313434000,
                                      1514313494000, 1514313554000, 1514313614000, 1514313674000))

df DF

        ----------------------------------
        imei            ign unixTime
        ----------------------------------
        35745407328     Off 1514313014000
        ----------------------------------
        35745407328     Off 1514313074000
        ----------------------------------
        35745407328     Off 1514313134000 
        ----------------------------------
        35745407328     On  1514313194000 
        ----------------------------------
        35745407328     On  1514313254000
        ----------------------------------
        35745407328     Off 1514313314000
        ----------------------------------
        35745407328     Off 1514313374000
        ----------------------------------
        35745407328     On  1514313434000
        ----------------------------------
        35745407328     On  1514313494000
        ----------------------------------
        35745407328     On  1514313554000
        ----------------------------------
        35745407328     On  1514313614000
        ----------------------------------
        35745407328     On  1514313674000
        ----------------------------------

I want to group the above data based on 'ign' and then 我想基于“ ign”对上述数据进行分组,然后

I want to calculate mean, the standard deviation of unixTime. 我想计算平均值,即unixTime的标准偏差。

And based on first and last values in the group difference of unixTime 并根据unixTime的组差异中的第一个和最后一个值

Grouping based on which mean, sd and diff will be calculated 根据平均值,标准差和差异进行分组

        ----------------------------------
         imei           ign unixTime
        ----------------------------------
        35745407328     Off 1514313014000
        ----------------------------------
        35745407328     Off 1514313074000
        ----------------------------------
        35745407328     Off 1514313134000 
        ----------------------------------

        ----------------------------------
        35745407328     On  1514313194000 
        ----------------------------------
        35745407328     On  1514313254000
        ----------------------------------

        ----------------------------------
        35745407328     Off 1514313314000
        ----------------------------------
        35745407328     Off 1514313374000
        ----------------------------------

        ----------------------------------
        35745407328     On  1514313434000
        ----------------------------------
        35745407328     On  1514313494000
        ----------------------------------
        35745407328     On  1514313554000
        ----------------------------------
        35745407328     On  1514313614000
        ----------------------------------
        35745407328     On  1514313674000
        ----------------------------------

Please help me with this 请在这件事上给予我帮助

If the answer is already available. 如果答案已经可用。 Please, provide me the link. 请给我链接。 thankyou 谢谢

A solution using . 使用的解决方案。

library(data.table)

setDT(df)

df2 <- df[, Group := rleid(ign)][
  , c("Mean", "SD", "Diff") := list(mean(unixTime), 
                                    sd(unixTime),
                                    first(unixTime) - last(unixTime)),
  by = Group]

df2[]
#            imei ign      unixTime Group          Mean       SD    Diff
#  1: 35745407328 Off 1514313014000     1 1514313074000 60000.00 -120000
#  2: 35745407328 Off 1514313074000     1 1514313074000 60000.00 -120000
#  3: 35745407328 Off 1514313134000     1 1514313074000 60000.00 -120000
#  4: 35745407328  On 1514313194000     2 1514313224000 42426.41  -60000
#  5: 35745407328  On 1514313254000     2 1514313224000 42426.41  -60000
#  6: 35745407328 Off 1514313314000     3 1514313344000 42426.41  -60000
#  7: 35745407328 Off 1514313374000     3 1514313344000 42426.41  -60000
#  8: 35745407328  On 1514313434000     4 1514313554000 94868.33 -240000
#  9: 35745407328  On 1514313494000     4 1514313554000 94868.33 -240000
# 10: 35745407328  On 1514313554000     4 1514313554000 94868.33 -240000
# 11: 35745407328  On 1514313614000     4 1514313554000 94868.33 -240000
# 12: 35745407328  On 1514313674000     4 1514313554000 94868.33 -240000

Or a solution using and 或使用的解决方案

library(dplyr)
library(data.table)

df2 <- df %>%
  group_by(Group = rleid(ign)) %>%
  mutate(Mean = mean(unixTime), SD = sd(unixTime),
         Diff = first(unixTime) - last(unixTime)) %>%
  ungroup()
df2
#           imei    ign      unixTime Group          Mean       SD    Diff
#          <dbl> <fctr>         <dbl> <int>         <dbl>    <dbl>   <dbl>
#  1 35745407328    Off 1514313014000     1 1514313074000 60000.00 -120000
#  2 35745407328    Off 1514313074000     1 1514313074000 60000.00 -120000
#  3 35745407328    Off 1514313134000     1 1514313074000 60000.00 -120000
#  4 35745407328     On 1514313194000     2 1514313224000 42426.41  -60000
#  5 35745407328     On 1514313254000     2 1514313224000 42426.41  -60000
#  6 35745407328    Off 1514313314000     3 1514313344000 42426.41  -60000
#  7 35745407328    Off 1514313374000     3 1514313344000 42426.41  -60000
#  8 35745407328     On 1514313434000     4 1514313554000 94868.33 -240000
#  9 35745407328     On 1514313494000     4 1514313554000 94868.33 -240000
# 10 35745407328     On 1514313554000     4 1514313554000 94868.33 -240000
# 11 35745407328     On 1514313614000     4 1514313554000 94868.33 -240000
# 12 35745407328     On 1514313674000     4 1514313554000 94868.33 -240000

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM