简体   繁体   中英

Group the data based on binary, and calculate mean, sd

I have df looks like this

        options(scipen=999) 
        df = data.frame(imei = c(35745407328, 35745407328, 35745407328, 
                                 35745407328, 35745407328, 35745407328,
                                 35745407328, 35745407328, 35745407328,
                                 35745407328, 35745407328, 35745407328),
                        ign = c("Off", "Off", "Off", "On",
                                "On", "Off", "Off", "On",
                                "On", "On",  "On",  "On" ),
                        unixTime =  c(1514313014000, 1514313074000, 1514313134000, 1514313194000,
                                      1514313254000, 1514313314000, 1514313374000, 1514313434000,
                                      1514313494000, 1514313554000, 1514313614000, 1514313674000))

df

        ----------------------------------
        imei            ign unixTime
        ----------------------------------
        35745407328     Off 1514313014000
        ----------------------------------
        35745407328     Off 1514313074000
        ----------------------------------
        35745407328     Off 1514313134000 
        ----------------------------------
        35745407328     On  1514313194000 
        ----------------------------------
        35745407328     On  1514313254000
        ----------------------------------
        35745407328     Off 1514313314000
        ----------------------------------
        35745407328     Off 1514313374000
        ----------------------------------
        35745407328     On  1514313434000
        ----------------------------------
        35745407328     On  1514313494000
        ----------------------------------
        35745407328     On  1514313554000
        ----------------------------------
        35745407328     On  1514313614000
        ----------------------------------
        35745407328     On  1514313674000
        ----------------------------------

I want to group the above data based on 'ign' and then

I want to calculate mean, the standard deviation of unixTime.

And based on first and last values in the group difference of unixTime

Grouping based on which mean, sd and diff will be calculated

        ----------------------------------
         imei           ign unixTime
        ----------------------------------
        35745407328     Off 1514313014000
        ----------------------------------
        35745407328     Off 1514313074000
        ----------------------------------
        35745407328     Off 1514313134000 
        ----------------------------------

        ----------------------------------
        35745407328     On  1514313194000 
        ----------------------------------
        35745407328     On  1514313254000
        ----------------------------------

        ----------------------------------
        35745407328     Off 1514313314000
        ----------------------------------
        35745407328     Off 1514313374000
        ----------------------------------

        ----------------------------------
        35745407328     On  1514313434000
        ----------------------------------
        35745407328     On  1514313494000
        ----------------------------------
        35745407328     On  1514313554000
        ----------------------------------
        35745407328     On  1514313614000
        ----------------------------------
        35745407328     On  1514313674000
        ----------------------------------

Please help me with this

If the answer is already available. Please, provide me the link. thankyou

A solution using .

library(data.table)

setDT(df)

df2 <- df[, Group := rleid(ign)][
  , c("Mean", "SD", "Diff") := list(mean(unixTime), 
                                    sd(unixTime),
                                    first(unixTime) - last(unixTime)),
  by = Group]

df2[]
#            imei ign      unixTime Group          Mean       SD    Diff
#  1: 35745407328 Off 1514313014000     1 1514313074000 60000.00 -120000
#  2: 35745407328 Off 1514313074000     1 1514313074000 60000.00 -120000
#  3: 35745407328 Off 1514313134000     1 1514313074000 60000.00 -120000
#  4: 35745407328  On 1514313194000     2 1514313224000 42426.41  -60000
#  5: 35745407328  On 1514313254000     2 1514313224000 42426.41  -60000
#  6: 35745407328 Off 1514313314000     3 1514313344000 42426.41  -60000
#  7: 35745407328 Off 1514313374000     3 1514313344000 42426.41  -60000
#  8: 35745407328  On 1514313434000     4 1514313554000 94868.33 -240000
#  9: 35745407328  On 1514313494000     4 1514313554000 94868.33 -240000
# 10: 35745407328  On 1514313554000     4 1514313554000 94868.33 -240000
# 11: 35745407328  On 1514313614000     4 1514313554000 94868.33 -240000
# 12: 35745407328  On 1514313674000     4 1514313554000 94868.33 -240000

Or a solution using and

library(dplyr)
library(data.table)

df2 <- df %>%
  group_by(Group = rleid(ign)) %>%
  mutate(Mean = mean(unixTime), SD = sd(unixTime),
         Diff = first(unixTime) - last(unixTime)) %>%
  ungroup()
df2
#           imei    ign      unixTime Group          Mean       SD    Diff
#          <dbl> <fctr>         <dbl> <int>         <dbl>    <dbl>   <dbl>
#  1 35745407328    Off 1514313014000     1 1514313074000 60000.00 -120000
#  2 35745407328    Off 1514313074000     1 1514313074000 60000.00 -120000
#  3 35745407328    Off 1514313134000     1 1514313074000 60000.00 -120000
#  4 35745407328     On 1514313194000     2 1514313224000 42426.41  -60000
#  5 35745407328     On 1514313254000     2 1514313224000 42426.41  -60000
#  6 35745407328    Off 1514313314000     3 1514313344000 42426.41  -60000
#  7 35745407328    Off 1514313374000     3 1514313344000 42426.41  -60000
#  8 35745407328     On 1514313434000     4 1514313554000 94868.33 -240000
#  9 35745407328     On 1514313494000     4 1514313554000 94868.33 -240000
# 10 35745407328     On 1514313554000     4 1514313554000 94868.33 -240000
# 11 35745407328     On 1514313614000     4 1514313554000 94868.33 -240000
# 12 35745407328     On 1514313674000     4 1514313554000 94868.33 -240000

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM