简体   繁体   中英

Looping to get all combination using dplyr in R

Here it is my data

## Data
datex <- c(rep("2021-01-18", 61), rep("2021-01-19", 125))
hourx <- c(0,1,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,16,10,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,11,0,0,0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,3,3,3,3,3,3,3,4,4,4,4,4,4,4,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,14,15,15,15,15,16,16,16,16)
seller <- c("dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2")
product <- c
detail <- c("E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","notEnoughBalance","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","notEnoughBalance","E99","success","success","success","E99","success","success","E99","success","E99","success","E99","E99","success","E99","E99","success","E99","success","E99","success","E99","success","E99","success","success","E99","E99","E99","success","success","E99","success","E99","success","E99","success","success","E99","E99","E99","success","E99","success","success","E99","E99","success","E99","success","E99","success","success","E99","E99","success","success","E99","E99","success","E99","success","success","E99","success","E99","success","E99","E99","success","success","E99","E99","success","E99","success","success","E99","E99","E99","success","success","notEnoughBalance","E99","success","success","E99","success","E99","success","notEnoughBalance","E99","success","E99","E99","success","E99","success","success","E99","success","E99","E99","success","E99","success","success","E99","success","success","E99","E99","success","notEnoughBalance","E99","E99","success","E99","success","success","E99","E99","success","success","E99")
status <- c("FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","OK01","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","OK01","FI04","OK00","OK00","OK00","FI04","OK00","OK00","FI04","OK00","FI04","OK00","FI04","FI04","OK00","FI04","FI04","OK00","FI04","OK00","FI04","OK00","FI04","OK00","FI04","OK00","OK00","FI04","FI04","FI04","OK00","OK00","FI04","OK00","FI04","OK00","FI04","OK00","OK00","FI04","FI04","FI04","OK00","FI04","OK00","OK00","FI04","FI04","OK00","FI04","OK00","FI04","OK00","OK00","FI04","FI04","OK00","OK00","FI04","FI04","OK00","FI04","OK00","OK00","FI04","OK00","FI04","OK00","FI04","FI04","OK00","OK00","FI04","FI04","OK00","FI04","OK00","OK00","FI04","FI04","FI04","OK00","OK00","OK01","FI04","OK00","OK00","FI04","OK00","FI04","OK00","OK01","FI04","OK00","FI04","FI04","OK00","FI04","OK00","OK00","FI04","OK00","FI04","FI04","OK00","FI04","OK00","OK00","FI04","OK00","OK00","FI04","FI04","OK00","OK01","FI04","FI04","OK00","FI04","OK00","OK00","FI04","FI04","OK00","OK00","FI04")
channel <- c("f2","f2","f2","f3","f2","f3","f2","f3","f2","f3","f2","f3","f2","f2","f3","f2","f3","f2","f3","f2","f3","f2","f3","f3","f2","f3","f2","f2","f2","f3","f3","f2","f3","f3","f2","f2","f3","f3","f2","f3","f2","f3","f2","f3","f2","f2","f3","f3","f2","f2","f3","f3","f2","f2","f3","f2","f3","f2","f3","f2","f3","f3","f2","f3","f3","f3","f2","f3","f3","f2","f2","f3","f3","f2","f2","f3","f2","f2","f2","f3","f3","f2","f3","f3","f2","f3","f2","f2","f3","f2","f3","f2","f3","f3","f2","f2","f2","f2","f3","f3","f2","f3","f3","f2","f2","f3","f3","f3","f2","f2","f3","f2","f3","f2","f3","f3","f3","f2","f2","f2","f2","f3","f3","f3","f3","f2","f2","f3","f3","f2","f2","f3","f3","f2","f2","f3","f2","f3","f2","f2","f3","f2","f3","f2","f2","f3","f2","f3","f2","f3","f3","f2","f2","f3","f3","f2","f2","f2","f3","f2","f3","f3","f3","f2","f2","f3","f3","f2","f2","f2","f3","f3","f2","f3","f3","f3","f2","f2","f2","f2","f2","f2","f2","f2","f2","f2")
transaction <- c(1,120,50,5,1,2,1,9,6,12,5,25,14,6,22,9,10,14,15,12,220,12,12,14,9,11,100,90,110,12,13,4,3,1,2,3,3,5,7,5,5,6,9,16,8,13,10,20,15,18,10,19,15,5,13,12,10,12,26,14,0,4,0,0,0,2,0,0,2,0,4,0,6,8,0,2,3,0,2,0,1,0,1,0,2,0,0,2,1,1,0,0,3,0,1,0,3,0,0,6,5,2,0,8,0,0,12,11,0,2,0,11,0,0,14,21,0,0,13,7,0,17,0,0,18,0,7,0,4,4,0,0,7,12,0,13,0,0,130,160,9,0,0,0,16,0,0,16,0,14,0,0,9,0,11,8,0,8,0,0,8,0,10,5,0,15,0,0,3,0,0,8,8,0,0,6,5,0,8,0,0,5,1,0,0,95)
mydatax <- data.frame(datex, hourx, seller, product, detail, status, channel, transaction)

My task is to find outliers from any combination in my data using tsoutliers package. for sample, i use two combinations. 1st combination:

  • seller = "dombsdpapp1"
  • product = "00021460"
  • detail = "E99"
  • status = "FI04"
  • channel = "f2"
# Process 1
library(tsoutliers)
combination1 <- subset(mydatax, seller == "dombsdpapp1" &
                         product == "00021460" &
                         detail == "E99" &
                         status == "FI04" &
                         channel == "f2")

model.anomaly1 <- tso(as.ts(combination1$transaction))
find.anomaly.index1 <- subset(model.anomaly1$outliers, coefhat > 0)[,2]
data.anomaly1 <- combination1[find.anomaly.index1,]
data.anomaly1

#datex hourx      seller  product detail status channel transaction
#2   2021-01-18     1 dombsdpapp1 00021460    E99   FI04      f2         120
#27  2021-01-18    14 dombsdpapp1 00021460    E99   FI04      f2         100
#29  2021-01-18    16 dombsdpapp1 00021460    E99   FI04      f2         110
#139 2021-01-19    10 dombsdpapp1 00021460    E99   FI04      f2         130

2nd combination:

  • seller = "dombsdpapp2"
  • product = "00021460"
  • detail = "E99"
  • status = "FI04"
  • channel = "f2"
# Process 2
library(tsoutliers)
combination2 <- subset(mydatax, seller == "dombsdpapp2" &
                         product == "00021460" &
                         detail == "E99" &
                         status == "FI04" &
                         channel == "f2")

model.anomaly2 <- tso(as.ts(combination2$transaction))
find.anomaly.index2 <- subset(model.anomaly2$outliers, coefhat > 0)[,2]
data.anomaly2 <- combination2[find.anomaly.index2,]
data.anomaly2

#datex hourx      seller  product detail status channel transaction
#140 2021-01-19    10 dombsdpapp2 00021460    E99   FI04      f2         160
#186 2021-01-19    16 dombsdpapp2 00021460    E99   FI04      f2          95

After that, All looping insert into 1 table:

my.anomaly.result <- rbind(data.anomaly1, data.anomaly2)
my.anomaly.result

#         datex hourx      seller  product detail status channel transaction
#2   2021-01-18     1 dombsdpapp1 00021460    E99   FI04      f2         120
#27  2021-01-18    14 dombsdpapp1 00021460    E99   FI04      f2         100
#29  2021-01-18    16 dombsdpapp1 00021460    E99   FI04      f2         110
#139 2021-01-19    10 dombsdpapp1 00021460    E99   FI04      f2         130
#140 2021-01-19    10 dombsdpapp2 00021460    E99   FI04      f2         160
#186 2021-01-19    16 dombsdpapp2 00021460    E99   FI04      f2          95

The painfull point is how do i loop all process to get All Result using dplyr? Because i have 100K Combinations. Thank you.

In data there are certain groups with only 1 or 2 rows. For such groups tso functions returns an error. I have a written a custom function where I have set a threshold of 5 rows. So if a group has less than 5 rows, all the rows of that group are selected for rest of them we apply the function. You can adjust this 5 to any number as per your data.

library(dplyr)
library(tsoutliers)

get_outlier_index <- function(x) {
  if(length(x) < 5) return(seq_along(x))
  model.anomaly <- tso(as.ts(x))
  model.anomaly$outliers$ind[model.anomaly$outliers$coefhat > 0]
}

mydatax %>%
  group_by(across(seller:channel)) %>%
  slice(get_outlier_index(transaction)) %>%
  ungroup

#   datex      hourx seller      product  detail           status channel transaction
#   <chr>      <dbl> <chr>       <chr>    <chr>            <chr>  <chr>         <dbl>
# 1 2021-01-18     7 dombsdpapp1 00021459 E99              FI04   f3               25
# 2 2021-01-18    11 dombsdpapp1 00021459 E99              FI04   f3              220
# 3 2021-01-19     5 dombsdpapp1 00021459 E99              FI04   f3                6
# 4 2021-01-18    10 dombsdpapp1 00021459 notEnoughBalance OK01   f3               12
# 5 2021-01-18     1 dombsdpapp1 00021460 E99              FI04   f2              120
# 6 2021-01-18    14 dombsdpapp1 00021460 E99              FI04   f2              100
# 7 2021-01-18    16 dombsdpapp1 00021460 E99              FI04   f2              110
# 8 2021-01-19    10 dombsdpapp1 00021460 E99              FI04   f2              130
# 9 2021-01-19    11 dombsdpapp1 00021460 notEnoughBalance OK01   f2                0
#10 2021-01-18    11 dombsdpapp2 00021459 notEnoughBalance OK01   f3                0
#11 2021-01-19    14 dombsdpapp2 00021459 notEnoughBalance OK01   f3                0
#12 2021-01-19    10 dombsdpapp2 00021460 E99              FI04   f2              160
#13 2021-01-19    16 dombsdpapp2 00021460 E99              FI04   f2               95
#14 2021-01-19    11 dombsdpapp2 00021460 notEnoughBalance OK01   f2                0

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM