简体   繁体   中英

vectorize R for loop in data.table

I'm building a maintenance programmer in R. For different machines I have routines with specific activities which should be executed in specific dates, defined by frecuencies and a starting date.

I already have a data.table with the frequency (in weeks), the last known date of a large maintenance and the projected dates for each routine, according to its frequency and last date. A reduced version looks like this:

require(data.table)

dt <- structure(list(id = c(1, 2, 3, 4, 5, 6, 7, 8, 9), machine = c("t1", 
"t1", "t1", "t1", "t1", "t2", "t2", "t2", "t2"), frequencyWeeks = c(4, 
12, 24, 48, 96, 4, 24, 48, 96), lastMaintenance = structure(c(17889, 
17889, 17889, 17889, 17889, 17871, 17871, 17871, 17871), class = "Date"), 
    datesRoutines = list(structure(c(17889, 17917, 17945, 17973, 
    18001, 18029, 18057, 18085, 18113, 18141, 18169, 18197, 18225, 
    18253, 18281, 18309, 18337, 18365, 18393, 18421, 18449, 18477, 
    18505, 18533, 18561, 18589, 18617), class = "Date"), structure(c(17889, 
    17973, 18057, 18141, 18225, 18309, 18393, 18477, 18561), class = "Date"), 
        structure(c(17889, 18057, 18225, 18393, 18561), class = "Date"), 
        structure(c(17889, 18225, 18561), class = "Date"), structure(c(17889, 
        18561), class = "Date"), structure(c(17871, 17899, 17927, 
        17955, 17983, 18011, 18039, 18067, 18095, 18123, 18151, 
        18179, 18207, 18235, 18263, 18291, 18319, 18347, 18375, 
        18403, 18431, 18459, 18487, 18515, 18543, 18571, 18599, 
        18627), class = "Date"), structure(c(17871, 18039, 18207, 
        18375, 18543), class = "Date"), structure(c(17871, 18207, 
        18543), class = "Date"), structure(c(17871, 18543), class = "Date"))), class = c("data.table", 
"data.frame"), row.names = c(NA, -9L))

dt

   id machine frequencyWeeks lastMaintenance                                                         datesRoutines
1:  1      t1              4      2018-12-24 2018-12-24,2019-01-21,2019-02-18,2019-03-18,2019-04-15,2019-05-13,...
2:  2      t1             12      2018-12-24 2018-12-24,2019-03-18,2019-06-10,2019-09-02,2019-11-25,2020-02-17,...
3:  3      t1             24      2018-12-24                2018-12-24,2019-06-10,2019-11-25,2020-05-11,2020-10-26
4:  4      t1             48      2018-12-24                                      2018-12-24,2019-11-25,2020-10-26
5:  5      t1             96      2018-12-24                                                 2018-12-24,2020-10-26
6:  6      t2              4      2018-12-06 2018-12-06,2019-01-03,2019-01-31,2019-02-28,2019-03-28,2019-04-25,...
7:  7      t2             24      2018-12-06                2018-12-06,2019-05-23,2019-11-07,2020-04-23,2020-10-08
8:  8      t2             48      2018-12-06                                      2018-12-06,2019-11-07,2020-10-08
9:  9      t2             96      2018-12-06                                                 2018-12-06,2020-10-08

NEED : I want to establish for each machine and intervention date what is the routine with the highest id (routines are recorded in order of increasing complexity, that means it would be the most complex one).

WHAT I'VE ATTEMPTED SO FAR : I've used a nested for-loop to achieve it:

for (j in dt[, unique(machine)]){
    for (i in dt[machine == j, ][1, datesRoutines[[1]]]){
        result[count, "machine"] <- j
        result[count, "date"] <- as.Date(i, origin = origin)
        result[count, "rutina"] <- dt[machine == j, i %in% datesRoutines[[1]], by = id][V1 == TRUE, max(id)]
        count <- count + 1
    }
}

setDT(result)

EXPECTED RESULT : I expect a data.table with machine, date and routine id:

head(result)
  machine       date rutina
1      t1 2018-12-24      5
2      t1 2019-01-21      1
3      t1 2019-02-18      1
4      t1 2019-03-18      2
5      t1 2019-04-15      1
6      t1 2019-05-13      1

QUESTION : Is it possible to vectorize it? What would be the code to do it?

This is the best simplyfication I can come up with:

   results <- list()
for(m in unique(dt$machine)){       
  dates <- dt[machine==m]$datesRoutines
  dates <- as.Date(unique(unlist(dates)), origin="1970-01-01")
  result <- data.table(date=dates)
  result[, machine:=m]
  for(d in dates){
    result[date==d, routine:=dt[as.Date(d, origin="1970-01-01") %in% unlist(datesRoutines), 
                              .(id, ord=as.double(max(which(as.Date(d, origin="1970-01-01") %in% unlist(datesRoutines))))), 
                              by=seq_len(nrow(dt))][,.(ord==max(ord), id)][V1==T][, max(id)]]

  }       
  results[[m]] <- result                         

} 
final_result <- rbindlist(results)

For here you can move a step further:

results <- list()
for(m in unique(dt$machine)){       
  dates <- dt[machine==m]$datesRoutines
  dates <- as.Date(unique(unlist(dates)), origin="1970-01-01")
  result <- data.table(date=dates)
  result[, machine:=m]
  result$routine <-lapply(result$date, function(d){
    dt[as.Date(d, origin="1970-01-01") %in% unlist(datesRoutines), 
       .(id, ord=as.double(max(which(as.Date(d, origin="1970-01-01") %in% unlist(datesRoutines))))), 
       by=seq_len(nrow(dt))][,.(ord==max(ord), id)][V1==T][, max(id)]})
  results[[m]] <- result                         

} 
final_result <- rbindlist(results)

Finally, for the very haters of for loop :

results <- lapply(unique(dt$machine), function(x){
  dates <- dt[machine==x]$datesRoutines
  dates <- as.Date(unique(unlist(dates)), origin="1970-01-01")
  result <- data.table(date=dates)
  result[, machine:=x]
})

tmp_result<-lapply(results, function(r){
  r$routine <-lapply(r$date, function(d){
    dt[as.Date(d, origin="1970-01-01") %in% unlist(datesRoutines), 
       .(id, ord=as.double(max(which(as.Date(d, origin="1970-01-01") %in% unlist(datesRoutines))))), 
       by=seq_len(nrow(dt))][,.(ord==max(ord), id)][V1==T][, max(id)]})
})

final_results <- rbindlist(results)
final_results$rutina <- unlist(tmp_result)

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM