简体   繁体   中英

Data population in data frame based on different columns and row selection in R

I have following input data:

Type    Start       End         Value
Normal  14:10:01    14:20:00    0.05454545
Normal  14:50:01    15:00:00    0.05
Normal  15:00:01    15:10:00    0.056
Normal  15:10:01    15:20:00    0.05121951
Normal  15:20:01    15:30:00    0.05
Spl1    13:20:01    13:30:00    0.05089286
Spl1    13:40:01    13:50:00    0.05
Spl1    13:50:01    14:00:00    0.04848485
Spl2    14:30:01    14:40:00    0.05208333
Spl2    14:50:01    15:00:00    0.05
Spl2    15:20:01    15:30:00    0.05

Data structure of input table:

 $ Type                 : chr
 $ Start                : chr
 $ End                  : chr
 $ Value                : num

Output table structure is like below:

Start       End       Normal    Spl1    Spl2
13:00:01    13:10:00            
13:10:01    13:20:00            
13:20:01    13:30:00            
13:30:01    13:40:00            
13:40:01    13:50:00            
13:50:01    14:00:00            
14:00:01    14:10:00            
14:10:01    14:20:00            
14:20:01    14:30:00            
14:30:01    14:40:00            
14:40:01    14:50:00            
14:50:01    15:00:00            
15:00:01    15:10:00            
15:10:01    15:20:00            
15:20:01    15:30:00            

I want data to be populated based on below criteria:

  1. (Input_table$Start == Output_table$Start) && (Input_table$End == Output_table$End)
  2. Input table "Normal" type rows should populate values into Output table's Normal Column . Likewise for "Up" and "Down" type rows it should be consecutively Up and Down columns in output table
  3. Blank fields in output table should be populated with 0

Desired output table should look like:

Start       End         Normal      Spl1        Spl2
13:00:01    13:10:00    0           0           0
13:10:01    13:20:00    0           0           0
13:20:01    13:30:00    0           0.05089286  0
13:30:01    13:40:00    0           0           0
13:40:01    13:50:00    0           0.05        0
13:50:01    14:00:00    0           0.04848485  0
14:00:01    14:10:00    0           0           0
14:10:01    14:20:00    0.05454545  0           0
14:20:01    14:30:00    0           0           0
14:30:01    14:40:00    0           0           0.05208333
14:40:01    14:50:00    0           0           0
14:50:01    15:00:00    0.05        0           0.05
15:00:01    15:10:00    0.056       0           0
15:10:01    15:20:00    0.05121951  0           0
15:20:01    15:30:00    0.05        0           0.05

Please suggest possible soln. in R .

Note: I do not want to use loops for this solution. I am looking for direct methods. Please suggest if any.

We can do a left_join and then spread the output to 'wide'

library(dplyr)
library(tidyr)
left_join(df2, df1) %>% 
        spread(Type, Value) %>% 
        select(1:5) %>% 
        mutate_at(vars(Normal, Spl1, Spl2), funs(replace(., is.na(.), 0)))
#     Start      End     Normal       Spl1       Spl2
#1  13:00:01 13:10:00 0.00000000 0.00000000 0.00000000
#2  13:10:01 13:20:00 0.00000000 0.00000000 0.00000000
#3  13:20:01 13:30:00 0.00000000 0.05089286 0.00000000
#4  13:30:01 13:40:00 0.00000000 0.00000000 0.00000000
#5  13:40:01 13:50:00 0.00000000 0.05000000 0.00000000
#6  13:50:01 14:00:00 0.00000000 0.04848485 0.00000000
#7  14:00:01 14:10:00 0.00000000 0.00000000 0.00000000
#8  14:10:01 14:20:00 0.05454545 0.00000000 0.00000000
#9  14:20:01 14:30:00 0.00000000 0.00000000 0.00000000
#10 14:30:01 14:40:00 0.00000000 0.00000000 0.05208333
#11 14:40:01 14:50:00 0.00000000 0.00000000 0.00000000
#12 14:50:01 15:00:00 0.05000000 0.00000000 0.05000000
#13 15:00:01 15:10:00 0.05600000 0.00000000 0.00000000
#14 15:10:01 15:20:00 0.05121951 0.00000000 0.00000000
#15 15:20:01 15:30:00 0.05000000 0.00000000 0.05000000

data

 df1 <- structure(list(Type = c("Normal", "Normal", "Normal", "Normal", 
"Normal", "Spl1", "Spl1", "Spl1", "Spl2", "Spl2", "Spl2"), Start = c("14:10:01", 
"14:50:01", "15:00:01", "15:10:01", "15:20:01", "13:20:01", "13:40:01", 
"13:50:01", "14:30:01", "14:50:01", "15:20:01"), End = c("14:20:00", 
"15:00:00", "15:10:00", "15:20:00", "15:30:00", "13:30:00", "13:50:00", 
"14:00:00", "14:40:00", "15:00:00", "15:30:00"), Value = c(0.05454545, 
0.05, 0.056, 0.05121951, 0.05, 0.05089286, 0.05, 0.04848485, 
0.05208333, 0.05, 0.05)), .Names = c("Type", "Start", "End", 
"Value"), class = "data.frame", row.names = c(NA, -11L))

   df2 <- structure(list(Start = c("13:00:01", "13:10:01", "13:20:01", 
"13:30:01", "13:40:01", "13:50:01", "14:00:01", "14:10:01", "14:20:01", 
"14:30:01", "14:40:01", "14:50:01", "15:00:01", "15:10:01", "15:20:01"
), End = c("13:10:00", "13:20:00", "13:30:00", "13:40:00", "13:50:00", 
"14:00:00", "14:10:00", "14:20:00", "14:30:00", "14:40:00", "14:50:00", 
"15:00:00", "15:10:00", "15:20:00", "15:30:00")), .Names = c("Start", 
"End"), class = "data.frame", row.names = c(NA, -15L))

This can be done in a one-liner with data.table

dcast(dt1[dt2, on = c("Start", "End")], ... ~ Type, fill = 0)[, `NA` := NULL][]

will return

       Start      End     Normal       Spl1       Spl2
 1: 13:00:01 13:10:00 0.00000000 0.00000000 0.00000000
 2: 13:10:01 13:20:00 0.00000000 0.00000000 0.00000000
 3: 13:20:01 13:30:00 0.00000000 0.05089286 0.00000000
 4: 13:30:01 13:40:00 0.00000000 0.00000000 0.00000000
 5: 13:40:01 13:50:00 0.00000000 0.05000000 0.00000000
 6: 13:50:01 14:00:00 0.00000000 0.04848485 0.00000000
 7: 14:00:01 14:10:00 0.00000000 0.00000000 0.00000000
 8: 14:10:01 14:20:00 0.05454545 0.00000000 0.00000000
 9: 14:20:01 14:30:00 0.00000000 0.00000000 0.00000000
10: 14:30:01 14:40:00 0.00000000 0.00000000 0.05208333
11: 14:40:01 14:50:00 0.00000000 0.00000000 0.00000000
12: 14:50:01 15:00:00 0.05000000 0.00000000 0.05000000
13: 15:00:01 15:10:00 0.05600000 0.00000000 0.00000000
14: 15:10:01 15:20:00 0.05121951 0.00000000 0.00000000
15: 15:20:01 15:30:00 0.05000000 0.00000000 0.05000000

Data

library(data.table)
dt1 <- fread(
"Type    Start       End         Value
Normal  14:10:01    14:20:00    0.05454545
Normal  14:50:01    15:00:00    0.05
Normal  15:00:01    15:10:00    0.056
Normal  15:10:01    15:20:00    0.05121951
Normal  15:20:01    15:30:00    0.05
Spl1    13:20:01    13:30:00    0.05089286
Spl1    13:40:01    13:50:00    0.05
Spl1    13:50:01    14:00:00    0.04848485
Spl2    14:30:01    14:40:00    0.05208333
Spl2    14:50:01    15:00:00    0.05
Spl2    15:20:01    15:30:00    0.05"
)

dt2 <- fread(
  "Start       End     
13:00:01    13:10:00            
13:10:01    13:20:00            
13:20:01    13:30:00            
13:30:01    13:40:00            
13:40:01    13:50:00            
13:50:01    14:00:00            
14:00:01    14:10:00            
14:10:01    14:20:00            
14:20:01    14:30:00            
14:30:01    14:40:00            
14:40:01    14:50:00            
14:50:01    15:00:00            
15:00:01    15:10:00            
15:10:01    15:20:00            
15:20:01    15:30:00   "
)

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM