I have a data frame called dayfile
containing the following variables:
Period SubCode EchoTime PosX PosY PosZ
1 8753 13 2016-06-19 00:06:27 16.81 39.21 12.66
2 8753 13 2016-06-19 00:06:35 16.67 39.08 12.66
3 8753 13 2016-06-19 00:06:36 16.33 39.60 13.03
4 8753 13 2016-06-19 00:06:45 17.14 38.14 12.23
5 8753 13 2016-06-19 00:06:53 16.95 38.21 12.38
6 8753 13 2016-06-19 00:06:53 17.44 37.67 11.95
And I have another data frame called probe.DOT1
that looks like this:
DO.time.1m DO.1m Temp.1m
1 2016-06-18 10:24:50 7.69 18.04
2 2016-06-18 11:24:50 7.54 19.12
3 2016-06-18 11:54:50 7.57 18.98
4 2016-06-18 12:24:50 9.51 19.88
5 2016-06-18 12:54:50 9.30 11.62
6 2016-06-18 13:24:50 8.81 11.54
I want to create a new column in dayfile
called O2
from probe.DOT1$DO.1m
where dayfile$EchoTime
is within the time range between two consecutive values of probe.DOT1$Do.time.1m
, ie it should look something like this:
Period SubCode EchoTime PosX PosY PosZ O2
1 8753 13 2016-06-19 00:06:27 16.81 39.21 12.66 7.54
2 8753 13 2016-06-19 00:06:35 16.67 39.08 12.66 7.54
3 8753 13 2016-06-19 00:06:36 16.33 39.60 13.03 7.59
4 8753 13 2016-06-19 00:06:45 17.14 38.14 12.23 7.59
5 8753 13 2016-06-19 00:06:53 16.95 38.21 12.38 7.59
6 8753 13 2016-06-19 00:06:53 17.44 37.67 11.95 7.59
I have tried this:
dayfile$O2 <- probe.DOT1[dayfile$EchoTime < probe.DOT1$DO.time.1m &
dayfile$EchoTime > diff(probe.DOT1$DO.time.1m, lag = 1) , 'DO.1m']
But it gives me errors. Does anyone have any suggestions?
Thanks.
EDIT
Here is some dput so you can recreate small portions of my files:
dayfile
:
structure(list(Period = c(7017, 7017, 7017, 7017, 7017, 7017,
7017, 7017, 7017, 7017, 7017, 7017, 7017, 7017, 7017, 7017, 7017,
7017, 7017, 7017, 7017, 7017, 7017, 7017, 7017, 7017, 7017, 7017,
7017, 7017, 7017, 7017, 7017, 7017, 7017, 7017, 7017, 7017, 7017,
7017, 7017), SubCode = c(20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
), EchoTime = structure(c(1466249003, 1466249010, 1466249017,
1466249025, 1466249032, 1466249039, 1466249046, 1466249053, 1466249060,
1466249067, 1466249074, 1466249081, 1466249088, 1466249095, 1466249102,
1466249109, 1466249116, 1466249123, 1466249130, 1466249137, 1466249144,
1466249151, 1466249158, 1466249165, 1466249172, 1466249179, 1466249186,
1466249193, 1466249200, 1466249207, 1466249214, 1466249221, 1466249228,
1466249235, 1466249242, 1466249249, 1466249256, 1466249263, 1466249270,
1466249277, 1466249284), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
PosX = c(14.69, 14.69, 14.69, 16.31, 14.94, 14.94, 14.85,
14.73, 16.64, 16.65, 16.7, 16.36, 16.38, 16.37, 16.41, 16.39,
16.12, 15.8, 16.33, 16.17, 16.32, 15.61, 15.5, 15.82, 16.68,
16.68, 16.68, 16.61, 16.68, 16.68, 11.39, 11.39, 16.68, 16.6,
16.6, 15.08, 15.08, 14.9, 14.81, 15.08, 15.15), PosY = c(36.98,
36.98, 36.98, 37.68, 36.9, 36.9, 36.96, 37.09, 37.19, 37.19,
37.01, 37.49, 37.47, 37.47, 37.45, 37.46, 37.75, 38.08, 38.06,
38.49, 38.06, 38.54, 38.39, 38.06, 37.16, 37.16, 37.16, 37.36,
37.16, 37.16, 40.09, 40.09, 37.16, 37.37, 37.37, 36.79, 36.79,
36.98, 36.94, 36.79, 36.73), PosZ = c(14.68, 14.68, 14.68,
15.67, 15.03, 15.03, 14.66, 14.88, 15.12, 15.22, 14.84, 15.28,
15.58, 15.48, 15.88, 15.68, 16.05, 15.91, 15.37, 15.64, 15.27,
16.1, 16.3, 16.33, 15.61, 15.61, 15.61, 15.8, 15.61, 15.61,
18.06, 18.06, 15.61, 15.7, 15.7, 15.13, 15.13, 15.3, 15.38,
15.13, 15.19)), .Names = c("Period", "SubCode", "EchoTime",
"PosX", "PosY", "PosZ"), row.names = c(213387L, 213389L, 213391L,
213393L, 213395L, 213397L, 213399L, 213401L, 213403L, 213405L,
213407L, 213409L, 213411L, 213413L, 213415L, 213417L, 213419L,
213421L, 213423L, 213425L, 213427L, 213429L, 213431L, 213433L,
213435L, 213437L, 213439L, 213441L, 213443L, 213445L, 213447L,
213449L, 213450L, 213452L, 213454L, 213456L, 213458L, 213460L,
213462L, 213464L, 213466L), class = "data.frame")
probe.DOT1
:
structure(list(DO.time.1m = structure(c(1466245490, 1466249090,
1466250890, 1466252690, 1466254490, 1466256290, 1466258090, 1466259890,
1466261690, 1466263490, 1466265290, 1466267090, 1466268890, 1466270690,
1466272490, 1466274290, 1466276090, 1466277890, 1466279690, 1466281490
), class = c("POSIXct", "POSIXt"), tzone = "UTC"), DO.1m = c(7.69,
7.54, 7.57, 9.51, 9.3, 8.81, 8.71, 8.66, 8.19, 8.52, 8.79, 9.01,
9.19, 9.39, 9.79, 9.82, 9.48, 9.5, 9.29, 9.38), Temp.1m = c(18.04,
19.12, 18.98, 19.88, 11.62, 11.54, 12.24, 12.22, 12.32, 12.2,
12.18, 12.16, 12.14, 11.8, 11.62, 11.68, 11.52, 11.32, 11.34,
11.14)), .Names = c("DO.time.1m", "DO.1m", "Temp.1m"), row.names = c(NA,
20L), class = "data.frame")
I would use a double loop for this
dayfile$O2 <- NA
for (i in 1:nrow(dayfile)) {
for (j in 2:nrow(probe.DOT1)) {
# between previous do time and before current do time
if (dayfile$EchoTime[i] > probe.DOT1$DO.time.1m[j - 1] & dayfile$EchoTime[i] < probe.DOT1$DO.time.1m[j]) {
dayfile$O2[i] <- probe.DOT1$DO.1m[j]
}
}
}
Ah man... fuzzyjoin
almost can solve this (which would be infinitely "eleganter"), but o well.
Here's my pipeline to do this w/out loops... but the dput you gave us is a bit crappy in that there was only 1 value properly between ranges... but let's see how it does on your real data.
## First create a df that contains all the timestamp info:
all <- data.frame(ts = unique(unlist(c(dayfile$EchoTime,probe.DOT1$DO.time.1m))))
## Now join both tables to this master record, fill in the blanks in
## Period,SubCode,PosX,Y,Z, remove Temp.1m, remove useless rows, rename O2
library(dplyr)
library(tidyr)
all %>%
left_join(dayfile, by=c("ts"="EchoTime")) %>%
left_join(probe.DOT1,by=c("ts" = "DO.time.1m")) %>%
arrange(ts) %>%
fill(Period,SubCode,PosX,PosY,PosZ) %>%
select(-Temp.1m) %>%
filter(!is.na(DO.1m)) %>%
rename(O2=DO.1m)
# ts Period SubCode PosX PosY PosZ O2
# 1 2016-06-18 10:24:50 NA NA NA NA NA 7.69
# 2 2016-06-18 11:24:50 7017 20 16.38 37.47 15.58 7.54
# 3 2016-06-18 11:54:50 7017 20 15.15 36.73 15.19 7.57
# 4 2016-06-18 12:24:50 7017 20 15.15 36.73 15.19 9.51
# 5 2016-06-18 12:54:50 7017 20 15.15 36.73 15.19 9.30
# 6 2016-06-18 13:24:50 7017 20 15.15 36.73 15.19 8.81
# 7 2016-06-18 13:54:50 7017 20 15.15 36.73 15.19 8.71
# 8 2016-06-18 14:24:50 7017 20 15.15 36.73 15.19 8.66
# 9 2016-06-18 14:54:50 7017 20 15.15 36.73 15.19 8.19
# 10 2016-06-18 15:24:50 7017 20 15.15 36.73 15.19 8.52
# 11 2016-06-18 15:54:50 7017 20 15.15 36.73 15.19 8.79
# 12 2016-06-18 16:24:50 7017 20 15.15 36.73 15.19 9.01
# 13 2016-06-18 16:54:50 7017 20 15.15 36.73 15.19 9.19
# 14 2016-06-18 17:24:50 7017 20 15.15 36.73 15.19 9.39
# 15 2016-06-18 17:54:50 7017 20 15.15 36.73 15.19 9.79
# 16 2016-06-18 18:24:50 7017 20 15.15 36.73 15.19 9.82
# 17 2016-06-18 18:54:50 7017 20 15.15 36.73 15.19 9.48
# 18 2016-06-18 19:24:50 7017 20 15.15 36.73 15.19 9.50
# 19 2016-06-18 19:54:50 7017 20 15.15 36.73 15.19 9.29
# 20 2016-06-18 20:24:50 7017 20 15.15 36.73 15.19 9.38
To see the whole dataset with all the data together, use:
all %>%
left_join(dayfile, by=c("ts"="EchoTime")) %>%
left_join(probe.DOT1,by=c("ts" = "DO.time.1m"))
In R, you can (and should) run every row one at a time...just to see what each row does. Do this by selecting "blocks of text"... from all
to whatever. See how running what I have above shows you all the steps so far? You can do that with every other line in the pipeline too... so just select the block everything including the fill, then run:
all %>%
left_join(dayfile, by=c("ts"="EchoTime")) %>%
left_join(probe.DOT1,by=c("ts" = "DO.time.1m")) %>%
arrange(ts) %>%
fill(Period,SubCode,PosX,PosY,PosZ)
Here is the solution to my problem based on the code from @Amit
all <- data.frame(ts = unique(unlist(c(dayfile$EchoTime, probe.DOT1$DO.time.1m))))
library(dplyr)
library(tidyr)
all <- all %>%
left_join(dayfile, by=c("ts"="EchoTime")) %>%
left_join(probe.DOT1,by=c("ts" = "DO.time.1m")) %>%
arrange(ts) %>%
fill(DO.1m) %>%
select(-Temp.1m) %>%
filter(!is.na(PosX)) %>%
rename(O2=DO.1m) %>%
arrange(Period, ts)
dayfile$O2 <- all$O2
I changed @Amit's code slightly by filling Do.1m
instead of the dayfile
columns and then filtering out NAs from PosX
instead of DO.1m
. It works a treat!
Here is the output:
Period SubCode EchoTime PosX PosY PosZ O2
213387 7017 20 2016-06-18 11:23:23 14.69 36.98 14.68 7.69
213389 7017 20 2016-06-18 11:23:30 14.69 36.98 14.68 7.69
213391 7017 20 2016-06-18 11:23:37 14.69 36.98 14.68 7.69
213393 7017 20 2016-06-18 11:23:45 16.31 37.68 15.67 7.69
213395 7017 20 2016-06-18 11:23:52 14.94 36.90 15.03 7.69
213397 7017 20 2016-06-18 11:23:59 14.94 36.90 15.03 7.69
213399 7017 20 2016-06-18 11:24:06 14.85 36.96 14.66 7.69
213401 7017 20 2016-06-18 11:24:13 14.73 37.09 14.88 7.69
213403 7017 20 2016-06-18 11:24:20 16.64 37.19 15.12 7.69
213405 7017 20 2016-06-18 11:24:27 16.65 37.19 15.22 7.69
213407 7017 20 2016-06-18 11:24:34 16.70 37.01 14.84 7.69
213409 7017 20 2016-06-18 11:24:41 16.36 37.49 15.28 7.69
213411 7017 20 2016-06-18 11:24:48 16.38 37.47 15.58 7.69
213413 7017 20 2016-06-18 11:24:55 16.37 37.47 15.48 7.54
213415 7017 20 2016-06-18 11:25:02 16.41 37.45 15.88 7.54
213417 7017 20 2016-06-18 11:25:09 16.39 37.46 15.68 7.54
213419 7017 20 2016-06-18 11:25:16 16.12 37.75 16.05 7.54
213421 7017 20 2016-06-18 11:25:23 15.80 38.08 15.91 7.54
213423 7017 20 2016-06-18 11:25:30 16.33 38.06 15.37 7.54
213425 7017 20 2016-06-18 11:25:37 16.17 38.49 15.64 7.54
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.