My dataset looks somewhat like this:
ID job01 age_started_job01 job02 age_started_job02 job03 age_started_job03
1 "waiter" 18 "lawyer" 25 NA NA
2 "plumber" 18 "builder" 20 "foreman" 25
I'm trying to get to a format where I have this (note that I assume that people stayed in the same job as the previous year if they didn't start a new one that year):
ID job_age18 job19 job20 job21 job22 job23 job24 job25
1 "waiter" "waiter" "waiter" "waiter" "waiter" "waiter" "waiter" "lawyer"
2 "plumber" "plumber" "builder" "builder" "builder" "builder" "builder" "foreman"
And then convert it to long like this:
ID job age
1 "waiter" 18
1 "waiter" 19
1 "waiter" 20
1 "waiter" 21
1 "waiter" 22
1 "waiter" 23
1 "waiter" 24
1 "lawyer" 25
2 "plumber" 18
2 "plumber" 19
2 "builder" 20
2 "builder" 21
2 "builder" 22
2 "builder" 23
2 "builder" 24
2 "foreman" 25
The step from wide to long can be done with reshape2
and I know how to do it, but I cannot get from the first dataset to the intermediate format. I have tried something like this (ugly and with loops):
#create job variables
for (age in 18:25 ){
data[,paste("job_age",age, sep="")] <- NA
}
#populate each variable looping through job spells and ages
for (spell in c("01","02","03")){
for (age in 18:25){
#Place of employment
data[,paste("job_age",age, sep="")] <- ifelse(
data[,paste("age_started_job,spell,sep="")]==age &
!is.na(data[,paste("age_started_job",spell,sep="")]),
data[,paste("job",spell,sep="")],
data[,paste("job_age",age, sep="")])
}
}
Besides looking really ugly with loops and the like, it gives me an error saying that I provided 2 variables to replace 1 variables
.
Is there a good, elegant way to do it, perhaps with reshape2
and plyr
?
You may try
df1 <- reshape(df, idvar='ID', timevar='time',
varying=list(c(2,4,6), c(3,5,7)), direction='long')
colnames(df1)[4] <- 'age'
d1 <- data.frame(age=18:25)
res <- do.call(rbind,lapply(split(df1, df1$ID), function(x) {
x1 <- merge(x, d1, by='age', all=TRUE)
x1$job <-unique(na.omit(x1$job01))[cumsum(!is.na(x1$job01))]
x1$ID <- x1$ID[1]
na.omit(x1[,c(2,5,1)])}))
row.names(res) <- NULL
res
# ID job age
#1 1 waiter 18
#2 1 waiter 19
#3 1 waiter 20
#4 1 waiter 21
#5 1 waiter 22
#6 1 waiter 23
#7 1 waiter 24
#8 1 lawyer 25
#9 2 plumber 18
#10 2 plumber 19
#11 2 builder 20
#12 2 builder 21
#13 2 builder 22
#14 2 builder 23
#15 2 builder 24
#16 2 foreman 25
You could also try
colnames(df1)[3:4] <- c('job', 'age')
d1 <- expand.grid(ID=unique(df1$ID), age=18:25)
library(dplyr)
left_join(d1, df1[,-2], by=c('age', 'ID')) %>%
group_by(ID) %>%
arrange(ID, age) %>%
mutate(indx=cumsum(!is.na(job))) %>%
group_by(ID, indx) %>%
mutate(job= replace(job, is.na(job)&indx!=0,
na.omit(job))) %>%
select(-indx)
df <- structure(list(ID = 1:2, job01 = c("waiter", "plumber"), age_started_job01 =
c(18L, 18L), job02 = c("lawyer", "builder"), age_started_job02 = c(25L,
20L), job03 = c(NA, "foreman"), age_started_job03 = c(NA, 25L)), .Names
= c("ID", "job01", "age_started_job01", "job02", "age_started_job02",
"job03", "age_started_job03"), class = "data.frame", row.names = c(NA, -2L))
Here's another approach that uses merged.stack
+ expandRows
from my "splitstackshape" function. This answer uses @akrun's sample data:
DT <- na.omit(merged.stack(df, var.stubs = c("job", "age_started_job"),
sep = "var.stubs"))
DT[, age_started_job := as.numeric(age_started_job)]
DT[, Range := diff(c(age_started_job, (age_started_job[.N]+1))), by = list(ID)]
expandRows(DT, "Range")[, age_started_job := age_started_job +
(sequence(.N)-1), by = list(ID, .time_1)][]
# ID .time_1 job age_started_job
# 1: 1 01 waiter 18
# 2: 1 01 waiter 19
# 3: 1 01 waiter 20
# 4: 1 01 waiter 21
# 5: 1 01 waiter 22
# 6: 1 01 waiter 23
# 7: 1 01 waiter 24
# 8: 1 02 lawyer 25
# 9: 2 01 plumber 18
# 10: 2 01 plumber 19
# 11: 2 02 builder 20
# 12: 2 02 builder 21
# 13: 2 02 builder 22
# 14: 2 02 builder 23
# 15: 2 02 builder 24
# 16: 2 03 foreman 25
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.