如何根据 R 中的条件分配值？

Question

我想通过将 data2 的高度列与其在相应 ID1 和 ID2 中最接近的高度值相匹配，在“data1”的响应列中分配 1。

这里我选择了一个大数据集的样本作为演示。

数据1：

structure(list(ID1 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 
2, 2, 2, 2, 2, 2, 2), ID2 = c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 
2L, 2L, 7L, 7L, 7L, 7L, 7L, 9L, 9L, 9L, 9L, 9L), altitude = c(0L, 
500L, 1000L, 1500L, 2000L, 0L, 500L, 1000L, 1500L, 2000L, 0L, 
500L, 1000L, 1500L, 2000L, 0L, 500L, 1000L, 1500L, 2000L), response = c(0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L)), row.names = c(NA, -20L), class = "data.frame")

数据2

structure(list(ID1 = c(1, 1, 2, 2), ID2 = c(1L, 2L, 7L, 9L), 
    altitude = c(500L, 1920L, 128L, 261L), response = c(1L, 1L, 
    1L, 1L)), row.names = c(NA, -4L), class = "data.frame")

预期结果：

Answer 1

使用基础 R 的直接方法是：

to_replace = sapply(1:nrow(data2),function(i){
#get row index
row_idx = with(data1,which(ID1 == data2$ID1[i] & ID2== data2$ID2[i]))
#find the difference
delta = abs(data2$altitude[i] - data1$altitude[row_idx])
row_idx[which.min(delta)]
})

data1$response[to_replace] = 1

   ID1 ID2 altitude response
1    1   1        0        0
2    1   1      500        1
3    1   1     1000        0
4    1   1     1500        0
5    1   1     2000        0
6    1   2        0        0
7    1   2      500        0
8    1   2     1000        0
9    1   2     1500        0
10   1   2     2000        1
11   2   7        0        1
12   2   7      500        0
13   2   7     1000        0
14   2   7     1500        0
15   2   7     2000        0
16   2   9        0        0
17   2   9      500        1
18   2   9     1000        0
19   2   9     1500        0
20   2   9     2000        0

如果你想使用dplyr，试试这样，你rbind两个帧，然后计算组内第二个data.frame的绝对差，用1s替换最接近的，最后扔掉原来的dataframe：

library(dplyr)

rbind(cbind(data1,data=1),cbind(data2,data=2)) %>% 
group_by(ID1,ID2) %>% 
mutate(delta = abs(altitude-altitude[response==1])) %>%
mutate(response=replace(response,delta==sort(delta)[2],1)) %>% 
filter(data==1) %>%
select(-delta)

    # A tibble: 20 x 4
# Groups:   ID1, ID2 [4]
     ID1   ID2 altitude response
   <dbl> <int>    <int>    <dbl>
 1     1     1        0        0
 2     1     1      500        1
 3     1     1     1000        0
 4     1     1     1500        0
 5     1     1     2000        0
 6     1     2        0        0
 7     1     2      500        0
 8     1     2     1000        0
 9     1     2     1500        0
10     1     2     2000        1
11     2     7        0        1
12     2     7      500        0
13     2     7     1000        0
14     2     7     1500        0
15     2     7     2000        0
16     2     9        0        0
17     2     9      500        1
18     2     9     1000        0
19     2     9     1500        0
20     2     9     2000        0

如何根据 R 中的条件分配值？

问题描述

1 个解决方案

解决方案1
1 已采纳 2020-05-27 22:28:59

如何根据 R 中的条件分配值？

问题描述

1 个解决方案

解决方案1 1 已采纳 2020-05-27 22:28:59

解决方案1
1 已采纳 2020-05-27 22:28:59