简体   繁体   English

将列比较成比较矩阵 - R 解决方案

[英]Comparing columns into comparison matrix - R solution

Apologies if this is a really dumb question, but I'm trying to make a "matrix comparison" table from some data I have in an unfriendly format.如果这是一个非常愚蠢的问题,我深表歉意,但我正试图从我拥有的一些不友好格式的数据中制作一个“矩阵比较”表。 I'm part the way there, but need some help at.我是那里的一部分,但需要一些帮助。 the last stage pls!最后阶段请!

Input data:输入数据:

library(tidyverse)

#input data
data <- data.frame(ID = c("A", "A", "A","A", "B", "B", "B", "C", "C", "D"),
                colour = c("B", "C", "D", "E", "C", "D", "E", "D", "E", "E"),
                age =c(7, 7, 5, 6, 3, 7, 9, 8, 3, 2))

Now how I'd like it to look:现在我希望它看起来如何:

output_desired <- data.frame(sample=c("A", "B", "C", "D", "E"),
                     A = c(NA, 7, 7, 5, 6),
                     B = c(7, NA, 3, 7, 9),
                     C = c(7, 3, NA, 8, 3),
                     D = c(5, 7, 8, NA, 2 ),
                     E = c(6, 9, 3, 2, NA))

Just to be clear, the two columns in data compare to each other.为了清楚起见, data中的两列相互比较。 A cannot be compared with A , B cannot be compared with B etc. A不能与A比较, B不能与B比较等。

This is my attempt:这是我的尝试:

output <- data %>% pivot_wider(sample_a, names_from=sample_b, values_from=comparison)

However, you'll see the output is not mirrored and I'm missing the column of A...但是,您会看到 output 未镜像,并且我缺少 A 列...

The above uses an example.上面使用了一个例子。 Sample of data attached:附上数据样本:

structure(list(sample_a = c("AD001", "AD001", "AD001", "AD001", 
"AD001", "AD001", "AD001", "AD001", "AD001", "AD001", "AD001", 
"AD001", "AD001", "AD001", "AD001", "AD001", "AD001", "AD001", 
"AD001", "AD001", "AD001", "AD001", "AD001", "AD001", "AD001", 
"AD001", "AD001", "AD002", "AD002", "AD002", "AD002", "AD002", 
"AD002", "AD002", "AD002", "AD002", "AD002", "AD002", "AD002", 
"AD002", "AD002", "AD002", "AD002", "AD002", "AD002", "AD002", 
"AD002", "AD002", "AD002", "AD002", "AD002", "AD002", "AD002", 
"AD003", "AD003", "AD003", "AD003", "AD003", "AD003", "AD003", 
"AD003", "AD003", "AD003", "AD003", "AD003", "AD003", "AD003", 
"AD003", "AD003", "AD003", "AD003", "AD003", "AD003", "AD003", 
"AD003", "AD003", "AD003", "AD003", "AD004", "AD004", "AD004", 
"AD004", "AD004", "AD004", "AD004", "AD004", "AD004", "AD004", 
"AD004", "AD004", "AD004", "AD004", "AD004", "AD004", "AD004", 
"AD004", "AD004", "AD004", "AD004", "AD004", "AD004", "AD004", 
"AD005", "AD005", "AD005", "AD005", "AD005", "AD005", "AD005", 
"AD005", "AD005", "AD005", "AD005", "AD005", "AD005", "AD005", 
"AD005", "AD005", "AD005", "AD005", "AD005", "AD005", "AD005", 
"AD005", "AD005", "AD006", "AD006", "AD006", "AD006", "AD006", 
"AD006", "AD006", "AD006", "AD006", "AD006", "AD006", "AD006", 
"AD006", "AD006", "AD006", "AD006", "AD006", "AD006", "AD006", 
"AD006", "AD006", "AD006", "AD007", "AD007", "AD007", "AD007", 
"AD007", "AD007", "AD007", "AD007", "AD007", "AD007", "AD007", 
"AD007", "AD007", "AD007", "AD007", "AD007", "AD007", "AD007", 
"AD007", "AD007", "AD007", "AD008", "AD008", "AD008", "AD008", 
"AD008", "AD008", "AD008", "AD008", "AD008", "AD008", "AD008", 
"AD008", "AD008", "AD008", "AD008", "AD008", "AD008", "AD008", 
"AD008", "AD008", "AD009", "AD009", "AD009", "AD009", "AD009", 
"AD009", "AD009", "AD009", "AD009", "AD009", "AD009", "AD009", 
"AD009", "AD009", "AD009", "AD009", "AD009", "AD009", "AD009", 
"AD010", "AD010", "AD010", "AD010", "AD010", "AD010", "AD010", 
"AD010", "AD010", "AD010", "AD010", "AD010", "AD010", "AD010", 
"AD010", "AD010", "AD010", "AD010", "AD011", "AD011", "AD011", 
"AD011", "AD011", "AD011", "AD011", "AD011", "AD011", "AD011", 
"AD011", "AD011", "AD011", "AD011", "AD011", "AD011", "AD011", 
"AD012", "AD012", "AD012", "AD012", "AD012", "AD012", "AD012", 
"AD012", "AD012", "AD012", "AD012", "AD012", "AD012", "AD012", 
"AD012", "AD012", "AD013", "AD013", "AD013", "AD013", "AD013", 
"AD013", "AD013", "AD013", "AD013", "AD013", "AD013", "AD013", 
"AD013", "AD013", "AD013", "AD014", "AD014", "AD014", "AD014", 
"AD014", "AD014", "AD014", "AD014", "AD014", "AD014", "AD014", 
"AD014", "AD014", "AD014", "AD015", "AD015", "AD015", "AD015", 
"AD015", "AD015", "AD015", "AD015", "AD015", "AD015", "AD015", 
"AD015", "AD015", "AD016", "AD016", "AD016", "AD016", "AD016", 
"AD016", "AD016", "AD016", "AD016", "AD016", "AD016", "AD016", 
"AD017", "AD017", "AD017", "AD017", "AD017", "AD017", "AD017", 
"AD017", "AD017", "AD017", "AD017", "AD018", "AD018", "AD018", 
"AD018", "AD018", "AD018", "AD018", "AD018", "AD018", "AD018", 
"AD019", "AD019", "AD019", "AD019", "AD019", "AD019", "AD019", 
"AD019", "AD019", "AD020", "AD020", "AD020", "AD020", "AD020", 
"AD020", "AD020", "AD020", "AD021", "AD021", "AD021", "AD021", 
"AD021", "AD021", "AD021", "AD022", "AD022", "AD022", "AD022", 
"AD022", "AD022", "AD023", "AD023", "AD023", "AD023", "AD023", 
"AD024", "AD024", "AD024", "AD024", "AD025", "AD025", "AD025", 
"AD026", "AD026", "AD027"), sample_b = c("AD002", "AD003", "AD004", 
"AD005", "AD006", "AD007", "AD008", "AD009", "AD010", "AD011", 
"AD012", "AD013", "AD014", "AD015", "AD016", "AD017", "AD018", 
"AD019", "AD020", "AD021", "AD022", "AD023", "AD024", "AD025", 
"AD026", "AD027", "AD028", "AD003", "AD004", "AD005", "AD006", 
"AD007", "AD008", "AD009", "AD010", "AD011", "AD012", "AD013", 
"AD014", "AD015", "AD016", "AD017", "AD018", "AD019", "AD020", 
"AD021", "AD022", "AD023", "AD024", "AD025", "AD026", "AD027", 
"AD028", "AD004", "AD005", "AD006", "AD007", "AD008", "AD009", 
"AD010", "AD011", "AD012", "AD013", "AD014", "AD015", "AD016", 
"AD017", "AD018", "AD019", "AD020", "AD021", "AD022", "AD023", 
"AD024", "AD025", "AD026", "AD027", "AD028", "AD005", "AD006", 
"AD007", "AD008", "AD009", "AD010", "AD011", "AD012", "AD013", 
"AD014", "AD015", "AD016", "AD017", "AD018", "AD019", "AD020", 
"AD021", "AD022", "AD023", "AD024", "AD025", "AD026", "AD027", 
"AD028", "AD006", "AD007", "AD008", "AD009", "AD010", "AD011", 
"AD012", "AD013", "AD014", "AD015", "AD016", "AD017", "AD018", 
"AD019", "AD020", "AD021", "AD022", "AD023", "AD024", "AD025", 
"AD026", "AD027", "AD028", "AD007", "AD008", "AD009", "AD010", 
"AD011", "AD012", "AD013", "AD014", "AD015", "AD016", "AD017", 
"AD018", "AD019", "AD020", "AD021", "AD022", "AD023", "AD024", 
"AD025", "AD026", "AD027", "AD028", "AD008", "AD009", "AD010", 
"AD011", "AD012", "AD013", "AD014", "AD015", "AD016", "AD017", 
"AD018", "AD019", "AD020", "AD021", "AD022", "AD023", "AD024", 
"AD025", "AD026", "AD027", "AD028", "AD009", "AD010", "AD011", 
"AD012", "AD013", "AD014", "AD015", "AD016", "AD017", "AD018", 
"AD019", "AD020", "AD021", "AD022", "AD023", "AD024", "AD025", 
"AD026", "AD027", "AD028", "AD010", "AD011", "AD012", "AD013", 
"AD014", "AD015", "AD016", "AD017", "AD018", "AD019", "AD020", 
"AD021", "AD022", "AD023", "AD024", "AD025", "AD026", "AD027", 
"AD028", "AD011", "AD012", "AD013", "AD014", "AD015", "AD016", 
"AD017", "AD018", "AD019", "AD020", "AD021", "AD022", "AD023", 
"AD024", "AD025", "AD026", "AD027", "AD028", "AD012", "AD013", 
"AD014", "AD015", "AD016", "AD017", "AD018", "AD019", "AD020", 
"AD021", "AD022", "AD023", "AD024", "AD025", "AD026", "AD027", 
"AD028", "AD013", "AD014", "AD015", "AD016", "AD017", "AD018", 
"AD019", "AD020", "AD021", "AD022", "AD023", "AD024", "AD025", 
"AD026", "AD027", "AD028", "AD014", "AD015", "AD016", "AD017", 
"AD018", "AD019", "AD020", "AD021", "AD022", "AD023", "AD024", 
"AD025", "AD026", "AD027", "AD028", "AD015", "AD016", "AD017", 
"AD018", "AD019", "AD020", "AD021", "AD022", "AD023", "AD024", 
"AD025", "AD026", "AD027", "AD028", "AD016", "AD017", "AD018", 
"AD019", "AD020", "AD021", "AD022", "AD023", "AD024", "AD025", 
"AD026", "AD027", "AD028", "AD017", "AD018", "AD019", "AD020", 
"AD021", "AD022", "AD023", "AD024", "AD025", "AD026", "AD027", 
"AD028", "AD018", "AD019", "AD020", "AD021", "AD022", "AD023", 
"AD024", "AD025", "AD026", "AD027", "AD028", "AD019", "AD020", 
"AD021", "AD022", "AD023", "AD024", "AD025", "AD026", "AD027", 
"AD028", "AD020", "AD021", "AD022", "AD023", "AD024", "AD025", 
"AD026", "AD027", "AD028", "AD021", "AD022", "AD023", "AD024", 
"AD025", "AD026", "AD027", "AD028", "AD022", "AD023", "AD024", 
"AD025", "AD026", "AD027", "AD028", "AD023", "AD024", "AD025", 
"AD026", "AD027", "AD028", "AD024", "AD025", "AD026", "AD027", 
"AD028", "AD025", "AD026", "AD027", "AD028", "AD026", "AD027", 
"AD028", "AD027", "AD028", "AD028"), rel = c(-0.03991, -0.0249, 
-0.01788, -0.02618, -0.003831, -0.0003193, 0, 0.00447, -0.03768, 
-0.02554, -0.0249, 0, -0.03512, -0.05268, -0.01948, -0.3177, 
-0.01692, 0.004151, -0.03857, -0.02618, -0.008621, -0.00447, 
-0.02778, -0.009898, -0.2722, 0.01054, 0.002235, -0.03303, 0.01615, 
0.01119, 0.00713, -0.005846, -0.012, -0.01108, -0.001863, -0.02334, 
0.00315, -0.01785, -0.02194, -0.002775, -0.01669, -0.2779, -0.01021, 
-0.02185, -0.04339, -0.05598, -0.02432, -0.002786, -0.01186, 
-0.02276, -0.2833, -0.02246, 0, 0.009972, 0.01122, 0.009037, 
0.01932, 0.01215, -0.001246, 0.02586, 0.6049, 0.007874, 0.0162, 
-0.01974, -0.005921, 0.002493, -0.3015, -0.008293, 0.002805, 
0.001286, -0.01518, 0.005609, 0.01652, -0.002809, -0.01714, -0.2803, 
0.03584, 0.004051, 0.01927, 0.02889, 0.03604, 0.0205, 0.01118, 
0.02641, 0.0165, -0.005984, 0.03573, -0.00282, 0.00497, 0.001553, 
-0.2583, 0.005742, 0.0174, 0.002893, -0.01233, 0.009009, 0.02578, 
0.009363, -0.005299, -0.252, 0.01771, 0.02081, -0.01243, 0.0202, 
0.01088, 0.0003108, -0.01336, -0.007781, -0.01449, 0.02735, -0.02068, 
-0.003729, 0.01243, -0.3045, -0.03254, -0.01274, -0.03247, -0.01297, 
-0.009323, -0.001554, -0.006554, -0.01465, -0.3005, 0.002797, 
-0.0006215, 0.01457, 0.03007, 0.02821, 0.02732, 0.01587, 0.009764, 
0.02728, 0.01661, 0.01209, 0.01457, -0.2398, -0.01372, 0.02108, 
-0.01093, -0.01107, 0.02449, 0.02294, 0.008427, -0.007481, -0.2665, 
0.04495, 0.03348, 0.03673, 0, 0.04471, 0.01961, 0.01134, 0.03367, 
0.02946, 0.008017, 0.01793, -0.2552, 0.0236, 0.003061, -0.004179, 
0.01139, 0.0157, 0.02477, -0.007803, 0.001247, -0.239, 0.03796, 
0.02173, 0.02416, 0.04905, 0.01836, -0.004409, 0.04178, 0.02131, 
0.0222, 0.009583, -0.01157, 0.02424, 0.01592, -0.009965, 0.0003163, 
-0.008929, 0.0192, 0.0231, 0.01091, 0.01565, 0.02958, 0.03478, 
0.02049, 0.02521, -0.008819, 0.02875, 0.01254, 0.02097, 0.01144, 
-0.2556, -0.007337, 0.03061, -0.006107, 0.03226, -0.002771, 0.01579, 
-0.009988, 0.01496, -0.2711, 0.0315, 0.01621, 0.02428, 0.01953, 
0.03663, 0.01504, 0.008382, -0.001552, -0.2791, 0.007337, 0.007451, 
-0.008679, -0.02309, 0.0326, 0.02391, -0.006554, 0.01029, -0.2375, 
0.02763, 0.04222, 0.004094, 0.02583, -0.004701, -0.01961, 0.01587, 
-0.2737, 0.0118, 0.01369, -0.007715, -0.006958, 0.007781, 0.01369, 
0.0103, -0.006234, -0.2635, 0.03579, 0.02148, 0.01764, -0.008189, 
-0.01134, -0.01858, -0.3096, -0.001595, -0.004724, -0.02732, 
-0.02846, -0.02016, -0.004724, -0.001575, -0.02047, -0.3032, 
0.01039, 0.008504, 0.03855, 0.02189, 0.02875, -0.2467, 0.01148, 
0.02296, 0.01382, 0.006009, 0.02586, 0.03158, 0.03246, 0.007481, 
-0.2593, 0.04971, 0.0421, -0.03071, -0.01128, -0.2722, 0.005423, 
0.02382, -0.018, -0.04175, -0.004074, 0.006894, -0.01128, -0.01316, 
-0.2665, 0.01943, 0.002194, -0.01051, -0.2868, -0.000638, -0.01418, 
-0.02411, -0.01992, 0.004009, 0.01424, 0.006866, -0.03148, -0.2913, 
0.0296, 0.01048, -0.2934, -0.01276, 0.01793, -0.01254, -0.004744, 
0.003091, 0, -0.008739, -0.008416, -0.2787, 0.02566, 0.01855, 
-0.2587, -0.2537, -0.3115, -0.2772, -0.3053, -0.2779, -0.3165, 
-0.2945, 0.01157, -0.2695, -0.2325, 0.01021, -0.03118, -0.02392, 
0.000638, 0.01085, 0.01818, -0.02073, -0.2635, 0.01786, 0.01212, 
-0.02347, 0.006325, 0.002463, 0.02539, -0.002185, -0.0134, -0.2417, 
0.02694, 0.008877, -0.02829, -0.02861, -0.03439, -0.03568, -0.02668, 
-0.3028, 0.004822, 0.006429, 0.01265, 0.01107, -0.009171, -0.01044, 
-0.2738, 0.009171, 0.01581, 0.006811, -0.01155, -0.004364, -0.2868, 
0.03633, 0.0314, 0.009051, 0.003117, -0.2787, 0.02415, 0.04211, 
-0.0362, -0.3043, 0.0009363, 0.02341, -0.2787, 0.02525, 0.01808, 
-0.2482, -0.2497, 0.05003)), row.names = c(NA, -378L), class = "data.frame")

In base R , we could convert the columns to factor with levels specified as unique elements from unlist ing the 'colour' and 'ID' column.base R中,我们可以将列转换为factor ,其levels指定为unlist 'color' 和 'ID' 列中的unique元素。 Then, we reshape to 'wide' format with xtabs and add the transpose of the output ('m1')然后,我们使用xtabs重塑为“宽”格式并添加 output ('m1') 的转置

un1 <- sort(unique(unlist(data[1:2])))
m1 <- xtabs(age ~ ID + colour, transform(data, 
     ID = factor(ID, levels = un1), colour = factor(colour, levels = un1)))
out <- m1 + t(m1)

In tidyverse , we can use complete to expand the rows based on the unique values ('un1')tidyverse中,我们可以使用complete来根据唯一值 ('un1') 扩展行

library(dplyr)
library(tidyr)
library(purrr)
out2 <- data %>% 
     complete(ID = un1, colour = un1) %>%
     pivot_wider(names_from = colour, values_from = age)

 map2_dfc(data.table::transpose(out2, make.names = 'ID'), 
        out2[-1], coalesce) %>% 
      bind_cols(out2 %>% select(ID), .)

# A tibble: 5 x 6
#  ID        A     B     C     D     E
#  <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 A        NA     7     7     5     6
#2 B         7    NA     3     7     9
#3 C         7     3    NA     8     3
#4 D         5     7     8    NA     2
#5 E         6     9     3     2    NA

Update更新

Testing with OP's new dataset使用 OP 的新数据集进行测试

un2 <- sort(unique(unlist(df2[1:2])))
out2_new <- df2 %>% 
      complete(sample_a = un2, sample_b = un2) %>% 
      pivot_wider(names_from = sample_b, values_from = rel)

 map2_dfc(data.table::transpose(out2_new, make.names = 'sample_a'), 
        out2_new[-1], coalesce) %>% 
      bind_cols(out2_new %>%
            select(sample_a), .)
# A tibble: 28 x 29
#   sample_a    AD001    AD002    AD003    AD004    AD005    AD006    AD007   AD008    AD009    AD010    AD011    AD012   AD013    AD014
#   <chr>       <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>   <dbl>    <dbl>    <dbl>    <dbl>    <dbl>   <dbl>    <dbl>
# 1 AD001    NA       -0.0399  -0.0249  -0.0179  -2.62e-2 -0.00383 -3.19e-4  0       4.47e-3 -0.0377  -0.0255  -0.0249   0      -0.0351 
# 2 AD002    -3.99e-2 NA       -0.0330   0.0162   1.12e-2  0.00713 -5.85e-3 -0.012  -1.11e-2 -0.00186 -0.0233   0.00315 -0.0178 -0.0219 
# 3 AD003    -2.49e-2 -0.0330  NA        0.00997  1.12e-2  0.00904  1.93e-2  0.0122 -1.25e-3  0.0259   0.605    0.00787  0.0162 -0.0197 
# 4 AD004    -1.79e-2  0.0162   0.00997 NA        1.93e-2  0.0289   3.60e-2  0.0205  1.12e-2  0.0264   0.0165  -0.00598  0.0357 -0.00282
# 5 AD005    -2.62e-2  0.0112   0.0112   0.0193  NA       -0.0124   2.02e-2  0.0109  3.11e-4 -0.0134  -0.00778 -0.0145   0.0274 -0.0207 
# 6 AD006    -3.83e-3  0.00713  0.00904  0.0289  -1.24e-2 NA        1.46e-2  0.0301  2.82e-2  0.0273   0.0159   0.00976  0.0273  0.0166 
# 7 AD007    -3.19e-4 -0.00585  0.0193   0.0360   2.02e-2  0.0146  NA        0.0367  0.       0.0447   0.0196   0.0113   0.0337  0.0295 
# 8 AD008     0.      -0.012    0.0122   0.0205   1.09e-2  0.0301   3.67e-2 NA       2.42e-2  0.0491   0.0184  -0.00441  0.0418  0.0213 
# 9 AD009     4.47e-3 -0.0111  -0.00125  0.0112   3.11e-4  0.0282   0.       0.0242 NA        0.0205   0.0252  -0.00882  0.0288  0.0125 
#10 AD010    -3.77e-2 -0.00186  0.0259   0.0264  -1.34e-2  0.0273   4.47e-2  0.0491  2.05e-2 NA        0.0243   0.0195   0.0366  0.0150 
# … with 18 more rows, and 14 more variables: AD015 <dbl>, AD016 <dbl>, AD017 <dbl>, AD018 <dbl>, AD019 <dbl>, AD020 <dbl>, AD021 <dbl>,
#   AD022 <dbl>, AD023 <dbl>, AD024 <dbl>, AD025 <dbl>, AD026 <dbl>, AD027 <dbl>, AD028 <dbl>

Here is one option with igraph这是igraph的一个选项

data %>%
  graph_from_data_frame(directed = FALSE) %>%
  set_edge_attr("weight", value = data$age) %>%
  as_adjacency_matrix(attr = "weight", sparse = FALSE) %>%
  `diag<-`(NA) %>%
  data.frame(sample = row.names(.)) %>%
  relocate(sample, .before = 1) %>%
  `row.names<-`(NULL)

which gives这使

  sample  A  B  C  D  E
1      A NA  7  7  5  6
2      B  7 NA  3  7  9
3      C  7  3 NA  8  3
4      D  5  7  8 NA  2
5      E  6  9  3  2 NA

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM