简体   繁体   中英

How to create a matrix from combination in a data frame?

I have a data frame like this:

V1 V2 Value
B A 0.211752
C A 0.134699
C B -0.210903
D A 0.349337
D B 0.614440
D C -0.220699
E A 0.442222
E B 0.154271
E C 0.240214
E D 0.371422
F A 0.459264
F B -0.057128
F C -0.144021
F D 0.126519
F E 0.477894

V1 has no A

And I need to make a matrix that would look like

A B C D E F
A 1.000000 0.211752 0.134699 0.349337 0.442222 0.459264
B 0.211752 1.000000 -0.210903 0.614440 0.154271 -0.057128
C 0.134699 -0.210903 1.00000 -0.220699 0.240214 -0.144021
D 0.349337 0.614440 -0.220699 1.00000 0.371422 0.126519
E 0.442222 0.154271 0.240214 0.371422 1.000000 0.477894
F 0.459264 -0.057128 -0.144021 0.126519 0.477894 1.000000

I tried running it like the answer to the question below but I couldn't

How to reshape data from long to wide format

We reshape it to 'wide' with xtabs , then update the diagonal

out <- xtabs(Value ~ V1 + V2, df1)
out1 <- out + t(out)
diag(out1) <- 1

-output

out1
#   V2
#V1          A         B         C         D         E
#  B  1.000000  0.134699  0.349337  0.442222  0.459264
#  C  0.134699  1.000000  0.614440  0.154271 -0.057128
#  D  0.349337  0.614440  1.000000  0.240214 -0.144021
#  E  0.442222  0.154271  0.240214  1.000000  0.126519
#  F  0.459264 -0.057128 -0.144021  0.126519  1.000000

data

df1 <- structure(list(V1 = c("B", "C", "C", "D", "D", "D", "E", "E", 
"E", "E", "F", "F", "F", "F", "F"), V2 = c("A", "A", "B", "A", 
"B", "C", "A", "B", "C", "D", "A", "B", "C", "D", "E"), Value = c(0.211752, 
0.134699, -0.210903, 0.349337, 0.61444, -0.220699, 0.442222, 
0.154271, 0.240214, 0.371422, 0.459264, -0.057128, -0.144021, 
0.126519, 0.477894)), class = "data.frame", row.names = c(NA, 
-15L))

Here is an igraph option

`diag<-`(as_adjacency_matrix(
  graph_from_data_frame(df[c(2, 1, 3)], directed = FALSE),
  attr = "Value"
), 1)

which gives

         A         B         C         D        E         F
A 1.000000  0.211752  0.134699  0.349337 0.442222  0.459264
B 0.211752  1.000000 -0.210903  0.614440 0.154271 -0.057128
C 0.134699 -0.210903  1.000000 -0.220699 0.240214 -0.144021
D 0.349337  0.614440 -0.220699  1.000000 0.371422  0.126519
E 0.442222  0.154271  0.240214  0.371422 1.000000  0.477894
F 0.459264 -0.057128 -0.144021  0.126519 0.477894  1.000000

Using dplyr and tidyr library you can try:

library(dplyr)
library(tidyr)

df %>%
  complete(V1 = unique(c(V1, V2)), V2 = unique(c(V1, V2))) %>%
  pivot_wider(names_from = V1, values_from = Value) %>%
  tibble::column_to_rownames('V2') %>%
  as.matrix() -> mat

diag(mat) <- 1
mat[lower.tri(mat)] <- mat[upper.tri(mat)]

mat
#          A         B         C         D        E         F
#A  1.000000  0.211752  0.134699  0.349337 0.442222  0.459264
#B  0.211752  1.000000 -0.210903  0.614440 0.154271 -0.057128
#C  0.134699 -0.220699  1.000000 -0.220699 0.240214 -0.144021
#D -0.210903  0.442222  0.371422  1.000000 0.371422  0.126519
#E  0.349337  0.154271  0.459264 -0.144021 1.000000  0.477894
#F  0.614440  0.240214 -0.057128  0.126519 0.477894  1.000000

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM