In a network of passes among basketball players I want to:
Following this question Extracting Open Triangles in R Igraph (Network Analysis) we can do the following:
library(igraph)
set.seed(1234)
G <- sample_gnm(10, 15)
G
IGRAPH 72f8e6a U--- 10 15 -- Erdos renyi (gnm) graph
+ attr: name (g/c), type (g/c), loops (g/l), m (g/n)
+ edges from 72f8e6a:
[1] 1-- 3 1-- 4 3-- 4 1-- 5 3-- 5 6-- 7 3-- 8 4-- 8 6-- 8 7-- 8 2-- 9 6-- 9 7-- 9 4--10 9--10
plot(G)
Find the open triangles:
openTriList <- unique(do.call(c, lapply(as_ids(V(G)), function(v) {
do.call(c, lapply(as_ids(neighbors(G, v)), function(v1) {
v2 <- as_ids(neighbors(G, v1))
v2 <- v2[shortest.paths(G, v, v2) == 2]
if(length(v2) != 0) {
lapply(v2, function(vv2) { c(v, v1, vv2)[order(c(v, v1, vv2))] })
} else { list() }
}))
})))
The results are correct:
do.call(rbind, openTriList)
[,1] [,2] [,3]
[1,] 1 3 8
[2,] 1 4 8
[3,] 1 4 10
[4,] 2 6 9
[5,] 2 7 9
[6,] 2 9 10
[7,] 3 4 10
[8,] 3 6 8
[9,] 3 7 8
[10,] 1 4 5
[11,] 3 4 5
[12,] 4 6 8
[13,] 4 7 8
[14,] 4 9 10
[15,] 3 5 8
[16,] 6 9 10
[17,] 7 9 10
[18,] 4 8 10
[19,] 6 8 9
[20,] 7 8 9
How do we find the players that are brokers?
And how can we efficiently count the number of times these player broker an open triangle?
[The real data holds millions of passes and several thousands of players. So performance is an important aspect. Using combn
results in extremely long computational times. Are there faster ways of doing this? Perhaps getting the adjacency graph to build a sparse matrix and converting it into a data.table
object for joining by neighbours? See this link . ]
If you want to speed up, below is an option using for
loops + combn
to define a user functions f
, which ouput a list including both the openTriList
and occurCnt
( thank @minem's feedback as well for performance improvement ):
f <- function(G) {
dmat <- as_adj(G, sparse = FALSE)
resLst <- c()
for (broker in 1:nrow(dmat)) {
k <- which(dmat[broker, ] == 1)
if (length(k) > 1) {
inds <- t(combn(k, 2))
resLst[[broker]] <- subset(cbind(broker, inds), dmat[inds] == 0)
}
}
resLst <- do.call(rbind, resLst)
resCnt <- table(resLst[, "broker"])
list(openTriLst = resLst, occurCnt = resCnt)
}
and you will see that they can achieve the desired output
> set.seed(1234)
> G <- sample_gnm(10, 15)
> f(G)
$openTriLst
broker
[1,] 1 4 5
[2,] 3 1 8
[3,] 3 4 5
[4,] 3 5 8
[5,] 4 1 8
[6,] 4 1 10
[7,] 4 3 10
[8,] 4 8 10
[9,] 6 8 9
[10,] 7 8 9
[11,] 8 3 6
[12,] 8 3 7
[13,] 8 4 6
[14,] 8 4 7
[15,] 9 2 6
[16,] 9 2 7
[17,] 9 2 10
[18,] 9 6 10
[19,] 9 7 10
[20,] 10 4 9
$occurCnt
1 3 4 6 7 8 9 10
1 3 4 1 1 4 5 1
and the speed is remarkably improved than my previous answer. You can also compare it with the answer by @minem .
> set.seed(1234)
> G1 <- sample_gnm(1000, 4000)
> system.time(f(G1))
user system elapsed
0.07 0.00 0.08
> G2 <- sample_gnm(10000, 40000)
> system.time(f(G2))
user system elapsed
2.46 0.16 2.62
You can try the code below using combn
+ are_ajdacent
, eg,
G <- sample_gnm(10, 15) %>%
get.data.frame() %>%
graph_from_data_frame(directed = FALSE)
openTriList <- do.call(
rbind,
sapply(
names(V(G)),
function(v) {
nbs <- names(neighbors(G, v))
if (length(nbs) > 1) {
do.call(rbind, Filter(length, combn(nbs, 2, FUN = function(x) {
if (!are_adjacent(G, x[1], x[2])) {
sort(as.numeric(c(v, x)))
}
}, simplify = FALSE)))
}
}
)
)
occurCount <- na.omit(
sapply(names(V(G)), function(v) {
nbs <- names(neighbors(G, v))
ifelse(length(nbs) > 1,
sum(!combn(nbs,
2,
FUN = function(x) are_adjacent(G, x[1], x[2])
)),
NA
)
})
)
and you will get named vector
> openTriList
[,1] [,2] [,3]
[1,] 1 4 5
[2,] 1 3 8
[3,] 3 4 5
[4,] 3 5 8
[5,] 6 8 9
[6,] 1 4 8
[7,] 1 4 10
[8,] 3 4 10
[9,] 4 8 10
[10,] 7 8 9
[11,] 2 6 9
[12,] 6 9 10
[13,] 2 7 9
[14,] 7 9 10
[15,] 2 9 10
[16,] 3 6 8
[17,] 3 7 8
[18,] 4 6 8
[19,] 4 7 8
[20,] 4 9 10
> occurCount
1 3 6 4 7 9 5 8 10
1 3 1 4 1 5 0 4 1
attr(,"na.action")
2
6
attr(,"class")
[1] "omit"
I reworked openTriList
calculation by Thomas:
require(data.table)
v2 <- function(a) {
n <- names(V(a))
d <- as.data.table(a %>% get.data.frame())
d <- as.data.table(lapply(d, as.numeric))
k1 <- d[[1]]
k2 <- d[[2]]
ks <- k1 + k2
# v <- n[[1]] # for testing
xx <- sapply(n, function(v) {
nbs <- as.numeric(names(neighbors(a, v)))
vn <- as.numeric(v)
if (length(nbs) > 1) {
i2 <- combn(nbs, 2, simplify = F)
# reduce test vectors:
ss <- sapply(i2, sum)
ii <- ks %in% ss
kk1 <- k1[ii]
kk2 <- k2[ii]
# reduce test vectors 2:
i <- (kk1 %in% nbs) & (kk2 %in% nbs)
kk1 <- kk1[i]
kk2 <- kk2[i]
# x <- i2[[1]] # for testing
i3 <- lapply(i2, function(x) {
q1 <- kk1 == x[1]
q2 <- kk2 == x[2]
zz <- q1 & q2
r2 <- !any(zz)
if (is.null(r2) || r2) {
rs <- c(vn, x)
rs <- .Internal(sort(rs, decreasing = F)) # less overhead
rs
}
})
s <- i3[lengths(i3) > 0]
do.call(rbind, s)
}
}
)
do.call(rbind, xx)
}
openTriList <- v2(G)
This should be significantly faster. combn
isnt slow. Slow is are_adjacent
. Thomas code is written quite complicated, its hard to debug and find possible slowdowns... Test:
set.seed(1)
G <- sample_gnm(1000, 4000)
G <- get.data.frame(G) %>% graph_from_data_frame(directed = FALSE)
system.time(r1 <- v1(G)) # 12.00 sec
system.time(r2 <- v2(G)) # 0.99 sec
all.equal(r1, r2) # TRUE
fun_openTriList2 <- function(G) {
dmat <- as_adj(G, sparse = FALSE)
res <- list()
for (i in 1:nrow(dmat)) {
inds <- which(dmat[i, ] == 1)
if (length(inds) > 1) {
r <- combn(inds, 2,
FUN = function(x) {
if (dmat[x[1], x[2]] == 0) {
.Internal(sort(c(i, x), decreasing = F))
}
},
simplify = F
)
res[[i]] <- do.call(rbind, r)
}
}
res <- do.call(rbind, res)
res
}
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.