如何識別兩個向量中的多個相同對

Question

在我的圖形的封裝（如在圖論，由邊緣連接的節點）我有指示起源節點的每個邊的矢量from ，矢量表示每個邊緣目的地的節點to及一個向量表示每個邊緣的曲線curve 。

默認情況下，如果兩個節點之間只有一條邊，我希望邊緣的曲線為0，如果兩個節點之間有兩條邊，則曲線為0.2。 我現在使用的代碼是一個for循環，有點慢：

curve <- rep(0,5)
from<-c(1,2,3,3,2)
to<-c(2,3,4,2,1)

    for (i in 1:length(from))
    {
        if (any(from==to[i] & to==from[i]))
        {
            curve[i]=0.2        

        }
    }

因此，基本上to如果from to其他任何對都使用相同的節點（數字），則我會尋找每個邊緣（ from一個索引和to一個索引）。

我正在尋找兩件事：

一種確定是否有一對節點之間有兩個邊的方法（因此，如果沒有，則可以省略循環）
一種加快此循環的方法

＃

編輯：

為了使這一點更加清楚，另一個示例：

from <- c(4L, 6L, 7L, 8L, 1L, 9L, 5L, 1L, 2L, 1L, 10L, 2L, 6L, 7L, 10L, 4L, 9L)
to <- c(1L, 1L, 1L, 2L, 3L, 3L, 4L, 5L, 6L, 7L, 7L, 8L, 8L, 8L, 8L, 10L, 10L)
cbind(from,to)
      from to
 [1,]    4  1
 [2,]    6  1
 [3,]    7  1
 [4,]    8  2
 [5,]    1  3
 [6,]    9  3
 [7,]    5  4
 [8,]    1  5
 [9,]    2  6
[10,]    1  7
[11,]   10  7
[12,]    2  8
[13,]    6  8
[14,]    7  8
[15,]   10  8
[16,]    4 10
[17,]    9 10

在這兩個向量中，對3與對10相同（1和7的順序不同），對4和12對相同（2和8都相同）。 因此，我希望curve變為：

 [1,]  0.0
 [2,]  0.0
 [3,]  0.2
 [4,]  0.2
 [5,]  0.0
 [6,]  0.0
 [7,]  0.0
 [8,]  0.0
 [9,]  0.0
[10,]  0.2
[11,]  0.0
[12,]  0.2
[13,]  0.0
[14,]  0.0
[15,]  0.0
[16,]  0.0
[17,]  0.0

（作為向量，我兩次換位以獲得行號）。

解

from <- c(4L, 6L, 7L, 8L, 1L, 9L, 5L, 1L, 2L, 1L, 10L, 2L, 6L, 7L, 10L, 4L, 9L)
to <- c(1L, 1L, 1L, 2L, 3L, 3L, 4L, 5L, 6L, 7L, 7L, 8L, 8L, 8L, 8L, 10L, 10L)

srt <- apply(cbind(from,to),1,sort)
dub <- duplicated(t(srt))|duplicated(t(srt),fromLast=T)
curve <- ifelse(dub,0.2,0)

標桿解決方案

這是不同解決方案的一些基准測試

> # for-loop
> system.time(
+ {
+ curve <- rep(0,5)
+     for (i in 1:length(from))
+     {
+         if (any(from==to[i] & to==from[i]))
+         {
+             curve[i]=0.2        
+ 
+         }
+     }
+ })
   user  system elapsed 
 171.49    0.05  171.98 

from <- sample(1:1000,100000,T)
> to <- sample(1:1000,100000,T)
> 
> # My solution:
> system.time(
+ {
+ srt <- apply(cbind(from,to),1,sort)
+ dub <- duplicated(t(srt))|duplicated(t(srt),fromLast=T)
+ curve <- ifelse(dub,0.2,0)
+ })
   user  system elapsed 
  16.92    0.00   16.94 
> 
> 
> # Marek 1:
> system.time(
+ {
+ srt <- cbind(pmin(from,to), pmax(from,to) )
+ dub <- duplicated(srt)|duplicated(srt,fromLast=T)
+ curve <- ifelse(dub,0.2,0)
+ })
   user  system elapsed 
   2.43    0.00    2.43 
> 
> # Marek 2:
> system.time(
+ {
+ srt <- cbind(ifelse(from>to,to,from),ifelse(from>to,from,to))
+ dub <- duplicated(srt)|duplicated(srt,fromLast=T)
+ curve <- ifelse(dub,0.2,0)
+ })
   user  system elapsed 
   2.67    0.00    2.70 
> 
> # Maiasaura:
> library(plyr)
> 
> system.time(
+ {
+ data=data.frame(cbind(id=1:length(from),from,to))
+ data=ddply(data, .(id), transform, f1=min(from,to),f2=max(from,to))
+ curved=data.frame(data[which(duplicated(data[,4:5])==TRUE),],value=0.2)
+ result=join(data[,4:5],curved[,4:6],by=intersect(names(data)[4:5],names(curved)[4:6]))
+ result$value[which(is.na(result$value))]=0
+ result=data.frame(from,to,curve=result$value)
+ })
   user  system elapsed 
 103.43    0.11  103.95

> # Marek 1 + Joshua
> > system.time(
> + {
> + srt <- cbind(pmin(from,to), pmax(from,to) )
> + curve <- ifelse(ave(srt[,1], srt[,1], srt[,2], FUN=length) > 1,
> 0.2, 0)
> + })    user  system elapsed 
>    7.26    0.00    7.25

最快的解決方案是：

srt <- cbind(pmin(from,to), pmax(from,to) )
dub <- duplicated(srt)|duplicated(srt,fromLast=T)
curve <- ifelse(dub,0.2,0)

Answer 1

如何使用outer ？

from <- c(1,2,3,3,2)
to <- c(2,3,4,2,1)
out <- outer(from, to, `==`)
ifelse(rowSums(out) > 0 & colSums(out) > 0, 0.2, 0)

Answer 2

改變中

any(from==to[i] & to==from[i])

至

any(from==to[i]) && any(to==from[i])

可以節省很多時間。 在您的示例中，如果將from和to復制5000次，則計算時間將減少1/3。

使用&& ，如果第一個條件為FALSE R便不會費心計算第二個表達式。

Answer 3

如果我理解正確，則可以使用%in% ：

curve[ to %in% from & from %in% to ] <- 0.2

根據您的更新的另一種解決方案：

srt <- t(apply(cbind(from,to),1,sort))
curve <- ifelse(ave(srt[,1], srt[,1], srt[,2], FUN=length) > 1, 0.2, 0)

Answer 4

這是使用plyr的解決方案

我首先將from和to合並為一個data.frame

library(plyr)
data=data.frame(cbind(id=1:length(from),from,to))

數據

  id from to
1   1    4  1
2   2    6  1
3   3    7  1
4   4    8  2
5   5    1  3
6   6    9  3
7   7    5  4
8   8    1  5
9   9    2  6
10 10    1  7
11 11   10  7
12 12    2  8
13 13    6  8
14 14    7  8
15 15   10  8
16 16    4 10
17 17    9 10

那么以下內容將產生您想要的結果：

data=ddply(data, .(id), transform, f1=min(from,to),f2=max(from,to))
curved=data.frame(data[which(duplicated(data[,4:5])==TRUE),],value=0.2)
result=join(data[,4:5],curved[,4:6],by=intersect(names(data)[4:5],names(curved)[4:6]))
result$value[which(is.na(result$value))]=0
result=data.frame(from,to,curve=result$value)

應該產生：

   from to curve
1     4  1   0.0
2     6  1   0.0
3     7  1   0.2
4     8  2   0.2
5     1  3   0.0
6     9  3   0.0
7     5  4   0.0
8     1  5   0.0
9     2  6   0.0
10    1  7   0.2
11   10  7   0.0
12    2  8   0.2
13    6  8   0.0
14    7  8   0.0
15   10  8   0.0
16    4 10   0.0
17    9 10   0.0

您可以將上面的代碼變成一個函數

calculate_curve <- function (from,to)
{
data=data.frame(cbind(id=1:length(from),from,to))
data=ddply(data, .(id), transform, f1=min(from,to),f2=max(from,to))
curved=data.frame(data[which(duplicated(data[,4:5])==TRUE),],value=0.2)
result=join(data[,4:5],curved[,4:6],by=intersect(names(data)[4:5],names(curved)[4:6]))
result$value[which(is.na(result$value))]=0
return (result$value)
}

然后做

curve=calculate_curve(from,to)

如何識別兩個向量中的多個相同對

問題描述

＃

解

標桿解決方案

4 個解決方案

解決方案1
2 2011-02-15 21:26:41

解決方案2
2 2011-02-15 22:35:28

解決方案3
2 2011-02-15 22:55:22

解決方案4
2 已采納 2011-02-16 00:58:04

如何識別兩個向量中的多個相同對

問題描述

＃

解

標桿解決方案

4 個解決方案

解決方案1 2 2011-02-15 21:26:41

解決方案2 2 2011-02-15 22:35:28

解決方案3 2 2011-02-15 22:55:22

解決方案4 2 已采納 2011-02-16 00:58:04

解決方案1
2 2011-02-15 21:26:41

解決方案2
2 2011-02-15 22:35:28

解決方案3
2 2011-02-15 22:55:22

解決方案4
2 已采納 2011-02-16 00:58:04