簡體   English   中英

使用apply()和具有多個參數的用戶定義函數計算數據幀中樣本之間的距離

[英]calculate distance between samples in dataframe with apply() and user-defined function with multiple args

我有一個非常龐大的樣本及其地理位置信息集(> 500個樣本)。 我想使用兩種不同的方法來計算所有樣本之間的距離(即,形成距離矩陣):分離株之間的大圈距離,以及使用人類遷徙的航點的大圈距離。 前者在geosphere軟件包中非常簡單,我已經通過示例數據集(n = 10)之后列出的代碼實現了它。

test <- structure(list(sample_lon = c(85.1, 101.65, 101.52, 100.5, 77.67, 
78.01, 41.8376999, 136.3070068, 43.0671997, 33.6925011), sample_lat = c(20.95, 
3.11, 3.07, 13.76, 27.49, 27.18, 56.2234001, 49.0937004, 52.3828011, 
-12.3848), Cairo = c(5482676.53004203, 7979572.75185404, 7969372.29645241, 
7300548.40307534, 4535773.29985337, 4577259.89703268, 3035531.48539288, 
8559356.13799981, 2675638.0088102, 4698651.89376045), Istanbul = c(5775286.47479145, 
8414621.15676231, 8406733.79230612, 7555850.43260897, 4744788.34257317, 
4791494.45027787, 1968286.36800993, 7814352.55494176, 1704767.09047958, 
5939243.14729151), PhnomPenh = c(2300013.05256412, 910323.277871997, 
918740.411068992, 487913.521378381, 3302731.94513236, 3256808.88281153, 
7291948.53468698, 5171445.2592504, 7089458.06789339, 8189260.33147658
), AddisAbada = c(5137690.28464086, 6987142.79928228, 6973508.88041575, 
6744246.81620126, 4567281.84480851, 4588137.16563698, 5241878.016939, 
9802623.85641921, 4823403.30909928, 2433156.16935115), UN = c("Southern-Asia", 
"South-Eastern-Asia", "South-Eastern-Asia", "South-Eastern-Asia", 
"Southern-Asia", "Southern-Asia", "Eastern-Europe", "Eastern-Europe", 
"Eastern-Europe", "Eastern-Africa"), continent = c("Asia", "Asia", 
"Asia", "Asia", "Asia", "Asia", "Europe", "Europe", "Europe", 
"Africa")), .Names = c("sample_lon", "sample_lat", "Cairo", "Istanbul", 
"PhnomPenh", "AddisAbada", "UN", "continent"), row.names = c(NA, 
10L), class = "data.frame")

require(geosphere)
test.dvse <- apply(test[c("sample_lon", "sample_lat")], 1, FUN=function(X) distVincentyEllipsoid(X, test[c("sample_lon", "sample_lat")]))

這樣可以成功返回所有分離株之間距離的矩陣。

對於航點分析,我創建了一個用戶定義的函數,該函數可以查看樣本所來自的大陸,然后通過一系列規則來計算隔離點之間的距離。

dCI <- distVincentyEllipsoid(c(31,30), c(28,41)) #Cairo to Istanbul
dCP <- distVincentyEllipsoid(c(31,30), c(104,11)) #Cairo to Phnom Penh
dIP <- distVincentyEllipsoid(c(28,41), c(104,11)) #Istanbul Phnom Penh

calcWayDist <- function(lon1, lat1, con1, lon2, lat2, con2) {
  if (con1 == con2) {
    dd = distVincentyEllipsoid(c(lon1,lat1), c(lon2,lat2))
  } 
  else{
    if (setequal(c("Africa", "Europe"), c(con1, con2)) == TRUE) {
      if (which("Africa" == c(con1, con2)) == 1) {
        afr = distVincentyEllipsoid(c(lon1,lat1), c(31,30)) #dist from isolate to Cairo
        eur = distVincentyEllipsoid(c(lon2,lat2), c(28,41)) #dist from isolate to Istanbul
        dd = afr + dCI + eur
      }
      else {
        afr = distVincentyEllipsoid(c(lon2,lat2), c(31,30)) #dist from isolate to Cairo
        eur = distVincentyEllipsoid(c(lon1,lat1), c(28,41)) #dist from isolate to Istanbul
        dd = afr + dCI + eur
      }
    }
    else if (setequal(c("Africa", "Asia"), c(con1, con2)) == TRUE) {
      s1 = distVincentyEllipsoid(c(lon1,lat1), c(31,30)) #dist from isolate to Cairo
      s2 = distVincentyEllipsoid(c(lon2,lat2), c(31,30)) #dist from isolate to Cairo
      dd = s1 + s2
    }
    else if (setequal(c("Africa", "Melanesia"), c(con1, con2)) == TRUE) {
      if (which("Africa" == c(con1, con2)) == 1) {
        afr = distVincentyEllipsoid(c(lon1,lat1), c(31,30)) #dist from isolate to Cairo
        mel = distVincentyEllipsoid(c(lon2,lat2), c(104,11)) #dist from isolate to Phnom Penh
        dd = afr + dCP + mel
      }
      else {
        afr = distVincentyEllipsoid(c(lon2,lat2), c(31,30)) #dist from isolate to Cairo
        mel = distVincentyEllipsoid(c(lon1,lat1), c(104,11)) #dist from isolate to Phnom Penh
        dd = afr + dCP + mel
      }
    }
    else if (setequal(c("Europe", "Asia"), c(con1, con2)) == TRUE) {
      s1 = distVincentyEllipsoid(c(lon1,lat1), c(28,41)) #dist from isolate to Cairo
      s2 = distVincentyEllipsoid(c(lon2,lat2), c(28,41)) #dist from isolate to Cairo
      dd = s1 + s2
    }
    else if (setequal(c("Europe", "Melanesia"), c(con1, con2)) == TRUE) {
       if (which("Europe" == c(con1, con2)) == 1) {
          eur = distVincentyEllipsoid(c(lon1,lat1), c(28,41)) #dist from isolate to Istanbul
          mel = distVincentyEllipsoid(c(lon2,lat2), c(104,11)) #dist from isolate to Phnom Penh
          dd = eur + dIP + mel
       }
        else {
          eur = distVincentyEllipsoid(c(lon2,lat2), c(28,41)) #dist from isolate to Istanbul
          mel = distVincentyEllipsoid(c(lon1,lat1), c(104,11)) #dist from isolate to Phnom Penh
          dd = eur + dIP + mel 
        }
    }
    else if (setequal(c("Asia", "Melanesia"), c(con1, con2)) == TRUE) {
      s1 = distVincentyEllipsoid(c(lon1,lat1), c(104,11)) #dist from isolate to Phnom Penh
      s2 = distVincentyEllipsoid(c(lon2,lat2), c(104,11)) #dist from isolate to Phnom Penh
      dd = s1 + s2 
    }
  }
 return(dd) 
  }

手動輸入值時,這可以按需工作。 例如,

calcWayDist(3.6,15.6,"Africa",51.9,31.9,"Asia")

5225325

但是,當我嘗試使用apply函數在所有可能的隔離對之間執行計算時,出現錯誤。

test.waypoint <- apply(test, 1, FUN=function(X) calcWayDist(X["sample_lon"], X["sample_lat"], X["continent"], test["sample_lon"], test["sample_lat"], test["continent"]))

.pointsToMatrix(p1)中的錯誤:經度> 360另外:警告消息:如果if(con1 == con2){:條件的長度> 1,並且僅將使用第一個元素,則從以下位置調用:.pointsToMatrix(p1)

接下來,我嘗試對用戶定義的函數的輸入參數進行“向量化”:

calcWayDistbyVec <- function(s1, s2) {
  if (s1[3] == s2[3]) {
    dd = distVincentyEllipsoid(c(as.numeric(s1[1]),as.numeric(s1[2])), c(as.numeric(s2[1]),as.numeric(s2[2])))
  } 
  else{
    if (setequal(c("Africa", "Europe"), c(s1[3], s2[3])) == TRUE) {
      if (which("Africa" == c(s1[3], s2[3])) == 1) {
        afr = distVincentyEllipsoid(c(as.numeric(s1[1]),as.numeric(s1[2])), c(31,30)) #dist from isolate to Cairo
        eur = distVincentyEllipsoid(c(as.numeric(s2[1]),as.numeric(s2[2])), c(28,41)) #dist from isolate to Istanbul
        dd = afr + dCI + eur
      }
      else {
        afr = distVincentyEllipsoid(c(as.numeric(s2[1]),as.numeric(s2[2])), c(31,30)) #dist from isolate to Cairo
        eur = distVincentyEllipsoid(c(as.numeric(s1[1]),as.numeric(s1[2])), c(28,41)) #dist from isolate to Istanbul
        dd = afr + dCI + eur
      }
    }
    else if (setequal(c("Africa", "Asia"), c(s1[3], s2[3])) == TRUE) {
      d1 = distVincentyEllipsoid(c(as.numeric(s1[1]),as.numeric(s1[2])), c(31,30)) #dist from isolate to Cairo
      d2 = distVincentyEllipsoid(c(as.numeric(s2[1]),as.numeric(s2[2])), c(31,30)) #dist from isolate to Cairo
      dd = d1 + d2
    }
    else if (setequal(c("Africa", "Melanesia"), c(s1[3], s2[3])) == TRUE) {
      if (which("Africa" == c(s1[3], s2[3])) == 1) {
        afr = distVincentyEllipsoid(c(as.numeric(s1[1]),as.numeric(s1[2])), c(31,30)) #dist from isolate to Cairo
        mel = distVincentyEllipsoid(c(as.numeric(s2[1]),as.numeric(s2[2])), c(104,11)) #dist from isolate to Phnom Penh
        dd = afr + dCP + mel
      }
      else {
        afr = distVincentyEllipsoid(c(as.numeric(s2[1]),as.numeric(s2[2])), c(31,30)) #dist from isolate to Cairo
        mel = distVincentyEllipsoid(c(as.numeric(s1[1]),as.numeric(s1[2])), c(104,11)) #dist from isolate to Phnom Penh
        dd = afr + dCP + mel
      }
    }
    else if (setequal(c("Europe", "Asia"), c(s1[3], s2[3])) == TRUE) {
      d1 = distVincentyEllipsoid(c(as.numeric(s1[1]),as.numeric(s1[2])), c(28,41)) #dist from isolate to Cairo
      d2 = distVincentyEllipsoid(c(as.numeric(s2[1]),as.numeric(s2[2])), c(28,41)) #dist from isolate to Cairo
      dd = d1 + d2
    }
    else if (setequal(c("Europe", "Melanesia"), c(s1[3], s2[3])) == TRUE) {
       if (which("Europe" == c(s1[3], s2[3])) == 1) {
          eur = distVincentyEllipsoid(c(as.numeric(s1[1]),as.numeric(s1[2])), c(28,41)) #dist from isolate to Istanbul
          mel = distVincentyEllipsoid(c(as.numeric(s2[1]),as.numeric(s2[2])), c(104,11)) #dist from isolate to Phnom Penh
          dd = eur + dIP + mel
       }
        else {
          eur = distVincentyEllipsoid(c(as.numeric(s2[1]),as.numeric(s2[2])), c(28,41)) #dist from isolate to Istanbul
          mel = distVincentyEllipsoid(c(as.numeric(s1[1]),as.numeric(s1[2])), c(104,11)) #dist from isolate to Phnom Penh
          dd = eur + dIP + mel 
        }
    }
    else if (setequal(c("Asia", "Melanesia"), c(s1[3], s2[3])) == TRUE) {
      d1 = distVincentyEllipsoid(c(as.numeric(s1[1]),as.numeric(s1[2])), c(104,11)) #dist from isolate to Phnom Penh
      d2 = distVincentyEllipsoid(c(as.numeric(s2[1]),as.numeric(s2[2])), c(104,11)) #dist from isolate to Phnom Penh
      dd = d1 + d2 
    }
  }
 return(dd) 
  }

這適用於單個計算:

calcWayDistbyVec(c(3.6,15.6,"Africa"), c(41.8,56.2,"Europe"))

[1] 6435307

但是同樣,當我嘗試使用apply時,我得到一個錯誤。

test.waypoint <- apply(test[c("sample_lon", "sample_lat", "continent")], 1, FUN=function(X) calcWayDistbyVec(X, test[c("sample_lon", "sample_lat", "continent")]))

繼承(p,“ SpatialPoints”)中的錯誤:(列表)對象不能被強制鍵入'double'另外:警告消息:如果(s1 [3] == s2 [3]){:條件的長度> 1,將僅使用第一個元素。調用方:Inherits(p,“ SpatialPoints”)

如果我只做一點,它確實可以工作。

test.waypoint.ind <- apply(test[c("sample_lon", "sample_lat", "continent")], 1, FUN=function(X) calcWayDistbyVec(X, c(3.6,15.6,"Africa")))

8702972 11199869 11189668 10520844 7756069 7797556 6438793 12284859 6175273 4535197

如果任何人都可以找出不起作用的地方,我將不勝感激! 我看過一些文章《 應用用戶定義的函數》 ,《 將用戶定義的函數 應用到特定的數據框列》 ,這些內容有助於我進行第一次距離矩陣計算,但在這里似乎不太適用。

我將這個問題傳遞給了我大學的R研究小組,一個沒有Stack Overflow帳戶的人提供了解決方案。 它不使用apply()。

calcWayDistMODIFIED <- function(lon1, lat1, con1, lon2, lat2, con2) {
  if (con1 == con2) {
    dd = distVincentyEllipsoid(c(lon1,lat1), c(lon2,lat2))
  } else if (con1 == "Africa" & con2 == "Europe") {
    afr = distVincentyEllipsoid(c(lon1,lat1), c(31,30)) #dist from isolate to Cairo
    eur = distVincentyEllipsoid(c(lon2,lat2), c(28,41)) #dist from isolate to Istanbul
    dd = afr + dCI + eur
  } else if (con1 == "Europe" & con2 == "Africa") {
    afr = distVincentyEllipsoid(c(lon2,lat2), c(31,30)) #dist from isolate to Cairo
    eur = distVincentyEllipsoid(c(lon1,lat1), c(28,41)) #dist from isolate to Istanbul
    dd = afr + dCI + eur
  } else if (setequal(c("Africa", "Asia"), c(con1, con2))) {
    s1 = distVincentyEllipsoid(c(lon1,lat1), c(31,30)) #dist from isolate to Cairo
    s2 = distVincentyEllipsoid(c(lon2,lat2), c(31,30)) #dist from isolate to Cairo
    dd = s1 + s2
  } else if (con1 == "Africa" & con2 == "Melanesia") {
    afr = distVincentyEllipsoid(c(lon1,lat1), c(31,30)) #dist from isolate to Cairo
    mel = distVincentyEllipsoid(c(lon2,lat2), c(104,11)) #dist from isolate to Phnom Penh
    dd = afr + dCP + mel
  } else if (con1 == "Melanesia" & con2 == "Africa") {
    afr = distVincentyEllipsoid(c(lon2,lat2), c(31,30)) #dist from isolate to Cairo
    mel = distVincentyEllipsoid(c(lon1,lat1), c(104,11)) #dist from isolate to Phnom Penh
    dd = afr + dCP + mel
  } else if (setequal(c("Europe", "Asia"), c(con1, con2))) {
    s1 = distVincentyEllipsoid(c(lon1,lat1), c(28,41)) #dist from isolate to Cairo
    s2 = distVincentyEllipsoid(c(lon2,lat2), c(28,41)) #dist from isolate to Cairo
    dd = s1 + s2
  } else if (con1 == "Europe" & con2 == "Melanesia") {
    eur = distVincentyEllipsoid(c(lon1,lat1), c(28,41)) #dist from isolate to Istanbul
    mel = distVincentyEllipsoid(c(lon2,lat2), c(104,11)) #dist from isolate to Phnom Penh
    dd = eur + dIP + mel
  } else if (con1 == "Melanesia" & con2 == "Europe") {
    eur = distVincentyEllipsoid(c(lon2,lat2), c(28,41)) #dist from isolate to Istanbul
    mel = distVincentyEllipsoid(c(lon1,lat1), c(104,11)) #dist from isolate to Phnom Penh
    dd = eur + dIP + mel
  } else if (setequal(c("Asia", "Melanesia"), c(con1, con2))) {
    s1 = distVincentyEllipsoid(c(lon1,lat1), c(104,11)) #dist from isolate to Phnom Penh
    s2 = distVincentyEllipsoid(c(lon2,lat2), c(104,11)) #dist from isolate to Phnom Penh
    dd = s1 + s2
  }
  return(dd)
}

distance_function = function(d) {
  mat = matrix(0, ncol = nrow(d), nrow = nrow(d))
  for(i in 1:nrow(mat)) {
    for(j in 1:nrow(mat)) {
      mat[i,j] = calcWayDistMODIFIED(lon1 = d[i,'sample_lon'], lat1 = d[i,'sample_lat'], con1 = d[i,'continent'], lon2 = d[j,'sample_lon'], lat2 = d[j,'sample_lat'], con2 = d[j,'continent'])
    }
  }
  return(mat)
}

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM