簡體   English   中英

如何選擇沒有出現在第二個 tibble 中的 tibble 行

[英]How to select rows of a tibble that do not appear in a second tibble

我有一個mytrees和該findOut一個子集(它們被稱為mytreesfindOut )。 我想創建第三mytreesCorr ,它只有與mytrees中的任何行都不匹配findOut行。 如果我想根據列過濾掉行,例如Árvore (抱歉,它不是英文,但它用作 ID),我會這樣做:

mytreesCorr <- mytrees[-(mytrees$Árvore %in% findOut$Árvore)]

但我想根據行進行過濾,而不僅僅是索引。 這是mytrees

> dput(mytrees)
structure(list(Dia = c("26/10/2020", "26/10/2020", "26/10/2020", 
"26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", 
"26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", 
"26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", 
"26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", 
"26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", 
"26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", 
"26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", 
"26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", 
"26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", 
"26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", 
"26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", 
"26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", 
"26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", 
"26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", 
"26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", 
"26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", 
"27/10/2020", "26/10/2020", "27/10/2020", "27/10/2020", "27/10/2020", 
"27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020", 
"27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020", 
"27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020", 
"27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020", 
"27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020", 
"27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020", 
"27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020", 
"27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020", 
"27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020", 
"27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020", 
"27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020", 
"27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020", 
"27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020", 
"27/10/2020", "9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", 
"9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", 
"9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", 
"9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", 
"9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", 
"9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", 
"9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", 
"9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", 
"9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", 
"9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", 
"9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", 
"9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", 
"9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", 
"9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", 
"9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", 
"9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", 
"9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", 
"9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", 
"9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", 
"9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", 
"9/11/2020"), Árvore = c("398", "414", "415", "416", "416", 
"417", "417", "419", "432", "432", "434", "435", "436", "439", 
"441", "442", "443", "451", "452", "454", "459", "482", "484", 
"485", "486", "489", "490", "492", "496", "497", "498", "502", 
"503", "504", "505", "506", "507", "508", "513", "513", "514", 
"514", "515", "516", "518", "519", "520", "521", "525", "526", 
"534", "535", "536", "538", "547", "547", "552", "555", "556", 
"557", "559", "564", "565", "568", "569", "572", "573", "581", 
"582", "584", "585", "586", "588", "593", "596", "597", "603", 
"604", "605", "606", "608", "612", "614", "369", "616", "373", 
"386", "399", "415", "434", "435", "436", "441", "442", "448", 
"449", "451", "451", "452", "454", "456", "463", "464", "482", 
"484", "485", "485", "486", "487", "491", "492", "492", "493", 
"495", "496", "497", "500", "501", "503", "505", "507", "508", 
"510", "513", "514", "515", "515", "516", "517", "519", "519", 
"520", "526", "528", "547", "548", "549", "555", "560", "564", 
"572", "573", "575", "581", "582", "586", "596", "599", "604", 
"606", "607", "612", "613", "614", "575", "607", "513", "569", 
"565", "559", "567", "514", "555", "520", "564", "560", "557", 
"556", "537", "536", "521", "513", "508", "511", "548", "599", 
"515", "504", "609", "500", "497", "502", "499", "608", "509", 
"498", "496", "540", "497", "500", "593", "581", "567", "516", 
"517", "571", "516", "568", "560", "554", "568", "555", "546", 
"547", "548", "406", "538", "549", "543", "550", "504", "534", 
"536", "535", "521", "526", "485", "524", "538", "482", "483", 
"612", "589", "489", "490", "487", "615", "496", "497", "498", 
"457", "613", "454", "449", "616", "443", "614", "442", "452", 
"453", "441", "450", "452", "461", "459", "462", "286", "468", 
"469", "458", "474", "284", "463", "251"), `Circunferência (cm)` = c(28.5, 
21.2, 107, 44.2, 44.1, 21.1, 21.6, 17.8, 44.7, 34.1, 28.5, 78.7, 
52.5, 117, 15.5, 100, 29, 70.5, 69.4, 242.5, 68.5, 49, 30.4, 
24.5, 46.8, 21.6, 163.4, 61, 80.5, 38, 21, 74.5, 95.5, 48.7, 
86, 106.5, 54, 85.4, 45.2, 41.1, 19.2, 19.1, 88, 19.7, 259, 21.5, 
26.2, 35.3, 47.2, 38, 25, 81.5, 22.3, 178.8, 48.3, 48.6, 35.3, 
47, 86, 24.4, 71, 212, 47.3, 19.7, 173, 257.7, 26, 43.8, 85.2, 
130, 77.5, 63.2, 201, 319, 33, 21.6, 137.2, 84.7, 40.8, 65, 73.2, 
50.2, 104.3, 47, 86, 62, 158, 138, 117.5, 30, 78, 52, 17.5, 54, 
80.5, 100, 71, 70.5, 69, 238, 17, 74, 22, 42, 26.5, 24, 23, 49, 
185, 113.5, 62, 63, 55, 95, 80, 40, 78.5, 212, 97, 89, 32, 86, 
80, 44, 20, 87, 86.5, 20, 89, 24.5, 23, 26, 40, 40, 48, 41, 116, 
48, 134, 213, 252, 26, 113.5, 46, 85, 64.5, 33, 222, 85, 88, 
32, 51, 88, 108.5, 27, 29.5, 43.5, 107.8, 48, 71.5, 136, 20.5, 
46.9, 27.2, 211, 130, 24.1, 89, 68.5, 22.5, 35.4, 43, 85, 63.4, 
210, 222, 98, 315.4, 63, 79, 38.1, 75.5, 98, 76, 215, 21.2, 80.4, 
19.5, 38.4, 81, 334, 43.5, 136.4, 20, 89.5, 16.3, 198, 21, 131, 
23.5, 22, 48.5, 240, 48, 41, 238.4, 178.2, 123, 43.5, 132.1, 
259, 25, 22, 51, 35.2, 39.5, 24.3, 25.4, 179, 50.5, 134.7, 51, 
260, 23, 199, 188.5, 67, 80.2, 39, 22, 248, 87, 238, 99.2, 87, 
30, 106, 98, 69.8, 218, 16, 27, 70.3, 170, 66.5, 35, 238, 38.3, 
233.3, 132.5, 19.5, 324.5, 69.7, 677.2)), class = c("spec_tbl_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -254L), spec = structure(list(
    cols = list(Dia = structure(list(), class = c("collector_character", 
    "collector")), Árvore = structure(list(), class = c("collector_character", 
    "collector")), `Circunferência (cm)` = structure(list(), class = c("collector_double", 
    "collector"))), default = structure(list(), class = c("collector_guess", 
    "collector")), skip = 1), class = "col_spec"))

這就是findOut (我只會給出結果而不是復制程序):

> dput(findOut)
structure(list(Dia = c("27/10/2020", "27/10/2020", "26/10/2020", 
"9/11/2020", "9/11/2020"), Árvore = c("442", "482", "504", "504", 
"548"), `Circunferência (cm)` = c(54, 42, 48.7, 315.4, 210), 
    Range = c(46, 8.5, 266.7, 266.7, 169)), row.names = c(NA, 
-5L), groups = structure(list(Árvore = c("442", "482", "504", 
"548"), .rows = structure(list(1L, 2L, 3:4, 5L), ptype = integer(0), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))), row.names = c(NA, -4L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))

以下是根據您的描述過濾數據的一些basedplyr方法:

# load functions ----
"%>%" <- magrittr::"%>%"

# some data ----
dat1 <- dplyr::tibble(ID=1:100,
                      X=sample(1:1000,size=100,replace=T))
dat2 <- dplyr::tibble(ID=sample(1:100,size=20,replace=F),
                      Y=sample(1:1000,size=20,replace=T))

# filter data ----

# base
dat1[!dat1$ID %in% dat2$ID,]

# dplyr: filter ####
dat1 %>%
  dplyr::filter(!ID %in% dat2$ID)

# dplyr: anti_join ####
dat1 %>%
  dplyr::anti_join(dat2,by="ID")

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM