简体   繁体   English

按组和多个条件过滤,dplyr

[英]Filter by group and multiple conditions, dplyr

I am trying to filter Countries, across the years 2000 to 2016, where indic.no must equal 10 across ALL years.我正在尝试过滤从 2000 年到 2016 年的国家,其中 indic.no 在所有年份中必须等于 10。

I have tried using different filtering using multiple conditions, but never seem to get the expected results.我曾尝试使用多个条件使用不同的过滤,但似乎从未得到预期的结果。

#subset of df
structure(list(ISO3 = c("ABW", "ABW", "ABW", "ABW", "ABW", "ABW", 
                        "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", 
                        "ABW", "ABW", "ABW", "ABW", "ARE", "ARE", "ARE", "ARE", "ARE", 
                        "ARE", "ARE", "ARE", "ARE", "ARE", "ARE", "ARE", "ARE", "ARE", 
                        "ARE", "ARE", "ARE", "ARE", "ARE", "AUS", "AUS", "AUS", "AUS", 
                        "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", 
                        "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "BRB", "BRB", "BRB", 
                        "BRB", "BRB", "BRB", "BRB", "BRB", "BRB", "BRB", "BRB", "BRB", 
                        "BRB", "BRB", "BRB", "BRB", "BRB", "COL", "COL", "COL", "COL", 
                        "COL", "COL", "COL", "COL", "COL", "COL", "COL", "COL", "COL", 
                        "COL", "COL", "COL", "COL", "COL", "COL", "FJI", "FJI", "FJI", 
                        "FJI", "FJI", "FJI", "FJI", "FJI", "FJI", "FJI", "FJI", "FJI", 
                        "FJI", "FJI", "FJI", "FJI", "FJI", "FJI", "FJI", "KIR", "KIR", 
                        "KIR", "KIR", "KIR", "KIR", "KIR", "KIR", "KIR", "KIR", "KIR", 
                        "KIR", "KIR", "KIR", "KIR", "KIR", "KIR", "KIR", "PNG", "PNG", 
                        "PNG", "PNG", "PNG", "PNG", "PNG", "PNG", "PNG", "PNG", "PNG", 
                        "PNG", "PNG", "PNG", "PNG", "PNG", "PNG", "PNG", "PNG", "VCT", 
                        "VCT", "VCT", "VCT", "VCT", "VCT", "VCT", "VCT", "VCT", "VCT", 
                        "VCT", "VCT", "VCT", "VCT", "VCT", "VCT", "VCT"), 
                        NAME_0 = c("Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", 
                        "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", 
                        "Aruba", "Aruba", "Aruba", "Aruba", "United Arab Emirates", "United Arab Emirates", 
                        "United Arab Emirates", "United Arab Emirates", "United Arab Emirates", 
                        "United Arab Emirates", "United Arab Emirates", "United Arab Emirates", 
                        "United Arab Emirates", "United Arab Emirates", "United Arab Emirates", 
                        "United Arab Emirates", "United Arab Emirates", "United Arab Emirates", 
                        "United Arab Emirates", "United Arab Emirates", "United Arab Emirates", 
                        "United Arab Emirates", "United Arab Emirates", "Australia", 
                        "Australia", "Australia", "Australia", "Australia", "Australia", 
                        "Australia", "Australia", "Australia", "Australia", "Australia", 
                        "Australia", "Australia", "Australia", "Australia", "Australia", 
                        "Australia", "Australia", "Australia", "Barbados", "Barbados", 
                        "Barbados", "Barbados", "Barbados", "Barbados", "Barbados", "Barbados", 
                        "Barbados", "Barbados", "Barbados", "Barbados", "Barbados", "Barbados", 
                        "Barbados", "Barbados", "Barbados", "Colombia", "Colombia", "Colombia", 
                        "Colombia", "Colombia", "Colombia", "Colombia", "Colombia", "Colombia", 
                        "Colombia", "Colombia", "Colombia", "Colombia", "Colombia", "Colombia", 
                        "Colombia", "Colombia", "Colombia", "Colombia", "Fiji", "Fiji", 
                        "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", 
                        "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", 
                        "Fiji", "Kiribati", "Kiribati", "Kiribati", "Kiribati", "Kiribati", 
                        "Kiribati", "Kiribati", "Kiribati", "Kiribati", "Kiribati", "Kiribati", 
                        "Kiribati", "Kiribati", "Kiribati", "Kiribati", "Kiribati", "Kiribati", 
                        "Kiribati", "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", 
                        "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", 
                        "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", 
                        "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", 
                        "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", 
                        "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                        "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                        "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                        "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                        "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                        "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                        "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                        "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                        "Saint Vincent and the Grenadines"), Year = c(2000, 2001, 2002, 
                                                                      2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 
                        2014, 2015, 2016, 2017, 2018, 2000, 2001, 2002, 2003, 2004, 2005, 
                        2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 
                        2017, 2018, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 
                        2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2000, 
                        2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 
                        2012, 2013, 2014, 2015, 2016, 2000, 2001, 2002, 2003, 2004, 2005, 
                        2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 
                        2017, 2018, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 
                        2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2000, 
                        2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 
                        2012, 2013, 2014, 2015, 2016, 2017, 2000, 2001, 2002, 2003, 2004, 
                        2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 
                        2016, 2017, 2018, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 
                        2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016), indic.no = c(10, 
                        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 
                        7, 7, 10, 10, 10, 10, 10, 10, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 
                        10, 7, 7, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 
                        10, 10, 10, 10, 7, 7, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 
                        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 
                        10, 10, 10, 10, 10, 10, 10, 10, 7, 7, 10, 10, 10, 10, 10, 10, 
                        9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 7, 7, 10, 10, 9, 9, 
                        9, 10, 9, 9, 8, 8, 8, 8, 8, 8, 9, 9, 9, 7, 8, 8, 8, 9, 9, 9, 
                        9, 9, 10, 9, 10, 10, 9, 10, 10, 9, 10, 7, 7, 8, 8, 8, 8, 8, 8, 
                        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8)), row.names = c(NA, -166L), class = c("tbl_df", 
                                                                                    

Plot of countries and indic.no across years Plot 国家和 indic.no 跨年

#Want countries, that have indic.no = 10 across ALL years between 2000 to 2016
#Here I thought I am selecting all years between 2000 to 2016 AND where indic.no equals 10

df2 <-df %>%
  group_by(ISO3, NAME_0)%>%
  filter(all(Year %in%(2000:2016)) & indic.no == 10)

However, it only seems to give me "BRB", where I would expect to also have "ABW", "AUS"& "COL".然而,它似乎只给了我“BRB”,我希望在那里也有“ABW”、“AUS”和“COL”。

Filtered data plot过滤数据 plot

I have spent so much time on this one filtering issue, and cannot see where I am going wrong.我在这个过滤问题上花了很多时间,但看不到我哪里出错了。

Here is the most easiest way.这是最简单的方法。 After grouping by 'ISO3' and 'NAME_0', use a single filter by subsetting the 'indic.no' that are within the 'Year' range and check whether the all the values are 10按“ISO3”和“NAME_0”分组后,通过子集“年份”范围内的“indic.no”使用单个filter ,并检查all值是否为 10

library(dplyr)
df2 <-  df %>%
   group_by(ISO3, NAME_0)%>%
   filter(all(indic.no[Year %in% 2000:2016] == 10))

unique(df2$ISO3)
#[1] "ABW" "AUS" "BRB" "COL"

My approach would be:我的方法是:

df <- tibble::tibble(ISO3 = c("ABW", "ABW", "ABW", "ABW", "ABW", "ABW", 
                        "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", 
                        "ABW", "ABW", "ABW", "ABW", "ARE", "ARE", "ARE", "ARE", "ARE", 
                        "ARE", "ARE", "ARE", "ARE", "ARE", "ARE", "ARE", "ARE", "ARE", 
                        "ARE", "ARE", "ARE", "ARE", "ARE", "AUS", "AUS", "AUS", "AUS", 
                        "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", 
                        "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "BRB", "BRB", "BRB", 
                        "BRB", "BRB", "BRB", "BRB", "BRB", "BRB", "BRB", "BRB", "BRB", 
                        "BRB", "BRB", "BRB", "BRB", "BRB", "COL", "COL", "COL", "COL", 
                        "COL", "COL", "COL", "COL", "COL", "COL", "COL", "COL", "COL", 
                        "COL", "COL", "COL", "COL", "COL", "COL", "FJI", "FJI", "FJI", 
                        "FJI", "FJI", "FJI", "FJI", "FJI", "FJI", "FJI", "FJI", "FJI", 
                        "FJI", "FJI", "FJI", "FJI", "FJI", "FJI", "FJI", "KIR", "KIR", 
                        "KIR", "KIR", "KIR", "KIR", "KIR", "KIR", "KIR", "KIR", "KIR", 
                        "KIR", "KIR", "KIR", "KIR", "KIR", "KIR", "KIR", "PNG", "PNG", 
                        "PNG", "PNG", "PNG", "PNG", "PNG", "PNG", "PNG", "PNG", "PNG", 
                        "PNG", "PNG", "PNG", "PNG", "PNG", "PNG", "PNG", "PNG", "VCT", 
                        "VCT", "VCT", "VCT", "VCT", "VCT", "VCT", "VCT", "VCT", "VCT", 
                        "VCT", "VCT", "VCT", "VCT", "VCT", "VCT", "VCT"), 
               NAME_0 = c("Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", 
                          "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", 
                          "Aruba", "Aruba", "Aruba", "Aruba", "United Arab Emirates", "United Arab Emirates", 
                          "United Arab Emirates", "United Arab Emirates", "United Arab Emirates", 
                          "United Arab Emirates", "United Arab Emirates", "United Arab Emirates", 
                          "United Arab Emirates", "United Arab Emirates", "United Arab Emirates", 
                          "United Arab Emirates", "United Arab Emirates", "United Arab Emirates", 
                          "United Arab Emirates", "United Arab Emirates", "United Arab Emirates", 
                          "United Arab Emirates", "United Arab Emirates", "Australia", 
                          "Australia", "Australia", "Australia", "Australia", "Australia", 
                          "Australia", "Australia", "Australia", "Australia", "Australia", 
                          "Australia", "Australia", "Australia", "Australia", "Australia", 
                          "Australia", "Australia", "Australia", "Barbados", "Barbados", 
                          "Barbados", "Barbados", "Barbados", "Barbados", "Barbados", "Barbados", 
                          "Barbados", "Barbados", "Barbados", "Barbados", "Barbados", "Barbados", 
                          "Barbados", "Barbados", "Barbados", "Colombia", "Colombia", "Colombia", 
                          "Colombia", "Colombia", "Colombia", "Colombia", "Colombia", "Colombia", 
                          "Colombia", "Colombia", "Colombia", "Colombia", "Colombia", "Colombia", 
                          "Colombia", "Colombia", "Colombia", "Colombia", "Fiji", "Fiji", 
                          "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", 
                          "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", 
                          "Fiji", "Kiribati", "Kiribati", "Kiribati", "Kiribati", "Kiribati", 
                          "Kiribati", "Kiribati", "Kiribati", "Kiribati", "Kiribati", "Kiribati", 
                          "Kiribati", "Kiribati", "Kiribati", "Kiribati", "Kiribati", "Kiribati", 
                          "Kiribati", "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", 
                          "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", 
                          "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", 
                          "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", 
                          "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", 
                          "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                          "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                          "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                          "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                          "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                          "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                          "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                          "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                          "Saint Vincent and the Grenadines"),
               Year = c(2000, 2001, 2002, 
                        2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 
                        2014, 2015, 2016, 2017, 2018, 2000, 2001, 2002, 2003, 2004, 2005, 
                        2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 
                        2017, 2018, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 
                        2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2000, 
                        2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 
                        2012, 2013, 2014, 2015, 2016, 2000, 2001, 2002, 2003, 2004, 2005, 
                        2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 
                        2017, 2018, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 
                        2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2000, 
                        2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 
                        2012, 2013, 2014, 2015, 2016, 2017, 2000, 2001, 2002, 2003, 2004, 
                        2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 
                        2016, 2017, 2018, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 
                        2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016),
               indic.no = c(10, 
                            10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 
                            7, 7, 10, 10, 10, 10, 10, 10, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 
                            10, 7, 7, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 
                            10, 10, 10, 10, 7, 7, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 
                            10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 
                            10, 10, 10, 10, 10, 10, 10, 10, 7, 7, 10, 10, 10, 10, 10, 10, 
                            9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 7, 7, 10, 10, 9, 9, 
                            9, 10, 9, 9, 8, 8, 8, 8, 8, 8, 9, 9, 9, 7, 8, 8, 8, 9, 9, 9, 
                            9, 9, 10, 9, 10, 10, 9, 10, 10, 9, 10, 7, 7, 8, 8, 8, 8, 8, 8, 
                            8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8)
           )

library(dplyr)
df %>%
  filter(between(Year, 2000, 2016), indic.no==10) %>% 
  select(ISO3, NAME_0) %>% 
  distinct()

The easiest way to get "countries that have indic.no = 10 across ALL years between 2000 to 2016" (assuming you don't need to keep the other data for any reason) is to separate the filtering steps and use the all around indic.no :获得“在 2000 年到 2016 年之间的所有年份中具有 indic.no = 10 的国家”(假设您出于任何原因不需要保留其他数据)的最简单方法是分离过滤步骤并使用all周围的indic.no . indic.no

library(dplyr)
df %>% 
  group_by(ISO3, NAME_0) %>%
  filter(between(Year, 2000, 2016)) %>% 
  filter(all(indic.no == 10)) %>% 
  ungroup %>% 
  pull(ISO3) %>% unique()

[1] "ABW" "AUS" "BRB" "COL"

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM