简体   繁体   中英

Filter by group and multiple conditions, dplyr

I am trying to filter Countries, across the years 2000 to 2016, where indic.no must equal 10 across ALL years.

I have tried using different filtering using multiple conditions, but never seem to get the expected results.

#subset of df
structure(list(ISO3 = c("ABW", "ABW", "ABW", "ABW", "ABW", "ABW", 
                        "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", 
                        "ABW", "ABW", "ABW", "ABW", "ARE", "ARE", "ARE", "ARE", "ARE", 
                        "ARE", "ARE", "ARE", "ARE", "ARE", "ARE", "ARE", "ARE", "ARE", 
                        "ARE", "ARE", "ARE", "ARE", "ARE", "AUS", "AUS", "AUS", "AUS", 
                        "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", 
                        "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "BRB", "BRB", "BRB", 
                        "BRB", "BRB", "BRB", "BRB", "BRB", "BRB", "BRB", "BRB", "BRB", 
                        "BRB", "BRB", "BRB", "BRB", "BRB", "COL", "COL", "COL", "COL", 
                        "COL", "COL", "COL", "COL", "COL", "COL", "COL", "COL", "COL", 
                        "COL", "COL", "COL", "COL", "COL", "COL", "FJI", "FJI", "FJI", 
                        "FJI", "FJI", "FJI", "FJI", "FJI", "FJI", "FJI", "FJI", "FJI", 
                        "FJI", "FJI", "FJI", "FJI", "FJI", "FJI", "FJI", "KIR", "KIR", 
                        "KIR", "KIR", "KIR", "KIR", "KIR", "KIR", "KIR", "KIR", "KIR", 
                        "KIR", "KIR", "KIR", "KIR", "KIR", "KIR", "KIR", "PNG", "PNG", 
                        "PNG", "PNG", "PNG", "PNG", "PNG", "PNG", "PNG", "PNG", "PNG", 
                        "PNG", "PNG", "PNG", "PNG", "PNG", "PNG", "PNG", "PNG", "VCT", 
                        "VCT", "VCT", "VCT", "VCT", "VCT", "VCT", "VCT", "VCT", "VCT", 
                        "VCT", "VCT", "VCT", "VCT", "VCT", "VCT", "VCT"), 
                        NAME_0 = c("Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", 
                        "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", 
                        "Aruba", "Aruba", "Aruba", "Aruba", "United Arab Emirates", "United Arab Emirates", 
                        "United Arab Emirates", "United Arab Emirates", "United Arab Emirates", 
                        "United Arab Emirates", "United Arab Emirates", "United Arab Emirates", 
                        "United Arab Emirates", "United Arab Emirates", "United Arab Emirates", 
                        "United Arab Emirates", "United Arab Emirates", "United Arab Emirates", 
                        "United Arab Emirates", "United Arab Emirates", "United Arab Emirates", 
                        "United Arab Emirates", "United Arab Emirates", "Australia", 
                        "Australia", "Australia", "Australia", "Australia", "Australia", 
                        "Australia", "Australia", "Australia", "Australia", "Australia", 
                        "Australia", "Australia", "Australia", "Australia", "Australia", 
                        "Australia", "Australia", "Australia", "Barbados", "Barbados", 
                        "Barbados", "Barbados", "Barbados", "Barbados", "Barbados", "Barbados", 
                        "Barbados", "Barbados", "Barbados", "Barbados", "Barbados", "Barbados", 
                        "Barbados", "Barbados", "Barbados", "Colombia", "Colombia", "Colombia", 
                        "Colombia", "Colombia", "Colombia", "Colombia", "Colombia", "Colombia", 
                        "Colombia", "Colombia", "Colombia", "Colombia", "Colombia", "Colombia", 
                        "Colombia", "Colombia", "Colombia", "Colombia", "Fiji", "Fiji", 
                        "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", 
                        "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", 
                        "Fiji", "Kiribati", "Kiribati", "Kiribati", "Kiribati", "Kiribati", 
                        "Kiribati", "Kiribati", "Kiribati", "Kiribati", "Kiribati", "Kiribati", 
                        "Kiribati", "Kiribati", "Kiribati", "Kiribati", "Kiribati", "Kiribati", 
                        "Kiribati", "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", 
                        "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", 
                        "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", 
                        "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", 
                        "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", 
                        "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                        "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                        "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                        "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                        "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                        "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                        "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                        "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                        "Saint Vincent and the Grenadines"), Year = c(2000, 2001, 2002, 
                                                                      2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 
                        2014, 2015, 2016, 2017, 2018, 2000, 2001, 2002, 2003, 2004, 2005, 
                        2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 
                        2017, 2018, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 
                        2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2000, 
                        2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 
                        2012, 2013, 2014, 2015, 2016, 2000, 2001, 2002, 2003, 2004, 2005, 
                        2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 
                        2017, 2018, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 
                        2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2000, 
                        2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 
                        2012, 2013, 2014, 2015, 2016, 2017, 2000, 2001, 2002, 2003, 2004, 
                        2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 
                        2016, 2017, 2018, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 
                        2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016), indic.no = c(10, 
                        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 
                        7, 7, 10, 10, 10, 10, 10, 10, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 
                        10, 7, 7, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 
                        10, 10, 10, 10, 7, 7, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 
                        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 
                        10, 10, 10, 10, 10, 10, 10, 10, 7, 7, 10, 10, 10, 10, 10, 10, 
                        9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 7, 7, 10, 10, 9, 9, 
                        9, 10, 9, 9, 8, 8, 8, 8, 8, 8, 9, 9, 9, 7, 8, 8, 8, 9, 9, 9, 
                        9, 9, 10, 9, 10, 10, 9, 10, 10, 9, 10, 7, 7, 8, 8, 8, 8, 8, 8, 
                        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8)), row.names = c(NA, -166L), class = c("tbl_df", 
                                                                                    

Plot of countries and indic.no across years

#Want countries, that have indic.no = 10 across ALL years between 2000 to 2016
#Here I thought I am selecting all years between 2000 to 2016 AND where indic.no equals 10

df2 <-df %>%
  group_by(ISO3, NAME_0)%>%
  filter(all(Year %in%(2000:2016)) & indic.no == 10)

However, it only seems to give me "BRB", where I would expect to also have "ABW", "AUS"& "COL".

Filtered data plot

I have spent so much time on this one filtering issue, and cannot see where I am going wrong.

Here is the most easiest way. After grouping by 'ISO3' and 'NAME_0', use a single filter by subsetting the 'indic.no' that are within the 'Year' range and check whether the all the values are 10

library(dplyr)
df2 <-  df %>%
   group_by(ISO3, NAME_0)%>%
   filter(all(indic.no[Year %in% 2000:2016] == 10))

unique(df2$ISO3)
#[1] "ABW" "AUS" "BRB" "COL"

My approach would be:

df <- tibble::tibble(ISO3 = c("ABW", "ABW", "ABW", "ABW", "ABW", "ABW", 
                        "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", 
                        "ABW", "ABW", "ABW", "ABW", "ARE", "ARE", "ARE", "ARE", "ARE", 
                        "ARE", "ARE", "ARE", "ARE", "ARE", "ARE", "ARE", "ARE", "ARE", 
                        "ARE", "ARE", "ARE", "ARE", "ARE", "AUS", "AUS", "AUS", "AUS", 
                        "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", 
                        "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "BRB", "BRB", "BRB", 
                        "BRB", "BRB", "BRB", "BRB", "BRB", "BRB", "BRB", "BRB", "BRB", 
                        "BRB", "BRB", "BRB", "BRB", "BRB", "COL", "COL", "COL", "COL", 
                        "COL", "COL", "COL", "COL", "COL", "COL", "COL", "COL", "COL", 
                        "COL", "COL", "COL", "COL", "COL", "COL", "FJI", "FJI", "FJI", 
                        "FJI", "FJI", "FJI", "FJI", "FJI", "FJI", "FJI", "FJI", "FJI", 
                        "FJI", "FJI", "FJI", "FJI", "FJI", "FJI", "FJI", "KIR", "KIR", 
                        "KIR", "KIR", "KIR", "KIR", "KIR", "KIR", "KIR", "KIR", "KIR", 
                        "KIR", "KIR", "KIR", "KIR", "KIR", "KIR", "KIR", "PNG", "PNG", 
                        "PNG", "PNG", "PNG", "PNG", "PNG", "PNG", "PNG", "PNG", "PNG", 
                        "PNG", "PNG", "PNG", "PNG", "PNG", "PNG", "PNG", "PNG", "VCT", 
                        "VCT", "VCT", "VCT", "VCT", "VCT", "VCT", "VCT", "VCT", "VCT", 
                        "VCT", "VCT", "VCT", "VCT", "VCT", "VCT", "VCT"), 
               NAME_0 = c("Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", 
                          "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", 
                          "Aruba", "Aruba", "Aruba", "Aruba", "United Arab Emirates", "United Arab Emirates", 
                          "United Arab Emirates", "United Arab Emirates", "United Arab Emirates", 
                          "United Arab Emirates", "United Arab Emirates", "United Arab Emirates", 
                          "United Arab Emirates", "United Arab Emirates", "United Arab Emirates", 
                          "United Arab Emirates", "United Arab Emirates", "United Arab Emirates", 
                          "United Arab Emirates", "United Arab Emirates", "United Arab Emirates", 
                          "United Arab Emirates", "United Arab Emirates", "Australia", 
                          "Australia", "Australia", "Australia", "Australia", "Australia", 
                          "Australia", "Australia", "Australia", "Australia", "Australia", 
                          "Australia", "Australia", "Australia", "Australia", "Australia", 
                          "Australia", "Australia", "Australia", "Barbados", "Barbados", 
                          "Barbados", "Barbados", "Barbados", "Barbados", "Barbados", "Barbados", 
                          "Barbados", "Barbados", "Barbados", "Barbados", "Barbados", "Barbados", 
                          "Barbados", "Barbados", "Barbados", "Colombia", "Colombia", "Colombia", 
                          "Colombia", "Colombia", "Colombia", "Colombia", "Colombia", "Colombia", 
                          "Colombia", "Colombia", "Colombia", "Colombia", "Colombia", "Colombia", 
                          "Colombia", "Colombia", "Colombia", "Colombia", "Fiji", "Fiji", 
                          "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", 
                          "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", 
                          "Fiji", "Kiribati", "Kiribati", "Kiribati", "Kiribati", "Kiribati", 
                          "Kiribati", "Kiribati", "Kiribati", "Kiribati", "Kiribati", "Kiribati", 
                          "Kiribati", "Kiribati", "Kiribati", "Kiribati", "Kiribati", "Kiribati", 
                          "Kiribati", "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", 
                          "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", 
                          "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", 
                          "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", 
                          "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", 
                          "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                          "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                          "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                          "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                          "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                          "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                          "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                          "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                          "Saint Vincent and the Grenadines"),
               Year = c(2000, 2001, 2002, 
                        2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 
                        2014, 2015, 2016, 2017, 2018, 2000, 2001, 2002, 2003, 2004, 2005, 
                        2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 
                        2017, 2018, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 
                        2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2000, 
                        2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 
                        2012, 2013, 2014, 2015, 2016, 2000, 2001, 2002, 2003, 2004, 2005, 
                        2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 
                        2017, 2018, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 
                        2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2000, 
                        2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 
                        2012, 2013, 2014, 2015, 2016, 2017, 2000, 2001, 2002, 2003, 2004, 
                        2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 
                        2016, 2017, 2018, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 
                        2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016),
               indic.no = c(10, 
                            10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 
                            7, 7, 10, 10, 10, 10, 10, 10, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 
                            10, 7, 7, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 
                            10, 10, 10, 10, 7, 7, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 
                            10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 
                            10, 10, 10, 10, 10, 10, 10, 10, 7, 7, 10, 10, 10, 10, 10, 10, 
                            9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 7, 7, 10, 10, 9, 9, 
                            9, 10, 9, 9, 8, 8, 8, 8, 8, 8, 9, 9, 9, 7, 8, 8, 8, 9, 9, 9, 
                            9, 9, 10, 9, 10, 10, 9, 10, 10, 9, 10, 7, 7, 8, 8, 8, 8, 8, 8, 
                            8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8)
           )

library(dplyr)
df %>%
  filter(between(Year, 2000, 2016), indic.no==10) %>% 
  select(ISO3, NAME_0) %>% 
  distinct()

The easiest way to get "countries that have indic.no = 10 across ALL years between 2000 to 2016" (assuming you don't need to keep the other data for any reason) is to separate the filtering steps and use the all around indic.no :

library(dplyr)
df %>% 
  group_by(ISO3, NAME_0) %>%
  filter(between(Year, 2000, 2016)) %>% 
  filter(all(indic.no == 10)) %>% 
  ungroup %>% 
  pull(ISO3) %>% unique()

[1] "ABW" "AUS" "BRB" "COL"

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM