简体   繁体   中英

R - Identify common elements in data frame columns

How can I identify the elements that are common to ALL columns of this data frame (excluding NA ). How can I achieve that? I tried some approaches using intersect and unique without success.

 df <- structure(list(cloudiness = structure(1:47, .Label = c("ACCESS1-0", 
"ACCESS1-3", "BNU-ESM", "CCSM4", "CESM1-BGC", "CESM1-CAM5", "CESM1-CAM5-1-FV2", 
"CESM1-FASTCHEM", "CESM1-WACCM", "CMCC-CESM", "CMCC-CM", "CMCC-CMS", 
"CNRM-CM5", "CNRM-CM5-2", "CSIRO-Mk3-6-0", "CanESM2", "FGOALS-g2", 
"FIO-ESM", "GFDL-CM3", "GFDL-ESM2G", "GFDL-ESM2M", "GISS-E2-H", 
"GISS-E2-H-CC", "GISS-E2-R", "GISS-E2-R-CC", "HadCM3", "HadGEM2-AO", 
"HadGEM2-CC", "HadGEM2-ES", "IPSL-CM5A-LR", "IPSL-CM5A-MR", "IPSL-CM5B-LR", 
"MIROC-ESM", "MIROC-ESM-CHEM", "MIROC4h", "MIROC5", "MPI-ESM-LR", 
"MPI-ESM-MR", "MPI-ESM-P", "MRI-CGCM3", "MRI-ESM1", "NorESM1-M", 
"NorESM1-ME", "bcc-csm1-1", "bcc-csm1-1-m", "concat", "inmcm4"
), class = "factor"), humidity = structure(c(1L, 2L, 3L, 4L, 
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 
19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 
32L, 33L, 34L, 35L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA), .Label = c("ACCESS1-0", "ACCESS1-3", "BNU-ESM", "CCSM4", 
"CESM1-BGC", "CESM1-CAM5", "CESM1-FASTCHEM", "CESM1-WACCM", "CNRM-CM5", 
"CSIRO-Mk3-6-0", "CanESM2", "GFDL-CM3", "GFDL-ESM2G", "GFDL-ESM2M", 
"GISS-E2-H", "GISS-E2-H-CC", "GISS-E2-R", "GISS-E2-R-CC", "HadCM3", 
"HadGEM2-AO", "HadGEM2-CC", "HadGEM2-ES", "IPSL-CM5A-MR", "IPSL-CM5B-LR", 
"MIROC-ESM", "MIROC-ESM-CHEM", "MIROC4h", "MIROC5", "MRI-CGCM3", 
"MRI-ESM1", "NorESM1-M", "NorESM1-ME", "bcc-csm1-1", "bcc-csm1-1-m", 
"inmcm4"), class = "factor"), precipitation = structure(c(1L, 
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 
29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 
NA, NA, NA, NA, NA, NA), .Label = c("BNU-ESM", "CCSM4", "CESM1-BGC", 
"CESM1-CAM5", "CESM1-FASTCHEM", "CESM1-WACCM", "CMCC-CESM", "CMCC-CMS", 
"CNRM-CM5-2", "CanCM4", "CanESM2", "FGOALS-g2", "FIO-ESM", "GFDL-CM2p1", 
"GFDL-CM3", "GFDL-ESM2M", "GISS-E2-H", "GISS-E2-H-CC", "GISS-E2-R", 
"GISS-E2-R-CC", "HadCM3", "HadGEM2-AO", "HadGEM2-CC", "HadGEM2-ES", 
"IPSL-CM5A-LR", "IPSL-CM5A-MR", "IPSL-CM5B-LR", "MIROC-ESM", 
"MIROC-ESM-CHEM", "MIROC4h", "MIROC5", "MPI-ESM-LR", "MPI-ESM-MR", 
"MPI-ESM-P", "MRI-CGCM3", "MRI-ESM1", "NorESM1-M", "NorESM1-ME", 
"bcc-csm1-1", "bcc-csm1-1-m", "inmcm4"), class = "factor"), temperature = structure(c(NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_), .Label = character(0), class = "factor"), wind = structure(c(1L, 
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 
29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, NA, NA, 
NA, NA, NA, NA, NA, NA), .Label = c("ACCESS1-0", "ACCESS1-3", 
"BNU-ESM", "CMCC-CESM", "CMCC-CM", "CMCC-CMS", "CNRM-CM5", "CNRM-CM5-2", 
"CSIRO-Mk3-6-0", "CanESM2", "GFDL-CM2p1", "GFDL-CM3", "GFDL-ESM2G", 
"GFDL-ESM2M", "GISS-E2-H", "GISS-E2-H-CC", "GISS-E2-R", "GISS-E2-R-CC", 
"HadCM3", "HadGEM2-AO", "HadGEM2-CC", "HadGEM2-ES", "IPSL-CM5A-LR", 
"IPSL-CM5A-MR", "IPSL-CM5B-LR", "MIROC-ESM", "MIROC-ESM-CHEM", 
"MIROC4h", "MIROC5", "MPI-ESM-LR", "MPI-ESM-MR", "MPI-ESM-P", 
"MRI-CGCM3", "MRI-ESM1", "NorESM1-M", "NorESM1-ME", "bcc-csm1-1", 
"bcc-csm1-1-m", "inmcm4"), class = "factor")), .Names = c("cloudiness", 
"humidity", "precipitation", "temperature", "wind"), row.names = c(NA, 
-47L), class = "data.frame")

You could try Reduce with intersect after removing the columns that are all NAs ( colSums[!is.na(df))!=0] )

Reduce(intersect,df[colSums(!is.na(df))!=0])
#[1] "BNU-ESM"        "CanESM2"        "GFDL-CM3"       "GFDL-ESM2M"    
#[5] "GISS-E2-H"      "GISS-E2-H-CC"   "GISS-E2-R"      "GISS-E2-R-CC"  
#[9] "HadCM3"         "HadGEM2-AO"     "HadGEM2-CC"     "HadGEM2-ES"    
#[13] "IPSL-CM5A-MR"   "IPSL-CM5B-LR"   "MIROC-ESM"      "MIROC-ESM-CHEM"
#[17] "MIROC4h"        "MIROC5"         "MRI-CGCM3"      "MRI-ESM1"      
#[21] "NorESM1-M"      "NorESM1-ME"     "bcc-csm1-1"     "bcc-csm1-1-m"  
#[25] "inmcm4"     

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM