简体   繁体   中英

Delete rows based on two columns (& containing specific strings)

I'm trying to delete rows based on two criteria:

  1. Trial number is the same

  2. Code includes the word "test"

The closest code I've found so far is something like this:

report[!report$Trial %in% report$Trial [report$Code == "test"]),]

but it doesn't work since the there are no cells named "test", they just contain the word

What I want in the end (according to the sample I have attached) is rows 1-20 to be deleted

report <- structure(list(Subject = c("VP02_RP", "VP02_RP", "VP02_RP", "VP02_RP", 
                           "VP02_RP", "VP02_RP", "VP02_RP", "VP02_RP", "VP02_RP", "VP02_RP", 
                           "VP02_RP", "VP02_RP", "VP02_RP", "VP02_RP", "VP02_RP", "VP02_RP", 
                           "VP02_RP", "VP02_RP", "VP02_RP", "VP02_RP", "VP02_RP", "VP02_RP", 
                           "VP02_RP", "VP02_RP", "VP02_RP", "VP02_RP", "VP02_RP", "VP02_RP", 
                           "VP02_RP", "VP02_RP"), Trial = c(9, 9, 9, 9, 9, 12, 12, 12, 12, 
                                                            12, 15, 15, 15, 15, 15, 18, 18, 18, 18, 18, 22, 22, 22, 22, 22, 
                                                            25, 25, 25, 25, 25), `Event type` = c("Picture", "Sound", "Picture", 
                                                                                                  "Nothing", "Response", "Picture", "Sound", "Picture", "Nothing", 
                                                                                                  "Response", "Picture", "Sound", "Picture", "Nothing", "Response", 
                                                                                                  "Picture", "Sound", "Picture", "Nothing", "Response", "Picture", 
                                                                                                  "Sound", "Picture", "Nothing", "Response", "Picture", "Sound", 
                                                                                                  "Picture", "Nothing", "Response"), Code = c("face01_p", "mnegsound_test3", 
                                                                                                                                              "pospic_test3", "ev_mposposneg_adj_onset", "14", "face01_n", 
                                                                                                                                              "mnegsound_test4", "pospic_test4", "ev_mnegposneg_adj_onset", 
                                                                                                                                              "14", "face01_n", "mpossound_test5", "pospic_test5", "ev_mnegpos_adj_onset", 
                                                                                                                                              "15", "face01_p", "mpossound_test6", "negpic_test6", "ev_mposnegpos_adj_onset", 
                                                                                                                                              "15", "face01_n", "mnegsound1", "pospic1", "ev_mnegposneg_adj_onset", 
                                                                                                                                              "14", "face02_n", "fpossound1", "negpic1", "ev_fnegnegpos_adj_onset", 
                                                                                                                                              "15"), Time = c("685252", "685254", "703617", "725617", "760233", 
                                                                                                                                                              "781466", "781467", "799830", "821830", "864732", "887969", "888260", 
                                                                                                                                                              "906623", "928623", "958962", "987666", "987668", "1006031", 
                                                                                                                                                              "1028031", "1076642", "1700595", "1700886", "1716953", "1748603", 
                                                                                                                                                              "1779212", "1801100", "1801392", "1816560", "1846190", "1877413"
                                                                                                                                              )), row.names = c(NA, -30L), class = c("tbl_df", "tbl", "data.frame"
                                                                                                                                              ), na.action = structure(1632:1634, .Names = c("1632", "1633", 
                                                                                                                                                                                             "1634"), class = "omit"))
`Subject` `Trial` `Event type` `Code`                 `Time`
  
   <chr>   <dbl> <chr>        <chr>                   <chr> 

 `1` VP02_RP     9 Picture      face01_p                685252

 `2` VP02_RP     9 Sound        mnegsound_test3         685254

 `3` VP02_RP     9 Picture      pospic_test3            703617

 `4` VP02_RP     9 Nothing      ev_mposposneg_adj_onset 725617

 `5` VP02_RP     9 Response     14                      760233

 `6` VP02_RP    12 Picture      face01_n                781466

 `7` VP02_RP    12 Sound        mnegsound_test4         781467

 `8` VP02_RP    12 Picture      pospic_test4            799830

 `9` VP02_RP    12 Nothing      ev_mnegposneg_adj_onset 821830

`10` VP02_RP    12 Response     14                      864732

Does this work:

library(dplyr)
library(stringr)

report %>% group_by(Trial) %>% filter(all(str_detect(Code, 'test', negate = TRUE)))
# A tibble: 10 x 5
# Groups:   Trial [2]
   Subject Trial `Event type` Code                    Time   
   <chr>   <dbl> <chr>        <chr>                   <chr>  
 1 VP02_RP    22 Picture      face01_n                1700595
 2 VP02_RP    22 Sound        mnegsound1              1700886
 3 VP02_RP    22 Picture      pospic1                 1716953
 4 VP02_RP    22 Nothing      ev_mnegposneg_adj_onset 1748603
 5 VP02_RP    22 Response     14                      1779212
 6 VP02_RP    25 Picture      face02_n                1801100
 7 VP02_RP    25 Sound        fpossound1              1801392
 8 VP02_RP    25 Picture      negpic1                 1816560
 9 VP02_RP    25 Nothing      ev_fnegnegpos_adj_onset 1846190
10 VP02_RP    25 Response     15                      1877413

When you exchange in your code the part report$Code == "test" with grep("test", report$Code) it should give the desired result.

report[!report$Trial %in% report$Trial[grep("test", report$Code)],]
#   Subject Trial Event type                    Code    Time
#21 VP02_RP    22    Picture                face01_n 1700595
#22 VP02_RP    22      Sound              mnegsound1 1700886
#23 VP02_RP    22    Picture                 pospic1 1716953
#24 VP02_RP    22    Nothing ev_mnegposneg_adj_onset 1748603
#25 VP02_RP    22   Response                      14 1779212
#26 VP02_RP    25    Picture                face02_n 1801100
#27 VP02_RP    25      Sound              fpossound1 1801392
#28 VP02_RP    25    Picture                 negpic1 1816560
#29 VP02_RP    25    Nothing ev_fnegnegpos_adj_onset 1846190
#30 VP02_RP    25   Response                      15 1877413

You can use group the data by Trial and filter the rows that do not have the specified text:

library(dplyr)

report %>% group_by(Trial) %>% filter(!grepl("test", Code))

# A tibble: 22 x 5
# Groups:   Trial [6]
   Subject Trial `Event type` Code                    Time  
   <chr>   <dbl> <chr>        <chr>                   <chr> 
 1 VP02_RP     9 Picture      face01_p                685252
 2 VP02_RP     9 Nothing      ev_mposposneg_adj_onset 725617
 3 VP02_RP     9 Response     14                      760233
 4 VP02_RP    12 Picture      face01_n                781466
 5 VP02_RP    12 Nothing      ev_mnegposneg_adj_onset 821830
 6 VP02_RP    12 Response     14                      864732
 7 VP02_RP    15 Picture      face01_n                887969
 8 VP02_RP    15 Nothing      ev_mnegpos_adj_onset    928623
 9 VP02_RP    15 Response     15                      958962
10 VP02_RP    18 Picture      face01_p                987666
# ... with 12 more rows

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM