简体   繁体   中英

Case When Sequential Character Values and Time Greater Than Specified Interval

Reprex :

data
# A tibble: 82 x 12
# Groups:   SUBJ_ID, READER [2]
   SUBJ_ID VISIT DOS        MOS_DUR READER   SPD SPD_NADIR PCT_DIFF_NADIR SPD_BL PCT_DIFF_BL
   <chr>   <chr> <date>       <dbl> <fct>  <dbl>     <dbl>          <dbl>  <dbl>       <dbl>
 1 1002-3~ 1 Sc~ 2012-06-13   0     2       275.      275.         NA       275.      NA    
 2 1002-3~ 1 We~ 2012-07-12   0.967 2       358.      275.          0.302   275.       0.302
 3 1002-3~ 1 We~ 2012-08-09   1.87  2       439.      275.          0.594   275.       0.594
 4 1002-3~ 1 We~ 2012-09-18   3.17  2       528.      275.          0.919   275.       0.919
 5 1002-3~ Unsc~ 2012-10-25   4.39  2       584.      275.          1.12    275.       1.12 
 6 1002-3~ Unsc~ 2012-12-20   6.23  2       573.      275.          1.08    275.       1.08 
 7 1002-3~ Unsc~ 2013-02-13   8     2       516.      275.          0.872   275.       0.872
 8 1002-3~ Unsc~ 2013-03-29   9.52  2       532.      275.          0.930   275.       0.930
 9 1002-3~ Unsc~ 2013-05-14  11.0   2       534.      275.          0.941   275.       0.941
10 1002-3~ Unsc~ 2013-08-20  14.2   2       419.      275.          0.522   275.       0.522
# ... with 72 more rows, and 2 more variables: PROG <chr>, PRGMOS <dbl>
dput(data)
structure(list(SUBJ_ID = c("1002-31169", "1002-31169", "1002-31169", 
"1002-31169", "1002-31169", "1002-31169", "1002-31169", "1002-31169", 
"1002-31169", "1002-31169", "1002-31169", "1002-31169", "1002-31169", 
"1002-31169", "1002-31169", "1002-31169", "1002-31169", "1002-31169", 
"1002-31169", "1002-31169", "1002-31169", "1002-31169", "1002-31169", 
"1002-31169", "1002-31169", "1002-31169", "1002-31169", "1002-31169", 
"1002-31169", "1002-31169", "1002-31169", "1002-31169", "1002-31169", 
"1002-31169", "1002-31169", "1002-31169", "1002-31169", "1002-31169", 
"1002-31169", "1002-31169", "1002-31169", "1002-31169", "1002-31169", 
"1002-31169", "1002-31169", "1002-31169", "1002-31169", "1002-31169", 
"1002-31169", "1002-31169", "1002-31169", "1002-31169", "1002-31169", 
"1002-31169", "1002-31169", "1002-31169", "1002-31169", "1002-31169", 
"1002-31169", "1002-31169", "1002-31169", "1002-31169", "1002-31169", 
"1002-31169", "1002-31169", "1002-31169", "1002-31169", "1002-31169", 
"1002-31169", "1002-31169", "1002-31169", "1002-31169", "1002-31169", 
"1002-31169", "1002-31169", "1002-31169", "1002-31169", "1002-31169", 
"1002-31169", "1002-31169", "1002-31169", "1002-31169"), VISIT = c("1 Screening", 
"1 Week 04", "1 Week 08", "1 Week 16", "Unscheduled 01", "Unscheduled 02", 
"Unscheduled 03", "Unscheduled 04", "Unscheduled 05", "Unscheduled 06", 
"Unscheduled 07", "Unscheduled 08", "Unscheduled 09", "Unscheduled 10", 
"Unscheduled 11", "Unscheduled 12", "Unscheduled 13", "Unscheduled 14", 
"Unscheduled 15", "Unscheduled 16", "Unscheduled 17", "2 Screening", 
"2 Week 04", "2 Week 08", "2 Week 16", "Unscheduled 18", "Unscheduled 19", 
"Unscheduled 20", "Unscheduled 21", "Unscheduled 22", "Unscheduled 23", 
"3 Screening", "3 Week 04", "3 Week 08", "3 Week 16", "Unscheduled 24", 
"Unscheduled 25", "Unscheduled 26", "Unscheduled 27", "Unscheduled 28", 
"Unscheduled 29", "1 Screening", "1 Week 04", "1 Week 08", "1 Week 16", 
"Unscheduled 01", "Unscheduled 02", "Unscheduled 03", "Unscheduled 04", 
"Unscheduled 05", "Unscheduled 06", "Unscheduled 07", "Unscheduled 08", 
"Unscheduled 09", "Unscheduled 10", "Unscheduled 11", "Unscheduled 12", 
"Unscheduled 13", "Unscheduled 14", "Unscheduled 15", "Unscheduled 16", 
"Unscheduled 17", "2 Screening", "2 Week 04", "2 Week 08", "2 Week 16", 
"Unscheduled 18", "Unscheduled 19", "Unscheduled 20", "Unscheduled 21", 
"Unscheduled 22", "Unscheduled 23", "3 Screening", "3 Week 04", 
"3 Week 08", "3 Week 16", "Unscheduled 24", "Unscheduled 25", 
"Unscheduled 26", "Unscheduled 27", "Unscheduled 28", "Unscheduled 29"
), DOS = structure(c(15504, 15533, 15561, 15601, 15638, 15694, 
15749, 15793, 15839, 15937, 16027, 16153, 16272, 16398, 16552, 
16680, 16909, 16972, 17028, 17119, 17252, 17316, 17343, 17371, 
17427, 17490, 17553, 17609, 17665, 17756, 17840, 17862, 17896, 
17924, 17980, 17988, 18036, 18092, 18148, 18231, 18326, 15504, 
15533, 15561, 15601, 15638, 15694, 15749, 15793, 15839, 15937, 
16027, 16153, 16272, 16398, 16552, 16680, 16909, 16972, 17028, 
17119, 17252, 17316, 17343, 17371, 17427, 17490, 17553, 17609, 
17665, 17756, 17840, 17862, 17896, 17924, 17980, 17988, 18036, 
18092, 18148, 18231, 18326), class = "Date"), MOS_DUR = c(0, 
0.966666666666667, 1.87096774193548, 3.16666666666667, 4.38709677419355, 
6.2258064516129, 8, 9.51612903225806, 11.0322580645161, 14.2258064516129, 
17.1666666666667, 21.3548387096774, 25.258064516129, 29.3666666666667, 
34.4666666666667, 38.6451612903226, 46.1666666666667, 48.2333333333333, 
50.0645161290323, 53.0333333333333, 57.4516129032258, 59.5483870967742, 
60.4333333333333, 61.3548387096774, 63.1666666666667, 65.2333333333333, 
67.2903225806452, 69.1935483870968, 71.0322580645161, 74, 76.741935483871, 
77.4666666666667, 78.5806451612903, 79.4838709677419, 81.3870967741936, 
81.6451612903226, 83.2258064516129, 85.0645161290323, 86.8709677419355, 
89.6, 92.7241379310345, 0, 0.966666666666667, 1.87096774193548, 
3.16666666666667, 4.38709677419355, 6.2258064516129, 8, 9.51612903225806, 
11.0322580645161, 14.2258064516129, 17.1666666666667, 21.3548387096774, 
25.258064516129, 29.3666666666667, 34.4666666666667, 38.6451612903226, 
46.1666666666667, 48.2333333333333, 50.0645161290323, 53.0333333333333, 
57.4516129032258, 59.5483870967742, 60.4333333333333, 61.3548387096774, 
63.1666666666667, 65.2333333333333, 67.2903225806452, 69.1935483870968, 
71.0322580645161, 74, 76.741935483871, 77.4666666666667, 78.5806451612903, 
79.4838709677419, 81.3870967741936, 81.6451612903226, 83.2258064516129, 
85.0645161290323, 86.8709677419355, 89.6, 92.7241379310345), 
    READER = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
    4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
    4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("1", 
    "2", "3", "4", "5"), class = "factor"), SPD = c(275.32, 358.49, 
    438.99, 528.38, 583.55, 572.93, 515.5, 531.5, 534.4, 419.07, 
    328.02, 145.58, 146.78, 165.25, 136.49, 200.55, 170.55, 271.51, 
    243.32, 356.41, 447.68, 526.89, 586.47, 619.36, 666.5, 625.72, 
    679.19, 639.81, 638.95, 686.85, 920.76, 992.07, 1131.35, 
    1092.71, 1405.81, 1692.02, 1553.67, 2074.41, 2056.49, 1281.96, 
    1944.41, 361.13, 488.18, 581.76, 623.45, 730.45, 639.17, 
    545.33, 564.94, 633.28, 491.78, 434.64, 203.66, 154.57, 229.18, 
    148.35, 132.52, 152.16, 288.91, 334.22, 420.01, 688.19, 645.57, 
    630.8, 683.63, 758.01, 642.23, 636.91, 691.11, 708.32, 579.91, 
    884.35, 1193.26, 1377.71, 1208.79, 1555.86, 1737.75, 1616, 
    2003.85, 2190.7, 1353.92, 2239.47), SPD_NADIR = c(275.32, 
    275.32, 275.32, 275.32, 275.32, 275.32, 275.32, 275.32, 275.32, 
    275.32, 275.32, 145.58, 145.58, 145.58, 136.49, 136.49, 136.49, 
    136.49, 136.49, 136.49, 136.49, 136.49, 136.49, 136.49, 136.49, 
    136.49, 136.49, 136.49, 136.49, 136.49, 136.49, 136.49, 136.49, 
    136.49, 136.49, 136.49, 136.49, 136.49, 136.49, 136.49, 136.49, 
    361.13, 361.13, 361.13, 361.13, 361.13, 361.13, 361.13, 361.13, 
    361.13, 361.13, 361.13, 203.66, 154.57, 154.57, 148.35, 132.52, 
    132.52, 132.52, 132.52, 132.52, 132.52, 132.52, 132.52, 132.52, 
    132.52, 132.52, 132.52, 132.52, 132.52, 132.52, 132.52, 132.52, 
    132.52, 132.52, 132.52, 132.52, 132.52, 132.52, 132.52, 132.52, 
    132.52), PCT_DIFF_NADIR = c(NA, 0.302084846723812, 0.594471887258463, 
    0.919148627052157, 1.11953363359, 1.08096033706233, 0.872366700566613, 
    0.930480894958594, 0.94101409269214, 0.522119715240448, 0.191413627778585, 
    -0.47123347377597, 0.00824289050693769, 0.135114713559555, 
    -0.0624398955900536, 0.469338413070555, 0.249542090995677, 
    0.989229980218331, 0.782694702908638, 1.61125357169023, 2.2799472488827, 
    2.86028280460107, 3.29679830024178, 3.53776833467653, 3.88314162209686, 
    3.58436515495641, 3.97611546633453, 3.68759616089091, 3.68129533299143, 
    4.03223679390431, 5.74598871712213, 6.26844457469412, 7.28888563264708, 
    7.00578796981464, 9.29972891786944, 11.3966590959045, 10.3830317239358, 
    14.1982562825115, 14.0669646127921, 8.39233643490366, 13.2458055535204, 
    NA, 0.35181236673774, 0.610943427574558, 0.726386619776812, 
    1.02267881372359, 0.769916650513665, 0.510065627336416, 0.564367402320494, 
    0.753606734416969, 0.36178107606679, 0.203555506327361, -0.436047960568216, 
    -0.241038986546204, 0.482693925082487, -0.040240667658666, 
    -0.106707111560499, 0.148204044672502, 1.18012375490492, 
    1.52203440990039, 2.16940839118624, 4.19310292785995, 3.87149109568367, 
    3.76003622094778, 4.15869302746755, 4.71996679746453, 3.8462873528524, 
    3.80614246906127, 4.21513733776034, 4.34500452761847, 3.37601871415635, 
    5.67333232719589, 8.00437669785693, 9.39624207666767, 8.12156655599155, 
    10.7405674615152, 12.1131150015092, 11.1943857530939, 14.1211137941443, 
    15.5310896468458, 9.21672200422578, 15.899109568367), SPD_BL = c(275.32, 
    275.32, 275.32, 275.32, 275.32, 275.32, 275.32, 275.32, 275.32, 
    275.32, 275.32, 275.32, 275.32, 275.32, 275.32, 275.32, 275.32, 
    275.32, 275.32, 275.32, 275.32, 275.32, 275.32, 275.32, 275.32, 
    275.32, 275.32, 275.32, 275.32, 275.32, 275.32, 275.32, 275.32, 
    275.32, 275.32, 275.32, 275.32, 275.32, 275.32, 275.32, 275.32, 
    361.13, 361.13, 361.13, 361.13, 361.13, 361.13, 361.13, 361.13, 
    361.13, 361.13, 361.13, 361.13, 361.13, 361.13, 361.13, 361.13, 
    361.13, 361.13, 361.13, 361.13, 361.13, 361.13, 361.13, 361.13, 
    361.13, 361.13, 361.13, 361.13, 361.13, 361.13, 361.13, 361.13, 
    361.13, 361.13, 361.13, 361.13, 361.13, 361.13, 361.13, 361.13, 
    361.13), PCT_DIFF_BL = c(NA, 0.302084846723812, 0.594471887258463, 
    0.919148627052157, 1.11953363359, 1.08096033706233, 0.872366700566613, 
    0.930480894958594, 0.94101409269214, 0.522119715240448, 0.191413627778585, 
    -0.47123347377597, -0.466874909196571, -0.399789336045329, 
    -0.504249600464914, -0.271574894668023, -0.380539009152986, 
    -0.0138384425395903, -0.11622838878396, 0.294530001452855, 
    0.626035159087607, 0.913736742699404, 1.13013947406654, 1.24960046491356, 
    1.42081941014093, 1.27270085718437, 1.46691123056807, 1.3238776696208, 
    1.32075403167224, 1.49473340113323, 2.34432660177248, 2.60333430190324, 
    3.10921836408543, 2.9688725846288, 4.10609472613686, 5.1456486996949, 
    4.64314252506175, 6.53454162429173, 6.46945372657271, 3.65625454017144, 
    6.06236379485689, NA, 0.35181236673774, 0.610943427574558, 
    0.726386619776812, 1.02267881372359, 0.769916650513665, 0.510065627336416, 
    0.564367402320494, 0.753606734416969, 0.36178107606679, 0.203555506327361, 
    -0.436047960568216, -0.571982388613519, -0.365380887768947, 
    -0.589206103065378, -0.633040733253953, -0.578655885692133, 
    -0.199983385484451, -0.0745161022346523, 0.16304377924847, 
    0.905657242544236, 0.787638800431978, 0.746739401323623, 
    0.893030210727439, 1.09899482180932, 0.778390053443358, 0.763658516323761, 
    0.913742973444466, 0.961398942209177, 0.605820618613796, 
    1.4488411375405, 2.30423947055077, 2.81499736936837, 2.34724337496192, 
    3.30831002686013, 3.81197906571041, 3.47484285437377, 4.54883283028272, 
    5.06623653531969, 2.7491208152189, 5.20128485586908), PROG = c(NA, 
    "PDu", "PDu", "PDu", "PDu", "PDc", "PDc", "PDc", "PDc", "PDc", 
    NA, NA, NA, NA, NA, "PDc", NA, "PDc", "PDc", "PDc", "PDc", 
    "PDc", "PDc", "PDc", "PDc", "PDc", "PDc", "PDc", "PDc", "PDc", 
    "PDc", "PDc", "PDc", "PDc", "PDc", "PDc", "PDc", "PDc", "PDc", 
    "PDc", "PDc", NA, "PDu", "PDu", "PDu", "PDu", "PDc", "PDc", 
    "PDc", "PDc", "PDc", NA, NA, NA, "PDc", NA, NA, NA, "PDc", 
    "PDc", "PDc", "PDc", "PDc", "PDc", "PDc", "PDc", "PDc", "PDc", 
    "PDc", "PDc", "PDc", "PDc", "PDc", "PDc", "PDc", "PDc", "PDc", 
    "PDc", "PDc", "PDc", "PDc", "PDc"), PRGMOS = c(NA, NA, NA, 
    NA, 4.38709677419355, 6.2258064516129, 8, 9.51612903225806, 
    11.0322580645161, 14.2258064516129, NA, NA, NA, NA, NA, 38.6451612903226, 
    NA, 48.2333333333333, 50.0645161290323, 53.0333333333333, 
    57.4516129032258, 59.5483870967742, 60.4333333333333, 61.3548387096774, 
    63.1666666666667, 65.2333333333333, 67.2903225806452, 69.1935483870968, 
    71.0322580645161, 74, 76.741935483871, 77.4666666666667, 
    78.5806451612903, 79.4838709677419, 81.3870967741936, 81.6451612903226, 
    83.2258064516129, 85.0645161290323, 86.8709677419355, 89.6, 
    92.7241379310345, NA, NA, NA, NA, 4.38709677419355, 6.2258064516129, 
    8, 9.51612903225806, 11.0322580645161, 14.2258064516129, 
    NA, NA, NA, 29.3666666666667, NA, NA, NA, 48.2333333333333, 
    50.0645161290323, 53.0333333333333, 57.4516129032258, 59.5483870967742, 
    60.4333333333333, 61.3548387096774, 63.1666666666667, 65.2333333333333, 
    67.2903225806452, 69.1935483870968, 71.0322580645161, 74, 
    76.741935483871, 77.4666666666667, 78.5806451612903, 79.4838709677419, 
    81.3870967741936, 81.6451612903226, 83.2258064516129, 85.0645161290323, 
    86.8709677419355, 89.6, 92.7241379310345)), row.names = c(NA, 
-82L), groups = structure(list(SUBJ_ID = c("1002-31169", "1002-31169"
), READER = structure(c(2L, 4L), .Label = c("1", "2", "3", "4", 
"5"), class = "factor"), .rows = structure(list(1:41, 42:82), ptype = integer(0), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))), row.names = c(NA, -2L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))

Problem : I need to set PDu values to PDc when there are sequential PDu values and the time duration (difference in MOS_DUR ) between the first occurrence and last occurrence is >=3 . PDu can only occur when MOS_DUR <= 6 . In the example given, the PROG value for the second scan (row 2) would be reset to PDc from PDu .

Edit 1 : Added a PROGMOS variable to the reprex in an attempt to provide more clarity. Essentially, if there is a sequence where last(PRGMOS) - first(PRGMOS) > 3 & PROG == "PDu" PDu will then become a PDc .

This is my current code, but it is not doing what I want.

data <- data %>%
  group_by(SUBJ_ID, READER) %>%
  mutate(PROG = case_when(
           PCT_DIFF_NADIR >= 0.25 & MOS_DUR <= 6 ~ "PDu", 
           PCT_DIFF_NADIR >= 0.25 & MOS_DUR > 6 ~ "PDc"
          )) 

data <- data %>%
  group_by(SUBJ_ID, READER) %>%
  mutate(PRGMOS = case_when(
    PROG == "PDu" & lead(PROG) == "PDc" ~ MOS_DUR,
    PROG == "PDu" & lead(PROG) == "PDu" ~ MOS_DUR,
    PROG == "PDc"
  ))  %>%
  mutate(PRGFLG = case_when(
    max(PRGMOS) - min(PRGMOS) > 3 ~ 1,
  ))

Edit 2 *: Expected output example below:

> output
# A tibble: 33 x 5
   SUBJ_ID    READER MOS_DUR PROG  PROG2
   <chr>      <chr>    <dbl> <chr> <chr>
 1 1001-31169 4        0     NA    NA   
 2 1001-31169 4        0.903 PDu   PDc  
 3 1001-31169 4        2     PDu   PDc  
 4 1001-31169 4        3.58  PDu   PDc  
 5 1001-31169 4        5.42  PDu   PDc  
 6 1001-31169 4        7.26  NA    NA   
 7 1001-31169 4        9.07  NA    NA   
 8 1001-31169 4       11.0   NA    NA   
 9 1001-31169 4       14.2   NA    NA   
10 1001-31169 4       17.7   NA    NA   
# ... with 23 more rows

As you can see in the example above, the PDu was converted to PDc since 5.42-0.903 >3 and all PDu 's were in sequence.

Building off of @akrun code, here is the solution that I used:

data <- data %>%
  group_by(SUBJ_ID, READER) %>%
  mutate(PROG = case_when(
           PCT_DIFF_NADIR >= 0.25 & MOS_DUR <= 6 ~ "PDu", 
           PCT_DIFF_NADIR >= 0.25 & MOS_DUR > 6 ~ "PDc"
          )) 

data <- data %>% 
  group_by(SUBJ_ID, READER, grp = rleid(PROG)) %>% 
  mutate(PROG2 = case_when(
    all(PROG == 'PDu') & (last(MOS_DUR) - first(MOS_DUR)) >= 3 ~ 'PDc', 
    TRUE ~ PROG)) %>% 
    ungroup()

I needed to also group_by(READER) in addition to the other vars. Thanks again @akrun!

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM