简体   繁体   中英

How to categorize multi-index data in R?

I have a multi-index data set with 100 cases, and each case has 10 questions. Each question was scored by 3 raters.

  Case Question     A   B       C 
 <dbl>    <dbl> <dbl> <dbl> <dbl> 
     1        1     1   1       0 
     1        2     1   1       1 
     1        3     1   1       0 
     1        4     1   1       1 
     1        5     1   1       1 
     1        6     1   1       1 
     1        7     1   1       1 
     1        8     1   1       1 
     1        9     1   1       0 
     1       10     1   1       0 
     2        1     1   1       1 
     2        2     1   0.5     1 
     2        3     1   1       1 
     2        4     1   1       1 
     2        5     1   1       1 
     2        6     1   1       1 
     2        7     1   1       1 
     2        8     1   1       1 
     2        9     1   0       0 
     2       10     1   0       0 
     3        1     1   1       1 
     3        2     1   1       1 
     3        3     1   1       1 
     3        4     1   1       1 
     3        5     1   1       1 
     3        6     1   1       1 
     3        7     1   1       1 
     3        8     1   1       1 
     3        9     1   1       0 
     3       10     1   0       0 
......

I have used the following function (thanks to @Limey) to get the sum of each cases

addSummaryRow <- function(data, qFilter, newIndex) {
  data %>%
    bind_rows(
      data %>% 
        pivot_longer(cols = c(colnames(df)[3:5])) %>% 
        filter(Question %in% qFilter) %>% 
        group_by(Case, name) %>% 
        summarise(value=sum(value), .groups="drop") %>% 
        pivot_wider(id_cols=c(Case), names_from=name, values_from=value) %>% 
        mutate(Question=newIndex)
    ) %>%
    arrange(Case, Question)
}

df %>% 
  addSummaryRow(1:10, 11)
Case Question     A   B       C 
  <dbl>    <dbl> <dbl> <dbl> <dbl> 
   1        1     1   1       0
   1        2     1   1       1
   1        3     1   1       0
   1        4     1   1       1
   1        5     1   1       1
   1        6     1   1       1
   1        7     1   1       1
   1        8     1   1       1
   1        9     1   1       0
   1       10     1   1       0
   1       11    10  10       6
   2        1     1   1       1
   2        2     1   0.5     1
   2        3     1   1       1
   2        4     1   1       1
   2        5     1   1       1
   2        6     1   1       1
   2        7     1   1       1
   2        8     1   1       1
   2        9     1   0       0
   2       10     1   0       0
   2       11    10   7.5     8
   3        1     1   1       1
   3        2     1   1       1
   3        3     1   1       1
   3        4     1   1       1
   3        5     1   1       1
   3        6     1   1       1
   3        7     1   1       1
   3        8     1   1       1
   3        9     1   1       0
   3       10     1   0       0
   3       11    10   9       8
......

Now I want to add question 12 at the end of each case by converting the results of question 11 if <Q11 = 0-4.0, Q12 = 1>; <Q11 = 4.1-7.0, Q12 = 2>; <Q11 = 7.1-10, Q12 = 3>, so it would be:

Case Question     A   B       C 
  <dbl>    <dbl> <dbl> <dbl> <dbl> 
   1        1     1   1       0
   1        2     1   1       1
   1        3     1   1       0
   1        4     1   1       1
   1        5     1   1       1
   1        6     1   1       1
   1        7     1   1       1
   1        8     1   1       1
   1        9     1   1       0
   1       10     1   1       0
   1       11    10  10       6
   1       12     3   3       2
   2        1     1   1       1
   2        2     1   0.5     1
   2        3     1   1       1
   2        4     1   1       1
   2        5     1   1       1
   2        6     1   1       1
   2        7     1   1       1
   2        8     1   1       1
   2        9     1   0       0
   2       10     1   0       0
   2       11    10   7.5     8
   2       12     3   3       3
   3        1     1   1       1
   3        2     1   1       1
   3        3     1   1       1
   3        4     1   1       1
   3        5     1   1       1
   3        6     1   1       1
   3        7     1   1       1
   3        8     1   1       1
   3        9     1   1       0
   3       10     1   0       0
   3       11    10   9       8
   3       12    3    3       3
......

I wonder how to achieve this conversion.

As already stated in comments, I think it's a bad practice to add summary rows into data. (Probably this comes from "spreadsheet-thinking".)

Instead, consider this approach: Use a summary data frame a ,

a <- aggregate(cbind(A, B, C) ~ Case, dat, sum)
a
#   Case  A    B C
# 1    1 10 10.0 6
# 2    2 10  7.5 8
# 3    3 10  9.0 8

cut the values at the desired breakpoints, and assign labels= .

a[2:4] <- lapply(a[2:4], cut, breaks=c(.4, 4.1, 7.1, 10), labels=1:3)
a
#   Case A B C
# 1    1 3 3 2
# 2    2 3 3 3
# 3    3 3 3 3

Edit

If you really need to add the results as rows to your data, you can rbind them, and order the results. Using this method you can easily add more of such rows.

a1 <- cbind(a <- aggregate(cbind(A, B, C) ~ Case, dat, sum), Question=11)
a[2:4] <- lapply(a[2:4], cut, c(.4, 4.1, 7.1, 10), labels=1:3)
a2 <- cbind(a, Question=12)

dat <- rbind(dat, a1, a2)
res <- dat[with(dat, order(Case, Question)), ]

Note, that cut s below .4 in this case become NA , try sq <- seq.int(0, 10, .1); cut(sq, c(.4, 4.1, 7.1, 10)) sq <- seq.int(0, 10, .1); cut(sq, c(.4, 4.1, 7.1, 10)) to see how cut works.

Gives

res
#    Case Question  A   B C
# 1     1        1  1   1 0
# 2     1        2  1   1 1
# 3     1        3  1   1 0
# 4     1        4  1   1 1
# 5     1        5  1   1 1
# 6     1        6  1   1 1
# 7     1        7  1   1 1
# 8     1        8  1   1 1
# 9     1        9  1   1 0
# 10    1       10  1   1 0
# 31    1       11 10  10 6
# 34    1       12  3   3 2
# 11    2        1  1   1 1
# 12    2        2  1 0.5 1
# 13    2        3  1   1 1
# 14    2        4  1   1 1
# 15    2        5  1   1 1
# 16    2        6  1   1 1
# 17    2        7  1   1 1
# 18    2        8  1   1 1
# 19    2        9  1   0 0
# 20    2       10  1   0 0
# 32    2       11 10 7.5 8
# 35    2       12  3   3 3
# 21    3        1  1   1 1
# 22    3        2  1   1 1
# 23    3        3  1   1 1
# 24    3        4  1   1 1
# 25    3        5  1   1 1
# 26    3        6  1   1 1
# 27    3        7  1   1 1
# 28    3        8  1   1 1
# 29    3        9  1   1 0
# 30    3       10  1   0 0
# 33    3       11 10   9 8
# 36    3       12  3   3 3

Data:

dat <- structure(list(Case = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L), Question = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 
9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 
4L, 5L, 6L, 7L, 8L, 9L, 10L), A = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L), B = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 0.5, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
0), C = c(0L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 
0L)), class = "data.frame", row.names = c(NA, -30L))

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM