簡體   English   中英

按組計算唯一值(data.table)不起作用

[英]Count unique values by group (data.table) doesn't work

我試圖用data.table計算兩組( yearpn_soc_informante )變量( ruc_pk_informante )的唯一值,但我從dplyr得到不同的值(我使用tidytable )。

data.table

library(data.table)
library(tidytable)

> tt[, .(count = uniqueN(ruc_pk_informante)), by = .(sort(year),pn_soc_informante)]
# A tidytable: 4 x 3
   sort pn_soc_informante count
  <int> <chr>             <int>
1  2012 PERSONA NATURAL       1
2  2013 PERSONA NATURAL      10 --------->????
3  2014 PERSONA NATURAL       8
4  2015 PERSONA NATURAL       1

dplyr 和手動

> tt %>% arrange.(year) %>% summarise.(n_distinct.(ruc_pk_informante), .by = c(year, pn_soc_informante))
# A tidytable: 4 x 3
   year pn_soc_informante    V1
  <int> <chr>             <int>
1  2012 PERSONA NATURAL       1
2  2013 PERSONA NATURAL      17 --------> OK!
3  2014 PERSONA NATURAL      12
4  2015 PERSONA NATURAL      13
> t1 = tt %>% filter.(year == 2013)
> length(unique(t1$ruc_pk_informante)) # OK!
[1] 17 

數據

# data ---------------------------------------------------------------------


tt = structure(list(ruc_pk_informante = c("R3", "R3", "R3", "R3", 
                                     "R3", "R3", "R3", "R3", "R3", "R3", "R3", "R3", "R3", "R3", "R3", 
                                     "R3", "R3", "R3", "R3", "R3", "R3", "R3", "R3", "R3", "R3", "R3", 
                                     "R3", "R3", "R3", "R3", "R3", "R3", "R3", "R3", "R3", "R3", "R3", 
                                     "R3", "R3", "R3", "R3", "R3", "R3", "R3", "R3", "R3", "R3", "R3", 
                                     "R3", "R3", "R3", "R3", "R3", "R3", "R3", "R3", "R3", "R3", "R3", 
                                     "R3", "R3", "R3", "R3", "R3", "R5", "R5", "R7", "R7", "R7", "R7", 
                                     "C9", "C9", "R12", "R12", "R12", "R14", "R16", "R16", "R16", 
                                     "R16", "R16", "R16", "R16", "R16", "R16", "R16", "R16", "R16", 
                                     "R16", "R16", "R16", "R16", "R16", "R16", "R16", "R16", "R16", 
                                     "C21", "R23", "R23", "R23", "R23", "R23", "R23", "R23", "R23", 
                                     "R23", "R27", "R27", "R27", "R27", "R27", "R27", "R27", "R27", 
                                     "R27", "R27", "R27", "R27", "R27", "R27", "R27", "R27", "R27", 
                                     "R27", "R27", "R27", "R27", "R27", "R27", "R30", "R30", "R30", 
                                     "R30", "R33", "R33", "R33", "R33", "R33", "R33", "R34", "R34", 
                                     "R34", "R34", "R37", "R41", "R41", "R41", "R41", "R41", "R41", 
                                     "R41", "R44", "R44", "R44", "R45", "R45", "R45", "R45", "R45", 
                                     "R45", "R45", "R45", "R45", "R45", "R45", "R45", "R45", "R45", 
                                     "R45", "R45", "R45", "R45", "R45", "R45", "R45", "R45", "R45", 
                                     "R45", "R45", "R45", "R45", "R45", "R45", "R45", "R45", "R45", 
                                     "R45", "R45", "R45", "R45", "R45", "R45", "R45", "R45", "R45", 
                                     "R45", "R45", "R45", "R45", "R45", "R45", "R45", "R45", "R45", 
                                     "R45", "R45", "R45", "R45", "R45", "R45", "R45", "R45", "R45", 
                                     "R45", "R45", "R45", "R45", "R45", "R45", "R45", "R45", "R45", 
                                     "R45", "R45", "R45", "R45", "R45", "R45", "R45", "R45", "R45", 
                                     "R45", "R45", "R45", "R45", "R45", "R45", "R45", "R45", "R45", 
                                     "R45", "R45", "R45", "R45", "R45", "R45", "R45", "R45", "R45"
), year = c(2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 
            2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 
            2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2014L, 2014L, 2014L, 
            2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 
            2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 
            2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2015L, 2015L, 
            2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 
            2015L, 2015L, 2015L, 2013L, 2015L, 2012L, 2013L, 2014L, 2015L, 
            2013L, 2015L, 2013L, 2014L, 2015L, 2013L, 2013L, 2013L, 2013L, 
            2013L, 2013L, 2013L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 
            2014L, 2014L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 
            2013L, 2013L, 2013L, 2013L, 2014L, 2014L, 2014L, 2015L, 2015L, 
            2015L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 
            2013L, 2014L, 2014L, 2014L, 2014L, 2014L, 2015L, 2015L, 2015L, 
            2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2013L, 2014L, 2015L, 
            2015L, 2013L, 2013L, 2014L, 2014L, 2015L, 2015L, 2013L, 2013L, 
            2014L, 2015L, 2013L, 2013L, 2013L, 2014L, 2014L, 2015L, 2015L, 
            2015L, 2013L, 2014L, 2015L, 2013L, 2013L, 2013L, 2013L, 2013L, 
            2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 
            2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 
            2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 
            2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 
            2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 
            2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2014L, 
            2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 
            2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 
            2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 
            2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L
), pn_soc_informante = c("PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", 
                         "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL"
)), row.names = c(NA, -250L), class = c("tidytable", "data.table", 
                                        "data.frame"), index = structure(integer(0), "`__year`" = c(67L, 
                                                                                                                                                       1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 
                                                                                                                                                       15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 65L, 68L, 71L, 73L, 76L, 
                                                                                                                                                       77L, 78L, 79L, 80L, 81L, 82L, 98L, 99L, 100L, 101L, 108L, 109L, 
                                                                                                                                                       110L, 111L, 112L, 113L, 114L, 115L, 116L, 131L, 135L, 136L, 141L, 
                                                                                                                                                       142L, 145L, 146L, 147L, 153L, 156L, 157L, 158L, 159L, 160L, 161L, 
                                                                                                                                                       162L, 163L, 164L, 165L, 166L, 167L, 168L, 169L, 170L, 171L, 172L, 
                                                                                                                                                       173L, 174L, 175L, 176L, 177L, 178L, 179L, 180L, 181L, 182L, 183L, 
                                                                                                                                                       184L, 185L, 186L, 187L, 188L, 189L, 190L, 191L, 192L, 193L, 194L, 
                                                                                                                                                       195L, 196L, 197L, 198L, 199L, 200L, 201L, 202L, 203L, 204L, 205L, 
                                                                                                                                                       206L, 207L, 208L, 209L, 210L, 211L, 212L, 213L, 23L, 24L, 25L, 
                                                                                                                                                       26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 
                                                                                                                                                       39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 69L, 
                                                                                                                                                       74L, 83L, 84L, 85L, 86L, 87L, 88L, 89L, 90L, 102L, 103L, 104L, 
                                                                                                                                                       117L, 118L, 119L, 120L, 121L, 132L, 137L, 138L, 143L, 148L, 149L, 
                                                                                                                                                       154L, 214L, 215L, 216L, 217L, 218L, 219L, 220L, 221L, 222L, 223L, 
                                                                                                                                                       224L, 225L, 226L, 227L, 228L, 229L, 230L, 231L, 232L, 233L, 234L, 
                                                                                                                                                       235L, 236L, 237L, 238L, 239L, 240L, 241L, 242L, 243L, 244L, 245L, 
                                                                                                                                                       246L, 247L, 248L, 249L, 250L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 
                                                                                                                                                       58L, 59L, 60L, 61L, 62L, 63L, 64L, 66L, 70L, 72L, 75L, 91L, 92L, 
                                                                                                                                                       93L, 94L, 95L, 96L, 97L, 105L, 106L, 107L, 122L, 123L, 124L, 
                                                                                                                                                       125L, 126L, 127L, 128L, 129L, 130L, 133L, 134L, 139L, 140L, 144L, 
                                                                                                                                                       150L, 151L, 152L, 155L)))


by上的sort打破了行的順序 - 即它單獨重新排列year ,這將導致來自與“年份”的排序列匹配的其他列的錯誤值。 它應該在i

tt[order(year), .(count = uniqueN(ruc_pk_informante)), 
     by = .(year, pn_soc_informante)]
# A tidytable: 4 × 3
   year pn_soc_informante count
  <int> <chr>             <int>
1  2012 PERSONA NATURAL       1
2  2013 PERSONA NATURAL      17
3  2014 PERSONA NATURAL      12
4  2015 PERSONA NATURAL      13

或者稍后再做訂單

tt[, .(count = uniqueN(ruc_pk_informante)), 
   by = .(year, pn_soc_informante)][order(year)]
# A tidytable: 4 × 3
   year pn_soc_informante count
  <int> <chr>             <int>
1  2012 PERSONA NATURAL       1
2  2013 PERSONA NATURAL      17
3  2014 PERSONA NATURAL      12
4  2015 PERSONA NATURAL      13

從已排序和未排序中查看output

> tt[, .SD, by = .(year = year,pn_soc_informante)][year == 2013]$ruc_pk_informante
  [1] "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3" 
 [23] "R5"  "R7"  "C9"  "R12" "R14" "R16" "R16" "R16" "R16" "R16" "R16" "C21" "R23" "R23" "R23" "R27" "R27" "R27" "R27" "R27" "R27" "R27"
 [45] "R27" "R27" "R30" "R33" "R33" "R34" "R34" "R37" "R41" "R41" "R44" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45"
 [67] "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45"
 [89] "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45"
[111] "R45" "R45" "R45"
> tt[, .SD, by = .(year = sort(year),pn_soc_informante)][year == 2013]$ruc_pk_informante
  [1] "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3" 
 [23] "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3" 
 [45] "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R3"  "R5"  "R5"  "R7" 
 [67] "R7"  "R7"  "R7"  "C9"  "C9"  "R12" "R12" "R12" "R14" "R16" "R16" "R16" "R16" "R16" "R16" "R16" "R16" "R16" "R16" "R16" "R16" "R16"
 [89] "R16" "R16" "R16" "R16" "R16" "R16" "R16" "R16" "C21" "R23" "R23" "R23" "R23" "R23" "R23" "R23" "R23" "R23" "R27" "R27" "R27" "R27"
[111] "R27" "R27" "R27"

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM