[英]Count unique values by group (data.table) doesn't work
我試圖用data.table
計算兩組( year
和pn_soc_informante
)變量( ruc_pk_informante
)的唯一值,但我從dplyr
得到不同的值(我使用tidytable
)。
library(data.table)
library(tidytable)
> tt[, .(count = uniqueN(ruc_pk_informante)), by = .(sort(year),pn_soc_informante)]
# A tidytable: 4 x 3
sort pn_soc_informante count
<int> <chr> <int>
1 2012 PERSONA NATURAL 1
2 2013 PERSONA NATURAL 10 --------->????
3 2014 PERSONA NATURAL 8
4 2015 PERSONA NATURAL 1
> tt %>% arrange.(year) %>% summarise.(n_distinct.(ruc_pk_informante), .by = c(year, pn_soc_informante))
# A tidytable: 4 x 3
year pn_soc_informante V1
<int> <chr> <int>
1 2012 PERSONA NATURAL 1
2 2013 PERSONA NATURAL 17 --------> OK!
3 2014 PERSONA NATURAL 12
4 2015 PERSONA NATURAL 13
> t1 = tt %>% filter.(year == 2013)
> length(unique(t1$ruc_pk_informante)) # OK!
[1] 17
# data ---------------------------------------------------------------------
tt = structure(list(ruc_pk_informante = c("R3", "R3", "R3", "R3",
"R3", "R3", "R3", "R3", "R3", "R3", "R3", "R3", "R3", "R3", "R3",
"R3", "R3", "R3", "R3", "R3", "R3", "R3", "R3", "R3", "R3", "R3",
"R3", "R3", "R3", "R3", "R3", "R3", "R3", "R3", "R3", "R3", "R3",
"R3", "R3", "R3", "R3", "R3", "R3", "R3", "R3", "R3", "R3", "R3",
"R3", "R3", "R3", "R3", "R3", "R3", "R3", "R3", "R3", "R3", "R3",
"R3", "R3", "R3", "R3", "R3", "R5", "R5", "R7", "R7", "R7", "R7",
"C9", "C9", "R12", "R12", "R12", "R14", "R16", "R16", "R16",
"R16", "R16", "R16", "R16", "R16", "R16", "R16", "R16", "R16",
"R16", "R16", "R16", "R16", "R16", "R16", "R16", "R16", "R16",
"C21", "R23", "R23", "R23", "R23", "R23", "R23", "R23", "R23",
"R23", "R27", "R27", "R27", "R27", "R27", "R27", "R27", "R27",
"R27", "R27", "R27", "R27", "R27", "R27", "R27", "R27", "R27",
"R27", "R27", "R27", "R27", "R27", "R27", "R30", "R30", "R30",
"R30", "R33", "R33", "R33", "R33", "R33", "R33", "R34", "R34",
"R34", "R34", "R37", "R41", "R41", "R41", "R41", "R41", "R41",
"R41", "R44", "R44", "R44", "R45", "R45", "R45", "R45", "R45",
"R45", "R45", "R45", "R45", "R45", "R45", "R45", "R45", "R45",
"R45", "R45", "R45", "R45", "R45", "R45", "R45", "R45", "R45",
"R45", "R45", "R45", "R45", "R45", "R45", "R45", "R45", "R45",
"R45", "R45", "R45", "R45", "R45", "R45", "R45", "R45", "R45",
"R45", "R45", "R45", "R45", "R45", "R45", "R45", "R45", "R45",
"R45", "R45", "R45", "R45", "R45", "R45", "R45", "R45", "R45",
"R45", "R45", "R45", "R45", "R45", "R45", "R45", "R45", "R45",
"R45", "R45", "R45", "R45", "R45", "R45", "R45", "R45", "R45",
"R45", "R45", "R45", "R45", "R45", "R45", "R45", "R45", "R45",
"R45", "R45", "R45", "R45", "R45", "R45", "R45", "R45", "R45"
), year = c(2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L,
2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L,
2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2014L, 2014L, 2014L,
2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L,
2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L,
2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2015L, 2015L,
2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L,
2015L, 2015L, 2015L, 2013L, 2015L, 2012L, 2013L, 2014L, 2015L,
2013L, 2015L, 2013L, 2014L, 2015L, 2013L, 2013L, 2013L, 2013L,
2013L, 2013L, 2013L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L,
2014L, 2014L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L,
2013L, 2013L, 2013L, 2013L, 2014L, 2014L, 2014L, 2015L, 2015L,
2015L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L,
2013L, 2014L, 2014L, 2014L, 2014L, 2014L, 2015L, 2015L, 2015L,
2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2013L, 2014L, 2015L,
2015L, 2013L, 2013L, 2014L, 2014L, 2015L, 2015L, 2013L, 2013L,
2014L, 2015L, 2013L, 2013L, 2013L, 2014L, 2014L, 2015L, 2015L,
2015L, 2013L, 2014L, 2015L, 2013L, 2013L, 2013L, 2013L, 2013L,
2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L,
2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L,
2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L,
2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L,
2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L,
2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2014L,
2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L,
2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L,
2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L,
2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L
), pn_soc_informante = c("PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL",
"PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL", "PERSONA NATURAL"
)), row.names = c(NA, -250L), class = c("tidytable", "data.table",
"data.frame"), index = structure(integer(0), "`__year`" = c(67L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L,
15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 65L, 68L, 71L, 73L, 76L,
77L, 78L, 79L, 80L, 81L, 82L, 98L, 99L, 100L, 101L, 108L, 109L,
110L, 111L, 112L, 113L, 114L, 115L, 116L, 131L, 135L, 136L, 141L,
142L, 145L, 146L, 147L, 153L, 156L, 157L, 158L, 159L, 160L, 161L,
162L, 163L, 164L, 165L, 166L, 167L, 168L, 169L, 170L, 171L, 172L,
173L, 174L, 175L, 176L, 177L, 178L, 179L, 180L, 181L, 182L, 183L,
184L, 185L, 186L, 187L, 188L, 189L, 190L, 191L, 192L, 193L, 194L,
195L, 196L, 197L, 198L, 199L, 200L, 201L, 202L, 203L, 204L, 205L,
206L, 207L, 208L, 209L, 210L, 211L, 212L, 213L, 23L, 24L, 25L,
26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L,
39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 69L,
74L, 83L, 84L, 85L, 86L, 87L, 88L, 89L, 90L, 102L, 103L, 104L,
117L, 118L, 119L, 120L, 121L, 132L, 137L, 138L, 143L, 148L, 149L,
154L, 214L, 215L, 216L, 217L, 218L, 219L, 220L, 221L, 222L, 223L,
224L, 225L, 226L, 227L, 228L, 229L, 230L, 231L, 232L, 233L, 234L,
235L, 236L, 237L, 238L, 239L, 240L, 241L, 242L, 243L, 244L, 245L,
246L, 247L, 248L, 249L, 250L, 51L, 52L, 53L, 54L, 55L, 56L, 57L,
58L, 59L, 60L, 61L, 62L, 63L, 64L, 66L, 70L, 72L, 75L, 91L, 92L,
93L, 94L, 95L, 96L, 97L, 105L, 106L, 107L, 122L, 123L, 124L,
125L, 126L, 127L, 128L, 129L, 130L, 133L, 134L, 139L, 140L, 144L,
150L, 151L, 152L, 155L)))
by
上的sort
打破了行的順序 - 即它單獨重新排列year
,這將導致來自與“年份”的排序列匹配的其他列的錯誤值。 它應該在i
tt[order(year), .(count = uniqueN(ruc_pk_informante)),
by = .(year, pn_soc_informante)]
# A tidytable: 4 × 3
year pn_soc_informante count
<int> <chr> <int>
1 2012 PERSONA NATURAL 1
2 2013 PERSONA NATURAL 17
3 2014 PERSONA NATURAL 12
4 2015 PERSONA NATURAL 13
或者稍后再做訂單
tt[, .(count = uniqueN(ruc_pk_informante)),
by = .(year, pn_soc_informante)][order(year)]
# A tidytable: 4 × 3
year pn_soc_informante count
<int> <chr> <int>
1 2012 PERSONA NATURAL 1
2 2013 PERSONA NATURAL 17
3 2014 PERSONA NATURAL 12
4 2015 PERSONA NATURAL 13
從已排序和未排序中查看output
> tt[, .SD, by = .(year = year,pn_soc_informante)][year == 2013]$ruc_pk_informante
[1] "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3"
[23] "R5" "R7" "C9" "R12" "R14" "R16" "R16" "R16" "R16" "R16" "R16" "C21" "R23" "R23" "R23" "R27" "R27" "R27" "R27" "R27" "R27" "R27"
[45] "R27" "R27" "R30" "R33" "R33" "R34" "R34" "R37" "R41" "R41" "R44" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45"
[67] "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45"
[89] "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45" "R45"
[111] "R45" "R45" "R45"
> tt[, .SD, by = .(year = sort(year),pn_soc_informante)][year == 2013]$ruc_pk_informante
[1] "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3"
[23] "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3"
[45] "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R3" "R5" "R5" "R7"
[67] "R7" "R7" "R7" "C9" "C9" "R12" "R12" "R12" "R14" "R16" "R16" "R16" "R16" "R16" "R16" "R16" "R16" "R16" "R16" "R16" "R16" "R16"
[89] "R16" "R16" "R16" "R16" "R16" "R16" "R16" "R16" "C21" "R23" "R23" "R23" "R23" "R23" "R23" "R23" "R23" "R23" "R27" "R27" "R27" "R27"
[111] "R27" "R27" "R27"
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.