簡體   English   中英

如何根據包含數字的字符列對數據框進行排序

[英]How to sort a dataframe based on a character column with numbers in it

所以我有一個包含一個字符和 3 個數字列的數據框,如下所示:

    Class  Freq  a        b
1   (-1,0]   949 1.000000 2.000000
2    (0,1]  2498 1.001418 2.002836
3    (1,2]  3543 1.004991 2.009982
4  (10,11]  8830 1.077992 2.155984
5  (11,12]  6471 1.086052 2.172105
6  (12,13]  4672 1.093956 2.187912
7  (13,14]  3398 1.100472 2.200944
8  (14,15]  2128 1.107643 2.215287
9  (15,16]  1197 1.113376 2.226751
10   (2,3]  4843 1.005870 2.011739
11   (3,4]  6644 1.033653 2.067305
12   (4,5]  8654 1.038444 2.076889
13   (5,6] 10516 1.044713 2.089426
14   (6,7] 11310 1.050979 2.101958
15   (7,8] 12026 1.058072 2.116144
16   (8,9] 14065 1.066286 2.132572
17  (9,10] 12775 1.073026 2.146052

數據框是從 excel 文件中讀取的,因此在以正確的順序生成它方面我無能為力。 我正在嘗試根據數字按“類別”對其進行排序,而不是像我認為目前正在發生的那樣按字母順序排序。

在 gtools::mixedsort(df$Class) 的幫助下,我設法創建了一個有序向量:

[1] "(-1,0]"  "(0,1]"   "(1,2]"   "(2,3]"   "(3,4]"   "(4,5]"   "(5,6]"   "(6,7]"   "(7,8]"  
[10] "(8,9]"   "(9,10]"  "(10,11]" "(11,12]" "(12,13]" "(13,14]" "(14,15]" "(15,16]"

但是當將它與 dplyr::arrange() 一起使用時,我似乎每次運行都會得到 Class 的半隨機排序。

例子:

> arrange(df, mixedsort(df$Class))

     Class  Freq a        b
1   (-1,0]   949 1.000000 2.000000
2    (0,1]  2498 1.001418 2.002836
3    (1,2]  3543 1.004991 2.009982
4  (14,15]  2128 1.107643 2.215287
5  (15,16]  1197 1.113376 2.226751
6    (2,3]  4843 1.005870 2.011739
7    (3,4]  6644 1.033653 2.067305
8    (4,5]  8654 1.038444 2.076889
9    (5,6] 10516 1.044713 2.089426
10   (6,7] 11310 1.050979 2.101958
11   (7,8] 12026 1.058072 2.116144
12   (8,9] 14065 1.066286 2.132572
13  (9,10] 12775 1.073026 2.146052
14 (10,11]  8830 1.077992 2.155984
15 (11,12]  6471 1.086052 2.172105
16 (12,13]  4672 1.093956 2.187912
17 (13,14]  3398 1.100472 2.200944

示例 2:

     Class  Freq a        b
1   (-1,0]   949 1.000000 2.000000
2    (0,1]  2498 1.001418 2.002836
3    (1,2]  3543 1.004991 2.009982
4    (6,7] 11310 1.050979 2.101958
5    (7,8] 12026 1.058072 2.116144
6    (8,9] 14065 1.066286 2.132572
7   (9,10] 12775 1.073026 2.146052
8  (10,11]  8830 1.077992 2.155984
9  (11,12]  6471 1.086052 2.172105
10 (12,13]  4672 1.093956 2.187912
11 (13,14]  3398 1.100472 2.200944
12 (14,15]  2128 1.107643 2.215287
13 (15,16]  1197 1.113376 2.226751
14   (2,3]  4843 1.005870 2.011739
15   (3,4]  6644 1.033653 2.067305
16   (4,5]  8654 1.038444 2.076889
17   (5,6] 10516 1.044713 2.089426

我對 R 相當陌生,所以我不確定我是否只是使用了錯誤的功能,或者是錯誤的工作功能。 任何幫助表示贊賞。

試試下面的基本 R 代碼

> df[order(as.numeric(gsub("\\(|,.*", "", df$Class))), ]
     Class  Freq        a        b
1   (-1,0]   949 1.000000 2.000000
2    (0,1]  2498 1.001418 2.002836
3    (1,2]  3543 1.004991 2.009982
10   (2,3]  4843 1.005870 2.011739
11   (3,4]  6644 1.033653 2.067305
12   (4,5]  8654 1.038444 2.076889
13   (5,6] 10516 1.044713 2.089426
14   (6,7] 11310 1.050979 2.101958
15   (7,8] 12026 1.058072 2.116144
16   (8,9] 14065 1.066286 2.132572
17  (9,10] 12775 1.073026 2.146052
4  (10,11]  8830 1.077992 2.155984
5  (11,12]  6471 1.086052 2.172105
6  (12,13]  4672 1.093956 2.187912
7  (13,14]  3398 1.100472 2.200944
8  (14,15]  2128 1.107643 2.215287
9  (15,16]  1197 1.113376 2.226751

如果你想以arrangemixedsort方式mixedsort

library(gtools)
library(dplyr, warn.conflicts = F)

arrange(df, factor(df$Class, levels = mixedsort(df$Class), ordered = T))
#>      Class  Freq        a        b
#> 1   (-1,0]   949 1.000000 2.000000
#> 2    (0,1]  2498 1.001418 2.002836
#> 3    (1,2]  3543 1.004991 2.009982
#> 10   (2,3]  4843 1.005870 2.011739
#> 11   (3,4]  6644 1.033653 2.067305
#> 12   (4,5]  8654 1.038444 2.076889
#> 13   (5,6] 10516 1.044713 2.089426
#> 14   (6,7] 11310 1.050979 2.101958
#> 15   (7,8] 12026 1.058072 2.116144
#> 16   (8,9] 14065 1.066286 2.132572
#> 17  (9,10] 12775 1.073026 2.146052
#> 4  (10,11]  8830 1.077992 2.155984
#> 5  (11,12]  6471 1.086052 2.172105
#> 6  (12,13]  4672 1.093956 2.187912
#> 7  (13,14]  3398 1.100472 2.200944
#> 8  (14,15]  2128 1.107643 2.215287
#> 9  (15,16]  1197 1.113376 2.226751

reprex 包( v2.0.0 ) 於 2021 年 7 月 23 日創建

帶有parse_number的選項

library(dplyr)
df1 %>%
    arrange(readr::parse_number(Class))

-輸出

 Class  Freq        a        b
1   (-1,0]   949 1.000000 2.000000
2    (0,1]  2498 1.001418 2.002836
3    (1,2]  3543 1.004991 2.009982
10   (2,3]  4843 1.005870 2.011739
11   (3,4]  6644 1.033653 2.067305
12   (4,5]  8654 1.038444 2.076889
13   (5,6] 10516 1.044713 2.089426
14   (6,7] 11310 1.050979 2.101958
15   (7,8] 12026 1.058072 2.116144
16   (8,9] 14065 1.066286 2.132572
17  (9,10] 12775 1.073026 2.146052
4  (10,11]  8830 1.077992 2.155984
5  (11,12]  6471 1.086052 2.172105
6  (12,13]  4672 1.093956 2.187912
7  (13,14]  3398 1.100472 2.200944
8  (14,15]  2128 1.107643 2.215287
9  (15,16]  1197 1.113376 2.226751

數據

df1 <- structure(list(Class = c("(-1,0]", "(0,1]", "(1,2]", "(10,11]", 
"(11,12]", "(12,13]", "(13,14]", "(14,15]", "(15,16]", "(2,3]", 
"(3,4]", "(4,5]", "(5,6]", "(6,7]", "(7,8]", "(8,9]", "(9,10]"
), Freq = c(949L, 2498L, 3543L, 8830L, 6471L, 4672L, 3398L, 2128L, 
1197L, 4843L, 6644L, 8654L, 10516L, 11310L, 12026L, 14065L, 12775L
), a = c(1, 1.001418, 1.004991, 1.077992, 1.086052, 1.093956, 
1.100472, 1.107643, 1.113376, 1.00587, 1.033653, 1.038444, 1.044713, 
1.050979, 1.058072, 1.066286, 1.073026), b = c(2, 2.002836, 2.009982, 
2.155984, 2.172105, 2.187912, 2.200944, 2.215287, 2.226751, 2.011739, 
2.067305, 2.076889, 2.089426, 2.101958, 2.116144, 2.132572, 2.146052
)), class = "data.frame", row.names = c("1", "2", "3", "4", "5", 
"6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", 
"17"))

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM