简体   繁体   中英

How to sort a dataframe based on a character column with numbers in it

So I have a data frame containing a character and 3 numeric columns like so:

    Class  Freq  a        b
1   (-1,0]   949 1.000000 2.000000
2    (0,1]  2498 1.001418 2.002836
3    (1,2]  3543 1.004991 2.009982
4  (10,11]  8830 1.077992 2.155984
5  (11,12]  6471 1.086052 2.172105
6  (12,13]  4672 1.093956 2.187912
7  (13,14]  3398 1.100472 2.200944
8  (14,15]  2128 1.107643 2.215287
9  (15,16]  1197 1.113376 2.226751
10   (2,3]  4843 1.005870 2.011739
11   (3,4]  6644 1.033653 2.067305
12   (4,5]  8654 1.038444 2.076889
13   (5,6] 10516 1.044713 2.089426
14   (6,7] 11310 1.050979 2.101958
15   (7,8] 12026 1.058072 2.116144
16   (8,9] 14065 1.066286 2.132572
17  (9,10] 12775 1.073026 2.146052

The data frame is read from an excel file so I can't do anything in terms of generating it in the proper order. I'm trying to sort it based on 'Class' numerically, not alphabetically like I believe it is currently happening.

I've managed to create an ordered vector with the help of gtools::mixedsort(df$Class):

[1] "(-1,0]"  "(0,1]"   "(1,2]"   "(2,3]"   "(3,4]"   "(4,5]"   "(5,6]"   "(6,7]"   "(7,8]"  
[10] "(8,9]"   "(9,10]"  "(10,11]" "(11,12]" "(12,13]" "(13,14]" "(14,15]" "(15,16]"

but when using it with dplyr::arrange(), I seem to get a semi-random ordering of Class with each run.

Example:

> arrange(df, mixedsort(df$Class))

     Class  Freq a        b
1   (-1,0]   949 1.000000 2.000000
2    (0,1]  2498 1.001418 2.002836
3    (1,2]  3543 1.004991 2.009982
4  (14,15]  2128 1.107643 2.215287
5  (15,16]  1197 1.113376 2.226751
6    (2,3]  4843 1.005870 2.011739
7    (3,4]  6644 1.033653 2.067305
8    (4,5]  8654 1.038444 2.076889
9    (5,6] 10516 1.044713 2.089426
10   (6,7] 11310 1.050979 2.101958
11   (7,8] 12026 1.058072 2.116144
12   (8,9] 14065 1.066286 2.132572
13  (9,10] 12775 1.073026 2.146052
14 (10,11]  8830 1.077992 2.155984
15 (11,12]  6471 1.086052 2.172105
16 (12,13]  4672 1.093956 2.187912
17 (13,14]  3398 1.100472 2.200944

Example 2:

     Class  Freq a        b
1   (-1,0]   949 1.000000 2.000000
2    (0,1]  2498 1.001418 2.002836
3    (1,2]  3543 1.004991 2.009982
4    (6,7] 11310 1.050979 2.101958
5    (7,8] 12026 1.058072 2.116144
6    (8,9] 14065 1.066286 2.132572
7   (9,10] 12775 1.073026 2.146052
8  (10,11]  8830 1.077992 2.155984
9  (11,12]  6471 1.086052 2.172105
10 (12,13]  4672 1.093956 2.187912
11 (13,14]  3398 1.100472 2.200944
12 (14,15]  2128 1.107643 2.215287
13 (15,16]  1197 1.113376 2.226751
14   (2,3]  4843 1.005870 2.011739
15   (3,4]  6644 1.033653 2.067305
16   (4,5]  8654 1.038444 2.076889
17   (5,6] 10516 1.044713 2.089426

I'm fairly new to R so I'm not sure if I'm just using the functions wrong, or the wrong functions for the job. Any help is appreciated.

Try the base R code below

> df[order(as.numeric(gsub("\\(|,.*", "", df$Class))), ]
     Class  Freq        a        b
1   (-1,0]   949 1.000000 2.000000
2    (0,1]  2498 1.001418 2.002836
3    (1,2]  3543 1.004991 2.009982
10   (2,3]  4843 1.005870 2.011739
11   (3,4]  6644 1.033653 2.067305
12   (4,5]  8654 1.038444 2.076889
13   (5,6] 10516 1.044713 2.089426
14   (6,7] 11310 1.050979 2.101958
15   (7,8] 12026 1.058072 2.116144
16   (8,9] 14065 1.066286 2.132572
17  (9,10] 12775 1.073026 2.146052
4  (10,11]  8830 1.077992 2.155984
5  (11,12]  6471 1.086052 2.172105
6  (12,13]  4672 1.093956 2.187912
7  (13,14]  3398 1.100472 2.200944
8  (14,15]  2128 1.107643 2.215287
9  (15,16]  1197 1.113376 2.226751

If you want to do it in arrange and mixedsort way

library(gtools)
library(dplyr, warn.conflicts = F)

arrange(df, factor(df$Class, levels = mixedsort(df$Class), ordered = T))
#>      Class  Freq        a        b
#> 1   (-1,0]   949 1.000000 2.000000
#> 2    (0,1]  2498 1.001418 2.002836
#> 3    (1,2]  3543 1.004991 2.009982
#> 10   (2,3]  4843 1.005870 2.011739
#> 11   (3,4]  6644 1.033653 2.067305
#> 12   (4,5]  8654 1.038444 2.076889
#> 13   (5,6] 10516 1.044713 2.089426
#> 14   (6,7] 11310 1.050979 2.101958
#> 15   (7,8] 12026 1.058072 2.116144
#> 16   (8,9] 14065 1.066286 2.132572
#> 17  (9,10] 12775 1.073026 2.146052
#> 4  (10,11]  8830 1.077992 2.155984
#> 5  (11,12]  6471 1.086052 2.172105
#> 6  (12,13]  4672 1.093956 2.187912
#> 7  (13,14]  3398 1.100472 2.200944
#> 8  (14,15]  2128 1.107643 2.215287
#> 9  (15,16]  1197 1.113376 2.226751

Created on 2021-07-23 by the reprex package (v2.0.0)

An option with parse_number

library(dplyr)
df1 %>%
    arrange(readr::parse_number(Class))

-ouptut

 Class  Freq        a        b
1   (-1,0]   949 1.000000 2.000000
2    (0,1]  2498 1.001418 2.002836
3    (1,2]  3543 1.004991 2.009982
10   (2,3]  4843 1.005870 2.011739
11   (3,4]  6644 1.033653 2.067305
12   (4,5]  8654 1.038444 2.076889
13   (5,6] 10516 1.044713 2.089426
14   (6,7] 11310 1.050979 2.101958
15   (7,8] 12026 1.058072 2.116144
16   (8,9] 14065 1.066286 2.132572
17  (9,10] 12775 1.073026 2.146052
4  (10,11]  8830 1.077992 2.155984
5  (11,12]  6471 1.086052 2.172105
6  (12,13]  4672 1.093956 2.187912
7  (13,14]  3398 1.100472 2.200944
8  (14,15]  2128 1.107643 2.215287
9  (15,16]  1197 1.113376 2.226751

data

df1 <- structure(list(Class = c("(-1,0]", "(0,1]", "(1,2]", "(10,11]", 
"(11,12]", "(12,13]", "(13,14]", "(14,15]", "(15,16]", "(2,3]", 
"(3,4]", "(4,5]", "(5,6]", "(6,7]", "(7,8]", "(8,9]", "(9,10]"
), Freq = c(949L, 2498L, 3543L, 8830L, 6471L, 4672L, 3398L, 2128L, 
1197L, 4843L, 6644L, 8654L, 10516L, 11310L, 12026L, 14065L, 12775L
), a = c(1, 1.001418, 1.004991, 1.077992, 1.086052, 1.093956, 
1.100472, 1.107643, 1.113376, 1.00587, 1.033653, 1.038444, 1.044713, 
1.050979, 1.058072, 1.066286, 1.073026), b = c(2, 2.002836, 2.009982, 
2.155984, 2.172105, 2.187912, 2.200944, 2.215287, 2.226751, 2.011739, 
2.067305, 2.076889, 2.089426, 2.101958, 2.116144, 2.132572, 2.146052
)), class = "data.frame", row.names = c("1", "2", "3", "4", "5", 
"6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", 
"17"))

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM