So I have a data frame containing a character and 3 numeric columns like so:
Class Freq a b
1 (-1,0] 949 1.000000 2.000000
2 (0,1] 2498 1.001418 2.002836
3 (1,2] 3543 1.004991 2.009982
4 (10,11] 8830 1.077992 2.155984
5 (11,12] 6471 1.086052 2.172105
6 (12,13] 4672 1.093956 2.187912
7 (13,14] 3398 1.100472 2.200944
8 (14,15] 2128 1.107643 2.215287
9 (15,16] 1197 1.113376 2.226751
10 (2,3] 4843 1.005870 2.011739
11 (3,4] 6644 1.033653 2.067305
12 (4,5] 8654 1.038444 2.076889
13 (5,6] 10516 1.044713 2.089426
14 (6,7] 11310 1.050979 2.101958
15 (7,8] 12026 1.058072 2.116144
16 (8,9] 14065 1.066286 2.132572
17 (9,10] 12775 1.073026 2.146052
The data frame is read from an excel file so I can't do anything in terms of generating it in the proper order. I'm trying to sort it based on 'Class' numerically, not alphabetically like I believe it is currently happening.
I've managed to create an ordered vector with the help of gtools::mixedsort(df$Class):
[1] "(-1,0]" "(0,1]" "(1,2]" "(2,3]" "(3,4]" "(4,5]" "(5,6]" "(6,7]" "(7,8]"
[10] "(8,9]" "(9,10]" "(10,11]" "(11,12]" "(12,13]" "(13,14]" "(14,15]" "(15,16]"
but when using it with dplyr::arrange(), I seem to get a semi-random ordering of Class with each run.
Example:
> arrange(df, mixedsort(df$Class))
Class Freq a b
1 (-1,0] 949 1.000000 2.000000
2 (0,1] 2498 1.001418 2.002836
3 (1,2] 3543 1.004991 2.009982
4 (14,15] 2128 1.107643 2.215287
5 (15,16] 1197 1.113376 2.226751
6 (2,3] 4843 1.005870 2.011739
7 (3,4] 6644 1.033653 2.067305
8 (4,5] 8654 1.038444 2.076889
9 (5,6] 10516 1.044713 2.089426
10 (6,7] 11310 1.050979 2.101958
11 (7,8] 12026 1.058072 2.116144
12 (8,9] 14065 1.066286 2.132572
13 (9,10] 12775 1.073026 2.146052
14 (10,11] 8830 1.077992 2.155984
15 (11,12] 6471 1.086052 2.172105
16 (12,13] 4672 1.093956 2.187912
17 (13,14] 3398 1.100472 2.200944
Example 2:
Class Freq a b
1 (-1,0] 949 1.000000 2.000000
2 (0,1] 2498 1.001418 2.002836
3 (1,2] 3543 1.004991 2.009982
4 (6,7] 11310 1.050979 2.101958
5 (7,8] 12026 1.058072 2.116144
6 (8,9] 14065 1.066286 2.132572
7 (9,10] 12775 1.073026 2.146052
8 (10,11] 8830 1.077992 2.155984
9 (11,12] 6471 1.086052 2.172105
10 (12,13] 4672 1.093956 2.187912
11 (13,14] 3398 1.100472 2.200944
12 (14,15] 2128 1.107643 2.215287
13 (15,16] 1197 1.113376 2.226751
14 (2,3] 4843 1.005870 2.011739
15 (3,4] 6644 1.033653 2.067305
16 (4,5] 8654 1.038444 2.076889
17 (5,6] 10516 1.044713 2.089426
I'm fairly new to R so I'm not sure if I'm just using the functions wrong, or the wrong functions for the job. Any help is appreciated.
Try the base R code below
> df[order(as.numeric(gsub("\\(|,.*", "", df$Class))), ]
Class Freq a b
1 (-1,0] 949 1.000000 2.000000
2 (0,1] 2498 1.001418 2.002836
3 (1,2] 3543 1.004991 2.009982
10 (2,3] 4843 1.005870 2.011739
11 (3,4] 6644 1.033653 2.067305
12 (4,5] 8654 1.038444 2.076889
13 (5,6] 10516 1.044713 2.089426
14 (6,7] 11310 1.050979 2.101958
15 (7,8] 12026 1.058072 2.116144
16 (8,9] 14065 1.066286 2.132572
17 (9,10] 12775 1.073026 2.146052
4 (10,11] 8830 1.077992 2.155984
5 (11,12] 6471 1.086052 2.172105
6 (12,13] 4672 1.093956 2.187912
7 (13,14] 3398 1.100472 2.200944
8 (14,15] 2128 1.107643 2.215287
9 (15,16] 1197 1.113376 2.226751
If you want to do it in arrange
and mixedsort
way
library(gtools)
library(dplyr, warn.conflicts = F)
arrange(df, factor(df$Class, levels = mixedsort(df$Class), ordered = T))
#> Class Freq a b
#> 1 (-1,0] 949 1.000000 2.000000
#> 2 (0,1] 2498 1.001418 2.002836
#> 3 (1,2] 3543 1.004991 2.009982
#> 10 (2,3] 4843 1.005870 2.011739
#> 11 (3,4] 6644 1.033653 2.067305
#> 12 (4,5] 8654 1.038444 2.076889
#> 13 (5,6] 10516 1.044713 2.089426
#> 14 (6,7] 11310 1.050979 2.101958
#> 15 (7,8] 12026 1.058072 2.116144
#> 16 (8,9] 14065 1.066286 2.132572
#> 17 (9,10] 12775 1.073026 2.146052
#> 4 (10,11] 8830 1.077992 2.155984
#> 5 (11,12] 6471 1.086052 2.172105
#> 6 (12,13] 4672 1.093956 2.187912
#> 7 (13,14] 3398 1.100472 2.200944
#> 8 (14,15] 2128 1.107643 2.215287
#> 9 (15,16] 1197 1.113376 2.226751
Created on 2021-07-23 by the reprex package (v2.0.0)
An option with parse_number
library(dplyr)
df1 %>%
arrange(readr::parse_number(Class))
-ouptut
Class Freq a b
1 (-1,0] 949 1.000000 2.000000
2 (0,1] 2498 1.001418 2.002836
3 (1,2] 3543 1.004991 2.009982
10 (2,3] 4843 1.005870 2.011739
11 (3,4] 6644 1.033653 2.067305
12 (4,5] 8654 1.038444 2.076889
13 (5,6] 10516 1.044713 2.089426
14 (6,7] 11310 1.050979 2.101958
15 (7,8] 12026 1.058072 2.116144
16 (8,9] 14065 1.066286 2.132572
17 (9,10] 12775 1.073026 2.146052
4 (10,11] 8830 1.077992 2.155984
5 (11,12] 6471 1.086052 2.172105
6 (12,13] 4672 1.093956 2.187912
7 (13,14] 3398 1.100472 2.200944
8 (14,15] 2128 1.107643 2.215287
9 (15,16] 1197 1.113376 2.226751
df1 <- structure(list(Class = c("(-1,0]", "(0,1]", "(1,2]", "(10,11]",
"(11,12]", "(12,13]", "(13,14]", "(14,15]", "(15,16]", "(2,3]",
"(3,4]", "(4,5]", "(5,6]", "(6,7]", "(7,8]", "(8,9]", "(9,10]"
), Freq = c(949L, 2498L, 3543L, 8830L, 6471L, 4672L, 3398L, 2128L,
1197L, 4843L, 6644L, 8654L, 10516L, 11310L, 12026L, 14065L, 12775L
), a = c(1, 1.001418, 1.004991, 1.077992, 1.086052, 1.093956,
1.100472, 1.107643, 1.113376, 1.00587, 1.033653, 1.038444, 1.044713,
1.050979, 1.058072, 1.066286, 1.073026), b = c(2, 2.002836, 2.009982,
2.155984, 2.172105, 2.187912, 2.200944, 2.215287, 2.226751, 2.011739,
2.067305, 2.076889, 2.089426, 2.101958, 2.116144, 2.132572, 2.146052
)), class = "data.frame", row.names = c("1", "2", "3", "4", "5",
"6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16",
"17"))
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.