简体   繁体   中英

Arrange data frame columns by class: numeric before character

Good afternoon,

Assume we have the following dataset:

dput(head(cylinder))
structure(list(X19910108 = c("19910109", "19910104", "19910104", 
"19910111", "19910104", "19910111"), X126 = c("X266", "B7", "T133", 
"J34", "T218", "X249"), TVGUIDE = c("TVGUIDE", "MODMAT", "MASSEY", 
"KMART", "MASSEY", "ROSES"), X25503 = c(25503L, 47201L, 39039L, 
37351L, 38039L, 35751L), YES = c("YES", "YES", "YES", "NO", "YES", 
"NO"), KEY = c("KEY", "KEY", "KEY", "KEY", "KEY", "KEY"), YES.1 = c("YES", 
"YES", "YES", "YES", "YES", "YES"), BENTON = c("BENTON", "BENTON", 
"BENTON", "BENTON", "BENTON", "BENTON"), GALLATIN = c("GALLATIN", 
"GALLATIN", "GALLATIN", "GALLATIN", "GALLATIN", "GALLATIN"), 
    UNCOATED = c("UNCOATED", "UNCOATED", "UNCOATED", "UNCOATED", 
    "UNCOATED", "COATED"), UNCOATED.1 = c("UNCOATED", "COATED", 
    "UNCOATED", "COATED", "UNCOATED", "COATED"), NO = c("NO", 
    "NO", "NO", "NO", "NO", "NO"), LINE = c("LINE", "LINE", "LINE", 
    "LINE", "LINE", "LINE"), YES.2 = c("YES", "YES", "YES", "YES", 
    "YES", "YES"), Motter94 = c("Motter94", "WoodHoe70", "WoodHoe70", 
    "WoodHoe70", "WoodHoe70", "Motter94"), X821 = c(821L, 815L, 
    816L, 816L, 816L, 827L), X2 = c(2, 9, 9, 2, 2, 2), TABLOID = c("TABLOID", 
    "CATALOG", "CATALOG", "TABLOID", "CATALOG", "TABLOID"), NorthUS = c("NorthUS", 
    "NorthUS", "NorthUS", NA, "NorthUS", "CANADIAN"), X1911 = c(NA, 
    NA, 1910L, 1910L, 1910L, 1911L), X55 = c(55, 62, 52, 50, 
    50, 50), X46 = c(46L, 40L, 40L, 46L, 40L, 46L), X0.2 = c("0.3", 
    "0.433", "0.3", "0.3", "0.267", "0.3"), X17 = c(15, 16, 16, 
    17, 16.8, 16.5), X78 = c(80L, 80L, 75L, 80L, 76L, 75L), X0.75 = c(0.75, 
    NA, 0.3125, 0.75, 0.4375, 0.75), X20 = c(20L, 30L, 30L, 30L, 
    28L, 30L), X13.1 = c(6.6, 6.5, 5.6, 0, 8.6, 0), X1700 = c(1900L, 
    1850L, 1467L, 2100L, 1467L, 2600L), X50.5 = c(54.9, 53.8, 
    55.6, 57.5, 53.8, 62.5), X36.4 = c(38.5, 39.8, 38.8, 42.5, 
    37.6, 37.5), X0 = c(0, 0, 0, 5, 5, 6), X0.1 = c(0, 0, 0, 
    0, 0, 0), X2.5 = c(2.5, 2.8, 2.5, 2.3, 2.5, 2.5), X1 = c(0.7, 
    0.9, 1.3, 0.6, 0.8, 0.6), X34 = c(34, 40, 40, 35, 40, 30), 
    X40 = c(40L, 40L, 40L, 40L, 40L, 40L), X105 = c(105, 103.87, 
    108.06, 106.67, 103.87, 106.67), X100 = c(100L, 100L, 100L, 
    100L, 100L, 100L), band = c("noband", "noband", "noband", 
    "noband", "noband", "noband")), row.names = c(NA, 6L), class = "data.frame")

The columns types are:

sapply(cylinder,class)

  X19910108        X126     TVGUIDE      X25503         YES         KEY       YES.1      BENTON    GALLATIN 
"character" "character" "character"   "integer" "character" "character" "character" "character" "character" 
   UNCOATED  UNCOATED.1          NO        LINE       YES.2    Motter94        X821          X2     TABLOID 
"character" "character" "character" "character" "character" "character"   "integer"   "numeric" "character" 
    NorthUS       X1911         X55         X46        X0.2         X17         X78       X0.75         X20 
"character"   "integer"   "numeric"   "integer" "character"   "numeric"   "integer"   "numeric"   "integer" 
      X13.1       X1700       X50.5       X36.4          X0        X0.1        X2.5          X1         X34 
  "numeric"   "integer"   "numeric"   "numeric"   "numeric"   "numeric"   "numeric"   "numeric"   "numeric" 
        X40        X105        X100        band 
  "integer"   "numeric"   "integer" "character" 

I want to reorder the dataset columns such that the "numeric" columns ( numeric & integers ) are first at the left. The "character" columns must be at the right !

Thank you for help !

We could also use a single where with an | logical operator that combines the two expressions with a lambda call

library(dplyr)
df %>% 
    select(where(~ is.character(.)|is.numeric(.)))

Something along those lines would work

library(dplyr)
df %>% select(where(is.numeric), where(is.character))

Here is one option

cylinder[
  order(
    as.integer(
      factor(sapply(cylinder, class),
        levels = c("numeric", "integer", "character")
      )
    )
  )
]

Compact with collapse functions:

library(collapse)
colorderv(d, nv(d, 2))

# same but more explicit
colorderv(d, num_vars(d, return = "names")

Using nv / num_vars together with data.table::setcolorder , we can update column order by reference:

setcolorder(d, nv(d, 2))

I had found a possible solution, I'm asking if someone could suggest better one:

unique(sapply(cylinder,class))
[1] "character" "integer"   "numeric" 

> my.order <-unique(sapply(cylinder,class))

> head(cylinder %>% 
+          select(sapply(., class) %>% .[order(match(., my.order))] %>% names))

  X19910108 X126 TVGUIDE YES KEY YES.1 BENTON GALLATIN UNCOATED UNCOATED.1 NO LINE YES.2  Motter94 TABLOID  NorthUS
1  19910109 X266 TVGUIDE YES KEY   YES BENTON GALLATIN UNCOATED   UNCOATED NO LINE   YES  Motter94 TABLOID  NorthUS
2  19910104   B7  MODMAT YES KEY   YES BENTON GALLATIN UNCOATED     COATED NO LINE   YES WoodHoe70 CATALOG  NorthUS
3  19910104 T133  MASSEY YES KEY   YES BENTON GALLATIN UNCOATED   UNCOATED NO LINE   YES WoodHoe70 CATALOG  NorthUS
4  19910111  J34   KMART  NO KEY   YES BENTON GALLATIN UNCOATED     COATED NO LINE   YES WoodHoe70 TABLOID     <NA>
5  19910104 T218  MASSEY YES KEY   YES BENTON GALLATIN UNCOATED   UNCOATED NO LINE   YES WoodHoe70 CATALOG  NorthUS
6  19910111 X249   ROSES  NO KEY   YES BENTON GALLATIN   COATED     COATED NO LINE   YES  Motter94 TABLOID CANADIAN
   X0.2   band X25503 X821 X1911 X46 X78 X20 X1700 X40 X100 X2 X55  X17  X0.75 X13.1 X50.5 X36.4 X0 X0.1 X2.5  X1
1   0.3 noband  25503  821    NA  46  80  20  1900  40  100  2  55 15.0 0.7500   6.6  54.9  38.5  0    0  2.5 0.7
2 0.433 noband  47201  815    NA  40  80  30  1850  40  100  9  62 16.0     NA   6.5  53.8  39.8  0    0  2.8 0.9
3   0.3 noband  39039  816  1910  40  75  30  1467  40  100  9  52 16.0 0.3125   5.6  55.6  38.8  0    0  2.5 1.3
4   0.3 noband  37351  816  1910  46  80  30  2100  40  100  2  50 17.0 0.7500   0.0  57.5  42.5  5    0  2.3 0.6
5 0.267 noband  38039  816  1910  40  76  28  1467  40  100  2  50 16.8 0.4375   8.6  53.8  37.6  5    0  2.5 0.8
6   0.3 noband  35751  827  1911  46  75  30  2600  40  100  2  50 16.5 0.7500   0.0  62.5  37.5  6    0  2.5 0.6
  X34   X105
1  34 105.00
2  40 103.87
3  40 108.06
4  35 106.67
5  40 103.87
6  30 106.67

By luck, i had found a built-in function for this purpose:

df %>% relocate(where(is.numeric), .after = where(is.character))

https://dplyr.tidyverse.org/reference/relocate.html

This solution implies that columns have names. For example:

colnames(df)<-NULL
df %>% relocate(where(is.numeric), .after = where(is.character))

Will give the following error:

Error: Can't select within an unnamed vector.
Run `rlang::last_error()` to see where the error occurred.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM