簡體   English   中英

按 class 排列數據框列:字符前的數字

[英]Arrange data frame columns by class: numeric before character

下午好,

假設我們有以下數據集:

dput(head(cylinder))
structure(list(X19910108 = c("19910109", "19910104", "19910104", 
"19910111", "19910104", "19910111"), X126 = c("X266", "B7", "T133", 
"J34", "T218", "X249"), TVGUIDE = c("TVGUIDE", "MODMAT", "MASSEY", 
"KMART", "MASSEY", "ROSES"), X25503 = c(25503L, 47201L, 39039L, 
37351L, 38039L, 35751L), YES = c("YES", "YES", "YES", "NO", "YES", 
"NO"), KEY = c("KEY", "KEY", "KEY", "KEY", "KEY", "KEY"), YES.1 = c("YES", 
"YES", "YES", "YES", "YES", "YES"), BENTON = c("BENTON", "BENTON", 
"BENTON", "BENTON", "BENTON", "BENTON"), GALLATIN = c("GALLATIN", 
"GALLATIN", "GALLATIN", "GALLATIN", "GALLATIN", "GALLATIN"), 
    UNCOATED = c("UNCOATED", "UNCOATED", "UNCOATED", "UNCOATED", 
    "UNCOATED", "COATED"), UNCOATED.1 = c("UNCOATED", "COATED", 
    "UNCOATED", "COATED", "UNCOATED", "COATED"), NO = c("NO", 
    "NO", "NO", "NO", "NO", "NO"), LINE = c("LINE", "LINE", "LINE", 
    "LINE", "LINE", "LINE"), YES.2 = c("YES", "YES", "YES", "YES", 
    "YES", "YES"), Motter94 = c("Motter94", "WoodHoe70", "WoodHoe70", 
    "WoodHoe70", "WoodHoe70", "Motter94"), X821 = c(821L, 815L, 
    816L, 816L, 816L, 827L), X2 = c(2, 9, 9, 2, 2, 2), TABLOID = c("TABLOID", 
    "CATALOG", "CATALOG", "TABLOID", "CATALOG", "TABLOID"), NorthUS = c("NorthUS", 
    "NorthUS", "NorthUS", NA, "NorthUS", "CANADIAN"), X1911 = c(NA, 
    NA, 1910L, 1910L, 1910L, 1911L), X55 = c(55, 62, 52, 50, 
    50, 50), X46 = c(46L, 40L, 40L, 46L, 40L, 46L), X0.2 = c("0.3", 
    "0.433", "0.3", "0.3", "0.267", "0.3"), X17 = c(15, 16, 16, 
    17, 16.8, 16.5), X78 = c(80L, 80L, 75L, 80L, 76L, 75L), X0.75 = c(0.75, 
    NA, 0.3125, 0.75, 0.4375, 0.75), X20 = c(20L, 30L, 30L, 30L, 
    28L, 30L), X13.1 = c(6.6, 6.5, 5.6, 0, 8.6, 0), X1700 = c(1900L, 
    1850L, 1467L, 2100L, 1467L, 2600L), X50.5 = c(54.9, 53.8, 
    55.6, 57.5, 53.8, 62.5), X36.4 = c(38.5, 39.8, 38.8, 42.5, 
    37.6, 37.5), X0 = c(0, 0, 0, 5, 5, 6), X0.1 = c(0, 0, 0, 
    0, 0, 0), X2.5 = c(2.5, 2.8, 2.5, 2.3, 2.5, 2.5), X1 = c(0.7, 
    0.9, 1.3, 0.6, 0.8, 0.6), X34 = c(34, 40, 40, 35, 40, 30), 
    X40 = c(40L, 40L, 40L, 40L, 40L, 40L), X105 = c(105, 103.87, 
    108.06, 106.67, 103.87, 106.67), X100 = c(100L, 100L, 100L, 
    100L, 100L, 100L), band = c("noband", "noband", "noband", 
    "noband", "noband", "noband")), row.names = c(NA, 6L), class = "data.frame")

列類型為:

sapply(cylinder,class)

  X19910108        X126     TVGUIDE      X25503         YES         KEY       YES.1      BENTON    GALLATIN 
"character" "character" "character"   "integer" "character" "character" "character" "character" "character" 
   UNCOATED  UNCOATED.1          NO        LINE       YES.2    Motter94        X821          X2     TABLOID 
"character" "character" "character" "character" "character" "character"   "integer"   "numeric" "character" 
    NorthUS       X1911         X55         X46        X0.2         X17         X78       X0.75         X20 
"character"   "integer"   "numeric"   "integer" "character"   "numeric"   "integer"   "numeric"   "integer" 
      X13.1       X1700       X50.5       X36.4          X0        X0.1        X2.5          X1         X34 
  "numeric"   "integer"   "numeric"   "numeric"   "numeric"   "numeric"   "numeric"   "numeric"   "numeric" 
        X40        X105        X100        band 
  "integer"   "numeric"   "integer" "character" 

我想重新排序數據集列,使"numeric"列(數字和整數)位於左側。 "character"列必須在右邊!

謝謝你的幫助 !

我們也可以使用帶有|的單個where 將兩個表達式與 lambda 調用組合的邏輯運算符

library(dplyr)
df %>% 
    select(where(~ is.character(.)|is.numeric(.)))

沿着這些路線的東西會起作用

library(dplyr)
df %>% select(where(is.numeric), where(is.character))

這是一種選擇

cylinder[
  order(
    as.integer(
      factor(sapply(cylinder, class),
        levels = c("numeric", "integer", "character")
      )
    )
  )
]

具有collapse功能的緊湊型:

library(collapse)
colorderv(d, nv(d, 2))

# same but more explicit
colorderv(d, num_vars(d, return = "names")

使用nv / num_varsdata.table::setcolorder ,我們可以通過引用更新列順序:

setcolorder(d, nv(d, 2))

我找到了一個可能的解決方案,我在問是否有人可以提出更好的解決方案:

unique(sapply(cylinder,class))
[1] "character" "integer"   "numeric" 

> my.order <-unique(sapply(cylinder,class))

> head(cylinder %>% 
+          select(sapply(., class) %>% .[order(match(., my.order))] %>% names))

  X19910108 X126 TVGUIDE YES KEY YES.1 BENTON GALLATIN UNCOATED UNCOATED.1 NO LINE YES.2  Motter94 TABLOID  NorthUS
1  19910109 X266 TVGUIDE YES KEY   YES BENTON GALLATIN UNCOATED   UNCOATED NO LINE   YES  Motter94 TABLOID  NorthUS
2  19910104   B7  MODMAT YES KEY   YES BENTON GALLATIN UNCOATED     COATED NO LINE   YES WoodHoe70 CATALOG  NorthUS
3  19910104 T133  MASSEY YES KEY   YES BENTON GALLATIN UNCOATED   UNCOATED NO LINE   YES WoodHoe70 CATALOG  NorthUS
4  19910111  J34   KMART  NO KEY   YES BENTON GALLATIN UNCOATED     COATED NO LINE   YES WoodHoe70 TABLOID     <NA>
5  19910104 T218  MASSEY YES KEY   YES BENTON GALLATIN UNCOATED   UNCOATED NO LINE   YES WoodHoe70 CATALOG  NorthUS
6  19910111 X249   ROSES  NO KEY   YES BENTON GALLATIN   COATED     COATED NO LINE   YES  Motter94 TABLOID CANADIAN
   X0.2   band X25503 X821 X1911 X46 X78 X20 X1700 X40 X100 X2 X55  X17  X0.75 X13.1 X50.5 X36.4 X0 X0.1 X2.5  X1
1   0.3 noband  25503  821    NA  46  80  20  1900  40  100  2  55 15.0 0.7500   6.6  54.9  38.5  0    0  2.5 0.7
2 0.433 noband  47201  815    NA  40  80  30  1850  40  100  9  62 16.0     NA   6.5  53.8  39.8  0    0  2.8 0.9
3   0.3 noband  39039  816  1910  40  75  30  1467  40  100  9  52 16.0 0.3125   5.6  55.6  38.8  0    0  2.5 1.3
4   0.3 noband  37351  816  1910  46  80  30  2100  40  100  2  50 17.0 0.7500   0.0  57.5  42.5  5    0  2.3 0.6
5 0.267 noband  38039  816  1910  40  76  28  1467  40  100  2  50 16.8 0.4375   8.6  53.8  37.6  5    0  2.5 0.8
6   0.3 noband  35751  827  1911  46  75  30  2600  40  100  2  50 16.5 0.7500   0.0  62.5  37.5  6    0  2.5 0.6
  X34   X105
1  34 105.00
2  40 103.87
3  40 108.06
4  35 106.67
5  40 103.87
6  30 106.67

幸運的是,我為此目的找到了一個內置的 function:

df %>% relocate(where(is.numeric), .after = where(is.character))

https://dplyr.tidyverse.org/reference/relocate.html

此解決方案意味着列具有名稱。 例如:

colnames(df)<-NULL
df %>% relocate(where(is.numeric), .after = where(is.character))

會報以下錯誤:

Error: Can't select within an unnamed vector.
Run `rlang::last_error()` to see where the error occurred.

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM