[英]Arrange data frame columns by class: numeric before character
下午好,
假設我們有以下數據集:
dput(head(cylinder))
structure(list(X19910108 = c("19910109", "19910104", "19910104",
"19910111", "19910104", "19910111"), X126 = c("X266", "B7", "T133",
"J34", "T218", "X249"), TVGUIDE = c("TVGUIDE", "MODMAT", "MASSEY",
"KMART", "MASSEY", "ROSES"), X25503 = c(25503L, 47201L, 39039L,
37351L, 38039L, 35751L), YES = c("YES", "YES", "YES", "NO", "YES",
"NO"), KEY = c("KEY", "KEY", "KEY", "KEY", "KEY", "KEY"), YES.1 = c("YES",
"YES", "YES", "YES", "YES", "YES"), BENTON = c("BENTON", "BENTON",
"BENTON", "BENTON", "BENTON", "BENTON"), GALLATIN = c("GALLATIN",
"GALLATIN", "GALLATIN", "GALLATIN", "GALLATIN", "GALLATIN"),
UNCOATED = c("UNCOATED", "UNCOATED", "UNCOATED", "UNCOATED",
"UNCOATED", "COATED"), UNCOATED.1 = c("UNCOATED", "COATED",
"UNCOATED", "COATED", "UNCOATED", "COATED"), NO = c("NO",
"NO", "NO", "NO", "NO", "NO"), LINE = c("LINE", "LINE", "LINE",
"LINE", "LINE", "LINE"), YES.2 = c("YES", "YES", "YES", "YES",
"YES", "YES"), Motter94 = c("Motter94", "WoodHoe70", "WoodHoe70",
"WoodHoe70", "WoodHoe70", "Motter94"), X821 = c(821L, 815L,
816L, 816L, 816L, 827L), X2 = c(2, 9, 9, 2, 2, 2), TABLOID = c("TABLOID",
"CATALOG", "CATALOG", "TABLOID", "CATALOG", "TABLOID"), NorthUS = c("NorthUS",
"NorthUS", "NorthUS", NA, "NorthUS", "CANADIAN"), X1911 = c(NA,
NA, 1910L, 1910L, 1910L, 1911L), X55 = c(55, 62, 52, 50,
50, 50), X46 = c(46L, 40L, 40L, 46L, 40L, 46L), X0.2 = c("0.3",
"0.433", "0.3", "0.3", "0.267", "0.3"), X17 = c(15, 16, 16,
17, 16.8, 16.5), X78 = c(80L, 80L, 75L, 80L, 76L, 75L), X0.75 = c(0.75,
NA, 0.3125, 0.75, 0.4375, 0.75), X20 = c(20L, 30L, 30L, 30L,
28L, 30L), X13.1 = c(6.6, 6.5, 5.6, 0, 8.6, 0), X1700 = c(1900L,
1850L, 1467L, 2100L, 1467L, 2600L), X50.5 = c(54.9, 53.8,
55.6, 57.5, 53.8, 62.5), X36.4 = c(38.5, 39.8, 38.8, 42.5,
37.6, 37.5), X0 = c(0, 0, 0, 5, 5, 6), X0.1 = c(0, 0, 0,
0, 0, 0), X2.5 = c(2.5, 2.8, 2.5, 2.3, 2.5, 2.5), X1 = c(0.7,
0.9, 1.3, 0.6, 0.8, 0.6), X34 = c(34, 40, 40, 35, 40, 30),
X40 = c(40L, 40L, 40L, 40L, 40L, 40L), X105 = c(105, 103.87,
108.06, 106.67, 103.87, 106.67), X100 = c(100L, 100L, 100L,
100L, 100L, 100L), band = c("noband", "noband", "noband",
"noband", "noband", "noband")), row.names = c(NA, 6L), class = "data.frame")
列類型為:
sapply(cylinder,class)
X19910108 X126 TVGUIDE X25503 YES KEY YES.1 BENTON GALLATIN
"character" "character" "character" "integer" "character" "character" "character" "character" "character"
UNCOATED UNCOATED.1 NO LINE YES.2 Motter94 X821 X2 TABLOID
"character" "character" "character" "character" "character" "character" "integer" "numeric" "character"
NorthUS X1911 X55 X46 X0.2 X17 X78 X0.75 X20
"character" "integer" "numeric" "integer" "character" "numeric" "integer" "numeric" "integer"
X13.1 X1700 X50.5 X36.4 X0 X0.1 X2.5 X1 X34
"numeric" "integer" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
X40 X105 X100 band
"integer" "numeric" "integer" "character"
我想重新排序數據集列,使"numeric"
列(數字和整數)位於左側。 "character"
列必須在右邊!
謝謝你的幫助 !
我們也可以使用帶有|
的單個where
將兩個表達式與 lambda 調用組合的邏輯運算符
library(dplyr)
df %>%
select(where(~ is.character(.)|is.numeric(.)))
沿着這些路線的東西會起作用
library(dplyr)
df %>% select(where(is.numeric), where(is.character))
這是一種選擇
cylinder[
order(
as.integer(
factor(sapply(cylinder, class),
levels = c("numeric", "integer", "character")
)
)
)
]
具有collapse
功能的緊湊型:
library(collapse)
colorderv(d, nv(d, 2))
# same but more explicit
colorderv(d, num_vars(d, return = "names")
使用nv
/ num_vars
和data.table::setcolorder
,我們可以通過引用更新列順序:
setcolorder(d, nv(d, 2))
我找到了一個可能的解決方案,我在問是否有人可以提出更好的解決方案:
unique(sapply(cylinder,class))
[1] "character" "integer" "numeric"
> my.order <-unique(sapply(cylinder,class))
> head(cylinder %>%
+ select(sapply(., class) %>% .[order(match(., my.order))] %>% names))
X19910108 X126 TVGUIDE YES KEY YES.1 BENTON GALLATIN UNCOATED UNCOATED.1 NO LINE YES.2 Motter94 TABLOID NorthUS
1 19910109 X266 TVGUIDE YES KEY YES BENTON GALLATIN UNCOATED UNCOATED NO LINE YES Motter94 TABLOID NorthUS
2 19910104 B7 MODMAT YES KEY YES BENTON GALLATIN UNCOATED COATED NO LINE YES WoodHoe70 CATALOG NorthUS
3 19910104 T133 MASSEY YES KEY YES BENTON GALLATIN UNCOATED UNCOATED NO LINE YES WoodHoe70 CATALOG NorthUS
4 19910111 J34 KMART NO KEY YES BENTON GALLATIN UNCOATED COATED NO LINE YES WoodHoe70 TABLOID <NA>
5 19910104 T218 MASSEY YES KEY YES BENTON GALLATIN UNCOATED UNCOATED NO LINE YES WoodHoe70 CATALOG NorthUS
6 19910111 X249 ROSES NO KEY YES BENTON GALLATIN COATED COATED NO LINE YES Motter94 TABLOID CANADIAN
X0.2 band X25503 X821 X1911 X46 X78 X20 X1700 X40 X100 X2 X55 X17 X0.75 X13.1 X50.5 X36.4 X0 X0.1 X2.5 X1
1 0.3 noband 25503 821 NA 46 80 20 1900 40 100 2 55 15.0 0.7500 6.6 54.9 38.5 0 0 2.5 0.7
2 0.433 noband 47201 815 NA 40 80 30 1850 40 100 9 62 16.0 NA 6.5 53.8 39.8 0 0 2.8 0.9
3 0.3 noband 39039 816 1910 40 75 30 1467 40 100 9 52 16.0 0.3125 5.6 55.6 38.8 0 0 2.5 1.3
4 0.3 noband 37351 816 1910 46 80 30 2100 40 100 2 50 17.0 0.7500 0.0 57.5 42.5 5 0 2.3 0.6
5 0.267 noband 38039 816 1910 40 76 28 1467 40 100 2 50 16.8 0.4375 8.6 53.8 37.6 5 0 2.5 0.8
6 0.3 noband 35751 827 1911 46 75 30 2600 40 100 2 50 16.5 0.7500 0.0 62.5 37.5 6 0 2.5 0.6
X34 X105
1 34 105.00
2 40 103.87
3 40 108.06
4 35 106.67
5 40 103.87
6 30 106.67
幸運的是,我為此目的找到了一個內置的 function:
df %>% relocate(where(is.numeric), .after = where(is.character))
https://dplyr.tidyverse.org/reference/relocate.html
此解決方案意味着列具有名稱。 例如:
colnames(df)<-NULL
df %>% relocate(where(is.numeric), .after = where(is.character))
會報以下錯誤:
Error: Can't select within an unnamed vector.
Run `rlang::last_error()` to see where the error occurred.
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.