簡體   English   中英

如何在 R 中將所有寬格式的列熔化為長格式

[英]How to melt all columns in wide to long format in R

假設我有一個三列數據框

df <- structure(list(extra.sd.3.sq. = structure(c(38L, 40L, 39L, 49L, 
47L, 36L), .Label = c("0_62433_1", "0_62433_49", "0_62433_54", 
"0_62534_1", "0_62534_17", "0_62771_56", "10R_R22_4", "10R_R22_5", 
"10R_R22_9", "11_14_14.211", "11_14_14.222", "11_14_14.223", 
"17_14_1", "19_L0022_L0022A", "19_L0031_L0031A", "19_L0031_L0031D", 
"21_ND121_3", "21_ND165_1", "21_ND165_3", "25_8_151", "26_BCR_BCR18846", 
"26_EGE_EGE63828", "27_104_62571", "27_104_84750", "27_176_85817", 
"4_557_10", "4_557_21", "4_557_99", "4_637_99", "4_650_5", "8_64021_11", 
"8_64034_15", "A-LOAD-LD003528", "A-LOAD-LD006880", "A-LOAD-LD006898", 
"A-LOAD-LD006901", "A-LOAD-LD006905", "A-LOAD-LD007218", "A-LOAD-LD007230", 
"A-LOAD-LD007250", "A-MIA-UM000661", "A-MIA-UM000663", "A-MIA-UM001974", 
"A-MIA-UM001975", "A-NCRD-NC002972", "A-NCRD-NC003203", "A-UPN-UP000022", 
"A-UPN-UP000131", "A-UPN-UP000175", "MAP_86633"), class = "factor"), 
    extra.sd.4.sq. = structure(c(9L, 10L, 7L, 6L, 8L, 2L), .Label = c("10R_R22_5", 
    "19_L0031_L0031D", "26_LDD_LDD83801", "27_176_85817", "4_557_99", 
    "4H_328_3", "4H_328_4", "4H_328_99.WGS", "A-LOAD-LD006898", 
    "A-LOAD-LD006905"), class = "factor"), extra.sd.5.sq. = structure(c(2L, 
    1L, NA, NA, NA, NA), .Label = c("4_557_99", "4H_328_4"), class = "factor")), row.names = c(NA, 
6L), class = "data.frame")

我想將所有列合並為長格式並獲得如下表格:

Samples         SD
A-LOAD-LD007218 extra.sd.3.sq.
A-LOAD-LD007250 extra.sd.3.sq.
A-LOAD-LD007230 extra.sd.3.sq.
A-UPN-UP000175 extra.sd.3.sq.
A-UPN-UP000022 extra.sd.3.sq.
A-LOAD-LD006901 extra.sd.3.sq.
A-LOAD-LD006898 extra.sd.4.sq.
A-LOAD-LD006905 extra.sd.4.sq.
4H_328_4 extra.sd.4.sq.
4H_328_3 extra.sd.4.sq.
4H_328_99.WGS extra.sd.4.sq.
19_L0031_L0031D extra.sd.4.sq.
4H_328_4 extra.sd.5.sq.
4_557_99 extra.sd.5.sq.

我在想這樣的事情,但我沒有“ID”或“var”。 melt(setDT(df), id.vars = "ID", variable.name = "var")

我們可以使用來自pivot_longer tidyr的 pivot_longer:

library(tidyr)
library(dplyr)

df %>% 
  pivot_longer(
    everything(),
    names_to = "SD",
    values_to = "Samples"
  ) %>% 
  na.omit() %>% 
  arrange(Samples)
   SD             Samples        
   <chr>          <fct>          
 1 extra.sd.4.sq. 19_L0031_L0031D
 2 extra.sd.5.sq. 4_557_99       
 3 extra.sd.4.sq. A-LOAD-LD006898
 4 extra.sd.3.sq. A-LOAD-LD006901
 5 extra.sd.4.sq. A-LOAD-LD006905
 6 extra.sd.3.sq. A-LOAD-LD007218
 7 extra.sd.3.sq. A-LOAD-LD007230
 8 extra.sd.3.sq. A-LOAD-LD007250
 9 extra.sd.3.sq. A-UPN-UP000022 
10 extra.sd.3.sq. A-UPN-UP000175 
11 extra.sd.4.sq. 4H_328_3       
12 extra.sd.5.sq. 4H_328_4       
13 extra.sd.4.sq. 4H_328_4       
14 extra.sd.4.sq. 4H_328_99.WGS 

在這種情況下,有時添加一個稍后丟棄的列是有幫助的,這樣melt就有一個“id”列。

在前面,讓我們將factor s 轉換為character ,否則它們不太可能很好地協同工作。

df[] <- lapply(df, as.character)
df$rn <- seq(nrow(df))
transform(
  reshape2::melt(df, id.vars = "rn",
                 variable.name = "SD", value.name = "Samples",
                 na.rm = TRUE),
  rn = NULL)
#                SD         Samples
# 1  extra.sd.3.sq. A-LOAD-LD007218
# 2  extra.sd.3.sq. A-LOAD-LD007250
# 3  extra.sd.3.sq. A-LOAD-LD007230
# 4  extra.sd.3.sq.  A-UPN-UP000175
# 5  extra.sd.3.sq.  A-UPN-UP000022
# 6  extra.sd.3.sq. A-LOAD-LD006901
# 7  extra.sd.4.sq. A-LOAD-LD006898
# 8  extra.sd.4.sq. A-LOAD-LD006905
# 9  extra.sd.4.sq.        4H_328_4
# 10 extra.sd.4.sq.        4H_328_3
# 11 extra.sd.4.sq.   4H_328_99.WGS
# 12 extra.sd.4.sq. 19_L0031_L0031D
# 13 extra.sd.5.sq.        4H_328_4
# 14 extra.sd.5.sq.        4_557_99

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM