简体   繁体   English

将数据框转换为 R 中的布尔值

[英]Converting a data frame to boolean values in R

I have a list of samples mutated for a signalling pathways like below 1 if mutated and 0 if not mutated我有一个针对信号通路突变的样本列表,如果突变则为 1,如果未突变则为 0

  > dput(pathway)
structure(list(sample = c("LP6005334.DNA_H01", "LP6005334.DNA_H01", 
"LP6005334.DNA_H01", "LP6005334.DNA_H01", "LP6005334.DNA_H01", 
"LP6005334.DNA_H01", "LP6005334.DNA_H01", "LP6005334.DNA_H01", 
"LP6005334.DNA_H01", "LP6005334.DNA_H01", "LP6005334.DNA_H01", 
"LP6005334.DNA_H01", "LP6005500.DNA_D03", "LP6005500.DNA_D03", 
"LP6005500.DNA_D03", "LP6005500.DNA_D03", "LP6005500.DNA_D03", 
"LP6005500.DNA_D03", "LP6005500.DNA_D03", "LP6005500.DNA_D03", 
"LP6005500.DNA_D03", "LP6005500.DNA_D03", "LP6005500.DNA_D03", 
"LP6005500.DNA_D03", "LP6005500.DNA_D03", "LP6007600", "LP6007600", 
"LP6007600", "LP6007600", "LP6007600", "LP6007600", "LP6007600", 
"LP6007600", "LP6007600", "LP6007600", "LP6007600", "LP6007600", 
"LP6008202.DNA_B03", "LP6008202.DNA_B03", "LP6008202.DNA_B03", 
"LP6008202.DNA_B03", "LP6008202.DNA_B03", "LP6008202.DNA_B03", 
"LP6008202.DNA_B03", "LP6008202.DNA_B03", "LP6008202.DNA_B03", 
"LP6008202.DNA_B03", "LP6008202.DNA_B03", "LP6008202.DNA_B03", 
"LP6008334.DNA_A03", "LP6008334.DNA_A03", "LP6008334.DNA_A03", 
"LP6008334.DNA_A03", "LP6008334.DNA_A03", "LP6008334.DNA_A03", 
"LP6008334.DNA_A03", "LP6008334.DNA_A03", "LP6008334.DNA_A03", 
"LP6008334.DNA_A03", "LP6008334.DNA_A03", "LP6008334.DNA_A03", 
"LP6008334.DNA_A04", "LP6008334.DNA_A04", "LP6008334.DNA_A04", 
"LP6008334.DNA_A04", "LP6008334.DNA_A04", "LP6008334.DNA_A04", 
"LP6008334.DNA_A04", "LP6008334.DNA_A04", "LP6008334.DNA_A04", 
"LP6008334.DNA_A04", "LP6008334.DNA_A04", "LP6008334.DNA_A04", 
"LP6008334.DNA_B02", "LP6008334.DNA_B02", "LP6008334.DNA_B02", 
"LP6008334.DNA_B02", "LP6008334.DNA_B02", "LP6008334.DNA_B02", 
"LP6008334.DNA_B02", "LP6008334.DNA_B02", "LP6008334.DNA_B02", 
"LP6008334.DNA_B02", "LP6008334.DNA_B02", "LP6008334.DNA_B02", 
"LP6008334.DNA_C02", "LP6008334.DNA_C02", "LP6008334.DNA_C02", 
"LP6008334.DNA_C02", "LP6008334.DNA_C02", "LP6008334.DNA_C02", 
"LP6008334.DNA_C02", "LP6008334.DNA_C02", "LP6008334.DNA_C02", 
"LP6008334.DNA_C02", "LP6008334.DNA_C02", "LP6008334.DNA_C02", 
"LP6008334.DNA_D02", "LP6008334.DNA_D02", "LP6008334.DNA_D02", 
"LP6008334.DNA_D02", "LP6008334.DNA_D02", "LP6008334.DNA_D02", 
"LP6008334.DNA_D02", "LP6008334.DNA_D02", "LP6008334.DNA_D02", 
"LP6008334.DNA_D02", "LP6008334.DNA_D02", "LP6008334.DNA_D02", 
"LP6008336.DNA_F02", "LP6008336.DNA_F02", "LP6008336.DNA_F02", 
"LP6008336.DNA_F02", "LP6008336.DNA_F02", "LP6008336.DNA_F02", 
"LP6008336.DNA_F02", "LP6008336.DNA_F02", "LP6008336.DNA_F02", 
"LP6008336.DNA_F02", "LP6008336.DNA_F02", "LP6008336.DNA_F02", 
"LP6008336.DNA_G01", "LP6008336.DNA_G01", "LP6008336.DNA_G01", 
"LP6008336.DNA_G01", "LP6008336.DNA_G01", "LP6008336.DNA_G01", 
"LP6008336.DNA_G01", "LP6008336.DNA_G01", "LP6008336.DNA_G01", 
"LP6008336.DNA_G01", "LP6008336.DNA_G01", "LP6008336.DNA_G01", 
"LP6008336.DNA_H01", "LP6008336.DNA_H01", "LP6008336.DNA_H01", 
"LP6008336.DNA_H01", "LP6008336.DNA_H01", "LP6008336.DNA_H01", 
"LP6008336.DNA_H01", "LP6008336.DNA_H01", "LP6008336.DNA_H01", 
"LP6008336.DNA_H01", "LP6008336.DNA_H01", "LP6008336.DNA_H01", 
"LP6008337.DNA_A07", "LP6008337.DNA_A07", "LP6008337.DNA_A07", 
"LP6008337.DNA_A07", "LP6008337.DNA_A07", "LP6008337.DNA_A07", 
"LP6008337.DNA_A07", "LP6008337.DNA_A07", "LP6008337.DNA_A07", 
"LP6008337.DNA_A07", "LP6008337.DNA_A07", "LP6008337.DNA_A07", 
"LP6008337.DNA_H06", "LP6008337.DNA_H06", "LP6008337.DNA_H06", 
"LP6008337.DNA_H06", "LP6008337.DNA_H06", "LP6008337.DNA_H06", 
"LP6008337.DNA_H06", "LP6008337.DNA_H06", "LP6008337.DNA_H06", 
"LP6008337.DNA_H06", "LP6008337.DNA_H06", "LP6008337.DNA_H06", 
"LP6008460.DNA_A04", "LP6008460.DNA_A04", "LP6008460.DNA_A04", 
"LP6008460.DNA_A04", "LP6008460.DNA_A04", "LP6008460.DNA_A04", 
"LP6008460.DNA_A04", "LP6008460.DNA_A04", "LP6008460.DNA_A04", 
"LP6008460.DNA_A04", "LP6008460.DNA_A04", "LP6008460.DNA_A04", 
"LP6008460.DNA_D01", "LP6008460.DNA_D01", "LP6008460.DNA_D01", 
"LP6008460.DNA_D01", "LP6008460.DNA_D01", "LP6008460.DNA_D01", 
"LP6008460.DNA_D01", "LP6008460.DNA_D01", "LP6008460.DNA_D01", 
"LP6008460.DNA_D01", "LP6008460.DNA_D01", "LP6008460.DNA_D01", 
"LP6008460.DNA_F02", "LP6008460.DNA_F02", "LP6008460.DNA_F02", 
"LP6008460.DNA_F02", "LP6008460.DNA_F02", "LP6008460.DNA_F02", 
"LP6008460.DNA_F02", "LP6008460.DNA_F02", "LP6008460.DNA_F02", 
"LP6008460.DNA_F02", "LP6008460.DNA_F02", "LP6008460.DNA_F02", 
"LP6008460.DNA_G03", "LP6008460.DNA_G03", "LP6008460.DNA_G03", 
"LP6008460.DNA_G03", "LP6008460.DNA_G03", "LP6008460.DNA_G03", 
"LP6008460.DNA_G03", "LP6008460.DNA_G03", "LP6008460.DNA_G03", 
"LP6008460.DNA_G03", "LP6008460.DNA_G03", "LP6008460.DNA_G03", 
"s15", "s15", "s15", "s15", "s15", "s15", "s15", "s15", "s15", 
"s15", "s15", "s15", "s15", "s15", "s18", "s18", "s18", "s18", 
"s18", "s18", "s18", "s18", "s18", "s18", "s18", "s18", "s18", 
"s18", "s18", "s18", "s24", "s24", "s24", "s24", "s24", "s24", 
"s24", "s24", "s24", "s24", "s24", "s24", "s24", "s24", "s24", 
"s30", "s30", "s30", "s30", "s30", "s30", "s30", "s30", "s30", 
"s30", "s30", "s30", "s30", "s30", "s59", "s59", "s59", "s59", 
"s59", "s59", "s59", "s59", "s59", "s59", "s59", "s59", "s67", 
"s67", "s67", "s67", "s67", "s67", "s67", "s67", "s67", "s67", 
"s67", "s67", "s67", "s67", "s80", "s80", "s80", "s80", "s80", 
"s80", "s80", "s80", "s80", "s80", "s80", "s80", "s80", "s80", 
"s80", "s80", "s86", "s86", "s86", "s86", "s86", "s86", "s86", 
"s86", "s86", "s86", "s86", "s86", "s86", "s86", "s94", "s94", 
"s94", "s94", "s94", "s94", "s94", "s94", "s94", "s94", "s94", 
"s94", "s94", "s94"), Pathway = c("PI3K", "Cell_Cycle", "RTK-RAS", 
"WNT", "TGF-Beta", "CR", "CF", "TP53", "NOTCH", "Hippo", "MYC", 
"NRF2", "PI3K", "Cell_Cycle", "WNT", "TGF-Beta", "RTK-RAS", "WNT", 
"CR", "CF", "TP53", "NOTCH", "Hippo", "MYC", "NRF2", "TGF-Beta", 
"PI3K", "RTK-RAS", "WNT", "CR", "CF", "TP53", "Cell_Cycle", "NOTCH", 
"Hippo", "MYC", "NRF2", "PI3K", "TGF-Beta", "RTK-RAS", "WNT", 
"CR", "CF", "TP53", "Cell_Cycle", "NOTCH", "Hippo", "MYC", "NRF2", 
"Cell_Cycle", "TGF-Beta", "PI3K", "RTK-RAS", "WNT", "CR", "CF", 
"TP53", "NOTCH", "Hippo", "MYC", "NRF2", "RTK-RAS", "TGF-Beta", 
"PI3K", "WNT", "CR", "CF", "TP53", "Cell_Cycle", "NOTCH", "Hippo", 
"MYC", "NRF2", "Cell_Cycle", "TGF-Beta", "PI3K", "RTK-RAS", "WNT", 
"CR", "CF", "TP53", "NOTCH", "Hippo", "MYC", "NRF2", "PI3K", 
"Cell_Cycle", "RTK-RAS", "TGF-Beta", "WNT", "CR", "CF", "TP53", 
"NOTCH", "Hippo", "MYC", "NRF2", "PI3K", "Cell_Cycle", "TGF-Beta", 
"RTK-RAS", "WNT", "CR", "CF", "TP53", "NOTCH", "Hippo", "MYC", 
"NRF2", "Cell_Cycle", "TGF-Beta", "PI3K", "RTK-RAS", "WNT", "CR", 
"CF", "TP53", "NOTCH", "Hippo", "MYC", "NRF2", "TGF-Beta", "PI3K", 
"RTK-RAS", "WNT", "CR", "CF", "TP53", "Cell_Cycle", "NOTCH", 
"Hippo", "MYC", "NRF2", "PI3K", "Cell_Cycle", "WNT", "TGF-Beta", 
"RTK-RAS", "CR", "CF", "TP53", "NOTCH", "Hippo", "MYC", "NRF2", 
"PI3K", "TGF-Beta", "RTK-RAS", "WNT", "CR", "CF", "TP53", "Cell_Cycle", 
"NOTCH", "Hippo", "MYC", "NRF2", "Cell_Cycle", "RTK-RAS", "TGF-Beta", 
"PI3K", "WNT", "CR", "CF", "TP53", "NOTCH", "Hippo", "MYC", "NRF2", 
"Cell_Cycle", "TGF-Beta", "PI3K", "RTK-RAS", "WNT", "CR", "CF", 
"TP53", "NOTCH", "Hippo", "MYC", "NRF2", "Cell_Cycle", "CF", 
"CR", "Hippo", "MYC", "NOTCH", "NRF2", "PI3K", "RTK-RAS", "TGF-Beta", 
"TP53", "WNT", "Cell_Cycle", "RTK-RAS", "TGF-Beta", "PI3K", "WNT", 
"CR", "CF", "TP53", "NOTCH", "Hippo", "MYC", "NRF2", "RTK-RAS", 
"TGF-Beta", "PI3K", "WNT", "CR", "CF", "TP53", "Cell_Cycle", 
"NOTCH", "Hippo", "MYC", "NRF2", "Cell_Cycle", "RTK-RAS", "TGF-Beta", 
"PI3K", "RTK-RAS", "WNT", "CR", "CF", "TP53", "Cell_Cycle", "NOTCH", 
"Hippo", "MYC", "NRF2", "PI3K", "Cell_Cycle", "RTK-RAS", "WNT", 
"TGF-Beta", "RTK-RAS", "WNT", "CR", "CF", "TP53", "Cell_Cycle", 
"NOTCH", "Hippo", "MYC", "NRF2", "PI3K", "PI3K", "Cell_Cycle", 
"RTK-RAS", "TGF-Beta", "RTK-RAS", "WNT", "CR", "CF", "TP53", 
"Cell_Cycle", "NOTCH", "Hippo", "MYC", "NRF2", "PI3K", "Cell_Cycle", 
"WNT", "TGF-Beta", "PI3K", "RTK-RAS", "WNT", "CR", "CF", "TP53", 
"Cell_Cycle", "NOTCH", "Hippo", "MYC", "NRF2", "TGF-Beta", "PI3K", 
"RTK-RAS", "WNT", "CR", "CF", "TP53", "Cell_Cycle", "NOTCH", 
"Hippo", "MYC", "NRF2", "Cell_Cycle", "WNT", "TGF-Beta", "PI3K", 
"RTK-RAS", "WNT", "CR", "CF", "TP53", "Cell_Cycle", "NOTCH", 
"Hippo", "MYC", "NRF2", "PI3K", "Cell_Cycle", "RTK-RAS", "WNT", 
"TGF-Beta", "RTK-RAS", "WNT", "CR", "CF", "TP53", "Cell_Cycle", 
"NOTCH", "Hippo", "MYC", "NRF2", "PI3K", "Cell_Cycle", "WNT", 
"TGF-Beta", "PI3K", "RTK-RAS", "WNT", "CR", "CF", "TP53", "Cell_Cycle", 
"NOTCH", "Hippo", "MYC", "NRF2", "Cell_Cycle", "RTK-RAS", "TGF-Beta", 
"PI3K", "RTK-RAS", "WNT", "CR", "CF", "TP53", "Cell_Cycle", "NOTCH", 
"Hippo", "MYC", "NRF2"), value = c(1L, 1L, 1L, 1L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L)), class = "data.frame", row.names = c(NA, -346L))

I want to convert this to a boolean matrix (wide format) in which pathways are in columns and samples are in rows我想将其转换为布尔矩阵(宽格式),其中路径在列中,样本在行中

A sample obtains 1 if mutated for a pathway and 0 if not mutated for a pathway like样本在通路发生突变时获得 1,如果通路未发生突变,则获得 0,例如

> head(p)
             sample value Cell_Cycle Hippo MYC NOTCH NRF2 PI3K TGF-Beta RTK-RAS TP53
1 LP6008334.DNA_C02     0          0     0   0     0    0    0        0       0    0
2 LP6008334.DNA_A03     0          0     0   0     0    0    0        0       0    0
3 LP6005334.DNA_H01     0          0     0   0     0    0    0        0       0    0
4 LP6008337.DNA_H06     0          0     0   0     0    0    0        0       0    0
5               s15     0          0     0   0     0    0    0        0       0    0
6 LP6008460.DNA_D01     0          0     0   0     0    0    0        0       0    0
  WNT CF CR
1   0  0  0
2   0  0  0
3   0  0  0
4   0  0  0
5   0  0  0
6   0  0  0
>

But all I get zero但我得到的都是零

I have tried this我试过这个

for (pathway in setdiff(unique(p$Pathway), colnames(p))) {
  p <- cbind(p,array(0,nrow(p)))
  colnames(p)[ncol(p)] <- pathway
}
 

Which gives all the matrix 0这给出了所有矩阵 0

I also have been trying我也一直在努力

reshape(pathway,         idvar = "sample",         timevar = "Pathway",         direction = "wide")

But gives wrong things of 0 and 1但是给出了错误的 0 和 1

Please help me请帮我

我们可以使用table

+(table(p[, c("sample", "Pathway")]) > 0)

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM