[英]Converting a data frame to boolean values in R
I have a list of samples mutated for a signalling pathways like below 1 if mutated and 0 if not mutated我有一个针对信号通路突变的样本列表,如果突变则为 1,如果未突变则为 0
> dput(pathway)
structure(list(sample = c("LP6005334.DNA_H01", "LP6005334.DNA_H01",
"LP6005334.DNA_H01", "LP6005334.DNA_H01", "LP6005334.DNA_H01",
"LP6005334.DNA_H01", "LP6005334.DNA_H01", "LP6005334.DNA_H01",
"LP6005334.DNA_H01", "LP6005334.DNA_H01", "LP6005334.DNA_H01",
"LP6005334.DNA_H01", "LP6005500.DNA_D03", "LP6005500.DNA_D03",
"LP6005500.DNA_D03", "LP6005500.DNA_D03", "LP6005500.DNA_D03",
"LP6005500.DNA_D03", "LP6005500.DNA_D03", "LP6005500.DNA_D03",
"LP6005500.DNA_D03", "LP6005500.DNA_D03", "LP6005500.DNA_D03",
"LP6005500.DNA_D03", "LP6005500.DNA_D03", "LP6007600", "LP6007600",
"LP6007600", "LP6007600", "LP6007600", "LP6007600", "LP6007600",
"LP6007600", "LP6007600", "LP6007600", "LP6007600", "LP6007600",
"LP6008202.DNA_B03", "LP6008202.DNA_B03", "LP6008202.DNA_B03",
"LP6008202.DNA_B03", "LP6008202.DNA_B03", "LP6008202.DNA_B03",
"LP6008202.DNA_B03", "LP6008202.DNA_B03", "LP6008202.DNA_B03",
"LP6008202.DNA_B03", "LP6008202.DNA_B03", "LP6008202.DNA_B03",
"LP6008334.DNA_A03", "LP6008334.DNA_A03", "LP6008334.DNA_A03",
"LP6008334.DNA_A03", "LP6008334.DNA_A03", "LP6008334.DNA_A03",
"LP6008334.DNA_A03", "LP6008334.DNA_A03", "LP6008334.DNA_A03",
"LP6008334.DNA_A03", "LP6008334.DNA_A03", "LP6008334.DNA_A03",
"LP6008334.DNA_A04", "LP6008334.DNA_A04", "LP6008334.DNA_A04",
"LP6008334.DNA_A04", "LP6008334.DNA_A04", "LP6008334.DNA_A04",
"LP6008334.DNA_A04", "LP6008334.DNA_A04", "LP6008334.DNA_A04",
"LP6008334.DNA_A04", "LP6008334.DNA_A04", "LP6008334.DNA_A04",
"LP6008334.DNA_B02", "LP6008334.DNA_B02", "LP6008334.DNA_B02",
"LP6008334.DNA_B02", "LP6008334.DNA_B02", "LP6008334.DNA_B02",
"LP6008334.DNA_B02", "LP6008334.DNA_B02", "LP6008334.DNA_B02",
"LP6008334.DNA_B02", "LP6008334.DNA_B02", "LP6008334.DNA_B02",
"LP6008334.DNA_C02", "LP6008334.DNA_C02", "LP6008334.DNA_C02",
"LP6008334.DNA_C02", "LP6008334.DNA_C02", "LP6008334.DNA_C02",
"LP6008334.DNA_C02", "LP6008334.DNA_C02", "LP6008334.DNA_C02",
"LP6008334.DNA_C02", "LP6008334.DNA_C02", "LP6008334.DNA_C02",
"LP6008334.DNA_D02", "LP6008334.DNA_D02", "LP6008334.DNA_D02",
"LP6008334.DNA_D02", "LP6008334.DNA_D02", "LP6008334.DNA_D02",
"LP6008334.DNA_D02", "LP6008334.DNA_D02", "LP6008334.DNA_D02",
"LP6008334.DNA_D02", "LP6008334.DNA_D02", "LP6008334.DNA_D02",
"LP6008336.DNA_F02", "LP6008336.DNA_F02", "LP6008336.DNA_F02",
"LP6008336.DNA_F02", "LP6008336.DNA_F02", "LP6008336.DNA_F02",
"LP6008336.DNA_F02", "LP6008336.DNA_F02", "LP6008336.DNA_F02",
"LP6008336.DNA_F02", "LP6008336.DNA_F02", "LP6008336.DNA_F02",
"LP6008336.DNA_G01", "LP6008336.DNA_G01", "LP6008336.DNA_G01",
"LP6008336.DNA_G01", "LP6008336.DNA_G01", "LP6008336.DNA_G01",
"LP6008336.DNA_G01", "LP6008336.DNA_G01", "LP6008336.DNA_G01",
"LP6008336.DNA_G01", "LP6008336.DNA_G01", "LP6008336.DNA_G01",
"LP6008336.DNA_H01", "LP6008336.DNA_H01", "LP6008336.DNA_H01",
"LP6008336.DNA_H01", "LP6008336.DNA_H01", "LP6008336.DNA_H01",
"LP6008336.DNA_H01", "LP6008336.DNA_H01", "LP6008336.DNA_H01",
"LP6008336.DNA_H01", "LP6008336.DNA_H01", "LP6008336.DNA_H01",
"LP6008337.DNA_A07", "LP6008337.DNA_A07", "LP6008337.DNA_A07",
"LP6008337.DNA_A07", "LP6008337.DNA_A07", "LP6008337.DNA_A07",
"LP6008337.DNA_A07", "LP6008337.DNA_A07", "LP6008337.DNA_A07",
"LP6008337.DNA_A07", "LP6008337.DNA_A07", "LP6008337.DNA_A07",
"LP6008337.DNA_H06", "LP6008337.DNA_H06", "LP6008337.DNA_H06",
"LP6008337.DNA_H06", "LP6008337.DNA_H06", "LP6008337.DNA_H06",
"LP6008337.DNA_H06", "LP6008337.DNA_H06", "LP6008337.DNA_H06",
"LP6008337.DNA_H06", "LP6008337.DNA_H06", "LP6008337.DNA_H06",
"LP6008460.DNA_A04", "LP6008460.DNA_A04", "LP6008460.DNA_A04",
"LP6008460.DNA_A04", "LP6008460.DNA_A04", "LP6008460.DNA_A04",
"LP6008460.DNA_A04", "LP6008460.DNA_A04", "LP6008460.DNA_A04",
"LP6008460.DNA_A04", "LP6008460.DNA_A04", "LP6008460.DNA_A04",
"LP6008460.DNA_D01", "LP6008460.DNA_D01", "LP6008460.DNA_D01",
"LP6008460.DNA_D01", "LP6008460.DNA_D01", "LP6008460.DNA_D01",
"LP6008460.DNA_D01", "LP6008460.DNA_D01", "LP6008460.DNA_D01",
"LP6008460.DNA_D01", "LP6008460.DNA_D01", "LP6008460.DNA_D01",
"LP6008460.DNA_F02", "LP6008460.DNA_F02", "LP6008460.DNA_F02",
"LP6008460.DNA_F02", "LP6008460.DNA_F02", "LP6008460.DNA_F02",
"LP6008460.DNA_F02", "LP6008460.DNA_F02", "LP6008460.DNA_F02",
"LP6008460.DNA_F02", "LP6008460.DNA_F02", "LP6008460.DNA_F02",
"LP6008460.DNA_G03", "LP6008460.DNA_G03", "LP6008460.DNA_G03",
"LP6008460.DNA_G03", "LP6008460.DNA_G03", "LP6008460.DNA_G03",
"LP6008460.DNA_G03", "LP6008460.DNA_G03", "LP6008460.DNA_G03",
"LP6008460.DNA_G03", "LP6008460.DNA_G03", "LP6008460.DNA_G03",
"s15", "s15", "s15", "s15", "s15", "s15", "s15", "s15", "s15",
"s15", "s15", "s15", "s15", "s15", "s18", "s18", "s18", "s18",
"s18", "s18", "s18", "s18", "s18", "s18", "s18", "s18", "s18",
"s18", "s18", "s18", "s24", "s24", "s24", "s24", "s24", "s24",
"s24", "s24", "s24", "s24", "s24", "s24", "s24", "s24", "s24",
"s30", "s30", "s30", "s30", "s30", "s30", "s30", "s30", "s30",
"s30", "s30", "s30", "s30", "s30", "s59", "s59", "s59", "s59",
"s59", "s59", "s59", "s59", "s59", "s59", "s59", "s59", "s67",
"s67", "s67", "s67", "s67", "s67", "s67", "s67", "s67", "s67",
"s67", "s67", "s67", "s67", "s80", "s80", "s80", "s80", "s80",
"s80", "s80", "s80", "s80", "s80", "s80", "s80", "s80", "s80",
"s80", "s80", "s86", "s86", "s86", "s86", "s86", "s86", "s86",
"s86", "s86", "s86", "s86", "s86", "s86", "s86", "s94", "s94",
"s94", "s94", "s94", "s94", "s94", "s94", "s94", "s94", "s94",
"s94", "s94", "s94"), Pathway = c("PI3K", "Cell_Cycle", "RTK-RAS",
"WNT", "TGF-Beta", "CR", "CF", "TP53", "NOTCH", "Hippo", "MYC",
"NRF2", "PI3K", "Cell_Cycle", "WNT", "TGF-Beta", "RTK-RAS", "WNT",
"CR", "CF", "TP53", "NOTCH", "Hippo", "MYC", "NRF2", "TGF-Beta",
"PI3K", "RTK-RAS", "WNT", "CR", "CF", "TP53", "Cell_Cycle", "NOTCH",
"Hippo", "MYC", "NRF2", "PI3K", "TGF-Beta", "RTK-RAS", "WNT",
"CR", "CF", "TP53", "Cell_Cycle", "NOTCH", "Hippo", "MYC", "NRF2",
"Cell_Cycle", "TGF-Beta", "PI3K", "RTK-RAS", "WNT", "CR", "CF",
"TP53", "NOTCH", "Hippo", "MYC", "NRF2", "RTK-RAS", "TGF-Beta",
"PI3K", "WNT", "CR", "CF", "TP53", "Cell_Cycle", "NOTCH", "Hippo",
"MYC", "NRF2", "Cell_Cycle", "TGF-Beta", "PI3K", "RTK-RAS", "WNT",
"CR", "CF", "TP53", "NOTCH", "Hippo", "MYC", "NRF2", "PI3K",
"Cell_Cycle", "RTK-RAS", "TGF-Beta", "WNT", "CR", "CF", "TP53",
"NOTCH", "Hippo", "MYC", "NRF2", "PI3K", "Cell_Cycle", "TGF-Beta",
"RTK-RAS", "WNT", "CR", "CF", "TP53", "NOTCH", "Hippo", "MYC",
"NRF2", "Cell_Cycle", "TGF-Beta", "PI3K", "RTK-RAS", "WNT", "CR",
"CF", "TP53", "NOTCH", "Hippo", "MYC", "NRF2", "TGF-Beta", "PI3K",
"RTK-RAS", "WNT", "CR", "CF", "TP53", "Cell_Cycle", "NOTCH",
"Hippo", "MYC", "NRF2", "PI3K", "Cell_Cycle", "WNT", "TGF-Beta",
"RTK-RAS", "CR", "CF", "TP53", "NOTCH", "Hippo", "MYC", "NRF2",
"PI3K", "TGF-Beta", "RTK-RAS", "WNT", "CR", "CF", "TP53", "Cell_Cycle",
"NOTCH", "Hippo", "MYC", "NRF2", "Cell_Cycle", "RTK-RAS", "TGF-Beta",
"PI3K", "WNT", "CR", "CF", "TP53", "NOTCH", "Hippo", "MYC", "NRF2",
"Cell_Cycle", "TGF-Beta", "PI3K", "RTK-RAS", "WNT", "CR", "CF",
"TP53", "NOTCH", "Hippo", "MYC", "NRF2", "Cell_Cycle", "CF",
"CR", "Hippo", "MYC", "NOTCH", "NRF2", "PI3K", "RTK-RAS", "TGF-Beta",
"TP53", "WNT", "Cell_Cycle", "RTK-RAS", "TGF-Beta", "PI3K", "WNT",
"CR", "CF", "TP53", "NOTCH", "Hippo", "MYC", "NRF2", "RTK-RAS",
"TGF-Beta", "PI3K", "WNT", "CR", "CF", "TP53", "Cell_Cycle",
"NOTCH", "Hippo", "MYC", "NRF2", "Cell_Cycle", "RTK-RAS", "TGF-Beta",
"PI3K", "RTK-RAS", "WNT", "CR", "CF", "TP53", "Cell_Cycle", "NOTCH",
"Hippo", "MYC", "NRF2", "PI3K", "Cell_Cycle", "RTK-RAS", "WNT",
"TGF-Beta", "RTK-RAS", "WNT", "CR", "CF", "TP53", "Cell_Cycle",
"NOTCH", "Hippo", "MYC", "NRF2", "PI3K", "PI3K", "Cell_Cycle",
"RTK-RAS", "TGF-Beta", "RTK-RAS", "WNT", "CR", "CF", "TP53",
"Cell_Cycle", "NOTCH", "Hippo", "MYC", "NRF2", "PI3K", "Cell_Cycle",
"WNT", "TGF-Beta", "PI3K", "RTK-RAS", "WNT", "CR", "CF", "TP53",
"Cell_Cycle", "NOTCH", "Hippo", "MYC", "NRF2", "TGF-Beta", "PI3K",
"RTK-RAS", "WNT", "CR", "CF", "TP53", "Cell_Cycle", "NOTCH",
"Hippo", "MYC", "NRF2", "Cell_Cycle", "WNT", "TGF-Beta", "PI3K",
"RTK-RAS", "WNT", "CR", "CF", "TP53", "Cell_Cycle", "NOTCH",
"Hippo", "MYC", "NRF2", "PI3K", "Cell_Cycle", "RTK-RAS", "WNT",
"TGF-Beta", "RTK-RAS", "WNT", "CR", "CF", "TP53", "Cell_Cycle",
"NOTCH", "Hippo", "MYC", "NRF2", "PI3K", "Cell_Cycle", "WNT",
"TGF-Beta", "PI3K", "RTK-RAS", "WNT", "CR", "CF", "TP53", "Cell_Cycle",
"NOTCH", "Hippo", "MYC", "NRF2", "Cell_Cycle", "RTK-RAS", "TGF-Beta",
"PI3K", "RTK-RAS", "WNT", "CR", "CF", "TP53", "Cell_Cycle", "NOTCH",
"Hippo", "MYC", "NRF2"), value = c(1L, 1L, 1L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L)), class = "data.frame", row.names = c(NA, -346L))
I want to convert this to a boolean matrix (wide format) in which pathways are in columns and samples are in rows我想将其转换为布尔矩阵(宽格式),其中路径在列中,样本在行中
A sample obtains 1 if mutated for a pathway and 0 if not mutated for a pathway like样本在通路发生突变时获得 1,如果通路未发生突变,则获得 0,例如
> head(p)
sample value Cell_Cycle Hippo MYC NOTCH NRF2 PI3K TGF-Beta RTK-RAS TP53
1 LP6008334.DNA_C02 0 0 0 0 0 0 0 0 0 0
2 LP6008334.DNA_A03 0 0 0 0 0 0 0 0 0 0
3 LP6005334.DNA_H01 0 0 0 0 0 0 0 0 0 0
4 LP6008337.DNA_H06 0 0 0 0 0 0 0 0 0 0
5 s15 0 0 0 0 0 0 0 0 0 0
6 LP6008460.DNA_D01 0 0 0 0 0 0 0 0 0 0
WNT CF CR
1 0 0 0
2 0 0 0
3 0 0 0
4 0 0 0
5 0 0 0
6 0 0 0
>
But all I get zero但我得到的都是零
I have tried this我试过这个
for (pathway in setdiff(unique(p$Pathway), colnames(p))) {
p <- cbind(p,array(0,nrow(p)))
colnames(p)[ncol(p)] <- pathway
}
Which gives all the matrix 0这给出了所有矩阵 0
I also have been trying我也一直在努力
reshape(pathway, idvar = "sample", timevar = "Pathway", direction = "wide")
But gives wrong things of 0 and 1但是给出了错误的 0 和 1
Please help me请帮我
我们可以使用table
+(table(p[, c("sample", "Pathway")]) > 0)
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.