具有多个列的两个数据帧之间的关系或相关性

Question

我有两个数据帧，我想以图形方式（散点图）显示这两个数据帧（基因与蛋白质）的行之间的相关性，以查看每行是否相关。 因此，我可以看到两种策略：1。两个数据帧之间的线性回归（不知道如何）2。使用列的均值（和标准偏差）两者之间的人物相关性。

有人可以帮我设计这些图表吗？

这是我的数据的例子：

genes <- "gene  sample1 sample2 sample3 sample4
gene1   1863.4  1972.94 1603.96 1185.6
gene2   213.88  247.14  189.02  208.793
gene3   8.06    9.25    9.59    7.33
gene4   22.36   3.76    10.64   19.17"
genes<-read.table(text=genes,header=T)

protein <- "protein sample1 sample2 sample3 sample4
protein1    314.2871797 426.8856595 405.7971059 334.1369651
protein2    4747.866647 3070.916824 2780.352062 2990.085431
protein3    1621.566329 1290.470104 1554.27426  1601.357345
pretein4    8832.210499 7796.675008 8461.733171 9500.429355"
protein<-read.table(text=protein,header=T)

谢谢

Answer 1

对于一组列：

data <- data.frame(genes[,2],protein[,2])
names(data) <- c("genes_sample1", "protein_sample1")
plot(data)
abline(lm(genes_sample1 ~ protein_sample1, data=data))

cor(data)
                genes_sample1 protein_sample1
genes_sample1       1.0000000      -0.6247528
protein_sample1    -0.6247528       1.0000000

Answer 2

rownames(protein) <- protein$protein
protein_t <- as.data.frame(t(as.matrix(protein[, - 1])))
rownames(genes) <- genes$gene
genes_t <- as.data.frame(t(as.matrix(genes[, - 1])))
cor(protein_t, genes_t, method = "pearson")
plot(cbind(protein_t, genes_t))

至于回归，我想你想要每种蛋白质，将其表达与所有基因联系起来的回归，所以它会像这样：

lapply(protein_t, function(protein) lm(protein ~ ., data = cbind(genes_t, protein = protein)))

Answer 3

我很欣赏我得到的答案，并帮助我解决了如下问题：

#using the exemple data


#getting the individuals average:
mRNA_expression<- data.frame(genes=genes[,1], Means=rowMeans(genes[,-1]))
Protein_abundance<- data.frame(protein=protein[,1], Means=rowMeans(protein[,-1]))

#merging both to do the correlation graph
mean_corr <- data.frame(mRNA_expression[,2],Protein_abundance[,2])
names(mean_corr) <- c("mRNA_expression","Protein_abundance")

#deleting NA lines
mean_corr <- mean_corr[complete.cases(mean_corr),]

#appling log10
mean_corr <- log10 (mean_corr)

library(ggplot2)

#to check the distribution
ggplot(mean_corr, aes(x=Protein_abundance, y=mRNA_expression)) + labs(x = "Protein abundance (log10)", y = "mRNA expression (log10)") +  theme(axis.title.y=element_text(margin=margin(0,10,0,0))) +  theme(axis.title.x=element_text(margin=margin(10,0,0,0))) +
  geom_point(shape=1)  # Use hollow circles
#Different kind of plots::

ggplot(mean_corr, aes(x=Protein_abundance, y=mRNA_expression)) + labs(x = "Protein abundance (log10)", y = "mRNA expression (log10)") +  theme(axis.title.y=element_text(margin=margin(0,10,0,0))) +  theme(axis.title.x=element_text(margin=margin(10,0,0,0))) + 
  geom_point(shape=1) +    # Use hollow circles
  geom_smooth(method=lm)   # Add linear regression line 
#  (by default includes 95% confidence region)

ggplot(mean_corr, aes(x=Protein_abundance, y=mRNA_expression))+ labs(x = "Protein abundance (log10)", y = "mRNA expression (log10)") +  theme(axis.title.y=element_text(margin=margin(0,10,0,0))) +  theme(axis.title.x=element_text(margin=margin(10,0,0,0))) +
  geom_point(shape=1) +    # Use hollow circles
  geom_smooth(method=lm,   # Add linear regression line
              se=FALSE)    # Don't add shaded confidence region

ggplot(mean_corr, aes(x=Protein_abundance, y=mRNA_expression)) + labs(x = "Protein abundance (log10)", y = "mRNA expression (log10)") +  theme(axis.title.y=element_text(margin=margin(0,10,0,0))) +  theme(axis.title.x=element_text(margin=margin(10,0,0,0))) +
  geom_point(shape=1) +    # Use hollow circles
  geom_smooth()            # Add a loess smoothed fit curve with confidence region

#statistics
#to check the correlation
cor(mean_corr)

#linear regression
#lm(genes_mean ~ protein$mean, data=mean_corr)
lm(Protein_abundance ~ mRNA_expression, data=mean_corr)

具有多个列的两个数据帧之间的关系或相关性

问题描述

3 个解决方案

解决方案1
1 2016-10-13 22:11:43

解决方案2
1 2016-10-13 23:14:25

解决方案3
1 已采纳 2016-10-18 15:39:10

具有多个列的两个数据帧之间的关系或相关性

问题描述

3 个解决方案

解决方案1 1 2016-10-13 22:11:43

解决方案2 1 2016-10-13 23:14:25

解决方案3 1 已采纳 2016-10-18 15:39:10

解决方案1
1 2016-10-13 22:11:43

解决方案2
1 2016-10-13 23:14:25

解决方案3
1 已采纳 2016-10-18 15:39:10