[英]Add Spearman Correlation Coefficient and RMSE in a plot, and change the plot size
我有以下數據集:
structure(list(Count = 1:14, GW = c(0.08, 0.04, 0.35, 0.54, 0.39,
0.94, 0.51, 0.01, 0.44, 0.63, 0.14, 0.79, 0.43, 0.73), Pz1 = c(-2.459826614,
-2.905007821, -2.241113224, -1.549264338, -1.761962438, -1.282612221,
-0.428828702, 1.659042479, 2.63518648, 3.076022461, 1.886216859,
0.124473561, -1.025720789, -1.969461882), Pz2 = c(-2.916090168,
-3.262459435, -2.455396094, -1.488106654, -0.417171756, -1.781014095,
-0.605012986, 1.037062685, 1.977265974, 2.587846362, 2.499228916,
1.0852274, -0.503736287, -1.829562138), Pz3 = c(-2.507944967,
-3.718722989, -2.812847708, -1.702389524, -0.356014073, -0.436223413,
-1.10341486, 0.860878402, 1.355286181, 1.929925855, 2.011052817,
1.698239458, 0.457017552, -1.307577636), Pz4 = c(-2.526729696,
-3.310577787, -3.269111262, -2.059841138, -0.570296943, -0.375065729,
0.241375822, 0.362476528, 1.179101897, 1.307946062, 1.35313231,
1.210063358, 1.07002961, -0.346823797), Pz5 = c(-3.284551238,
-3.329362517, -2.86096606, -2.516104692, -0.927748557, -0.5893486,
0.302533506, 1.70726721, 0.680700023, 1.131761778, 0.731152517,
0.552142851, 0.58185351, 0.266188261), Pz6 = c(-4.011896321,
-4.087184059, -2.87975079, -2.107959491, -1.38401211, -0.946800213,
0.088250636, 1.768424893, 2.025490705, 0.633359905, 0.554968233,
-0.069836942, -0.076066996, -0.221987839), Pz7 = c(-4.769878994,
-4.814529142, -3.637572331, -2.12674422, -0.975866909, -1.403063767,
-0.269200978, 1.554142023, 2.086648388, 1.978150587, 0.05656636,
-0.246021225, -0.698046789, -0.879908346)), class = "data.frame", row.names = c(NA,
-14L))
我正在運行此代碼以自動將繪圖保存在文件夾中:
library(tidyverse)
library(ggplot2)
#~~~ step 1: transform the data from 'wide' format to 'long' format ~~~
my_data %>%
pivot_longer(names_to = 'key', values_to = 'value',-c(Count, GW)) %>%
{. ->> my_data_long}
#~~~ step 2: write a function to plot the data and save the plot ~~~
saveplot_function <- function(i){
my_plot <- my_data_long %>%
filter(
key == i
) %>%
ggplot(aes(x=Count)) +
geom_line(aes(y = scale(GW)), color = "blue") +
geom_line(aes(y = scale(value)), color="red") +
ylab(i)
ggsave(paste0('my_location/my_folder/', i, '.png'))
}
#~~~ step 3: loop through all the values of 'key' (column names in the wide format data) ~~~
for(i in unique(my_data_long$key)){
saveplot_function(i)
}
這段代碼可以完成這項工作,但我需要添加一個框(或只是文本),顯示每個 plot 的 Spearman 相關系數和 RMSE(或者更好地說明每個圖中顯示的兩個時間序列)。 此外,這些圖以正方形保存,我希望它們是矩形的。
這樣的事情會很棒:
你能幫我相應地重寫代碼嗎? 我根本不是 R 專家,這段代碼是從有人為我寫的舊代碼修改而來的(我花了很長時間才讓它為我使用它的目的工作),所以我非常感激如果你可以重寫它(對不起)。
此外,從上面的代碼可以看出,我使用了scale()
。 我想做的是使用 Z 分數(即Z = (x - mean(x)) / Standard_Deviation(x) )標准化繪制的數據。 您能否確認(或拒絕) scale()
使用 Z 分數對時間序列進行標准化?
我對你問題的某些部分有點不清楚,但希望這會有所幫助。 根據文檔, scale()
使用數字數據進行 z 分數轉換,因為中心值是平均值,比例值是標准偏差。
可以使用annotate()
向 plot 添加文本並提供文本和坐標。 我使用了兩個單獨的注釋,一個用於相關性,也使用bquote()
來獲取希臘字母 rho,另一個用於 RMSE。 我使用 GW、Value 和 Count 的最大值來錨定文本。 這可能需要細化。 我還計算了未縮放序列的 RMSE 和相關性。
下面提供了一個示例 plot。 我會檢查以確保按照您期望的方式計算事情。
my_data = structure(list(Count = 1:14, GW = c(0.08, 0.04, 0.35, 0.54, 0.39,
0.94, 0.51, 0.01, 0.44, 0.63, 0.14, 0.79, 0.43, 0.73),
Pz1 = c(-2.459826614, -2.905007821, -2.241113224, -1.549264338, -1.761962438, -1.282612221,
-0.428828702, 1.659042479, 2.63518648, 3.076022461, 1.886216859,0.124473561, -1.025720789, -1.969461882),
Pz2 = c(-2.916090168,-3.262459435, -2.455396094, -1.488106654, -0.417171756, -1.781014095,
-0.605012986, 1.037062685, 1.977265974, 2.587846362, 2.499228916,1.0852274, -0.503736287, -1.829562138),
Pz3 = c(-2.507944967,-3.718722989, -2.812847708, -1.702389524, -0.356014073, -0.436223413,
-1.10341486, 0.860878402, 1.355286181, 1.929925855, 2.011052817,1.698239458, 0.457017552, -1.307577636),
Pz4 = c(-2.526729696,-3.310577787, -3.269111262, -2.059841138, -0.570296943, -0.375065729,
0.241375822, 0.362476528, 1.179101897, 1.307946062, 1.35313231,1.210063358, 1.07002961, -0.346823797),
Pz5 = c(-3.284551238,-3.329362517, -2.86096606, -2.516104692, -0.927748557, -0.5893486,
0.302533506, 1.70726721, 0.680700023, 1.131761778, 0.731152517,0.552142851, 0.58185351, 0.266188261),
Pz6 = c(-4.011896321,-4.087184059, -2.87975079, -2.107959491, -1.38401211, -0.946800213,
0.088250636, 1.768424893, 2.025490705, 0.633359905, 0.554968233,-0.069836942, -0.076066996, -0.221987839),
Pz7 = c(-4.769878994,-4.814529142, -3.637572331, -2.12674422, -0.975866909, -1.403063767,
-0.269200978, 1.554142023, 2.086648388, 1.978150587, 0.05656636,-0.246021225, -0.698046789, -0.879908346)),
class = "data.frame",
row.names = c(NA,-14L))
library(tidyverse)
library(ggplot2)
#~~~ step 1: transform the data from 'wide' format to 'long' format ~~~
my_data %>%
pivot_longer(names_to = 'key', values_to = 'value',-c(Count, GW)) %>%
{. ->> my_data_long}
#~~~ step 2: write a function to plot the data and save the plot ~~~
saveplot_function <- function(i){
# get data for key i
df_temp <- my_data_long %>%
filter(
key == i
)
# calculate RMSE and correlation
# adjust calculation as needed
rmse <- round(mean((df_temp$GW-df_temp$value)^2),3)
correl <- round(cor(df_temp$GW,df_temp$value),3)
my_plot <- my_data_long %>%
filter(
key == i
) %>%
ggplot(aes(x=Count)) +
geom_line(aes(y = scale(GW)), color = "blue") +
geom_line(aes(y = scale(value)), color="red") +
ylab(i)+
annotate(geom="text", x=max(df_temp$Count)-2,
y=max(df_temp$value,df_temp$GW),
label=bquote(paste(," rho (",rho,") =",.(correl),sep="")),#expression(paste("rho (", rho, ") = ",as.character(correl),sep='')),
color="black")+
annotate(geom="text", x=max(df_temp$Count)-2,
y=max(df_temp$value,df_temp$GW)-0.5,
label=paste('RMSE = ',rmse,sep=''),
color="black")
ggsave(paste0("my_location/my_folder/", i, '.png'),
width=6,
height=4,
units='in')
}
#~~~ step 3: loop through all the values of 'key' (column names in the wide format data) ~~~
for(i in unique(my_data_long$key)){
saveplot_function(i)
}
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.