繁体   English   中英

r:ggplot2相关图

[英]r: ggplot2 correlation plot

我正在尝试根据数据框中的两个参数创建一个相关图。但是,我对如何操纵数据框以获取所需内容感到困惑。

这是我的数据框的结构:

structure(list(orgid = c("USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ", 
"USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ", 
"USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ", 
"USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ"), locid = c("USGS-01367785", 
"USGS-01367785", "USGS-01455099", "USGS-01455099", "USGS-01440000", 
"USGS-01440000", "USGS-01380100", "USGS-01380100", "USGS-01380100", 
"USGS-01387700", "USGS-01387700", "USGS-01398000", "USGS-01398000", 
"USGS-0140940950", "USGS-01466500", "USGS-01461880", "USGS-01461880", 
"USGS-01445600", "USGS-01446400", "USGS-0140940950"), stdate = structure(c(16134, 
16134, 16133, 16133, 16135, 16135, 16133, 16133, 16133, 16127, 
16127, 16105, 16105, 16112, 15770, 15749, 15749, 15749, 15762, 
16112), class = "Date"), sttime = structure(c(45000, 45000, 39600, 
39600, 35040, 35040, 48000, 48000, 48000, 39600, 39600, 38700, 
38700, 39600, 37200, 32400, 32400, 40500, 36000, 39600), class = c("hms", 
"difftime"), units = "secs"), charnam = c("Total dissolved solids", 
"Total dissolved solids", "Total dissolved solids", "Total dissolved solids", 
"Total dissolved solids", "Total dissolved solids", "Total dissolved solids", 
"Total dissolved solids", "Total dissolved solids", "Total dissolved solids", 
"Total dissolved solids", "Total dissolved solids", "Total dissolved solids", 
"Total dissolved solids", "Specific conductance", "Total dissolved solids", 
"Specific conductance", "Specific conductance", "Specific conductance", 
"Total dissolved solids"), val = c(0.21, 154, 0.43, 333, 0.16, 
109, 12.1, 0.2, 143, 32, 0.05, 1.03, 711, 1.62, 31, 218, 391, 
384, 478, 104), valunit = c("tons/ac ft", "mg/l", "tons/ac ft", 
"mg/l", "tons/ac ft", "mg/l", "tons/day", "tons/ac ft", "mg/l", 
"mg/l", "tons/ac ft", "tons/ac ft", "mg/l", "tons/day", "uS/cm @25C", 
"mg/l", "uS/cm @25C", "uS/cm @25C", "uS/cm @25C", "mg/l"), swqs = c("FW2-NT", 
"FW2-NT", "FW2-TP", "FW2-TP", "FW2-TM", "FW2-TM", "FW2-NT", "FW2-NT", 
"FW2-NT", "FW2-TP", "FW2-TP", "FW2-NT", "FW2-NT", "PL", "FW1", 
"FW2-TM", "FW2-TM", "FW2-NT", "FW2-TM", "PL"), WMA = c(2L, 2L, 
1L, 1L, 1L, 1L, 6L, 6L, 6L, 3L, 3L, 8L, 8L, 14L, 19L, 11L, 11L, 
1L, 1L, 14L), year = c(2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 
2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2013L, 
2013L, 2013L, 2013L, 2013L, 2014L)), .Names = c("orgid", "locid", 
"stdate", "sttime", "charnam", "val", "valunit", "swqs", "WMA", 
"year"), row.names = c(NA, -20L), class = c("tbl_df", "tbl", 
"data.frame"))

我想绘制总溶解固体与电导率的关系图。但是,每个这些参数的值都在一个名为val的列中。 我是否需要操纵数据框以在其自己的列中具有“总溶解固体”值及其与“电导率”相同的值? 如果是这样,我将如何基于charnam列具有参数名称以及val列具有参数所有值的方式来做到这一点? 我尝试设置数据框的子集,但无法正常工作。

代码我有:

correlation_plot1<-ggplot() +
  geom_point(data=TDS_correlation_df,aes(x="",y=val))+
  geom_point(data=SC_correlation_df,aes(x=val,y=""))+
  ggtitle("Statewide Total Dissolved Solids vs. Specific Conductance Correlation\n;1997-2018") +
  xlab("SC(µS/cm)") + ylab("TDS(mg/L)")+
  scale_color_manual("",
                     values = c("red"),
                     labels=c("Freshwater Aquatic Life Criteria for TDS = 500 mg/L"))+

  correlation_theme+

  theme(legend.position ="bottom")

TDS_correlation和Sc_correlation是原始数据集的子集,已过滤为仅具有该参数。

好的,这很笨拙,但是我相信它可以满足您的需求。 正如评论所讨论的,问题不在于您的ggplot代码,而是与您的数据有关:

data <- structure(list(orgid = c("USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ", 
"USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ", 
"USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ", 
"USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ"), locid = c("USGS-01367785", 
"USGS-01367785", "USGS-01455099", "USGS-01455099", "USGS-01440000", 
"USGS-01440000", "USGS-01380100", "USGS-01380100", "USGS-01380100", 
"USGS-01387700", "USGS-01387700", "USGS-01398000", "USGS-01398000", 
"USGS-0140940950", "USGS-01466500", "USGS-01461880", "USGS-01461880", 
"USGS-01445600", "USGS-01446400", "USGS-0140940950"), stdate = structure(c(16134, 
16134, 16133, 16133, 16135, 16135, 16133, 16133, 16133, 16127, 
16127, 16105, 16105, 16112, 15770, 15749, 15749, 15749, 15762, 
16112), class = "Date"), sttime = structure(c(45000, 45000, 39600, 
39600, 35040, 35040, 48000, 48000, 48000, 39600, 39600, 38700, 
38700, 39600, 37200, 32400, 32400, 40500, 36000, 39600), class = c("hms", 
"difftime"), units = "secs"), charnam = c("Total dissolved solids", 
"Total dissolved solids", "Total dissolved solids", "Total dissolved solids", 
"Total dissolved solids", "Total dissolved solids", "Total dissolved solids", 
"Total dissolved solids", "Total dissolved solids", "Total dissolved solids", 
"Total dissolved solids", "Total dissolved solids", "Total dissolved solids", 
"Total dissolved solids", "Specific conductance", "Total dissolved solids", 
"Specific conductance", "Specific conductance", "Specific conductance", 
"Total dissolved solids"), val = c(0.21, 154, 0.43, 333, 0.16, 
109, 12.1, 0.2, 143, 32, 0.05, 1.03, 711, 1.62, 31, 218, 391, 
384, 478, 104), valunit = c("tons/ac ft", "mg/l", "tons/ac ft", 
"mg/l", "tons/ac ft", "mg/l", "tons/day", "tons/ac ft", "mg/l", 
"mg/l", "tons/ac ft", "tons/ac ft", "mg/l", "tons/day", "uS/cm @25C", 
"mg/l", "uS/cm @25C", "uS/cm @25C", "uS/cm @25C", "mg/l"), swqs = c("FW2-NT", 
"FW2-NT", "FW2-TP", "FW2-TP", "FW2-TM", "FW2-TM", "FW2-NT", "FW2-NT", 
"FW2-NT", "FW2-TP", "FW2-TP", "FW2-NT", "FW2-NT", "PL", "FW1", 
"FW2-TM", "FW2-TM", "FW2-NT", "FW2-TM", "PL"), WMA = c(2L, 2L, 
1L, 1L, 1L, 1L, 6L, 6L, 6L, 3L, 3L, 8L, 8L, 14L, 19L, 11L, 11L, 
1L, 1L, 14L), year = c(2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 
2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2013L, 
2013L, 2013L, 2013L, 2013L, 2014L)), .Names = c("orgid", "locid", 
"stdate", "sttime", "charnam", "val", "valunit", "swqs", "WMA", 
"year"), row.names = c(NA, -20L), class = c("tbl_df", "tbl", 
"data.frame"))

library(tidyverse)

data_tidy <- data %>%
 spread(charnam, val)

specific_conductance <- data_tidy %>%
 select(`Specific conductance`) %>%
 filter(!is.na(`Specific conductance`) == T) %>%
 rep(times = 4) %>%
 as.data.frame() %>%
 gather(1:4) %>%
 select(value) %>%
 rename("Specific conductance" = value) %>%
 rowid_to_column()

total_dissolved_solids <- data_tidy %>%
 select(`Total dissolved solids`) %>%
 filter(!is.na(`Total dissolved solids`) == T) %>%
 rowid_to_column()

combined <- total_dissolved_solids %>%
 left_join(specific_conductance)

ggplot(combined, aes(x = `Specific conductance`, y = `Total dissolved solids`)) +
 geom_point()

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM