[英]Plot multiple linear regressions from tibble of intercepts and slopes using ggplot
[英]Multiple linear regressions in a single plot
提前抱歉,有什么問題或令人困惑,我只是一個有一些基本 R 的生物學家。
首先,我將對腳本背后的想法給出一些見解。 我做了一些實驗,讓蒼蠅暴露在溫度升高的不同坡度下,為此使用了一個自制的設備。 需要知道設備是否正確升高溫度,我必須為每個斜坡制作校准曲線。 使用溫度傳感器隨時間測量校准曲線。
我的第一個dput
datos
(西班牙語數據)的輸出是 3 個不同斜坡(0.06 °C/分鍾、0.12 °C/分鍾、0.25°C/分鍾)測量的溫度的恢復
structure(list(date2 = c("03/02/2021", "03/02/2021", "03/02/2021",
"03/02/2021", "03/02/2021", "03/02/2021", "03/02/2021", "03/02/2021",
"03/02/2021", "03/02/2021", "03/02/2021", "03/02/2021", "03/02/2021",
"03/02/2021", "03/02/2021", "03/02/2021", "03/02/2021", "03/02/2021",
"03/02/2021", "03/02/2021", "03/02/2021", "03/02/2021", "03/02/2021",
"03/02/2021", "03/02/2021", "03/02/2021", "03/02/2021", "03/02/2021",
"03/02/2021", "03/02/2021"), time2 = c("14:56:32", "14:56:42",
"14:56:52", "14:57:02", "14:57:12", "14:57:22", "14:57:32", "14:57:42",
"14:57:52", "14:58:02", "14:58:12", "14:58:22", "14:58:32", "14:58:42",
"14:58:52", "14:59:02", "14:59:12", "14:59:22", "14:59:32", "14:59:42",
"14:59:52", "15:00:02", "15:00:12", "15:00:22", "15:00:32", "15:00:42",
"15:00:52", "15:01:02", "15:01:12", "15:01:22"), temperature = c(25.2,
25.8, 25.6, 25.6, 25.6, 25.6, 25.6, 25.6, 25.7, 25.5, 25.6, 25.5,
25.6, 25.6, 25.6, 25.6, 25.6, 25.6, 25.6, 25.6, 25.6, 25.6, 25.6,
25.6, 25.6, 25.8, 25.8, 25.8, 25.8, 25.8), grupo = c("voltcraft",
"voltcraft", "voltcraft", "voltcraft", "voltcraft", "voltcraft",
"voltcraft", "voltcraft", "voltcraft", "voltcraft", "voltcraft",
"voltcraft", "voltcraft", "voltcraft", "voltcraft", "voltcraft",
"voltcraft", "voltcraft", "voltcraft", "voltcraft", "voltcraft",
"voltcraft", "voltcraft", "voltcraft", "voltcraft", "voltcraft",
"voltcraft", "voltcraft", "voltcraft", "voltcraft"), segundos = c(1L,
11L, 21L, 31L, 41L, 51L, 61L, 71L, 81L, 91L, 101L, 111L, 121L,
131L, 141L, 151L, 161L, 171L, 181L, 191L, 201L, 211L, 221L, 231L,
241L, 251L, 261L, 271L, 281L, 291L), id3 = 1:30, curva = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("a",
"b", "c", "d", "e", "f", "g", "h", "i", "j"), class = "factor"),
modo = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = "h", class = "factor"), sujeto = c("agus",
"agus", "agus", "agus", "agus", "agus", "agus", "agus", "agus",
"agus", "agus", "agus", "agus", "agus", "agus", "agus", "agus",
"agus", "agus", "agus", "agus", "agus", "agus", "agus", "agus",
"agus", "agus", "agus", "agus", "agus"), tamb = c(26.7, 26.7,
26.7, 26.7, 26.7, 26.7, 26.7, 26.7, 26.7, 26.7, 26.7, 26.7,
26.7, 26.7, 26.7, 26.7, 26.7, 26.7, 26.7, 26.7, 26.7, 26.7,
26.7, 26.7, 26.7, 26.7, 26.7, 26.7, 26.7, 26.7), tratamiento = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("0.06",
"0.125", "0.25"), class = "factor"), datetime = structure(c(1612364192,
1612364202, 1612364212, 1612364222, 1612364232, 1612364242,
1612364252, 1612364262, 1612364272, 1612364282, 1612364292,
1612364302, 1612364312, 1612364322, 1612364332, 1612364342,
1612364352, 1612364362, 1612364372, 1612364382, 1612364392,
1612364402, 1612364412, 1612364422, 1612364432, 1612364442,
1612364452, 1612364462, 1612364472, 1612364482), class = c("POSIXct",
"POSIXt"), tzone = "GMT")), row.names = c(NA, 30L), class = "data.frame")
df dput
的datos2
與校准曲線的恢復相同,但已針對良好測量的曲線(具有盡可能小的變化的曲線)進行了過濾。
structure(list(date2 = c("01/03/2021", "01/03/2021", "01/03/2021",
"01/03/2021", "01/03/2021", "01/03/2021", "01/03/2021", "01/03/2021",
"01/03/2021", "01/03/2021", "01/03/2021", "01/03/2021", "01/03/2021",
"01/03/2021", "01/03/2021", "01/03/2021", "01/03/2021", "01/03/2021",
"01/03/2021", "01/03/2021", "01/03/2021", "01/03/2021", "01/03/2021",
"01/03/2021", "01/03/2021", "01/03/2021", "01/03/2021", "01/03/2021",
"01/03/2021", "01/03/2021"), time2 = c("10:24:28", "10:24:38",
"10:24:48", "10:24:58", "10:25:08", "10:25:18", "10:25:28", "10:25:38",
"10:25:48", "10:25:58", "10:26:08", "10:26:18", "10:26:28", "10:26:38",
"10:26:48", "10:26:58", "10:27:08", "10:27:18", "10:27:28", "10:27:38",
"10:27:48", "10:27:58", "10:28:08", "10:28:18", "10:28:28", "10:28:38",
"10:28:48", "10:28:58", "10:29:08", "10:29:18"), temperature = c(26.2,
26.8, 26.6, 26.7, 26.7, 26.7, 26.7, 26.8, 26.8, 26.8, 26.9, 26.8,
26.8, 26.7, 26.6, 26.6, 26.7, 26.6, 26.7, 26.7, 26.6, 26.6, 26.6,
26.7, 26.6, 26.9, 26.8, 26.8, 26.8, 26.8), grupo = c("voltcraft",
"voltcraft", "voltcraft", "voltcraft", "voltcraft", "voltcraft",
"voltcraft", "voltcraft", "voltcraft", "voltcraft", "voltcraft",
"voltcraft", "voltcraft", "voltcraft", "voltcraft", "voltcraft",
"voltcraft", "voltcraft", "voltcraft", "voltcraft", "voltcraft",
"voltcraft", "voltcraft", "voltcraft", "voltcraft", "voltcraft",
"voltcraft", "voltcraft", "voltcraft", "voltcraft"), segundos = c(1L,
11L, 21L, 31L, 41L, 51L, 61L, 71L, 81L, 91L, 101L, 111L, 121L,
131L, 141L, 151L, 161L, 171L, 181L, 191L, 201L, 211L, 221L, 231L,
241L, 251L, 261L, 271L, 281L, 291L), id3 = 5315:5344, curva = structure(c(4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("a",
"b", "c", "d", "e", "f", "g", "h", "i", "j"), class = "factor"),
modo = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = "h", class = "factor"), sujeto = c("agus",
"agus", "agus", "agus", "agus", "agus", "agus", "agus", "agus",
"agus", "agus", "agus", "agus", "agus", "agus", "agus", "agus",
"agus", "agus", "agus", "agus", "agus", "agus", "agus", "agus",
"agus", "agus", "agus", "agus", "agus"), tamb = c(28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28), tratamiento = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("0.06",
"0.125", "0.25"), class = "factor"), datetime = structure(c(1614594268,
1614594278, 1614594288, 1614594298, 1614594308, 1614594318,
1614594328, 1614594338, 1614594348, 1614594358, 1614594368,
1614594378, 1614594388, 1614594398, 1614594408, 1614594418,
1614594428, 1614594438, 1614594448, 1614594458, 1614594468,
1614594478, 1614594488, 1614594498, 1614594508, 1614594518,
1614594528, 1614594538, 1614594548, 1614594558), class = c("POSIXct",
"POSIXt"), tzone = "GMT")), row.names = c(NA, 30L), class = "data.frame")
基本上是相同的 df 只是過濾如下
datos2=datos %>% filter((tratamiento == "0.125" & segundos < 4001 & (curva ==
"a" | curva == "b"|curva == "c" | curva == "d" | curva == "f")) | (tratamiento
== "0.25" & segundos < 4001 & (curva == "a" | curva == "e" | curva == "j")) |
(tratamiento == "0.06" & segundos < 4001 & (curva == "d" | curva == "e"| curva == "f")) )
3 個不同斜坡的datos2
圖(這將需要作為最終圖的想法)
接下來,我使用了一個線性模型(也測試了假設),因此我可以獲得實際系數以及與實際升高溫度相關的每條曲線的“實際”斜率。 對於我使用的這個塊
modelo <- lm(datos2$temperature~datos2$segundos*datos2$tratamiento)
通過線性模型獲得的系數(以分鍾為單位的斜率):
> data.frame(modelo$coefficients)
modelo.coefficients
(Intercept) 26.3091784959
datos2$segundos 0.0006260104
datos2$tratamiento0.125 -1.0105069266
datos2$tratamiento0.25 -1.2511820668
datos2$segundos:datos2$tratamiento0.125 0.0015812593
datos2$segundos:datos2$tratamiento0.25 0.0047528818
#0.06
modelo$coefficients[1]=25 #intercept
round(modelo$coefficients[2]*60,3) #slope
#0.125
modelo$coefficients[1]+modelo$coefficients[3]
round((modelo$coefficients[2]+modelo$coefficients[5])*60,3)
#0.25
modelo$coefficients[1]+modelo$coefficients[4]
round((modelo$coefficients[2]+modelo$coefficients[6])*60,3)
總結
0.06 <- 0.038
0.125 <- 0.132
0.25 <- 0.323
最后,我嘗試將所有這些合並到一個圖中。 為此,我使用每個處理(每條曲線)的估計斜坡方程生成了一個預測斜坡。 我為此使用了一個小向量(10800 作為最后一個數字,因此所有曲線同時與 X 相交)。 最后我將預測作為一個函數來應用估計方程來計算應該預期的 Y。 這是我遇到各種我不理解和無法解決的錯誤的地方。
new_datos <- data.frame(segundos = rep(seq(0,10800,length.out=100),3),tratamiento = c(rep("0.06",200),rep("0.125",200),rep("0.25",200)))
dput
的new_datos
structure(list(segundos = c(0, 109.090909090909, 218.181818181818,
327.272727272727, 436.363636363636, 545.454545454545, 654.545454545455,
763.636363636364, 872.727272727273, 981.818181818182, 1090.90909090909,
1200, 1309.09090909091, 1418.18181818182, 1527.27272727273, 1636.36363636364,
1745.45454545455, 1854.54545454545, 1963.63636363636, 2072.72727272727,
2181.81818181818, 2290.90909090909, 2400, 2509.09090909091, 2618.18181818182,
2727.27272727273, 2836.36363636364, 2945.45454545455, 3054.54545454545,
3163.63636363636), tratamiento = c("0.06", "0.06", "0.06", "0.06",
"0.06", "0.06", "0.06", "0.06", "0.06", "0.06", "0.06", "0.06",
"0.06", "0.06", "0.06", "0.06", "0.06", "0.06", "0.06", "0.06",
"0.06", "0.06", "0.06", "0.06", "0.06", "0.06", "0.06", "0.06",
"0.06", "0.06")), row.names = c(NA, 30L), class = "data.frame")
線性模型的其余腳本:
b <- data.frame(predict.lm(modelo,data=new_datos,interval = "prediction"))
nrow(b)
new_datos$temperature <- b[,1]
new_datos$lwr <- b[,2]
new_datos$upr <- b[,3]
new_datos[new_datos$segundos==0]
levels(new_datos$tratamiento)[levels(datos$tratamiento)=="0.06"] <- "0.038 ºC/min"
levels(new_datos$tratamiento)[levels(datos$tratamiento)=="0.125"] <- "0.132 ºC/min"
levels(new_datos$tratamiento)[levels(datos$tratamiento)=="0.25"] <- "0.323 ºC/min"
# Split datos into Training and Testing
sample_size = floor(0.2*nrow(datos2))
# randomly split datos
picked = sample(seq_len(nrow(datos2)),size = sample_size)
development =datos2[picked,]
最后一部分的錯誤
> b <- data.frame(predict.lm(modelo,data=new_datos,interval = "prediction"))
Warning message:
In predict.lm(modelo, data = new_datos, interval = "prediction") :
predictions on current data refer to _future_ responses
> new_datos$temperature <- b[,1]
Error in `$<-.data.frame`(`*tmp*`, temperature, value = c(25.0006260103883, :
replacement has 7452 rows, data has 600
> new_datos$lwr <- b[,2]
Error in `$<-.data.frame`(`*tmp*`, lwr, value = c(22.6325633882165, 22.638854566693, :
replacement has 7452 rows, data has 600
> levels(new_datos$tratamiento)[levels(datos$tratamiento)=="0.06"] <- "0.038 ºC/min"
Error in `levels<-`(`*tmp*`, value = c("0.038 ºC/min", NA, NA)) :
factor level [3] is duplicated
> levels(new_datos$tratamiento)[levels(datos$tratamiento)=="0.125"] <- "0.132 ºC/min"
Error in `levels<-`(`*tmp*`, value = c(NA, "0.132 ºC/min", NA)) :
factor level [3] is duplicated
問題是:有沒有辦法獲得 Sup.3 中的每條曲線(后面可能是灰色的)。 在單個圖中,另外還有 3 條曲線計算疊加在曲線上的每個系數(斜率)(背面為灰色)?
謝謝!
編輯:
最小可重現示例:
temperature segundos curva tratamiento
1 25.2 1 a 0.06
2 25.8 11 a 0.06
3 25.6 21 a 0.06
4 25.6 31 a 0.06
5 25.6 41 a 0.06
6 25.6 51 a 0.06
7 25.6 61 a 0.06
8 25.6 71 a 0.06
9 25.7 81 a 0.06
10 25.5 91 a 0.06
11 25.6 101 a 0.06
12 25.5 111 a 0.06
13 25.6 121 a 0.06
14 25.6 131 a 0.06
15 25.6 141 a 0.06
16 25.6 151 a 0.06
17 25.6 161 a 0.06
18 25.6 171 a 0.06
19 25.6 181 a 0.06
20 25.6 191 a 0.06
21 25.6 201 a 0.06
22 25.6 211 a 0.06
23 25.6 221 a 0.06
24 25.6 231 a 0.06
25 25.6 241 a 0.06
26 25.8 251 a 0.06
27 25.8 261 a 0.06
28 25.8 271 a 0.06
29 25.8 281 a 0.06
30 25.8 291 a 0.06
31 25.7 301 a 0.06
32 25.8 311 a 0.06
33 25.7 321 a 0.06
34 25.7 331 a 0.06
35 25.8 341 a 0.06
36 25.8 351 a 0.06
37 25.7 361 a 0.06
38 25.7 371 a 0.06
39 25.7 381 a 0.06
40 25.9 391 a 0.06
41 25.9 401 a 0.06
42 25.9 411 a 0.06
43 25.9 421 a 0.06
44 25.9 431 a 0.06
45 25.9 441 a 0.06
46 25.9 451 a 0.06
47 25.9 461 a 0.06
48 25.9 471 a 0.06
49 25.9 481 a 0.06
50 25.9 491 a 0.06
51 25.9 501 a 0.06
52 25.9 511 a 0.06
53 25.9 521 a 0.06
54 25.9 531 a 0.06
55 25.9 541 a 0.06
56 25.9 551 a 0.06
57 25.9 561 a 0.06
58 25.9 571 a 0.06
59 25.9 581 a 0.06
60 25.9 591 a 0.06
61 25.8 601 a 0.06
62 25.9 611 a 0.06
63 26.0 621 a 0.06
64 25.9 631 a 0.06
65 26.2 641 a 0.06
66 26.1 651 a 0.06
67 26.1 661 a 0.06
68 26.1 671 a 0.06
69 26.1 681 a 0.06
70 26.1 691 a 0.06
71 26.1 701 a 0.06
72 26.1 711 a 0.06
73 26.2 721 a 0.06
74 26.1 731 a 0.06
75 26.1 741 a 0.06
76 26.1 751 a 0.06
77 26.1 761 a 0.06
78 26.1 771 a 0.06
79 26.2 781 a 0.06
80 26.3 791 a 0.06
81 26.1 801 a 0.06
82 26.3 811 a 0.06
83 26.1 821 a 0.06
84 26.3 831 a 0.06
85 26.1 841 a 0.06
86 26.3 851 a 0.06
87 26.4 861 a 0.06
88 26.3 871 a 0.06
89 26.3 881 a 0.06
90 26.3 891 a 0.06
91 26.3 901 a 0.06
92 26.3 911 a 0.06
93 26.3 921 a 0.06
94 26.3 931 a 0.06
95 26.3 941 a 0.06
96 26.2 951 a 0.06
97 26.3 961 a 0.06
98 26.3 971 a 0.06
99 26.3 981 a 0.06
100 26.3 991 a 0.06
101 26.5 1001 a 0.06
102 26.3 1011 a 0.06
103 26.5 1021 a 0.06
104 26.5 1031 a 0.06
105 26.5 1041 a 0.06
106 26.5 1051 a 0.06
107 26.5 1061 a 0.06
108 26.5 1071 a 0.06
109 26.6 1081 a 0.06
110 26.5 1091 a 0.06
111 26.5 1101 a 0.06
112 26.5 1111 a 0.06
113 26.6 1121 a 0.06
114 26.5 1131 a 0.06
115 26.5 1141 a 0.06
116 26.5 1151 a 0.06
117 26.5 1161 a 0.06
118 26.5 1171 a 0.06
119 26.6 1181 a 0.06
120 26.5 1191 a 0.06
121 26.5 1201 a 0.06
122 26.5 1211 a 0.06
123 26.5 1221 a 0.06
124 26.6 1231 a 0.06
125 26.5 1241 a 0.06
126 26.6 1251 a 0.06
127 26.7 1261 a 0.06
128 26.7 1271 a 0.06
129 26.8 1281 a 0.06
130 26.5 1291 a 0.06
131 26.7 1301 a 0.06
132 26.7 1311 a 0.06
133 26.7 1321 a 0.06
134 26.8 1331 a 0.06
135 26.7 1341 a 0.06
136 26.5 1351 a 0.06
137 26.7 1361 a 0.06
138 26.7 1371 a 0.06
139 26.8 1381 a 0.06
140 26.7 1391 a 0.06
141 26.8 1401 a 0.06
142 26.7 1411 a 0.06
143 26.8 1421 a 0.06
144 26.8 1431 a 0.06
145 26.7 1441 a 0.06
146 26.7 1451 a 0.06
147 26.8 1461 a 0.06
148 26.8 1471 a 0.06
149 26.7 1481 a 0.06
150 26.8 1491 a 0.06
151 26.7 1501 a 0.06
152 26.7 1511 a 0.06
153 26.7 1521 a 0.06
154 26.7 1531 a 0.06
155 26.8 1541 a 0.06
156 26.8 1551 a 0.06
157 26.7 1561 a 0.06
158 26.8 1571 a 0.06
159 26.7 1581 a 0.06
160 26.8 1591 a 0.06
161 27.0 1601 a 0.06
162 27.0 1611 a 0.06
163 27.0 1621 a 0.06
164 27.0 1631 a 0.06
165 27.0 1641 a 0.06
166 27.0 1651 a 0.06
167 27.0 1661 a 0.06
168 27.0 1671 a 0.06
169 27.0 1681 a 0.06
170 27.0 1691 a 0.06
171 27.0 1701 a 0.06
172 27.0 1711 a 0.06
173 27.0 1721 a 0.06
174 27.0 1731 a 0.06
175 27.0 1741 a 0.06
176 26.9 1751 a 0.06
177 27.0 1761 a 0.06
178 27.0 1771 a 0.06
179 27.0 1781 a 0.06
180 27.0 1791 a 0.06
181 27.0 1801 a 0.06
182 27.0 1811 a 0.06
183 27.3 1821 a 0.06
184 27.2 1831 a 0.06
185 27.2 1841 a 0.06
186 27.2 1851 a 0.06
187 27.2 1861 a 0.06
188 27.3 1871 a 0.06
189 27.2 1881 a 0.06
190 27.2 1891 a 0.06
191 27.2 1901 a 0.06
192 27.2 1911 a 0.06
193 27.2 1921 a 0.06
194 27.2 1931 a 0.06
195 27.2 1941 a 0.06
196 27.4 1951 a 0.06
197 27.4 1961 a 0.06
198 27.5 1971 a 0.06
199 27.4 1981 a 0.06
200 27.3 1991 a 0.06
草圖:
這是一個想法,但是在一個圖中有 3 種處理方法(如下圖所示)
這個想法是繪制估計斜坡的調整曲線(以任何顏色組合)和獲得的真實曲線(灰色,因此曲線的斜率被“突出顯示”)
EDIT2:提供建議的情節:
然而,這是沒有訓練和測試數據的,因此它不是測量的“真實”斜率。 當我嘗試使用b
繪制繪圖時:
#solved errors making a bigger vector to fit my data
new_datos <- data.frame(segundos = rep(seq(0,5000,length.out=2484),1),tratamiento = c(rep("0.06",2484),rep("0.125",2484),rep("0.25",2484)))
因為我從來沒有提供關於我如何嘗試繪制的正確方法:
ggplot(new_datos,aes(x=segundos,y=temperature,group = tratamiento,colour=tratamiento)) +
geom_point(data=development,aes(x=segundos,y=temperature,group = curva),size = 1,alpha=0.8,color="grey50",show.legend = FALSE) +
geom_point(size = 3,show.legend = FALSE) + labs(x="Tiempo (s)", y="Temperatura (ºC)") +
geom_text(aes(x = segundos, y = temperature+1, label = c("0,038 °C/min", "0.132 ºC/min","0.323 ºC/min")),color = "black",data = new_datos[c(200,250,500),]) +
scale_color_manual(values = c("#3B9AB2", "#E8C520", "#F21A00","grey"))
我正在生成一些測試數據並使用它來演示如何實現我認為你所要求的,因為你提供的數據仍然不夠完整以運行你的代碼(這不是一個大問題 - 我認為這個樣本數據很好地說明了這些概念):
# How many 'curves' and 'treatments' do we want?
n_curve <- 6
n_treat <- 3
n <- n_curve * n_treat
# Take the cars dataset and repeat it n times
dataDF <- do.call("rbind", replicate(n, cars, simplify = FALSE))
# Assign a curve ID
dataDF$curve <- as.factor(rep(letters[1:n_curve], each=nrow(cars)))
# Assign a treatment ID
dataDF$treatment <- as.factor(rep(LETTERS[1:n_treat], each=nrow(cars)))
# Adjust the data by a small, random number so that our curves aren't identical
dataDF$speed2 <- dataDF$speed + dataDF$speed * (sample(c(-6:6), size=nrow(dataDF), replace=T) / 100)
dataDF$dist2 <- dataDF$dist + dataDF$dist * (sample(c(-6:6), size=nrow(dataDF), replace=T) / 100)
head(dataDF)
# Make sure that data differ between treatments
dataDF$dist2 <- dataDF$dist2 * (as.numeric(dataDF$treatment)^2)
# The 'real' data are plotted in grey
# I'm using geom_smooth() to build a linear model of the data and
# then plotting that in red. You can instead build a separate lm
# and use the predict() function to generate new data, and then
# plot that here by adding another line along the lines of:
# geom_line(data=new_datos, aes(x=segundos, y=temperature))
ggplot(dataDF, aes(x=speed2, y=dist2)) +
geom_line(col='grey50') +
geom_smooth(method='lm', col='red', fill='red') +
facet_grid(cols=vars(treatment))
這為我們提供了一個圖,其中每個處理都有單獨的子圖(“方面”)。 基於真實數據的原始曲線以灰色繪制。 來自線性模型的線(此處由 ggplot 塊內的geom_smooth()
生成)以紅色繪制在頂部。
您也可以在一個圖上執行此操作並為曲線線着色,使它們不全是灰色:
# We can also do this in one plot by removing the facet
# and colouring the geom_smooth() lines by treatment:
ggplot(dataDF, aes(x=speed2, y=dist2, col=treatment, fill=treatment)) +
geom_line(alpha=0.5) +
geom_smooth(method='lm')
我不完全確定你的目標是什么,但希望這能給你一些開始。 如果您需要其他信息,請隨時對此答案發表評論。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.