[英]Parameters estimation with multiple data sets using Gekko
Following @John Hedengren ideas I modified my code ( Parameters estimation and least squares minimization - Python and Gekko ) and added another set of data following the explanations from here: How to set up GEKKO for parameter estimation from multiple independent sets of data?按照@John Hedengren 的想法,我修改了我的代码( 参数估计和最小二乘法最小化 - Python 和 Gekko )并按照此处的说明添加了另一组数据: 如何设置 GEKKO 以从多个独立的数据集进行参数估计? However, I am not reaching a good solution, particularly for measurement 4. Have you got any ideas about how can I adjust it better?
但是,我没有找到一个好的解决方案,特别是对于测量 4。您对如何更好地调整它有任何想法吗? Thank you for your time and help.
感谢您的时间和帮助。
from gekko import GEKKO
import numpy as np
import matplotlib.pyplot as plt
import math as math
import pandas as pd
#data set 1
t_data1 = [0.08333,0.5,1,4,22.61667]
x_data1 = [7.77e-5,7e-5,7e-5,6.22e-5,3.89e-5]
x_data1mgl = [4,3.6,3.6,3.2,2]
#data set 2
t_data4 = [0.08333,0.5,1,4,22.61667]
x_data4 = [2.92e-4,2.72e-4,2.92e-4,2.72e-4,2.14e-4]
x_data4mgl = [15,14,15,14,11]
#combine data and time in the same dataframe
data1 = pd.DataFrame({'time':t_data1,'x1':x_data1})
data4 = pd.DataFrame({'time':t_data4,'x4':x_data4})
data1.set_index('time', inplace=True)
data4.set_index('time', inplace=True)
# merge dataframes
data = data1.join(data4, how='outer')
# simulation time points
dftp = pd.DataFrame({'time':np.linspace(0,25,51)})
dftp.set_index('time', inplace=True)
# merge dataframes
df = data.join(dftp,how='outer')
# get True (1) or False (0) for measurement
#df['meas'] = (df['x'].values==df['x'].values).astype(int)
z1 = (df['x1']==df['x1']).astype(int)
z4 = (df['x4']==df['x4']).astype(int)
# replace NaN with zeros
df0 = df.fillna(value=0)
#Estimator Model
m = GEKKO(remote=True)
#m = GEKKO(remote=False) # remote=False to produce local folder with results
m.time = df.index.values
# measurements - Gekko arrays to store measured data
xm = m.Array(m.Param,2)
xm[0].value = df0['x1'].values
xm[1].value = df0['x4'].values
cl=m.Array(m.Var,2)
cl[0].value= 4e-5
cl[1].value= 1.33e-4
TOC=m.Array(m.Var,2)
TOC[0].value= 11.85
TOC[1].value= 11.85
C0=m.Array(m.Var,2)
C1=m.Array(m.Var,2)
cC1=m.Array(m.Var,2)
C2=m.Array(m.Var,2)
C2m=m.Array(m.Var,2)
cC2=m.Array(m.Var,2)
C3=m.Array(m.Var,2)
C4=m.Array(m.Var,2)
C5=m.Array(m.Var,2)
C6=m.Array(m.Var,2)
C7=m.Array(m.Var,2)
C8=m.Array(m.Var,2)
C9=m.Array(m.Var,2)
C10=m.Array(m.Var,2)
C11=m.Array(m.Var,2)
C12=m.Array(m.Var,2)
r1=m.Array(m.Var,2)
r2=m.Array(m.Var,2)
r3=m.Array(m.Var,2)
r4=m.Array(m.Var,2)
r5=m.Array(m.Var,2)
r6=m.Array(m.Var,2)
r7=m.Array(m.Var,2)
r8=m.Array(m.Var,2)
r9=m.Array(m.Var,2)
r10=m.Array(m.Var,2)
r11=m.Array(m.Var,2)
r12=m.Array(m.Var,2)
k5=m.Array(m.Var,2)
#Define GEKKO variables that determine if time point contains data to be used in regression
#index for objective (0=not measured, 1=measured)
zm = m.Array(m.Param,2)
zm[0].value=z1
zm[1].value=z4
# fit to measurement
x=m.Array(m.Var,2)
x[0].value=x_data1[0]
x[1].value=x_data4[0]
#adjustable parameters
kdoc1 = 0 #m-1h-1
kdoc2 = m.FV(1,lb=0.01,ub=10) #m-1h-1
S1 = 0
S2 = m.FV(1,lb=0.01,ub=10)
#Define Variables
for i in range(2):
#State variables
# M (mol/L)
#C0 = m.Var(cl[i])
C0[i] = m.Var(value=cl[i],lb=0,ub=1e-3)
C1[i] = m.Var(value=6.9e-6,lb=0,ub=2e-3)
C2[i] = m.Var(value=x[i],lb=0, ub=1e-3)
C2m[i] = m.Param(xm[i],lb=0)
meas = m.Param(zm[i])
m.Minimize(meas*((C2[i]-C2m[i]))**2)
C3[i] = m.Var(value=0)
C4[i] = m.Var(value=3.16e-8)
C5[i] = m.Var(value=3.83e-7)
C6[i] = m.Var(value=0)
C7[i]= m.Var(value=0)
C8[i] = m.Var(value=0)
C9[i] = m.Var(value=0)
C10[i] = m.Var(value=0)
C11[i] = m.Var(value=0)
C12[i] = m.Var(value=3.21e-3)
TOC[i] = m.Var(value=5.93)#mg-C/L
cC1[i] = m.Var(value=0)
cC2[i] = m.Var(value=0)
#temperature ºC
T = 20
#Rate constants
k1 = 2040000000*math.exp(-1887/(273.15+T))*3600
k2 = 138000000*math.exp(-8800/(273.15+T))*3600
k3 = 300000*math.exp(-2010/(273.15+T))*3600
k4 = 0.00000065*3600
k6 = 26700*3600
k7 = 167*3600
k8 = 27700*3600
k9 = 8300*3600
k10 = 0
k5[i] = 37800000000*math.exp(-2169/(273.15+T))*C4[i]\
+2.52e25*math.exp(-16860/(273.15+T))*C10[i]\
+0.87*math.exp(-503/(273.15+T))*C9[i]
r1[i] = k1 * C0[i] * C1[i]
r2[i] = k2 * C2[i]
r3[i] = k3 * C0[i] * C2[i]
r4[i] = k4 * C3[i]
r5[i] = k5[i] * C2[i] * C2[i]
r6[i] = k6 * C3[i] * C1[i]* C4[i]
r7[i] = k7 * C3[i] * C5[i]
r8[i] = k8 * C6[i] * C3[i]
r9[i] = k9 * C6[i] * C2[i]
r10[i] = k10 * C2[i] * C3[i]
r11[i] = kdoc1*S1*TOC[i]*C2[i]/12000
r12[i] = kdoc2*S2*TOC[i]*C0[i]/12000
t = m.Param(value=m.time)
m.Equation(C0[i].dt()== -r1[i] + r2[i] - r3[i] + r4[i] + r8[i] - r12[i])
m.Equation(C1[i].dt()== -r1[i] + r2[i] + r5[i] - r6[i] + r11[i])
m.Equation(C2[i].dt()== r1[i] - r2[i] - r3[i] + r4[i] - r5[i] + r6[i] - r9[i] - r10[i] - r11[i])
m.Equation(C3[i].dt()== r3[i] - r4[i] + r5[i] - r6[i] - r7[i] - r8[i] - r10[i])
m.Equation(C4[i].dt()== 0)
m.Equation(C5[i] == 1e-14/C4[i])
m.Equation(C6[i].dt()== r7[i] - r8[i] - r9[i])
m.Equation(C7[i] == (3.16e-8*C0[i])/C4[i])
m.Equation(C1[i] == (5.01e-10*C8[i])/C4[i])
m.Equation(C11[i] == (5.01e-11*C10[i])/C4[i])
m.Equation(C9[i] == (5.01e-7*C9[i])/C4[i])
m.Equation(C10[i] == C12[i] - 2*C11[i] - C5[i] + C4[i])
m.Equation(C12[i].dt()== 0)
m.Equation(TOC[i].dt()== 0)
m.Equation(cC1[i] == 17000*C1[i])
m.Equation(cC2[i] == 51500*C2[i])
m.Equation(S1+S2<1)
#Application options
m.options.SOLVER = 3 #IPOPT solver
m.options.IMODE = 5 #Dynamic Simultaneous - estimation = MHE
m.options.EV_TYPE = 2 #absolute error
m.options.NODES = 3 #collocation nodes (2,5)
if True:
#kdoc1.STATUS=1
kdoc2.STATUS=1
#S1.STATUS=1
S2.STATUS=1
#Solve
#m.open_folder() # open folder if remote=False to see infeasibilities.txt
m.solve(disp=True)
print('Final SSE Objective: ' + str(m.options.objfcnval))
print('Solution')
#print('kdoc1 = ' + str(kdoc1.value[0]))
print('kdoc2 = ' + str(kdoc2.value[0]))
#print('S1 = ' + str(S1.value[0]))
print('S2 = ' + str(S2.value[0]))
plt.figure(1,figsize=(8,5))
plt.subplot(2,1,1)
plt.plot(m.time,C2[0],'b.--',label='Predicted 1')
plt.plot(m.time,C2[1],'r.--',label='Predicted 4')
plt.plot(t_data1,x_data1,'bx',label='Meas 1')
plt.plot(t_data4,x_data4,'rx',label='Meas 4')
plt.legend(loc='best')
plt.subplot(2,1,2)
plt.plot(m.time,cC2[0].value,'b',label ='C2_M1')
plt.plot(m.time,cC2[1].value,'r',label ='C2_M4')
plt.plot(t_data1,x_data1mgl,'bx',label='Meas 1')
plt.plot(t_data4,x_data4mgl,'rx',label='Meas 4')
#plt.plot(m.time,cC1.value,label ='C1')
plt.legend(loc='best')
plt.xlabel('time (h)')
plt.figure(2,figsize=(12,8))
plt.subplot(4,3,1)
plt.plot(m.time,C0[i].value,label ='C0')
plt.legend()
plt.subplot(4,3,2)
plt.plot(m.time,C1[i].value,label ='C1')
plt.legend()
plt.subplot(4,3,3)
plt.plot(m.time,C3[i].value,label ='C3')
plt.legend()
plt.subplot(4,3,4)
plt.plot(m.time,C4[i].value,label ='C4')
plt.legend()
plt.subplot(4,3,5)
plt.plot(m.time,C5[i].value,label ='C5')
plt.legend()
plt.subplot(4,3,6)
plt.plot(m.time,C6[i].value,label ='C6')
plt.legend()
plt.subplot(4,3,7)
plt.plot(m.time,C7[i].value,label ='C7')
plt.legend()
plt.subplot(4,3,8)
plt.plot(m.time,C8[i].value,label ='C8')
plt.legend()
plt.subplot(4,3,9)
plt.plot(m.time,C9[i].value,label ='C9')
plt.legend()
plt.xlabel('time (h)')
plt.subplot(4,3,10)
plt.plot(m.time,C10[i].value,label ='C10')
plt.legend()
plt.xlabel('time (h)')
plt.subplot(4,3,11)
plt.plot(m.time,C11[i].value,label ='C11')
plt.legend()
plt.xlabel('time (h)')
plt.subplot(4,3,12)
plt.plot(m.time,C12[i].value,label ='C12')
plt.legend()
plt.xlabel('time (h)')
plt.show()
Here are the results from your script:以下是脚本的结果:
There are a couple issues that may improve the fit:有几个问题可能会改善合身性:
m.free_initial(x)
for any initial concentration that you'd like to calculate, substituting x
for the name of the variable.对于您想要计算的任何初始浓度,用
x
代替变量的名称。 Unfortunately, I don't have a lot more to offer on this problem because it would require more knowledge about the experiment and the equations used to model the dynamics.不幸的是,我在这个问题上没有更多的东西可以提供,因为它需要更多关于实验和用于 model 动力学的方程的知识。
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.