使用 Gekko 使用多个数据集进行参数估计

Question

Following @John Hedengren ideas I modified my code ( Parameters estimation and least squares minimization - Python and Gekko ) and added another set of data following the explanations from here: How to set up GEKKO for parameter estimation from multiple independent sets of data?按照@John Hedengren 的想法，我修改了我的代码（参数估计和最小二乘法最小化 - Python 和 Gekko ）并按照此处的说明添加了另一组数据：如何设置 GEKKO 以从多个独立的数据集进行参数估计？ However, I am not reaching a good solution, particularly for measurement 4. Have you got any ideas about how can I adjust it better?但是，我没有找到一个好的解决方案，特别是对于测量 4。您对如何更好地调整它有任何想法吗？ Thank you for your time and help.感谢您的时间和帮助。

from gekko import GEKKO
import numpy as np
import matplotlib.pyplot as plt 
import math as math
import pandas as pd

#data set 1
t_data1 = [0.08333,0.5,1,4,22.61667]
x_data1 = [7.77e-5,7e-5,7e-5,6.22e-5,3.89e-5]
x_data1mgl = [4,3.6,3.6,3.2,2]

#data set 2
t_data4 = [0.08333,0.5,1,4,22.61667]
x_data4 = [2.92e-4,2.72e-4,2.92e-4,2.72e-4,2.14e-4]
x_data4mgl = [15,14,15,14,11]

#combine data and time in the same dataframe
data1 = pd.DataFrame({'time':t_data1,'x1':x_data1})
data4 = pd.DataFrame({'time':t_data4,'x4':x_data4})
data1.set_index('time', inplace=True)
data4.set_index('time', inplace=True)


# merge dataframes
data = data1.join(data4, how='outer')

# simulation time points
dftp = pd.DataFrame({'time':np.linspace(0,25,51)})
dftp.set_index('time', inplace=True)

# merge dataframes
df = data.join(dftp,how='outer')

# get True (1) or False (0) for measurement
#df['meas'] = (df['x'].values==df['x'].values).astype(int)
z1 = (df['x1']==df['x1']).astype(int)
z4 = (df['x4']==df['x4']).astype(int)


# replace NaN with zeros
df0 = df.fillna(value=0)

#Estimator Model
m = GEKKO(remote=True)
#m = GEKKO(remote=False) # remote=False to produce local folder with results

m.time = df.index.values

# measurements - Gekko arrays to store measured data
xm = m.Array(m.Param,2)
xm[0].value = df0['x1'].values
xm[1].value = df0['x4'].values


cl=m.Array(m.Var,2)
cl[0].value= 4e-5
cl[1].value= 1.33e-4


TOC=m.Array(m.Var,2)
TOC[0].value= 11.85
TOC[1].value= 11.85


C0=m.Array(m.Var,2)
C1=m.Array(m.Var,2)
cC1=m.Array(m.Var,2)
C2=m.Array(m.Var,2)
C2m=m.Array(m.Var,2)
cC2=m.Array(m.Var,2)
C3=m.Array(m.Var,2)
C4=m.Array(m.Var,2)
C5=m.Array(m.Var,2)
C6=m.Array(m.Var,2)
C7=m.Array(m.Var,2)
C8=m.Array(m.Var,2)
C9=m.Array(m.Var,2)
C10=m.Array(m.Var,2)
C11=m.Array(m.Var,2)
C12=m.Array(m.Var,2)

r1=m.Array(m.Var,2)
r2=m.Array(m.Var,2)
r3=m.Array(m.Var,2)
r4=m.Array(m.Var,2)
r5=m.Array(m.Var,2)
r6=m.Array(m.Var,2)
r7=m.Array(m.Var,2)
r8=m.Array(m.Var,2)
r9=m.Array(m.Var,2)
r10=m.Array(m.Var,2)
r11=m.Array(m.Var,2)
r12=m.Array(m.Var,2)

k5=m.Array(m.Var,2)

#Define GEKKO variables that determine if time point contains data to be used in regression
#index for objective (0=not measured, 1=measured)
zm = m.Array(m.Param,2)
zm[0].value=z1
zm[1].value=z4


# fit to measurement
x=m.Array(m.Var,2)                         
x[0].value=x_data1[0]
x[1].value=x_data4[0]


#adjustable parameters
kdoc1 = 0 #m-1h-1
kdoc2 = m.FV(1,lb=0.01,ub=10) #m-1h-1
S1 = 0
S2 = m.FV(1,lb=0.01,ub=10)

#Define Variables
for i in range(2):

#State variables
# M (mol/L)
    #C0 = m.Var(cl[i])
    C0[i] = m.Var(value=cl[i],lb=0,ub=1e-3)
    C1[i] = m.Var(value=6.9e-6,lb=0,ub=2e-3)
    C2[i] = m.Var(value=x[i],lb=0, ub=1e-3)
    C2m[i] = m.Param(xm[i],lb=0)
    meas = m.Param(zm[i])
    m.Minimize(meas*((C2[i]-C2m[i]))**2)
    C3[i] = m.Var(value=0)
    C4[i] = m.Var(value=3.16e-8)
    C5[i] = m.Var(value=3.83e-7)
    C6[i] = m.Var(value=0)
    C7[i]= m.Var(value=0)
    C8[i] = m.Var(value=0)
    C9[i] = m.Var(value=0)
    C10[i] = m.Var(value=0)
    C11[i] = m.Var(value=0)
    C12[i] = m.Var(value=3.21e-3)
    TOC[i] = m.Var(value=5.93)#mg-C/L 
    cC1[i] = m.Var(value=0)
    cC2[i] = m.Var(value=0)

#temperature ºC
    T = 20 

#Rate constants
    k1 = 2040000000*math.exp(-1887/(273.15+T))*3600
    k2 = 138000000*math.exp(-8800/(273.15+T))*3600
    k3 = 300000*math.exp(-2010/(273.15+T))*3600
    k4 = 0.00000065*3600
    k6 = 26700*3600
    k7 = 167*3600
    k8 = 27700*3600
    k9 = 8300*3600
    k10 = 0
    k5[i] = 37800000000*math.exp(-2169/(273.15+T))*C4[i]\
         +2.52e25*math.exp(-16860/(273.15+T))*C10[i]\
         +0.87*math.exp(-503/(273.15+T))*C9[i]
 
    r1[i] = k1 * C0[i] * C1[i]
    r2[i] = k2 * C2[i]
    r3[i] = k3 * C0[i] * C2[i]
    r4[i] = k4 * C3[i]
    r5[i] = k5[i] * C2[i] * C2[i]
    r6[i] = k6 * C3[i] * C1[i]* C4[i]
    r7[i] = k7 * C3[i] * C5[i]
    r8[i] = k8 * C6[i] * C3[i]
    r9[i] = k9 * C6[i] * C2[i]
    r10[i] = k10 * C2[i] * C3[i]
    r11[i] = kdoc1*S1*TOC[i]*C2[i]/12000
    r12[i] = kdoc2*S2*TOC[i]*C0[i]/12000

    t = m.Param(value=m.time)
    m.Equation(C0[i].dt()== -r1[i] + r2[i] - r3[i] + r4[i] + r8[i] - r12[i])
    m.Equation(C1[i].dt()== -r1[i] + r2[i] + r5[i] - r6[i] + r11[i])
    m.Equation(C2[i].dt()== r1[i] - r2[i] - r3[i] + r4[i] - r5[i] + r6[i] - r9[i] - r10[i] - r11[i])
    m.Equation(C3[i].dt()== r3[i] - r4[i] + r5[i] - r6[i] - r7[i] - r8[i] - r10[i])
    m.Equation(C4[i].dt()== 0)
    m.Equation(C5[i] == 1e-14/C4[i])
    m.Equation(C6[i].dt()== r7[i] - r8[i] - r9[i])
    m.Equation(C7[i] == (3.16e-8*C0[i])/C4[i])
    m.Equation(C1[i] == (5.01e-10*C8[i])/C4[i])
    m.Equation(C11[i] == (5.01e-11*C10[i])/C4[i])
    m.Equation(C9[i] == (5.01e-7*C9[i])/C4[i])
    m.Equation(C10[i] == C12[i] - 2*C11[i] - C5[i] + C4[i])
    m.Equation(C12[i].dt()== 0)
    m.Equation(TOC[i].dt()== 0)
    m.Equation(cC1[i] == 17000*C1[i])
    m.Equation(cC2[i] == 51500*C2[i])
    m.Equation(S1+S2<1)

#Application options
m.options.SOLVER = 3 #IPOPT solver
m.options.IMODE = 5 #Dynamic Simultaneous - estimation = MHE
m.options.EV_TYPE = 2 #absolute error
m.options.NODES = 3 #collocation nodes (2,5)

if True:
    #kdoc1.STATUS=1
    kdoc2.STATUS=1
    #S1.STATUS=1
    S2.STATUS=1

#Solve
#m.open_folder() # open folder if remote=False to see infeasibilities.txt
m.solve(disp=True)


print('Final SSE Objective: ' + str(m.options.objfcnval))

print('Solution')
#print('kdoc1 = ' + str(kdoc1.value[0]))
print('kdoc2 = ' + str(kdoc2.value[0]))
#print('S1 = ' + str(S1.value[0]))
print('S2 = ' + str(S2.value[0]))

plt.figure(1,figsize=(8,5))
plt.subplot(2,1,1)
plt.plot(m.time,C2[0],'b.--',label='Predicted 1')
plt.plot(m.time,C2[1],'r.--',label='Predicted 4')


plt.plot(t_data1,x_data1,'bx',label='Meas 1')
plt.plot(t_data4,x_data4,'rx',label='Meas 4')
plt.legend(loc='best')

plt.subplot(2,1,2)
plt.plot(m.time,cC2[0].value,'b',label ='C2_M1')
plt.plot(m.time,cC2[1].value,'r',label ='C2_M4')
plt.plot(t_data1,x_data1mgl,'bx',label='Meas 1')
plt.plot(t_data4,x_data4mgl,'rx',label='Meas 4')


#plt.plot(m.time,cC1.value,label ='C1')
plt.legend(loc='best')
plt.xlabel('time (h)')

plt.figure(2,figsize=(12,8))
plt.subplot(4,3,1)
plt.plot(m.time,C0[i].value,label ='C0')
plt.legend()

plt.subplot(4,3,2)
plt.plot(m.time,C1[i].value,label ='C1')
plt.legend()

plt.subplot(4,3,3)
plt.plot(m.time,C3[i].value,label ='C3')
plt.legend()

plt.subplot(4,3,4)
plt.plot(m.time,C4[i].value,label ='C4')
plt.legend()

plt.subplot(4,3,5)
plt.plot(m.time,C5[i].value,label ='C5')
plt.legend()

plt.subplot(4,3,6)
plt.plot(m.time,C6[i].value,label ='C6')
plt.legend()

plt.subplot(4,3,7)
plt.plot(m.time,C7[i].value,label ='C7')
plt.legend()

plt.subplot(4,3,8)
plt.plot(m.time,C8[i].value,label ='C8')
plt.legend()

plt.subplot(4,3,9)
plt.plot(m.time,C9[i].value,label ='C9')
plt.legend()
plt.xlabel('time (h)')

plt.subplot(4,3,10)
plt.plot(m.time,C10[i].value,label ='C10')
plt.legend()
plt.xlabel('time (h)')

plt.subplot(4,3,11)
plt.plot(m.time,C11[i].value,label ='C11')
plt.legend()
plt.xlabel('time (h)')

plt.subplot(4,3,12)
plt.plot(m.time,C12[i].value,label ='C12')
plt.legend()
plt.xlabel('time (h)')

plt.show()

Answer 1

Here are the results from your script:以下是脚本的结果：

There are a couple issues that may improve the fit:有几个问题可能会改善合身性：

There are very fast dynamics in the first time step.第一个时间步有非常快的动态。 The concentrations of certain starting species has a big influence on how much these values change.某些起始物种的浓度对这些值的变化程度有很大影响。 You will likely need to use your knowledge of the experiment to estimate the concentrations of the initial conditions for the chemical compositions.您可能需要使用您对实验的了解来估计化学成分的初始条件的浓度。
If you don't know the initial conditions or want the solver to estimate those then use如果您不知道初始条件或希望求解器估计这些条件，请使用

m.free_initial(x)

for any initial concentration that you'd like to calculate, substituting x for the name of the variable.对于您想要计算的任何初始浓度，用x代替变量的名称。 Unfortunately, I don't have a lot more to offer on this problem because it would require more knowledge about the experiment and the equations used to model the dynamics.不幸的是，我在这个问题上没有更多的东西可以提供，因为它需要更多关于实验和用于 model 动力学的方程的知识。

使用 Gekko 使用多个数据集进行参数估计

问题描述

1 个解决方案

解决方案1
0 2021-01-23 14:47:34

使用 Gekko 使用多个数据集进行参数估计

问题描述

1 个解决方案

解决方案1 0 2021-01-23 14:47:34

解决方案1
0 2021-01-23 14:47:34