繁体   English   中英

Python Scipy GEV适合与分布不匹配

[英]Python scipy GEV fit does not match distribution

我有一个240个每月最大潮汐的数组,正在尝试将GEV曲线拟合到该数据以进行回报期计算。 但是,拟合的GEV曲线与输入到GEV函数的潮汐直方图不同。

import numpy as np
import matplotlib.pyplot as plt 
from scipy.stats import genextreme as gev

tides = np.array([204.25, 184.87, 164.15, 158.54, 194.47, 206.31, 212.04,
209.24, 186.28, 176.27, 181.72, 199.49, 205.97, 198.42, 187.2, 170.42, 188.22, 
193.66, 206.12, 204.03, 187.64, 188.66, 190.92, 191.3, 196.25, 191.91, 166.42, 
188.73, 192.57, 199.81, 193.57, 193.28, 198.45, 192.17, 200.9, 212.57, 205.65,
188.84, 175.5, 180.52, 199.2, 202.07, 209.27, 202.07, 187.95, 199.11, 206.81, 
235.44, 204.04, 195.15, 173.85, 163.17, 191.7, 201.87, 212.38, 207.92, 171.61,
186.32, 201.58, 222.89, 206.96, 200.68, 178.82, 183.91, 198.82, 209.23, 
224.03, 230.06, 199.87, 201.07, 205.59, 211.58, 210.78, 205.9, 182.66, 199.49, 
195.04, 196.12, 197.82, 203.91, 188.28, 196.81, 200.88, 201.25, 212.27, 
178.33, 173.86, 185.71, 191.83, 202.56, 195.54, 189.08, 184.48, 199.92,
206.66, 198.95, 188.12, 176.24, 161.95, 172.67, 196.1, 207.34, 208.96, 209.65,
178.95, 188.49, 211.91, 218.64, 201.82, 193.37, 170.33, 185.98, 201.05, 
212.28, 213.93, 204.78, 195.17, 196.68, 210.0, 211.09, 208.75, 191.5, 201.17,
190.19, 195.78, 197.68, 209.58, 205.62, 190.79, 198.04, 206.89, 210.84,
202.58, 180.44, 178.58, 191.25, 209.43, 205.74, 194.24, 192.74, 193.11, 
209.92, 214.03, 220.04, 187.46, 191.46, 161.37, 180.56, 192.58, 205.59, 208.1,
192.8, 180.27, 195.74, 201.17, 209.86, 201.87, 179.38, 167.11, 179.99, 208.07,
212.23, 205.14, 201.21, 180.63, 176.36, 190.89, 206.73, 205.34, 188.07, 
169.57, 176.18, 191.82, 194.07, 205.99, 204.98, 200.29, 190.52, 189.14, 
194.65, 188.97, 198.19, 178.03, 182.65, 194.29, 196.0, 193.19, 194.43, 179.63,
197.73, 204.24, 199.32, 209.48, 204.62, 193.44, 181.99, 196.02, 204.84, 209.4,
194.12, 175.39, 194.88, 208.65, 205.94, 197.69, 184.47, 172.59, 183.86,
199.14, 213.82, 206.46, 194.48, 175.3, 176.1, 194.91, 208.59, 209.01, 190.92,
191.17, 175.59, 195.32, 206.8, 217.82, 212.64, 195.08, 180.13, 190.87, 203.0, 
196.91, 189.42, 170.31, 170.07, 181.7, 187.96, 194.01, 207.64, 194.11, 192.11,
202.95, 197.85])

gev_fit = gev.fit(tides)

x = np.linspace(np.min(tides)-10,np.max(tides)+10,1000)
gev_pdf = gev.pdf(x, gev_fit[0], gev_fit[1], gev_fit[2])

plt.subplot(1,2,1)
plt.hist(tides, normed=True, alpha=0.2, label='Data')  
plt.xlabel('Tides (cm)')

plt.subplot(1,2,2)
plt.hist(tides, normed=True, alpha=0.2, label='Data')
plt.plot(x, gev_pdf, 'r--', label='GEV Fit')
plt.xlabel('Tides (cm)')
plt.legend(loc='upper left')
plt.show()

适合的形象

如您所见,GEV拟合完全与原始数据的分布不匹配。 您对改善贴身度有什么建议吗?

我承认:经过几个小时的汗水劳累,这才是我所拥有的。 这是最大间距估计的一个粗略示例,似乎确实收敛于合理的估计向量。

import numpy as np
import matplotlib.pyplot as plt 
from scipy.stats import genextreme 
from scipy.optimize import minimize
from collections import Counter

tides = [204.25, 184.87, 164.15, 158.54, 194.47, 206.31, 212.04, 209.24, 186.28, 176.27, 181.72, 199.50, 205.97, 198.42, 187.2, 170.42, 188.22,  193.66, 206.12, 204.03, 187.64, 188.66, 190.92, 191.3, 196.25, 191.91, 166.42,  188.73, 192.57, 199.81, 193.57, 193.28, 198.45, 192.17, 200.9, 212.57, 205.65, 188.84, 175.5, 180.52, 199.2, 202.08, 209.27, 202.07, 187.95, 199.11, 206.81,  235.44, 204.04, 195.15, 173.85, 163.17, 191.7, 201.88, 212.38, 207.92, 171.61, 186.32, 201.58, 222.89, 206.96, 200.68, 178.82, 183.91, 198.82, 209.23,  224.03, 230.06, 199.87, 201.07, 205.60, 211.58, 210.78, 205.9, 182.66, 199.49,  195.04, 196.12, 197.82, 203.91, 188.28, 196.81, 200.88, 201.25, 212.27,  178.33, 173.86, 185.71, 191.83, 202.56, 195.54, 189.08, 184.48, 199.92, 206.66, 198.95, 188.12, 176.24, 161.95, 172.67, 196.1, 207.34, 208.96, 209.65, 178.95, 188.49, 211.91, 218.64, 201.82, 193.37, 170.33, 185.98, 201.05,  212.28, 213.93, 204.78, 195.17, 196.68, 210.0, 211.09, 208.75, 191.5, 201.17, 190.19, 195.78, 197.68, 209.58, 205.62, 190.79, 198.04, 206.89, 210.84, 202.58, 180.44, 178.58, 191.25, 209.43, 205.74, 194.24, 192.74, 193.11,  209.92, 214.03, 220.04, 187.46, 191.46, 161.37, 180.56, 192.58, 205.59, 208.1, 192.8, 180.27, 195.74, 201.18, 209.86, 201.87, 179.38, 167.11, 179.99, 208.07, 212.23, 205.14, 201.21, 180.63, 176.36, 190.89, 206.73, 205.34, 188.07,  169.57, 176.18, 191.82, 194.07, 205.99, 204.98, 200.29, 190.52, 189.14,  194.65, 188.97, 198.19, 178.03, 182.65, 194.29, 196.0, 193.19, 194.43, 179.63, 197.73, 204.24, 199.32, 209.48, 204.62, 193.44, 181.99, 196.02, 204.84, 209.4, 194.12, 175.39, 194.88, 208.65, 205.94, 197.69, 184.47, 172.59, 183.86, 199.14, 213.82, 206.46, 194.48, 175.3, 176.1, 194.91, 208.59, 209.01, 190.93, 191.17, 175.59, 195.32, 206.8, 217.82, 212.64, 195.08, 180.13, 190.87, 203.0,  196.91, 189.42, 170.31, 170.07, 181.7, 187.96, 194.01, 207.64, 194.11, 192.11, 202.95, 197.85] 
tides.sort()

#~ counts = Counter(tides)
#~ print (counts)

def fun(params,tides):
    F = lambda x: genextreme.cdf(x,*params)
    result = -sum([np.log(d) for d in np.diff([0]+[F(_) for _ in tides]+[1])])
    return result

shape = 0.5
location = 234
scale = 50
x0 = (shape, location, scale)

#~ fun(x0,tides)

result = minimize(fun, x0, args=tides, method='Nelder-Mead')
print (result)

x = np.linspace(np.min(tides)-10,np.max(tides)+10,1000)
#~ gev_pdf = gev.pdf(x, gev_fit[0], gev_fit[1], gev_fit[2])
f = lambda x: genextreme.pdf(x,*result.x)

plt.subplot(1,2,1)
plt.hist(tides, normed=True, alpha=0.2, label='Data')  
plt.xlabel('Tides (cm)')

plt.subplot(1,2,2)
plt.hist(tides, normed=True, alpha=0.2, label='Data')
plt.plot(x, [f(_) for _ in x], 'r--', label='GEV Fit')
plt.xlabel('Tides (cm)')
plt.legend(loc='upper left')
plt.show()

代码中的碎片(例如Counter)代表我为识别重复数据而进行的工作。 它们导致零的项目间间距,这导致尝试取零的对数。 我要做的是稍微扰动物品。 例如,像201.17这样的一项变成了201.18。

这是结果图。

在此处输入图片说明

无论如何,我很高兴你问了这个。 有趣的问题。

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM