[英]Converting a mixture of gaussians to PyMC3
我正在嘗試學習PyMC3,我想舉一個簡單的混合高斯的例子。 我找到了這個示例,想將其轉換為pymc3,但是在嘗試繪制traceplot時出現錯誤。
n1 = 500
n2 = 200
n = n1+n2
mean1 = 21.8
mean2 = 42.0
precision = 0.1
sigma = np.sqrt(1 / precision)
# precision = 1/sigma^2
print "sigma1: %s" % sigma1
print "sigma2: %s" % sigma2
data1 = np.random.normal(mean1,sigma,n1)
data2 = np.random.normal(mean2,sigma,n2)
data = np.concatenate([data1 , data2])
#np.random.shuffle(data)
fig = plt.figure(figsize=(7, 7))
ax = fig.add_subplot(111, xlabel='x', ylabel='y', title='mixture of 2 guassians')
ax.plot(range(0,n1+n2), data, 'x', label='data')
plt.legend(loc=0)
with pm.Model() as model:
#priors
p = pm.Uniform( "p", 0 , 1) #this is the fraction that come from mean1 vs mean2
ber = pm.Bernoulli( "ber", p = p) # produces 1 with proportion p.
precision = pm.Gamma('precision', alpha=0.1, beta=0.1)
mean1 = pm.Normal( "mean1", 0, 0.01 ) #better to use normals versus Uniforms (unless you are certain the value is truncated at 0 and 200
mean2 = pm.Normal( "mean2", 0, 0.01 )
mean = pm.Deterministic('mean', ber*mean1 + (1-ber)*mean2)
process = pm.Normal('process', mu=mean, tau=precision, observed=data)
# inference
step = pm.Metropolis()
trace = pm.sample(10000, step)
pm.traceplot(trace)
錯誤:
sigma1: 3.16227766017
sigma2: 1.69030850946
[-----------------100%-----------------] 10000 of 10000 complete in 4.4 sec
---------------------------------------------------------------------------
LinAlgError Traceback (most recent call last)
<ipython-input-10-eb728824de83> in <module>()
44 step = pm.Metropolis()
45 trace = pm.sample(10000, step)
---> 46 pm.traceplot(trace)
/usr/lib/python2.7/site-packages/pymc-3.0-py2.7.egg/pymc/plots.pyc in traceplot(trace, vars, figsize, lines, combined, grid)
70 ax[i, 0].set_xlim(mind - .5, maxd + .5)
71 else:
---> 72 kdeplot_op(ax[i, 0], d)
73 ax[i, 0].set_title(str(v))
74 ax[i, 0].grid(grid)
/usr/lib/python2.7/site-packages/pymc-3.0-py2.7.egg/pymc/plots.pyc in kdeplot_op(ax, data)
94 for i in range(data.shape[1]):
95 d = data[:, i]
---> 96 density = kde.gaussian_kde(d)
97 l = np.min(d)
98 u = np.max(d)
/usr/lib64/python2.7/site-packages/scipy/stats/kde.pyc in __init__(self, dataset, bw_method)
186
187 self.d, self.n = self.dataset.shape
--> 188 self.set_bandwidth(bw_method=bw_method)
189
190 def evaluate(self, points):
/usr/lib64/python2.7/site-packages/scipy/stats/kde.pyc in set_bandwidth(self, bw_method)
496 raise ValueError(msg)
497
--> 498 self._compute_covariance()
499
500 def _compute_covariance(self):
/usr/lib64/python2.7/site-packages/scipy/stats/kde.pyc in _compute_covariance(self)
507 self._data_covariance = atleast_2d(np.cov(self.dataset, rowvar=1,
508 bias=False))
--> 509 self._data_inv_cov = linalg.inv(self._data_covariance)
510
511 self.covariance = self._data_covariance * self.factor**2
/usr/lib64/python2.7/site-packages/scipy/linalg/basic.pyc in inv(a, overwrite_a, check_finite)
381 inv_a, info = getri(lu, piv, lwork=lwork, overwrite_lu=1)
382 if info > 0:
--> 383 raise LinAlgError("singular matrix")
384 if info < 0:
385 raise ValueError('illegal value in %d-th argument of internal '
LinAlgError: singular matrix
感謝Fonnesbeck在github問題跟蹤器上回答了這個問題:
https://github.com/pymc-devs/pymc3/issues/452
這是更新的代碼:
with pm.Model() as model:
#priors
p = pm.Uniform( "p", 0 , 1) #this is the fraction that come from mean1 vs mean2
ber = pm.Bernoulli( "ber", p = p, shape=len(data)) # produces 1 with proportion p.
sigma = pm.Uniform('sigma', 0, 100)
precision = sigma**-2
mean = pm.Normal( "mean", 0, 0.01, shape=2 )
mu = pm.Deterministic('mu', mean[ber])
process = pm.Normal('process', mu=mu, tau=precision, observed=data)
with model:
step1 = pm.Metropolis([p, sigma, mean])
step2 = pm.BinaryMetropolis([ber])
trace = pm.sample(10000, [step1, step2])
推斷伯努利隨機變量時需要使用BinaryMetropolis
更加簡單快捷的版本如下:
with pm.Model() as model2:
p = pm.Beta( "p", 1., 1.)
means = pm.Uniform('mean', 15, 60, shape=2)
sigma = pm.Uniform('sigma', 0, 20, testval=5)
process = pm.NormalMixture('obs', tt.stack([p, 1-p]), means, sd=sigma, observed=data)
with model2:
step = pm.Metropolis()
trace = pm.sample(10000, step=step)
我知道這個問題已經很久了,但是我正在嘗試使用不同的PyMC3使用示例,以習慣於在PyMC3中進行建模。 上面給出的答案在PyMC3的當前版本1.0中不起作用(它不能正確區分這兩種方法)。 為了使其正常工作,我要做的最小更改如下:
1)
# mean = pm.Normal("mean", 0, 0.01, shape=2 )
mean = pm.Uniform('mean', 15, 60, shape=2)
2)
# step2 = pm.BinaryMetropolis([ber])
step2 = pm.ElemwiseCategorical(vars=[ber], values=[0, 1])
以防萬一其他人有類似的問題。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.