拟合对数正态分布（Python 图）

Question

I am trying to fit a log-normal distribution to the histogram data.我正在尝试将对数正态分布拟合到直方图数据。 I've tried to follow examples of other questions here on the Stack Exchange but I'm not getting the fit, because in this case I have a broken axis.我尝试在 Stack Exchange 上关注其他问题的示例，但我没有得到合适的结果，因为在这种情况下我的轴坏了。 I already put the broken axis on that plot, I tried to prevent the numbers from overlapping on the axes, I removed the numbers from the repeated axes, I reduced the size of the second subplot, but I'm not able to fit the log-normal.我已经将断轴放在 plot 上，我试图防止数字在轴上重叠，我从重复轴上删除了数字，我减小了第二个子图的大小，但我无法适应日志-普通的。 How can I fit the log-normal distribution for this data set?如何拟合该数据集的对数正态分布？

Code:代码：

#amostra 17B (menor intervalo)
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import lognorm
import matplotlib.ticker as tkr
import scipy, pylab
import locale
import matplotlib.gridspec as gridspec
from scipy.stats import lognorm
locale.setlocale(locale.LC_NUMERIC, "de_DE")
plt.rcParams['axes.formatter.use_locale'] = True

frequencia_relativa=[0.000, 0.000, 0.038, 0.097, 0.091, 0.118, 0.070, 0.124, 0.097, 0.059, 0.059, 0.048, 0.054, 0.043, 
                     0.032, 0.005, 0.027, 0.016, 0.005, 0.000, 0.005, 0.000, 0.005, 0.000, 0.000, 0.000, 0.000, 0.000, 
                     0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 
                     0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 
                     0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 
                     0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.005, 0.000, 0.000]
x=[0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 0.90, 1.00, 1.10, 1.20, 1.30, 1.40, 
   1.50, 1.60, 1.70, 1.80, 1.90, 2.00, 2.10, 2.20, 2.30, 2.40, 2.50, 2.60, 2.70, 2.80, 
   2.90, 3.00, 3.10, 3.20, 3.30, 3.40, 3.50, 3.60, 3.70, 3.80, 3.90, 4.00, 4.10, 4.20,
   4.30, 4.40, 4.50, 4.60, 4.70, 4.80, 4.90, 5.00, 5.10, 5.20, 5.30, 5.40, 5.50, 5.60,
   5.70, 5.80, 5.90, 6.00, 6.10, 6.20, 6.30, 6.40, 6.50, 6.60, 6.70, 6.80, 6.90, 7.00,
   7.10, 7.20, 7.30, 7.40, 7.50, 7.60, 7.70, 7.80, 7.90, 8.00]
plt.rcParams["figure.figsize"] = [20,8]
f, (ax,ax2) = plt.subplots(1,2, sharex=True, sharey=True, facecolor='w')
axes = f.add_subplot(111, frameon=False)
ax.spines['top'].set_color('none')
ax2.spines['top'].set_color('none')
gs = gridspec.GridSpec(1,2,width_ratios=[3,1])
ax = plt.subplot(gs[0])
ax2 = plt.subplot(gs[1])
ax.yaxis.tick_left()
ax.xaxis.tick_bottom()
ax2.xaxis.tick_bottom()
ax.tick_params(labeltop='off') # don't put tick labels at the top
ax2.yaxis.tick_right()
ax.bar(x, height=frequencia_relativa, alpha=0.5, width=0.1, align='edge', edgecolor='black', hatch="///")
ax2.bar(x, height=frequencia_relativa, alpha=0.5, width=0.1, align='edge', edgecolor='black', hatch="///")
ax.tick_params(axis = 'both', which = 'major', labelsize = 18)
ax.tick_params(axis = 'both', which = 'minor', labelsize = 18)
ax2.tick_params(axis = 'both', which = 'major', labelsize = 18)
ax2.tick_params(axis = 'both', which = 'minor', labelsize = 18)
ax2.xaxis.set_ticks(np.arange(7.0, 8.5, 0.5))
ax2.xaxis.set_major_formatter(tkr.FormatStrFormatter('%0.1f'))
plt.subplots_adjust(wspace=0.04)
ax.set_xlim(0,2.5)
ax.set_ylim(0,0.14)
ax2.set_xlim(7.0,8.0)
def func(x, pos):  # formatter function takes tick label and tick position
    s = str(x)
    ind = s.index('.')
    return s[:ind] + ',' + s[ind+1:]   # change dot to comma
x_format = tkr.FuncFormatter(func)
ax.xaxis.set_major_formatter(x_format)
ax2.xaxis.set_major_formatter(x_format)
# hide the spines between ax and ax2
ax.spines['right'].set_visible(False)
ax2.spines['left'].set_visible(False)
# This looks pretty good, and was fairly painless, but you can get that
# cut-out diagonal lines look with just a bit more work. The important
# thing to know here is that in axes coordinates, which are always
# between 0-1, spine endpoints are at these locations (0,0), (0,1),
# (1,0), and (1,1).  Thus, we just need to put the diagonals in the
# appropriate corners of each of our axes, and so long as we use the
# right transform and disable clipping.

d = .015 # how big to make the diagonal lines in axes coordinates
# arguments to pass plot, just so we don't keep repeating them
kwargs = dict(transform=ax.transAxes, color='k', clip_on=False)
ax.plot((1-d/3,1+d/3), (-d,+d), **kwargs)
ax.plot((1-d/3,1+d/3),(1-d,1+d), **kwargs)

kwargs.update(transform=ax2.transAxes)  # switch to the bottom axes
ax2.plot((-d,+d), (1-d,1+d), **kwargs)
ax2.plot((-d,+d), (-d,+d), **kwargs)
ax2.tick_params(labelright=False)
ax.tick_params(labeltop=False)
ax.tick_params(axis='x', which='major', pad=15)
ax2.tick_params(axis='x', which='major', pad=15)
ax2.set_yticks([])
f.text(0.5, -0.04, 'Tamanho lateral do triângulo ($\mu m$)', ha='center', fontsize=22)
f.text(-0.02, 0.5, 'Frequência relativa', va='center', rotation='vertical', fontsize=22)
#ax.set_xlabel('Tamanho lateral do triângulo ($\mu m$)', fontsize=22)
#ax.set_ylabel('Frequência relativa', fontsize=22)
#x_axis = np.arange(0, 29, 0.001)
#ax.plot(x_axis, norm.pdf(x_axis,2.232,1.888), linewidth=3)
f.tight_layout()
plt.show()
#plt.savefig('output.png', dpi=500, bbox_inches='tight')

Attempt with curve_fit:尝试使用curve_fit：

#amostra 17B (menor intervalo)
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import lognorm
import matplotlib.ticker as tkr
import scipy, pylab
import locale
import matplotlib.gridspec as gridspec
from scipy.stats import lognorm
locale.setlocale(locale.LC_NUMERIC, "de_DE")
plt.rcParams['axes.formatter.use_locale'] = True
from scipy.optimize import *

frequencia_relativa=[0.000, 0.000, 0.038, 0.097, 0.091, 0.118, 0.070, 0.124, 0.097, 0.059, 0.059, 0.048, 0.054, 0.043, 
                     0.032, 0.005, 0.027, 0.016, 0.005, 0.000, 0.005, 0.000, 0.005, 0.000, 0.000, 0.000, 0.000, 0.000, 
                     0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 
                     0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 
                     0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 
                     0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.005, 0.000, 0.000]
x=[0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 0.90, 1.00, 1.10, 1.20, 1.30, 1.40, 
   1.50, 1.60, 1.70, 1.80, 1.90, 2.00, 2.10, 2.20, 2.30, 2.40, 2.50, 2.60, 2.70, 2.80, 
   2.90, 3.00, 3.10, 3.20, 3.30, 3.40, 3.50, 3.60, 3.70, 3.80, 3.90, 4.00, 4.10, 4.20,
   4.30, 4.40, 4.50, 4.60, 4.70, 4.80, 4.90, 5.00, 5.10, 5.20, 5.30, 5.40, 5.50, 5.60,
   5.70, 5.80, 5.90, 6.00, 6.10, 6.20, 6.30, 6.40, 6.50, 6.60, 6.70, 6.80, 6.90, 7.00,
   7.10, 7.20, 7.30, 7.40, 7.50, 7.60, 7.70, 7.80, 7.90, 8.00]
plt.rcParams["figure.figsize"] = [20,8]
f, (ax,ax2) = plt.subplots(1,2, sharex=True, sharey=True, facecolor='w')
axes = f.add_subplot(111, frameon=False)
ax.spines['top'].set_color('none')
ax2.spines['top'].set_color('none')
gs = gridspec.GridSpec(1,2,width_ratios=[3,1])
ax = plt.subplot(gs[0])
ax2 = plt.subplot(gs[1])
def f(x, mu, sigma) :
   return 1/(np.sqrt(2*np.pi)*sigma*x)*np.exp(-((np.log(x)- 
   mu)**2)/(2*sigma**2))
params, extras = curve_fit(f, x, frequencia_relativa)
plt.plot(x, f(x ,params[0], params[1]))
print("mu=%g, sigma=%g" % (params[0], params[1]))

plt.subplots_adjust(wspace=0.04)
# hide the spines between ax and ax2
ax.spines['right'].set_visible(False)
ax2.spines['left'].set_visible(False)

d = .015 # how big to make the diagonal lines in axes coordinates
# arguments to pass plot, just so we don't keep repeating them
kwargs = dict(transform=ax.transAxes, color='k', clip_on=False)
ax.plot((1-d/3,1+d/3), (-d,+d), **kwargs)
ax.plot((1-d/3,1+d/3),(1-d,1+d), **kwargs)

kwargs.update(transform=ax2.transAxes)  # switch to the bottom axes
ax2.plot((-d,+d), (1-d,1+d), **kwargs)
ax2.plot((-d,+d), (-d,+d), **kwargs)
f.tight_layout()
plt.show()
#plt.savefig('output.png', dpi=500, bbox_inches='tight')

Error:错误：

import matplotlib.ticker as tkr
import scipy, pylab
import locale
import matplotlib.gridspec as gridspec
#from scipy.stats import lognorm
locale.setlocale(locale.LC_NUMERIC, "de_DE")
plt.rcParams['axes.formatter.use_locale'] = True

from scipy.optimize import curve_fit

x=np.asarray([0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 0.90, 1.00, 1.10, 1.20, 1.30, 1.40,
   1.50, 1.60, 1.70, 1.80, 1.90, 2.00, 2.10, 2.20, 2.30, 2.40, 2.50, 2.60, 2.70, 2.80,
   2.90, 3.00, 3.10, 3.20, 3.30, 3.40, 3.50, 3.60, 3.70, 3.80, 3.90, 4.00, 4.10, 4.20,
   4.30, 4.40, 4.50, 4.60, 4.70, 4.80, 4.90, 5.00, 5.10, 5.20, 5.30, 5.40, 5.50, 5.60,
   5.70, 5.80, 5.90, 6.00, 6.10, 6.20, 6.30, 6.40, 6.50, 6.60, 6.70, 6.80, 6.90, 7.00,
   7.10, 7.20, 7.30, 7.40, 7.50, 7.60, 7.70, 7.80, 7.90, 8.00], dtype=np.float64)

frequencia_relativa=np.asarray([0.000, 0.000, 0.038, 0.097, 0.091, 0.118, 0.070, 0.124, 0.097, 0.059, 0.059, 0.048, 0.054, 0.043,
                     0.032, 0.005, 0.027, 0.016, 0.005, 0.000, 0.005, 0.000, 0.005, 0.000, 0.000, 0.000, 0.000, 0.000,
                     0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,
                     0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,
                     0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,
                     0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.005, 0.000, 0.000], dtype=np.float64)

f, (ax,ax2) = plt.subplots(1,2, sharex=True, sharey=True, facecolor='w')

def fun(y, mu, sigma):
    return 1.0/(np.sqrt(2.0*np.pi)*sigma*y)*np.exp(-(np.log(y)-mu)**2/(2.0*sigma*sigma))

step = 0.1

xx = x

nrm = np.sum(frequencia_relativa*step) # normalization integral
print(nrm)

frequencia_relativa /= nrm # normalize frequences histogram

print(np.sum(frequencia_relativa*step)) # check normalizatio

params, extras = curve_fit(fun, xx, frequencia_relativa)

print(params[0])
print(params[1])

axes = f.add_subplot(111, frameon=False)

axes.plot(x, fun(x, params[0], params[1]), "b-", linewidth=3)


ax.spines['top'].set_color('none')
ax2.spines['top'].set_color('none')
gs = gridspec.GridSpec(1,2,width_ratios=[3,1])
ax = plt.subplot(gs[0])
ax2 = plt.subplot(gs[1])
ax.axvspan(0.190, 1.616, label='Média $\pm$ desvio padrão', ymin=0.0, ymax=1.0, alpha=0.2, color='Plum')
ax.yaxis.tick_left()
ax.xaxis.tick_bottom()
ax2.xaxis.tick_bottom()
ax.tick_params(labeltop='off') # don't put tick labels at the top
ax2.yaxis.tick_right()
ax.bar(xx, height=frequencia_relativa, label='Frequência relativa do tamanho lateral triangular', alpha=0.5, width=0.1, align='edge', edgecolor='black', hatch="///")
ax2.bar(xx, height=frequencia_relativa, alpha=0.5, width=0.1, align='edge', edgecolor='black', hatch="///")
#plt.plot(xx, frequencia_relativa, "ro")
ax.tick_params(axis = 'both', which = 'major', labelsize = 18)
ax.tick_params(axis = 'both', which = 'minor', labelsize = 18)
ax2.tick_params(axis = 'both', which = 'major', labelsize = 18)
ax2.tick_params(axis = 'both', which = 'minor', labelsize = 18)
ax2.xaxis.set_ticks(np.arange(7.0, 8.5, 0.5))
ax2.xaxis.set_major_formatter(tkr.FormatStrFormatter('%0.1f'))

plt.subplots_adjust(wspace=0.04)
ax.set_xlim(0,2.5)
ax.set_ylim(0,1.4)
ax2.set_xlim(7.0,8.0)
def func(x, pos):  # formatter function takes tick label and tick position
    s = str(x)
    ind = s.index('.')
    return s[:ind] + ',' + s[ind+1:]   # change dot to comma
x_format = tkr.FuncFormatter(func)
ax.xaxis.set_major_formatter(x_format)
ax2.xaxis.set_major_formatter(x_format)
# hide the spines between ax and ax2
ax.spines['right'].set_visible(False)
ax2.spines['left'].set_visible(False)

d = .015 # how big to make the diagonal lines in axes coordinates
# arguments to pass plot, just so we don't keep repeating them
kwargs = dict(transform=ax.transAxes, color='k', clip_on=False)
ax.plot((1-d/3,1+d/3), (-d,+d), **kwargs)
ax.plot((1-d/3,1+d/3),(1-d,1+d), **kwargs)

kwargs.update(transform=ax2.transAxes)  # switch to the bottom axes
ax2.plot((-d,+d), (1-d,1+d), **kwargs)
ax2.plot((-d,+d), (-d,+d), **kwargs)
ax2.tick_params(labelright=False)
ax.tick_params(labeltop=False)
ax.tick_params(axis='x', which='major', pad=15)
ax2.tick_params(axis='x', which='major', pad=15)
ax2.set_yticks([])
f.text(0.5, -0.04, 'Tamanho lateral do triângulo ($\mu m$)', ha='center', fontsize=22)
f.text(-0.02, 0.5, 'Frequência relativa', va='center', rotation='vertical', fontsize=22)
#ax.set_xlabel('Tamanho lateral do triângulo ($\mu m$)', fontsize=22)
#ax.set_ylabel('Frequência relativa', fontsize=22)
#x_axis = np.arange(0, 29, 0.001)
#ax.plot(x_axis, norm.pdf(x_axis,2.232,1.888), linewidth=3)

ax.axvline(0.903, color='k', linestyle='-', linewidth=1.3)
ax.axvline(0.190, color='k', linestyle='--', linewidth=1)
ax.axvline(1.616, color='k', linestyle='--', linewidth=1)
f.legend(loc=9, 
          bbox_to_anchor=(.79,.99),
          labelspacing=1.5,
          numpoints=1,
          columnspacing=0.2,
          ncol=1, fontsize=18)
ax.text(0.903*0.70, 1.4*0.92, '$\mu$ = (0,90 $\pm$ 0,71) $\mu m$', fontsize=20)
f.tight_layout()

plt.show()

Answer 1

You're trying at the same time to do fancy graphs and fit.您正在尝试同时制作精美的图形和拟合。 you help you with fit, graphs are secondary problem.你帮助你适应，图表是次要问题。

First, use NumPy arrays for data, helps a lot.首先，使用 NumPy arrays 作为数据，帮助很大。 Second, your histogram function is denormalized.其次，您的直方图 function 是非规范化的。

So if in the first of your programs I'll normalize freqs array因此，如果在您的第一个程序中，我将规范化 freqs 数组

x=np.asarray([0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 0.90, 1.00, 1.10, 1.20, 1.30, 1.40,
   1.50, 1.60, 1.70, 1.80, 1.90, 2.00, 2.10, 2.20, 2.30, 2.40, 2.50, 2.60, 2.70, 2.80,
   2.90, 3.00, 3.10, 3.20, 3.30, 3.40, 3.50, 3.60, 3.70, 3.80, 3.90, 4.00, 4.10, 4.20,
   4.30, 4.40, 4.50, 4.60, 4.70, 4.80, 4.90, 5.00, 5.10, 5.20, 5.30, 5.40, 5.50, 5.60,
   5.70, 5.80, 5.90, 6.00, 6.10, 6.20, 6.30, 6.40, 6.50, 6.60, 6.70, 6.80, 6.90, 7.00,
   7.10, 7.20, 7.30, 7.40, 7.50, 7.60, 7.70, 7.80, 7.90, 8.00], dtype=np.float64)

frequencia_relativa=np.asarray([0.000, 0.000, 0.038, 0.097, 0.091, 0.118, 0.070, 0.124, 0.097, 0.059, 0.059, 0.048, 0.054, 0.043,
                     0.032, 0.005, 0.027, 0.016, 0.005, 0.000, 0.005, 0.000, 0.005, 0.000, 0.000, 0.000, 0.000, 0.000,
                     0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,
                     0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,
                     0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,
                     0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.005, 0.000, 0.000], dtype=np.float64)

step = 0.1

nrm = np.sum(frequencia_relativa*step) # normalization integral
print(nrm)

frequencia_relativa /= nrm

print(np.sum(frequencia_relativa*step))

and set Y limit to 1.4, I'll get graph below并将 Y 限制设置为 1.4，我将在下面得到图表

Then, in the fitting part I'll do similar transformation, and shift X axis by half the step size, so that histogram value is in the middle of the bin, fit is starting to work, code, Python 3.9.1 Win 10 x64.然后，在拟合部分我会做类似的变换，将X轴移动步长的一半，使直方图值在bin的中间，拟合开始工作，代码，Python 3.9.1 Win 10 x64 . I removed everything not related to fit, just so it works for you, and plotted fitted function vs input data.我删除了与适合无关的所有内容，以便它适合您，并绘制适合的 function 与输入数据。

I also didn't quite understand the part of normalizing the integral (the sum of all the bars in the histogram gives 1 because it's the relative frequency) and I didn't understand the choice of step and shift.我也不太了解归一化积分的部分（直方图中所有条形的总和为 1，因为它是相对频率）并且我不了解步进和移位的选择。 Could you explain this part better, please?你能更好地解释这部分吗？

Your function to fit is two-parameters PDF of log-norm distribution.您要拟合的 function 是对数范数分布的两个参数 PDF。 It conditioned such that the ₀ ∫ ^∞ PDF(x,μ,σ)=1.它的条件是₀ ∫ ^∞ PDF(x,μ,σ)=1。 You have to condition your input data in the same way.您必须以相同的方式调整输入数据。 For histogram, integral is the sum of bins multiplied by steps.对于直方图，积分是 bin 的总和乘以步长。 Step is obviously 0.1, so I compute this sum, check it is not 1, and then divide frequencies by normalization value, such that integral is equal to 1. You could try to fit not 2-parametric, but 3-parametric curve, third parameter being normalization value, but more parameters in non-linear fit means more problems you could get. Step 显然是 0.1，所以我计算这个总和，检查它不是 1，然后将频率除以归一化值，使得积分等于 1。您可以尝试拟合的不是 2 参数，而是 3 参数曲线，第三参数是归一化值，但非线性拟合中的更多参数意味着您可能会遇到更多问题。

Wrt shift, one has to make an assumption, what value of the bin describes. Wrt shift，必须做出一个假设，即 bin 描述的值。 I assumed that value of the bin should be the value in the middle of the bin.我假设 bin 的值应该是 bin 中间的值。 Again, this is an assumption, I don't know how your data were made, maybe histogram value is really value at the left side of the bin.同样，这是一个假设，我不知道您的数据是如何制作的，也许直方图值实际上是 bin 左侧的值。 It that is so, you just remove the shift and rerun the code.就是这样，您只需删除班次并重新运行代码。

import numpy as np
import matplotlib.pyplot as plt

from scipy.optimize import curve_fit

x=np.asarray([0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 0.90, 1.00, 1.10, 1.20, 1.30, 1.40,
   1.50, 1.60, 1.70, 1.80, 1.90, 2.00, 2.10, 2.20, 2.30, 2.40, 2.50, 2.60, 2.70, 2.80,
   2.90, 3.00, 3.10, 3.20, 3.30, 3.40, 3.50, 3.60, 3.70, 3.80, 3.90, 4.00, 4.10, 4.20,
   4.30, 4.40, 4.50, 4.60, 4.70, 4.80, 4.90, 5.00, 5.10, 5.20, 5.30, 5.40, 5.50, 5.60,
   5.70, 5.80, 5.90, 6.00, 6.10, 6.20, 6.30, 6.40, 6.50, 6.60, 6.70, 6.80, 6.90, 7.00,
   7.10, 7.20, 7.30, 7.40, 7.50, 7.60, 7.70, 7.80, 7.90, 8.00], dtype=np.float64)

frequencia_relativa=np.asarray([0.000, 0.000, 0.038, 0.097, 0.091, 0.118, 0.070, 0.124, 0.097, 0.059, 0.059, 0.048, 0.054, 0.043,
                     0.032, 0.005, 0.027, 0.016, 0.005, 0.000, 0.005, 0.000, 0.005, 0.000, 0.000, 0.000, 0.000, 0.000,
                     0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,
                     0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,
                     0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,
                     0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.005, 0.000, 0.000], dtype=np.float64)

def f(y, mu, sigma):
    return 1/(np.sqrt(2.0*np.pi)*sigma*y)*np.exp(-(np.log(y)-mu)**2/(2.0*sigma*sigma))

step = 0.1

nrm = np.sum(frequencia_relativa*step)

frequencia_relativa /= nrm

xx = x - 0.5*step

params, extras = curve_fit(f, xx, frequencia_relativa)

mu = params[0]
sigma = params[1]

print((mu,sigma))

# calculate mean value, https://en.wikipedia.org/wiki/Log-normal_distribution
print(np.exp(mu + sigma*sigma/2.0))

# calculate stddev as sq.root of variance
z=np.sqrt((np.exp(sigma*sigma)-1)*np.exp(mu+mu+sigma*sigma))
print(z)

xxx=np.linspace(0.001,8,1000)
plt.plot(xxx, f(xxx, mu, sigma), "b-")
plt.plot(xx, frequencia_relativa, "ro")

plt.show()

and I'm getting lognorm curve which looks ok wrt input data.我得到的 lognorm 曲线看起来不错 wrt 输入数据。 Both curves have majority of data in the [0...2] interval with peak value at ~(0.8, 1.2).两条曲线的大部分数据都在 [0...2] 区间，峰值在 ~(0.8, 1.2)。 Here is simplest graph which overlaps fitted curve (blue) with centers of the frequency histogram bins (red dots).这是最简单的图表，它将拟合曲线（蓝色）与频率直方图箱（红点）的中心重叠。 Now you could try to put it into your fancy graphs, good luck.现在您可以尝试将其放入您的精美图表中，祝您好运。

And just for reference, code which fits 3-parameters log-norm curve to apply to denormalized data.仅供参考，适合 3 参数对数范数曲线的代码适用于非规范化数据。 Seems to work as well似乎也有效

import numpy as np
import matplotlib.pyplot as plt

from scipy.optimize import curve_fit

x=np.asarray([0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 0.90, 1.00, 1.10, 1.20, 1.30, 1.40,
   1.50, 1.60, 1.70, 1.80, 1.90, 2.00, 2.10, 2.20, 2.30, 2.40, 2.50, 2.60, 2.70, 2.80,
   2.90, 3.00, 3.10, 3.20, 3.30, 3.40, 3.50, 3.60, 3.70, 3.80, 3.90, 4.00, 4.10, 4.20,
   4.30, 4.40, 4.50, 4.60, 4.70, 4.80, 4.90, 5.00, 5.10, 5.20, 5.30, 5.40, 5.50, 5.60,
   5.70, 5.80, 5.90, 6.00, 6.10, 6.20, 6.30, 6.40, 6.50, 6.60, 6.70, 6.80, 6.90, 7.00,
   7.10, 7.20, 7.30, 7.40, 7.50, 7.60, 7.70, 7.80, 7.90, 8.00], dtype=np.float64)

frequencia_relativa=np.asarray([0.000, 0.000, 0.038, 0.097, 0.091, 0.118, 0.070, 0.124, 0.097, 0.059, 0.059, 0.048, 0.054, 0.043,
                     0.032, 0.005, 0.027, 0.016, 0.005, 0.000, 0.005, 0.000, 0.005, 0.000, 0.000, 0.000, 0.000, 0.000,
                     0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,
                     0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,
                     0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,
                     0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.005, 0.000, 0.000], dtype=np.float64)

def f(y, mu, sigma, N):
    return N/(np.sqrt(2.0*np.pi)*sigma*y)*np.exp(-(np.log(y)-mu)**2/(2.0*sigma*sigma))

step = 0.1

xx = x - 0.5*step

params, extras = curve_fit(f, xx, frequencia_relativa)

print(params)

plt.plot(xx, f(xx, params[0], params[1], params[2]), "b-")
plt.plot(xx, frequencia_relativa, "ro")

plt.show()

拟合对数正态分布（Python 图）

问题描述

1 个解决方案

解决方案1
1 已采纳 2021-02-20 22:17:48

拟合对数正态分布（Python 图）

问题描述

1 个解决方案

解决方案1 1 已采纳 2021-02-20 22:17:48

解决方案1
1 已采纳 2021-02-20 22:17:48