[英]Plot Additional Pandas DF
我的代码如下所示,我试图在 7 月至 12 月显示的 plot 上为 df“ltyc”中的数据画一条黑线。我的错误在 plt.legend 行之前的最后.
import warnings
import itertools
import numpy as np
import matplotlib.pyplot as plt
warnings.filterwarnings("ignore")
plt.style.use('fivethirtyeight')
import pandas as pd
import statsmodels.api as sm
import matplotlib
matplotlib.rcParams['axes.labelsize'] = 14
matplotlib.rcParams['xtick.labelsize'] = 12
matplotlib.rcParams['ytick.labelsize'] = 12
matplotlib.rcParams['text.color'] = 'k'
from sys import exit
df = pd.read_excel("MOSDailyWindSpeed.xlsx")
wspdBH1 = df.groupby('Date')[' Simulated WS BH1PI'].sum().reset_index()
wspdHOO = df.groupby('Date')[' Simulated WS HOO801'].sum().reset_index()
wspdBH1 = wspdBH1.set_index('Date')
wspdHOO = wspdHOO.set_index('Date')
wspdBH1.index
wspdHOO.index
y = wspdHOO[' Simulated WS HOO801'].resample('MS').mean()#monthly mean -->
change site here 'MS' is month start for 'Date' col
y['2017':]#look at daily data starting 2017 -view data
y.plot(figsize=(15, 6))
plt.show()
from pylab import rcParams
rcParams['figure.figsize'] = 18, 8
decomposition = sm.tsa.seasonal_decompose(y, model='additive')
fig = decomposition.plot()
plt.show()
p = d = q = range(0, 2)
pdq = list(itertools.product(p, d, q))
seasonal_pdq = [(x[0], x[1], x[2], 12) for x in list(itertools.product(p,
d, q))]
print('Examples of parameter combinations for Seasonal ARIMA...')
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[1]))
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[2]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[3]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[4]))
for param in pdq:
for param_seasonal in seasonal_pdq:
try:
mod = sm.tsa.statespace.SARIMAX(y,
order=param,
seasonal_order=param_seasonal,
enforce_stationarity=False,
enforce_invertibility=False)
results = mod.fit()
print('ARIMA{}x{}12 - AIC:{}'.format(param, param_seasonal,
results.aic))
except:
continue
mod = sm.tsa.statespace.SARIMAX(y,
order=(1, 1, 1),
seasonal_order=(1, 1, 0, 12),
enforce_stationarity=False,
enforce_invertibility=False)
results = mod.fit()
print(results.summary().tables[1])
results.plot_diagnostics(figsize=(16, 8))
plt.show()
# VALIDATE THE FORECAST - PLOT FORECAST VS ACTUAL
pred = results.get_prediction(start=pd.to_datetime('2019-01-01'),
dynamic=False)
pred_ci = pred.conf_int()
ax = y['2019':].plot(label='observed')
pred.predicted_mean.plot(ax=ax, label='One-step ahead Forecast', alpha=.7,
figsize=(14, 7))
ax.fill_between(pred_ci.index,
pred_ci.iloc[:, 0],
pred_ci.iloc[:, 1], color='k', alpha=.2)
ax.set_xlabel('Date')
ax.set_ylabel('Furniture Sales')
plt.legend()
plt.show()
y_forecasted = pred.predicted_mean
y_truth = y['2019-01-01':]
# Compute the mean square error
mse = ((y_forecasted - y_truth) ** 2).mean()
print('The Mean Squared Error of our forecasts is {}'.format(round(mse,
2)))
print('The Root Mean Squared Error of our forecasts is
{}'.format(round(np.sqrt(mse), 2)))
#PRODUCE AND VISUALIZE FORECAST
pred_uc = results.get_forecast(steps=6)
pred_ci = pred_uc.conf_int()
ax = y['2019':].plot(label='observed', figsize=(14, 7))
pred_uc.predicted_mean.plot(ax=ax, label='Forecast')
ax.fill_between(pred_ci.index,
pred_ci.iloc[:, 0],
pred_ci.iloc[:, 1], color='k', alpha=.25)
ax.set_xlabel('Date')
ax.set_ylabel('MOS Wind Speed')
#add the LT monthly average to plot
from datetime import date
cm = date.today().month
lty = y.groupby(y.index.month).mean()
lty = lty.to_frame()
ltyc = lty.iloc[cm-1:12].reset_index() # extract curr month to end of LT
mean monthly wind speed
ltyc.plot(label='LT Mean',ax=ax, color='k')
plt.legend()
plt.show()
ltyc df 看起来像这样,我正在尝试使用axis = ax 在下图中的plot。 我想我需要更改下面的“日期”列,因为当前轴不会将 7、8、9、10、11、12 解释为月份,但我不知道该怎么做。
Date Simulated WS HOO801
0 7 5.491916
1 8 5.596823
2 9 5.793934
3 10 7.501096
4 11 8.152358
5 12 8.426322
最后,我的错误如下所示:
File
"C:\Users\U321103\AppData\Local\Continuum\anaconda3\envs\Stats\lib\site-
packages\pandas\plotting\_matplotlib\timeseries.py", line 309, in
format_dateaxis
raise TypeError("index type not supported")
TypeError: index type not supported
我用它来将 integer 日期转换为 DF 中的 yyyy-mm-dd 格式。 ltyc['Date'] = pd.to_datetime(ltyc["Date"], format='%m').apply(lambda dt: dt.replace(year=2020))
然后,我将 DF (ltyc) 转换为这样的系列: ltycs = pd.Series(ltyc['LT Mean'].values, index=ltyc['Date'])#convert to Series 因为其他地块在系列格式
ltycs.plot(label='LT Mean',ax=ax,color='k')
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.