简体   繁体   中英

Plot Additional Pandas DF

i have code that looks like this below and I"m trying to draw a black line on the plot shown for Jul - Dec for the data in the df "ltyc". My error is at the very end just before the plt.legend line.

import warnings
import itertools
import numpy as np
import matplotlib.pyplot as plt
warnings.filterwarnings("ignore")
plt.style.use('fivethirtyeight')
import pandas as pd
import statsmodels.api as sm
import matplotlib

matplotlib.rcParams['axes.labelsize'] = 14
matplotlib.rcParams['xtick.labelsize'] = 12
matplotlib.rcParams['ytick.labelsize'] = 12
matplotlib.rcParams['text.color'] = 'k'
from sys import exit

df = pd.read_excel("MOSDailyWindSpeed.xlsx")

wspdBH1 = df.groupby('Date')[' Simulated WS BH1PI'].sum().reset_index()
wspdHOO = df.groupby('Date')[' Simulated WS HOO801'].sum().reset_index()
wspdBH1 = wspdBH1.set_index('Date')
wspdHOO = wspdHOO.set_index('Date')
wspdBH1.index
wspdHOO.index
y = wspdHOO[' Simulated WS HOO801'].resample('MS').mean()#monthly mean --> 
change site here 'MS' is month start for 'Date' col
y['2017':]#look at daily data starting 2017 -view data 

y.plot(figsize=(15, 6))
plt.show()

from pylab import rcParams
rcParams['figure.figsize'] = 18, 8

decomposition = sm.tsa.seasonal_decompose(y, model='additive')
fig = decomposition.plot()
plt.show()

p = d = q = range(0, 2)
pdq = list(itertools.product(p, d, q))
seasonal_pdq = [(x[0], x[1], x[2], 12) for x in list(itertools.product(p, 
d, q))]

print('Examples of parameter combinations for Seasonal ARIMA...')
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[1]))
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[2]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[3]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[4]))

for param in pdq:
  for param_seasonal in seasonal_pdq:
    try:
        mod = sm.tsa.statespace.SARIMAX(y,
                                        order=param,
                                        seasonal_order=param_seasonal,
                                        enforce_stationarity=False,
                                        enforce_invertibility=False)

        results = mod.fit()

        print('ARIMA{}x{}12 - AIC:{}'.format(param, param_seasonal, 
results.aic))
    except:
        continue

mod = sm.tsa.statespace.SARIMAX(y,
                            order=(1, 1, 1),
                            seasonal_order=(1, 1, 0, 12),
                            enforce_stationarity=False,
                            enforce_invertibility=False)

results = mod.fit()

print(results.summary().tables[1])        

results.plot_diagnostics(figsize=(16, 8))
plt.show()

# VALIDATE THE FORECAST - PLOT FORECAST VS ACTUAL 
pred = results.get_prediction(start=pd.to_datetime('2019-01-01'), 
dynamic=False)
pred_ci = pred.conf_int()
ax = y['2019':].plot(label='observed')
pred.predicted_mean.plot(ax=ax, label='One-step ahead Forecast', alpha=.7, 
figsize=(14, 7))

ax.fill_between(pred_ci.index,
            pred_ci.iloc[:, 0],
            pred_ci.iloc[:, 1], color='k', alpha=.2)

ax.set_xlabel('Date')
ax.set_ylabel('Furniture Sales')
plt.legend()

plt.show()

y_forecasted = pred.predicted_mean
y_truth = y['2019-01-01':]

# Compute the mean square error
mse = ((y_forecasted - y_truth) ** 2).mean()
print('The Mean Squared Error of our forecasts is {}'.format(round(mse, 
2)))
print('The Root Mean Squared Error of our forecasts is 
{}'.format(round(np.sqrt(mse), 2)))

#PRODUCE AND VISUALIZE FORECAST
pred_uc = results.get_forecast(steps=6)
pred_ci = pred_uc.conf_int()

ax = y['2019':].plot(label='observed', figsize=(14, 7))
pred_uc.predicted_mean.plot(ax=ax, label='Forecast')
ax.fill_between(pred_ci.index,
            pred_ci.iloc[:, 0],
            pred_ci.iloc[:, 1], color='k', alpha=.25)
ax.set_xlabel('Date')
ax.set_ylabel('MOS Wind Speed')
#add the LT monthly average to plot
from datetime import date
cm = date.today().month
lty = y.groupby(y.index.month).mean()
lty = lty.to_frame()
ltyc = lty.iloc[cm-1:12].reset_index() # extract curr month to end of LT 
mean monthly wind speed
ltyc.plot(label='LT Mean',ax=ax, color='k')

plt.legend()
plt.show()

The ltyc df looks like this that I am trying to plot in the image below using axis = ax. I think I need to change the "Date" col below since the current axis doesn't interpret 7,8,9,10,11,12 as months but I'm not sure how to do this.

   Date   Simulated WS HOO801
0     7              5.491916
1     8              5.596823
2     9              5.793934
3    10              7.501096
4    11              8.152358
5    12              8.426322 

Finally, my error looks like this:

File 
"C:\Users\U321103\AppData\Local\Continuum\anaconda3\envs\Stats\lib\site- 
packages\pandas\plotting\_matplotlib\timeseries.py", line 309, in 
format_dateaxis
raise TypeError("index type not supported")

TypeError: index type not supported

在此处输入图像描述

I used this to convert the integer dates into yyyy-mm-dd format in the DF. ltyc['Date'] = pd.to_datetime(ltyc["Date"], format='%m').apply(lambda dt: dt.replace(year=2020))

Then, i converted the DF (ltyc) into a series like this: ltycs = pd.Series(ltyc['LT Mean'].values, index=ltyc['Date'])#convert to Series since the other plots are in series format

ltycs.plot(label='LT Mean',ax=ax,color='k')

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM