简体   繁体   中英

Issue getting data for model training and predicting cryptocurrency prices

I am trying to use Facebook's Prophet Model with cryptocurrency price data from Yahoo Finance to predict future prices. I have imported all libraries, defined the function to get historical data from Yahoo Finance, however after getting the data and training the model, when I try to run the code to visualize data I get a ValueError: ValueError: All arguments should have the same length. The length of argument y is 4, whereas the length of previous arguments ['ds'] is 398. I will put the entire code below. Please help me.

from tqdm import tqdm
import pandas as pd
from prophet import Prophet
import yfinance as yf
from datetime import datetime, timedelta
import plotly.express as px
import numpy as np

def getData(ticker, window, ma_period):
    """
    Grabs price data from a given ticker. Retrieves prices based on the given time window; from now
    to N days ago.  Sets the moving average period for prediction. Returns a preprocessed DF
    formatted for FB Prophet.
    """
    # Time periods
    now = datetime.now()

    # How far back to retrieve tweets
    ago = now - timedelta(days=window)

    # Designating the Ticker
    crypto = yf.Ticker(ticker)

    # Getting price history
    df = crypto.history(start=ago.strftime("%Y-%m-%d"), end=now.strftime("%Y-%m-%d"), interval="1d")
    
    # Handling missing data from yahoo finance
    df = df.reindex(
        [df.index.min()+pd.offsets.Day(i) for i in range(df.shape[0])],
        fill_value=None
    ).fillna(method='ffill')
    
    # Getting the N Day Moving Average and rounding the values
    df['MA'] = df[['Open']].rolling(window=ma_period).mean().apply(lambda x: round(x, 2))

    # Dropping the NaNs
    df.dropna(inplace=True)

    # Formatted for FB Prophet
    df = df.reset_index().rename(columns={"Date": "ds", "MA": "y"})
    
    return df



def fbpTrainPredict(df, forecast_period):
    """
    Uses FB Prophet and fits to a appropriately formatted DF. Makes a prediction N days into 
    the future based on given forecast period. Returns predicted values as a DF.
    """
    # Setting up prophet
    m = Prophet(
        daily_seasonality=True, 
        yearly_seasonality=True, 
        weekly_seasonality=True
    )
    
    # Fitting to the prices
    m.fit(df[['ds', 'y']])
    
    # Future DF
    future = m.make_future_dataframe(periods=forecast_period)
        
    # Predicting values
    forecast = m.predict(future)

    # Returning a set of predicted values
    return forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]

def visFBP(df, forecast):
    """
    Given two dataframes: before training df and a forecast df, returns
    a visual chart of the predicted values and actual values.
    """
    # Visual DF
    vis_df = df[['ds','Open']].append(forecast).rename(
        columns={'yhat': 'Prediction',
                 'yhat_upper': "Predicted High",
                 'yhat_lower': "Predicted Low"}
    )

    # Visualizing results
    fig = px.line(
        vis_df,
        x='ds',
        y=['Open', 'Prediction', 'Predicted High', 'Predicted Low'],
        title='Crypto Forecast',
        labels={'value':'Price',
                'ds': 'Date'}
    )

    # Adding a slider
    fig.update_xaxes(
        rangeselector=dict(
            buttons=list([
                dict(count=1, label="1m", step="month", stepmode="backward"),
                dict(count=3, label="3m", step="month", stepmode="backward"),
                dict(count=6, label="6m", step="month", stepmode="backward"),
                dict(count=1, label="YTD", step="year", stepmode="todate"),
                dict(count=1, label="1y", step="year", stepmode="backward"),
                dict(step="all")
            ])
        )
    )

    return fig.show()

 
# Getting and Formatting Data
df = getData("SHIB-USD", window=730, ma_period=5)
# Training and Predicting Data
forecast = fbpTrainPredict(df, forecast_period=90)
# Visualizing Data
visFBP(df, forecast)

I used plotly's graph objects to add each of the graph lines individually. Make sure you include graph_objects: import plotly.graph_objects as go

And then instead of:

# Visualizing results
fig = px.line(
    vis_df,
    x='ds',
    y=['Open', 'Prediction', 'Predicted High', 'Predicted Low'],
    title='Crypto Forecast',
    labels={'value':'Price',
            'ds': 'Date'}
)

You create the empty figure and add each data column as a 'scatter':

# Visualizing results
fig = go.Figure()
fig.add_scatter(x=vis_df['ds'], y=vis_df['Open'],mode='lines', name="Open")
fig.add_scatter(x=vis_df['ds'], y=vis_df['Prediction'],mode='lines', name="Prediction")
fig.add_scatter(x=vis_df['ds'], y=vis_df['Predicted Low'],mode='lines', name="Predicted Low")
fig.add_scatter(x=vis_df['ds'], y=vis_df['Predicted High'],mode='lines', name="Predicted High")
fig.update_layout(
    title="Crypto Forecast",
    xaxis_title="Date",
    yaxis_title="Price",
)

There still seems to be a problem with your prediction, but I'm going to have to leave that to someone else.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM