[英]How can I implement the input of multiple regression in LSTM using keras?
這是我的代碼
def create_dataset(signal_data, look_back=1):
dataX, dataY = [], []
for i in range(len(signal_data) - look_back):
dataX.append(signal_data[i:(i + look_back), 0])
dataY.append(signal_data[i + look_back, 0])
return np.array(dataX), np.array(dataY)
df = pd.read_csv('time_series.csv')
signal_data = df.Close.values.astype('float32')
signal_data = signal_data.reshape(len(df), 1)
scaler = MinMaxScaler(feature_range=(0, 1))
signal_data = scaler.fit_transform(signal_data)
train_size = int(len(signal_data) * 0.80)
test_size = len(signal_data) - train_size)
# val_size = len(signal_data) - train_size - test_size
train = signal_data[0:train_size]
# val = signal_data[train_size:train_size+val_size]
test = signal_data[train_size+val_size:len(signal_data)]
x_train, y_train = create_dataset(train, look_back)
# x_val, y_val = create_dataset(val, look_back)
x_test, y_test = create_dataset(test, look_back)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
# x_val = np.reshape(x_val, (x_val.shape[0], x_val.shape[1], 1))
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
現在我想添加df.Open
和df.High
和df.Low
和df.Volume
如何實現此代碼?
我應該添加信號數據嗎? 我想知道如何添加數據,以便可以訓練信號數據中的多個功能。
我不知道在哪里以及如何實現它。 我需要你的幫助。
您的寶貴意見和想法將不勝感激。
我對您的代碼進行了一些修改。 這應該工作。 綜上所述:
我的一般建議:
MinMaxScaler
,這很危險,因為單個異常值會干擾您的所有分布。 而是使用StandardScaler
。 此處提供更多信息: http : //scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html train_x
, test_x
和它們的y
對應副本之后,我將按比例縮放數據。 原因是因為您正在計算統計信息,以便使用訓練和測試集(即將來的信息)來縮放數據。 這與在實際情況下嘗試運行代碼時所發現的方式完全不同。 也就是說,您將不得不根據過去的統計數據來擴展新數據。 最好建立一個盡可能接近實際情況的測試集。 metrics
您選擇accuracy
,這是一個分類指標。 我將根據我的問題類型(回歸)使用一種方法:例如“均值絕對錯誤”。 希望我能幫助您:D
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, Conv2D, Reshape, TimeDistributed, Flatten, Conv1D,ConvLSTM2D, MaxPooling1D
from keras.layers.core import Dense, Activation, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import tensorflow as tf
import matplotlib.pyplot as plt
config = tf.ConfigProto()
config.gpu_options.allow_growth=True
sess = tf.Session(config=config)
def create_dataset(signal_data, look_back=1):
dataX, dataY = [], []
for i in range(len(signal_data) - look_back):
dataX.append(signal_data[i:(i + look_back), :])
dataY.append(signal_data[i + look_back, -1])
return np.array(dataX), np.array(dataY)
look_back = 20
df = pd.read_csv('kospi.csv')
signal_data = df[["Open", "Low", "High", "Volume", "Close"]].values.astype('float32')
scaler = MinMaxScaler(feature_range=(0, 1))
signal_data = scaler.fit_transform(signal_data)
train_size = int(len(signal_data) * 0.80)
test_size = len(signal_data) - train_size - int(len(signal_data) * 0.05)
val_size = len(signal_data) - train_size - test_size
train = signal_data[0:train_size]
val = signal_data[train_size:train_size+val_size]
test = signal_data[train_size+val_size:len(signal_data)]
x_train, y_train = create_dataset(train, look_back)
x_val, y_val = create_dataset(val, look_back)
x_test, y_test = create_dataset(test, look_back)
model = Sequential()
model.add(LSTM(128, input_shape=(None, 5),return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(128, input_shape=(None, 5)))
model.add(Dropout(0.3))
model.add(Dense(128))
model.add(Dropout(0.3))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])
model.summary()
hist = model.fit(x_train, y_train, epochs=20, batch_size=32, verbose=2, validation_data=(x_val, y_val))
trainScore = model.evaluate(x_train, y_train, verbose=0)
model.reset_states()
print('Train Score: ', trainScore)
valScore = model.evaluate(x_val, y_val, verbose=0)
model.reset_states()
print('Validataion Score: ', valScore)
testScore = model.evaluate(x_test, y_test, verbose=0)
model.reset_states()
print('Test Score: ', testScore)
p = model.predict(x_test)
print(mean_squared_error(y_test, p))
import matplotlib.pyplot as pplt
pplt.plot(y_test)
pplt.plot(p)
pplt.legend(['testY', 'p'], loc='upper right')
pplt.show()
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.