[英]How can I implement the input of multiple regression in LSTM using keras?
这是我的代码
def create_dataset(signal_data, look_back=1):
dataX, dataY = [], []
for i in range(len(signal_data) - look_back):
dataX.append(signal_data[i:(i + look_back), 0])
dataY.append(signal_data[i + look_back, 0])
return np.array(dataX), np.array(dataY)
df = pd.read_csv('time_series.csv')
signal_data = df.Close.values.astype('float32')
signal_data = signal_data.reshape(len(df), 1)
scaler = MinMaxScaler(feature_range=(0, 1))
signal_data = scaler.fit_transform(signal_data)
train_size = int(len(signal_data) * 0.80)
test_size = len(signal_data) - train_size)
# val_size = len(signal_data) - train_size - test_size
train = signal_data[0:train_size]
# val = signal_data[train_size:train_size+val_size]
test = signal_data[train_size+val_size:len(signal_data)]
x_train, y_train = create_dataset(train, look_back)
# x_val, y_val = create_dataset(val, look_back)
x_test, y_test = create_dataset(test, look_back)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
# x_val = np.reshape(x_val, (x_val.shape[0], x_val.shape[1], 1))
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
现在我想添加df.Open
和df.High
和df.Low
和df.Volume
如何实现此代码?
我应该添加信号数据吗? 我想知道如何添加数据,以便可以训练信号数据中的多个功能。
我不知道在哪里以及如何实现它。 我需要你的帮助。
您的宝贵意见和想法将不胜感激。
我对您的代码进行了一些修改。 这应该工作。 综上所述:
我的一般建议:
MinMaxScaler
,这很危险,因为单个异常值会干扰您的所有分布。 而是使用StandardScaler
。 此处提供更多信息: http : //scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html train_x
, test_x
和它们的y
对应副本之后,我将按比例缩放数据。 原因是因为您正在计算统计信息,以便使用训练和测试集(即将来的信息)来缩放数据。 这与在实际情况下尝试运行代码时所发现的方式完全不同。 也就是说,您将不得不根据过去的统计数据来扩展新数据。 最好建立一个尽可能接近实际情况的测试集。 metrics
您选择accuracy
,这是一个分类指标。 我将根据我的问题类型(回归)使用一种方法:例如“均值绝对错误”。 希望我能帮助您:D
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, Conv2D, Reshape, TimeDistributed, Flatten, Conv1D,ConvLSTM2D, MaxPooling1D
from keras.layers.core import Dense, Activation, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import tensorflow as tf
import matplotlib.pyplot as plt
config = tf.ConfigProto()
config.gpu_options.allow_growth=True
sess = tf.Session(config=config)
def create_dataset(signal_data, look_back=1):
dataX, dataY = [], []
for i in range(len(signal_data) - look_back):
dataX.append(signal_data[i:(i + look_back), :])
dataY.append(signal_data[i + look_back, -1])
return np.array(dataX), np.array(dataY)
look_back = 20
df = pd.read_csv('kospi.csv')
signal_data = df[["Open", "Low", "High", "Volume", "Close"]].values.astype('float32')
scaler = MinMaxScaler(feature_range=(0, 1))
signal_data = scaler.fit_transform(signal_data)
train_size = int(len(signal_data) * 0.80)
test_size = len(signal_data) - train_size - int(len(signal_data) * 0.05)
val_size = len(signal_data) - train_size - test_size
train = signal_data[0:train_size]
val = signal_data[train_size:train_size+val_size]
test = signal_data[train_size+val_size:len(signal_data)]
x_train, y_train = create_dataset(train, look_back)
x_val, y_val = create_dataset(val, look_back)
x_test, y_test = create_dataset(test, look_back)
model = Sequential()
model.add(LSTM(128, input_shape=(None, 5),return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(128, input_shape=(None, 5)))
model.add(Dropout(0.3))
model.add(Dense(128))
model.add(Dropout(0.3))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])
model.summary()
hist = model.fit(x_train, y_train, epochs=20, batch_size=32, verbose=2, validation_data=(x_val, y_val))
trainScore = model.evaluate(x_train, y_train, verbose=0)
model.reset_states()
print('Train Score: ', trainScore)
valScore = model.evaluate(x_val, y_val, verbose=0)
model.reset_states()
print('Validataion Score: ', valScore)
testScore = model.evaluate(x_test, y_test, verbose=0)
model.reset_states()
print('Test Score: ', testScore)
p = model.predict(x_test)
print(mean_squared_error(y_test, p))
import matplotlib.pyplot as pplt
pplt.plot(y_test)
pplt.plot(p)
pplt.legend(['testY', 'p'], loc='upper right')
pplt.show()
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.