With fake data the shape of my input, in the following minimum working example, I illustrate how I create my autoencoder network, and the problem I'm facing with numpy
ndarray making the prediction.
import numpy as np
import pandas as pd
import random
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import ModelCheckpoint
from sklearn.metrics import mean_squared_error
class SingleEncoder:
def __init__(self, train, test):
self.x_train = train
self.x_test = test
self.first_dim = 1
self.second_dim = 100
self.channels = 4
self.input_dim = (self.first_dim, self.second_dim,
self.channels) #(1, 100, 4)
def buildModel(self):
input_layer = self.input_dim
autoencoder = Sequential()
activ='relu'
# encoder
autoencoder.add(Dense(200, activation='relu', input_shape=input_layer))
autoencoder.add(Dense(100, activation='relu'))
autoencoder.add(Dense(80, activation='linear'))
#decoder
autoencoder.add(Dense(80, activation='linear'))
autoencoder.add(Dense(100, activation='relu'))
autoencoder.add(Dense(200, activation='relu'))
autoencoder.add(Dense(self.channels, activation='relu'))
autoencoder.compile(optimizer='adam', loss='mae',
metrics=['mean_squared_error'])
autoencoder.summary()
filepath = "weights.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss',
verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]
autoencoder.fit(self.x_train, self.x_train, epochs=10, batch_size=32,
shuffle=True,callbacks=callbacks_list)
return autoencoder
#
Network loss:
def LosScore2(x_pred, x_test):
mse = []
for i in range(len(x_test)):
mse.append(mean_squared_error(pd.DataFrame(x_pred[i]), pd.DataFrame(x_test[i])))
return mse
Generating fake data:
#Generate sample data, the shape of expected input
# X: ND-Array containing fixed-length segments of shape (1,100,4)
X = np.random.randn(2000, 1, 100, 4)
a,b,c = np.repeat(0, 700), np.repeat(1, 700), np.repeat(2, 600)
y = np.hstack((a,b,c))
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=7)
LABELS= list(set(np.ndarray.flatten(y_train)))
Train the network:
models = []
for i in range(len(LABELS)):
print(LABELS[i])
sub_train = x_train[y_train == i]
sub_test = x_test[y_test == i]
autoencoder = SingleEncoder(sub_train, sub_test)
autoencoder = autoencoder.buildModel()
models.append(autoencoder)
This MWE
works perfectly, but when I try to evaluate on the test set I encountered an error due to input shape as below:
print("Evaluating on test set -> ")
x_pred = []
# for each model
# predition
for e in range(len(models)):
x_pred.append(models[e].predict(x_test))
scored0 = (LosScore2(x_pred[0], x_test))
scored1 = (LosScore2(x_pred[1], x_test))
scored2 = (LosScore2(x_pred[2], x_test))
Here goes the error:
Evaluating on test set ->
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-11-989c507cfd63> in <module>()
9 #scored0 = (LosScore2(x_pred[0][np.newaxis], x_test))
10
---> 11 scored0 = (LosScore2(x_pred[0], x_test))
12 scored1 = (LosScore2(x_pred[1], x_test))
13 scored2 = (LosScore2(x_pred[2], x_test))
3 frames
<ipython-input-9-356737ea1f97> in LosScore2(x_pred, x_test)
2 mse = []
3 for i in range(len(x_test)):
----> 4 mse.append(mean_squared_error(pd.DataFrame(x_pred[i]), pd.DataFrame(x_test[i])))
5 return mse
/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py in __init__(self, data, index, columns, dtype, copy)
462 mgr = init_dict({data.name: data}, index, columns, dtype=dtype)
463 else:
--> 464 mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy)
465
466 # For data is list-like, or Iterable (will consume into list)
/usr/local/lib/python3.6/dist-packages/pandas/core/internals/construction.py in init_ndarray(values, index, columns, dtype, copy)
167 # by definition an array here
168 # the dtypes will be coerced to a single dtype
--> 169 values = prep_ndarray(values, copy=copy)
170
171 if dtype is not None:
/usr/local/lib/python3.6/dist-packages/pandas/core/internals/construction.py in prep_ndarray(values, copy)
293 values = values.reshape((values.shape[0], 1))
294 elif values.ndim != 2:
--> 295 raise ValueError("Must pass 2-d input")
296
297 return values
ValueError: Must pass 2-d input
I understand this has to do with test data shape, but I can't figure out how to fix it.
you are using sklearn mse with 4D input casting them to a pandas dataframe, this will generate a sequence of errors. I suggest you to remain in simply numpy format to compute the mse for every single sample:
def LosScore2(x_pred, x_test):
mse = []
for i in range(len(x_test)):
mse.append(np.mean(np.square(x_test[i]-x_pred[i])))
return mse
here the running notebook: https://colab.research.google.com/drive/1OWdTYuIVeIWyMp477DoNNNKJ3ZXSoDji?usp=sharing
Have you try:
scored0 = (LosScore2(x_pred, x_test))
Instead of:
scored0 = (LosScore2(x_pred[0], x_test))
As you can see, in the following lines:
usr/local/lib/python3.6/dist-packages/pandas/core/internals/construction.py in prep_ndarray(values, copy)
293 values = values.reshape((values.shape[0], 1))
294 elif values.ndim != 2:
--> 295 raise ValueError("Must pass 2-d input")
296
297 return values
A reshaping is already being done, and it is choosing the first part of the values.
I think it might be that.
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.