numpy ndarray to pandas dataframe

Question

With fake data the shape of my input, in the following minimum working example, I illustrate how I create my autoencoder network, and the problem I'm facing with numpy ndarray making the prediction.

import numpy as np
import pandas as pd
import random
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import ModelCheckpoint
from sklearn.metrics import mean_squared_error
class SingleEncoder:

    def __init__(self, train, test):
        self.x_train = train
        self.x_test = test
        self.first_dim = 1
        self.second_dim = 100
        self.channels = 4
        self.input_dim = (self.first_dim, self.second_dim, 
               self.channels) #(1, 100, 4)

    def buildModel(self):
        input_layer = self.input_dim
        autoencoder = Sequential()
        activ='relu'

        # encoder
        autoencoder.add(Dense(200,  activation='relu', input_shape=input_layer)) 
        autoencoder.add(Dense(100,  activation='relu')) 
        autoencoder.add(Dense(80,  activation='linear'))   

        #decoder
        autoencoder.add(Dense(80, activation='linear'))  
        autoencoder.add(Dense(100, activation='relu')) 
        autoencoder.add(Dense(200, activation='relu'))
        autoencoder.add(Dense(self.channels, activation='relu'))

        autoencoder.compile(optimizer='adam', loss='mae',  
metrics=['mean_squared_error'])
        autoencoder.summary()

        filepath = "weights.hdf5"
        checkpoint = ModelCheckpoint(filepath, monitor='loss',
 verbose=1, save_best_only=True, mode='max')
        callbacks_list = [checkpoint]

        autoencoder.fit(self.x_train, self.x_train, epochs=10, batch_size=32, 
          shuffle=True,callbacks=callbacks_list)

        return autoencoder

#

Network loss:

def LosScore2(x_pred, x_test):
    mse = []
    for i in range(len(x_test)):
        mse.append(mean_squared_error(pd.DataFrame(x_pred[i]), pd.DataFrame(x_test[i])))
    return mse

Generating fake data:

#Generate sample data, the shape of expected input
# X: ND-Array containing fixed-length segments of shape (1,100,4)
X = np.random.randn(2000, 1, 100, 4)
a,b,c = np.repeat(0, 700), np.repeat(1, 700), np.repeat(2, 600)
y = np.hstack((a,b,c))

from sklearn.model_selection import  train_test_split

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=7)
LABELS= list(set(np.ndarray.flatten(y_train)))

Train the network:

models = []
for i in range(len(LABELS)):
  print(LABELS[i])
  sub_train = x_train[y_train == i]
  sub_test = x_test[y_test == i]
  
  autoencoder = SingleEncoder(sub_train, sub_test)
  autoencoder = autoencoder.buildModel()
  models.append(autoencoder)

This MWE works perfectly, but when I try to evaluate on the test set I encountered an error due to input shape as below:

print("Evaluating on test set -> ")
x_pred = []
# for each model
 # predition
for e in range(len(models)):
    x_pred.append(models[e].predict(x_test))

scored0 = (LosScore2(x_pred[0], x_test))
scored1 = (LosScore2(x_pred[1], x_test))
scored2 = (LosScore2(x_pred[2], x_test))

Here goes the error:

Evaluating on test set -> 

---------------------------------------------------------------------------

ValueError                                Traceback (most recent call last)

<ipython-input-11-989c507cfd63> in <module>()
      9 #scored0 = (LosScore2(x_pred[0][np.newaxis], x_test))
     10 
---> 11 scored0 = (LosScore2(x_pred[0], x_test))
     12 scored1 = (LosScore2(x_pred[1], x_test))
     13 scored2 = (LosScore2(x_pred[2], x_test))

3 frames

<ipython-input-9-356737ea1f97> in LosScore2(x_pred, x_test)
      2     mse = []
      3     for i in range(len(x_test)):
----> 4         mse.append(mean_squared_error(pd.DataFrame(x_pred[i]), pd.DataFrame(x_test[i])))
      5     return mse

/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py in __init__(self, data, index, columns, dtype, copy)
    462                 mgr = init_dict({data.name: data}, index, columns, dtype=dtype)
    463             else:
--> 464                 mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy)
    465 
    466         # For data is list-like, or Iterable (will consume into list)

/usr/local/lib/python3.6/dist-packages/pandas/core/internals/construction.py in init_ndarray(values, index, columns, dtype, copy)
    167     # by definition an array here
    168     # the dtypes will be coerced to a single dtype
--> 169     values = prep_ndarray(values, copy=copy)
    170 
    171     if dtype is not None:

/usr/local/lib/python3.6/dist-packages/pandas/core/internals/construction.py in prep_ndarray(values, copy)
    293         values = values.reshape((values.shape[0], 1))
    294     elif values.ndim != 2:
--> 295         raise ValueError("Must pass 2-d input")
    296 
    297     return values

ValueError: Must pass 2-d input

I understand this has to do with test data shape, but I can't figure out how to fix it.

Answer 1

you are using sklearn mse with 4D input casting them to a pandas dataframe, this will generate a sequence of errors. I suggest you to remain in simply numpy format to compute the mse for every single sample:

def LosScore2(x_pred, x_test):
    mse = []
    for i in range(len(x_test)):
        mse.append(np.mean(np.square(x_test[i]-x_pred[i])))
    return mse

here the running notebook: https://colab.research.google.com/drive/1OWdTYuIVeIWyMp477DoNNNKJ3ZXSoDji?usp=sharing

Answer 2

Have you try:

scored0 = (LosScore2(x_pred, x_test))

Instead of:

scored0 = (LosScore2(x_pred[0], x_test))

As you can see, in the following lines:

usr/local/lib/python3.6/dist-packages/pandas/core/internals/construction.py in prep_ndarray(values, copy)
    293         values = values.reshape((values.shape[0], 1))
    294     elif values.ndim != 2:
--> 295         raise ValueError("Must pass 2-d input")
    296 
    297     return values

A reshaping is already being done, and it is choosing the first part of the values.

I think it might be that.

numpy ndarray to pandas dataframe

Question

2 answers

solution1
2 ACCPTED 2020-08-05 08:11:30

solution2
0 2020-08-05 08:00:09

numpy ndarray to pandas dataframe

Question

2 answers

solution1 2 ACCPTED 2020-08-05 08:11:30

solution2 0 2020-08-05 08:00:09

solution1
2 ACCPTED 2020-08-05 08:11:30

solution2
0 2020-08-05 08:00:09