简体   繁体   中英

Tensorflow - Nan loss and constant accuracy when training

as the title says, I am trying to train a neural network to predict outcomes, and I can't figure out what is wrong with my model. I keep getting the exact same accuracy level, and the loss is Nan. I'm so confused... I have looked at other similar questions and still can't seem to get it working. My code for the model and training is below:

import numpy as np
import pandas as pd
import tensorflow as tf
import urllib.request as request
import matplotlib.pyplot as plt
from FlowersCustom import get_MY_data

def get_data():
    IRIS_TRAIN_URL = "http://download.tensorflow.org/data/iris_training.csv"
    IRIS_TEST_URL = "http://download.tensorflow.org/data/iris_test.csv"

    names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'species']
    train = pd.read_csv(IRIS_TRAIN_URL, names=names, skiprows=1)
    test = pd.read_csv(IRIS_TEST_URL, names=names, skiprows=1)

    # Train and test input data
    Xtrain = train.drop("species", axis=1)
    Xtest = test.drop("species", axis=1)

    # Encode target values into binary ('one-hot' style) representation
    ytrain = pd.get_dummies(train.species)
    ytest = pd.get_dummies(test.species)

    return Xtrain, Xtest, ytrain, ytest


def create_graph(hidden_nodes):
    # Reset the graph
    tf.reset_default_graph()

    # Placeholders for input and output data
    X = tf.placeholder(shape=Xtrain.shape, dtype=tf.float64, name='X')
    y = tf.placeholder(shape=ytrain.shape, dtype=tf.float64, name='y')

    # Variables for two group of weights between the three layers of the network
    print(Xtrain.shape, ytrain.shape)
    W1 = tf.Variable(np.random.rand(Xtrain.shape[1], hidden_nodes), dtype=tf.float64)
    W2 = tf.Variable(np.random.rand(hidden_nodes, ytrain.shape[1]), dtype=tf.float64)

    # Create the neural net graph
    A1 = tf.sigmoid(tf.matmul(X, W1))
    y_est = tf.sigmoid(tf.matmul(A1, W2))

    # Define a loss function
    deltas = tf.square(y_est - y)
    loss = tf.reduce_sum(deltas)

    # Define a train operation to minimize the loss
    # optimizer = tf.train.GradientDescentOptimizer(0.005)
    optimizer = tf.train.AdamOptimizer(0.001)
    opt = optimizer.minimize(loss)

    return opt, X, y, loss, W1, W2, y_est


def train_model(hidden_nodes, num_iters, opt, X, y, loss, W1, W2, y_est):
    # Initialize variables and run session
    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)
    losses = []

    # Go through num_iters iterations
    for i in range(num_iters):
        sess.run(opt, feed_dict={X: Xtrain, y: ytrain})
        local_loss = sess.run(loss, feed_dict={X: Xtrain.values, y: ytrain.values})
        losses.append(local_loss)
        weights1 = sess.run(W1)
        weights2 = sess.run(W2)

        y_est_np = sess.run(y_est, feed_dict={X: Xtrain.values, y: ytrain.values})
        correct = [estimate.argmax(axis=0) == target.argmax(axis=0)
                   for estimate, target in zip(y_est_np, ytrain.values)]
        acc = 100 * sum(correct) / len(correct)

        if i % 10 == 0:
            print('Epoch: %d, Accuracy: %.2f, Loss: %.2f' % (i, acc, local_loss))

    print("loss (hidden nodes: %d, iterations: %d): %.2f" % (hidden_nodes, num_iters, losses[-1]))
    sess.close()
    return weights1, weights2


def test_accuracy(weights1, weights2):
    X = tf.placeholder(shape=Xtest.shape, dtype=tf.float64, name='X')
    y = tf.placeholder(shape=ytest.shape, dtype=tf.float64, name='y')
    W1 = tf.Variable(weights1)
    W2 = tf.Variable(weights2)
    A1 = tf.sigmoid(tf.matmul(X, W1))
    y_est = tf.sigmoid(tf.matmul(A1, W2))

    # Calculate the predicted outputs
    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init)
        y_est_np = sess.run(y_est, feed_dict={X: Xtest, y: ytest})

    # Calculate the prediction accuracy
    correct = [estimate.argmax(axis=0) == target.argmax(axis=0)
               for estimate, target in zip(y_est_np, ytest.values)]
    accuracy = 100 * sum(correct) / len(correct)
    print('final accuracy: %.2f%%' % accuracy)


def get_inputs_and_outputs(train, test, output_column_name):
    Xtrain = train.drop(output_column_name, axis=1)
    Xtest = test.drop(output_column_name, axis=1)
    ytrain = pd.get_dummies(getattr(train, output_column_name))
    ytest = pd.get_dummies(getattr(test, output_column_name))

    return Xtrain, Xtest, ytrain, ytest




if __name__ == '__main__':

    train, test = get_MY_data('output')

    Xtrain, Xtest, ytrain, ytest = get_inputs_and_outputs(train, test, 'output')#get_data()
    # Xtrain, Xtest, ytrain, ytest = get_data()

    hidden_layers = 10
    num_epochs = 500


    opt, X, y, loss, W1, W2, y_est = create_graph(hidden_layers)
    w1, w2 = train_model(hidden_layers, num_epochs, opt, X, y, loss, W1, W2, y_est)
    # test_accuracy(w1, w2)

Here is a screenshot of what the training is printing out: 在此处输入图片说明

And this is a screenshot of the Pandas Dataframe that I am using for the input data (5 columns of floats): 在此处输入图片说明

And finally, here is the Pandas Dataframe that I am using for the expected outputs (1 column of either -1 or 1): 在此处输入图片说明

This is almost always a problem with the input data.

I would suggest inspecting in detail the values you are feeding into the model to make sure the model is receiving what you think it is.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM