[英]Python - Low accuracy with low loss with tensorflow
I'm building a simple neural network that takes 3 values and gives 2 outputs. 我正在构建一个简单的神经网络,该网络接受3个值并给出2个输出。
I'm getting an accuracy of 67.5% and an average cost of 0.05 我的准确度是67.5%,平均成本是0.05
I have a training dataset of 1000 examples and 500 testing examples. 我有一个包含1000个示例和500个测试示例的训练数据集。 I plan on making a larger dataset in the near future.
我计划在不久的将来制作更大的数据集。
A little while ago I managed to get an accuracy of about 82% and sometimes a bit higher, but the cost was quite high. 不久前,我设法获得了大约82%的准确度,有时甚至更高一些,但是成本却很高。
I've been experimenting with adding another layer which is currently in the model and that is the reason I have got the loss under 1.0 我一直在尝试添加模型中当前存在的另一层,这就是我将损失降至1.0以下的原因
I'm not sure what is going wrong, I'm new to Tensorflow and NNs in general. 我不确定出了什么问题,我是Tensorflow和NN的新手。
Here is my code: 这是我的代码:
import tensorflow as tf
import numpy as np
import sys
sys.path.insert(0, '.../Dataset/Testing/')
sys.path.insert(0, '.../Dataset/Training/')
#other files
from TestDataNormaliser import *
from TrainDataNormaliser import *
learning_rate = 0.01
trainingIteration = 10
batchSize = 100
displayStep = 1
x = tf.placeholder("float", [None, 3])
y = tf.placeholder("float", [None, 2])
#layer 1
w1 = tf.Variable(tf.truncated_normal([3, 4], stddev=0.1))
b1 = tf.Variable(tf.zeros([4]))
y1 = tf.matmul(x, w1) + b1
#layer 2
w2 = tf.Variable(tf.truncated_normal([4, 4], stddev=0.1))
b2 = tf.Variable(tf.zeros([4]))
#y2 = tf.nn.sigmoid(tf.matmul(y1, w2) + b2)
y2 = tf.matmul(y1, w2) + b2
w3 = tf.Variable(tf.truncated_normal([4, 2], stddev=0.1))
b3 = tf.Variable(tf.zeros([2]))
y3 = tf.nn.sigmoid(tf.matmul(y2, w3) + b3) #sigmoid
#output
#wO = tf.Variable(tf.truncated_normal([2, 2], stddev=0.1))
#bO = tf.Variable(tf.zeros([2]))
a = y3 #tf.nn.softmax(tf.matmul(y2, wO) + bO) #y2
a_ = tf.placeholder("float", [None, 2])
#cost function
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y * tf.log(a)))
#cross_entropy = -tf.reduce_sum(y*tf.log(a))
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy)
#training
init = tf.global_variables_initializer() #initialises tensorflow
with tf.Session() as sess:
sess.run(init) #runs the initialiser
writer = tf.summary.FileWriter(".../Logs")
writer.add_graph(sess.graph)
merged_summary = tf.summary.merge_all()
for iteration in range(trainingIteration):
avg_cost = 0
totalBatch = int(len(trainArrayValues)/batchSize) #1000/100
#totalBatch = 10
for i in range(batchSize):
start = i
end = i + batchSize #100
xBatch = trainArrayValues[start:end]
yBatch = trainArrayLabels[start:end]
#feeding training data
sess.run(optimizer, feed_dict={x: xBatch, y: yBatch})
i += batchSize
avg_cost += sess.run(cross_entropy, feed_dict={x: xBatch, y: yBatch})/totalBatch
if iteration % displayStep == 0:
print("Iteration:", '%04d' % (iteration + 1), "cost=", "{:.9f}".format(avg_cost))
#
print("Training complete")
predictions = tf.equal(tf.argmax(a, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(predictions, "float"))
print("Accuracy:", accuracy.eval({x: testArrayValues, y: testArrayLabels}))
A few important notes: 一些重要的注意事项:
When it comes to writing clean, maintainable code, I'd also encourage you to consider the following: 在编写简洁,可维护的代码时,我也鼓励您考虑以下事项:
For graph construction: 对于图构造:
def get_logits(features):
"""tf.layers API is cleaner and has better default values."""
# #layer 1
# w1 = tf.Variable(tf.truncated_normal([3, 4], stddev=0.1))
# b1 = tf.Variable(tf.zeros([4]))
# y1 = tf.matmul(x, w1) + b1
x = tf.layers.dense(features, 4, activation=tf.nn.relu)
# #layer 2
# w2 = tf.Variable(tf.truncated_normal([4, 4], stddev=0.1))
# b2 = tf.Variable(tf.zeros([4]))
# y2 = tf.matmul(y1, w2) + b2
x = tf.layers.dense(x, 4, activation=tf.nn.relu)
# w3 = tf.Variable(tf.truncated_normal([4, 2], stddev=0.1))
# b3 = tf.Variable(tf.zeros([2]))
# y3 = tf.nn.sigmoid(tf.matmul(y2, w3) + b3) #sigmoid
# N.B Don't take a non-linearity here.
logits = tf.layers.dense(x, 1, actiation=None)
# remove unnecessary final dimension, batch_size * 1 -> batch_size
logits = tf.squeeze(logits, axis=-1)
return logits
def get_loss(logits, labels):
"""tf.nn.sigmoid_cross_entropy_with_logits is numerically stable."""
# #cost function
# cross_entropy = tf.reduce_mean(-tf.reduce_sum(y * tf.log(a)))
return tf.nn.sigmoid_cross_entropy_with_logits(
logits=logits, labels=labels)
def get_train_op(loss):
"""There are better options than standard SGD. Try the following."""
learning_rate = 1e-3
# optimizer = tf.train.GradientDescentOptimizer(learning_rate)
optimizer = tf.train.MomentumOptimizer(learning_rate)
# optimizer = tf.train.AdamOptimizer(learning_rate)
return optimizer.minimize(loss)
def get_inputs(feature_data, label_data, batch_size, n_epochs=None,
shuffle=True):
"""
Get features and labels for training/evaluation.
Args:
feature_data: numpy array of feature data.
label_data: numpy array of label data
batch_size: size of batch to be returned
n_epochs: number of epochs to train for. None will result in repeating
forever/until stopped
shuffle: bool flag indicating whether or not to shuffle.
"""
dataset = tf.data.Dataset.from_tensor_slices(
(feature_data, label_data))
dataset = dataset.repeat(n_epochs)
if shuffle:
dataset = dataset.shuffle(len(feature_data))
dataset = dataset.batch(batch_size)
features, labels = dataset.make_one_shot_iterator().get_next()
return features, labels
For session running you could use this like you have (what I'd call 'the hard way')... 对于会话运行,您可以像以前一样使用它(我称之为“艰难的方式”)...
features, labels = get_inputs(
trainArrayValues, trainArrayLabels, batchSize, n_epochs, shuffle=True)
logits = get_logits(features)
loss = get_loss(logits, labels)
train_op = get_train_op(loss)
init = tf.global_variables_initializer()
# monitored sessions have the `should_stop` method, which works with datasets
with tf.train.MonitoredSession() as sess:
sess.run(init)
while not sess.should_stop():
# get both loss and optimizer step in the same session run
loss_val, _ = sess.run([loss, train_op])
print(loss_val)
# save variables etc, do evaluation in another graph with different inputs?
but I think you're better off using a tf.estimator.Estimator, though some people prefer tf.keras.Models. 但我认为您最好使用tf.estimator.Estimator,尽管有些人更喜欢tf.keras.Models。
def model_fn(features, labels, mode):
logits = get_logits(features)
loss = get_loss(logits, labels)
train_op = get_train_op(loss)
predictions = tf.greater(logits, 0)
accuracy = tf.metrics.accuracy(labels, predictions)
return tf.estimator.EstimatorSpec(
mode=mode, loss=loss, train_op=train_op,
eval_metric_ops={'accuracy': accuracy}, predictions=predictions)
def train_input_fn():
return get_inputs(trainArrayValues, trainArrayLabels, batchSize)
def eval_input_fn():
return get_inputs(
testArrayValues, testArrayLabels, batchSize, n_epochs=1, shuffle=False)
# Where variables and summaries will be saved to
model_dir = './model'
estimator = tf.estimator.Estimator(model_fn, model_dir)
estimator.train(train_input_fn, max_steps=max_steps)
estimator.evaluate(eval_input_fn)
Note if you use estimators the variables will be saved after training, so you won't need to re-train each time. 请注意,如果您使用估算器,则变量将在训练后保存,因此您无需每次都进行重新训练。 If you want to reset, just delete the model_dir.
如果要重置,只需删除model_dir。
I see that you are using a softmax loss with sigmoidal activation functions in the last layer. 我看到您在最后一层中使用了具有S型激活函数的softmax损失。 Now let me explain the difference between softmax activations and sigmoidal.
现在让我解释一下softmax激活和S形函数之间的区别。
You are now allowing the output of the network to be y=(0, 1), y=(1, 0), y=(0, 0) and y=(1, 1). 现在,您允许网络的输出为y =(0,1),y =(1,0),y =(0,0)和y =(1,1)。 This is because your sigmoidal activations "squish" each element in y between 0 and 1. Your loss function, however, assumes that your y vector sums to one.
这是因为您的S型激活将y中的每个元素“压缩”在0和1之间。但是,您的损失函数假定您的y向量总和为1。
What you need to do here is either to penalise the sigmoidal cross entropy function, which looks like this: 您需要做的是惩罚S型交叉熵函数,如下所示:
-tf.reduce_sum(y*tf.log(a))-tf.reduce_sum((1-y)*tf.log(1-a))
Or, if you want a to sum to one, you need to use softmax activations in your final layer (to get your a's) instead of sigmoids, which is implemented like this 或者,如果您想将a求和,则需要在最后一层中使用softmax激活(以获得a),而不是像这样的Sigmoids。
exp_out = tf.exp(y3)
a = exp_out/tf reduce_sum(exp_out)
Ps. 附言 I'm using my phone on a train so please excuse typos
我在火车上使用手机,所以请打个错字
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.