Hi i got this piece of code which I'm currently debugging. It's a multi-task model that performs two tasks given an MNIST handwritten digits. It acts as a simple autoencoder neural network and classifies the digit encoded in the thought vector of the autoencoder.
The issue I'm having is stating:
ValueError: Dimensions must be equal, but are 784 and 256 for 'MatMul' (op: 'MatMul') with input shapes: [?,784], [256,784].
Were in reality it is suppose to be of equal shape. ie both [256,784].
This is my code:
import warnings
warnings.filterwarnings('ignore')
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
import numpy as np
import matplotlib.pyplot as plt
import tsne
import timeit
start_time = timeit.default_timer()
###################################
with open('dataset.txt', 'r', encoding='utf-8') as f:
data = [ line.strip().split('\t') for line in f.read().strip().split('\n') ]
data_imgs = np.array([ [ float(px) for px in img.replace('-', '') ] for (lbl, img) in data ], np.float32)
train_imgs = data_imgs
with open('test.txt', 'r', encoding='utf-8') as f:
data = [ line.strip().split('\t') for line in f.read().strip().split('\n') ]
test_imgs = np.array([ [ float(px) for px in img.replace('-', '') ] for (lbl, img) in data ], np.float32)
test_lbls = np.array([ int(lbl) for (lbl,img) in data ], np.int32)
###################################
class Model(object):
def __init__(self):
#Set model hyperparameters here
input_size = 28*28
gen_output_size = input_size
thought_vector_size = 256
learning_rate = 0.75
momentum = 0.65
self.graph = tf.Graph()
with self.graph.as_default():
self.images = tf.placeholder(tf.float32, [None, gen_output_size], 'images')
with tf.variable_scope('hidden'):
W = tf.get_variable('W', [28 * 28, thought_vector_size], tf.float32,
tf.random_normal_initializer(stddev=0.1))
b = tf.get_variable('b', [thought_vector_size], tf.float32, tf.zeros_initializer())
self.thought_vectors = tf.tanh(tf.matmul(self.images, W) + b) # The thought vector
with tf.variable_scope('output'):
# W = tf.get_variable('W', [thought_vector_size, 28*28], tf.float32, tf.random_normal_initializer(stddev=0.1))
b = tf.get_variable('b', [28 * 28], tf.float32, tf.zeros_initializer())
logits = tf.matmul(self.thought_vectors, tf.transpose(W)) + b
self.out_images = tf.sigmoid(logits) # The output image
self.params = []
#Define model here
self.error = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=self.images))
self.optimiser_step = tf.train.MomentumOptimizer(learning_rate, momentum).minimize(self.error)
self.init = tf.global_variables_initializer()
self.graph.finalize()
self.sess = tf.Session()
def initialise(self):
self.sess.run([ self.init ], { })
def close(self):
self.sess.close()
def optimisation_step(self, images):
return self.sess.run([ self.optimiser_step ], { self.images: images })
def get_params(self):
return self.sess.run(self.params, { })
def get_error(self, images):
return self.sess.run([ self.error ], { self.images: images })[0]
def get_thoughtvectors(self, images):
return self.sess.run([ self.thought_vectors ], { self.images: images })[0]
def predict(self, images):
return self.sess.run([ self.out_images ], { self.images: images })[0]
###################################
#Set training hyperparameters here
max_epochs = 1250
(fig, ax) = plt.subplots(1, 1)
[ train_error_plot ] = ax.plot([], [], color='red', linestyle='-', linewidth=1, label='train')
ax.set_xlim(0, max_epochs)
ax.set_xlabel('epoch')
ax.set_ylim(0.0, 1.5)
ax.set_ylabel('Error')
ax.grid(True)
ax.set_title('Error progress')
ax.legend()
fig.canvas.set_window_title('Training progress')
fig.tight_layout()
fig.show()
###################################
model = Model()
model.initialise()
train_errors = list()
print('epoch', 'train error', sep='\t')
for epoch in range(1, max_epochs+1):
train_error = model.get_error(train_imgs)
train_errors.append(train_error)
if epoch%100 == 0:
print(epoch, train_error, sep='\t')
train_error_plot.set_data(np.arange(len(train_errors)), train_errors)
plt.draw()
fig.canvas.flush_events()
model.optimisation_step(train_imgs)
#Optimisation per epoch here
print()
(fig, axs) = plt.subplots(4, 5)
accuracy = np.sum(np.round(model.predict(test_imgs)) == test_imgs)/test_imgs.size
duration = round((timeit.default_timer() - start_time)/60, 1)
num_params = sum(p.size for p in model.get_params())
digit = 0
row = 0
for _ in range(2):
for col in range(5):
img = test_imgs[test_lbls == digit][0]
[ out_img ] = model.predict([ img ])
axs[row,col].set_axis_off()
axs[row,col].matshow(np.reshape(img, [28, 28]), vmin=0.0, vmax=1.0, cmap='bwr')
axs[row+1,col].set_axis_off()
axs[row+1,col].matshow(np.reshape(out_img, [28, 28]), vmin=0.0, vmax=1.0, cmap='bwr')
digit += 1
if digit == 5:
row += 2
axs[1,4].text(1.0, 0.5, 'Accuracy: {:.2%}\nDuration: {}min\nParams: {}'.format(accuracy, duration, num_params), dict(fontsize=10, ha='left', va='center', transform=axs[1,4].transAxes))
fig.canvas.set_window_title('Generated images')
fig.tight_layout()
fig.show()
(fig, ax) = plt.subplots(1, 1)
thought_vectors = model.get_thoughtvectors(test_imgs)
points_2d = tsne.tsne(thought_vectors)
for digit in range(0, 9+1):
ax.plot(points_2d[test_lbls==digit, 0], points_2d[test_lbls==digit, 1], linestyle='', marker='o', markersize=5, label=str(digit))
ax.legend()
fig.canvas.set_window_title('Thought vectors')
fig.tight_layout()
fig.show()
model.close()
Matrix multiplication requires that the inner dimensions match. It appears you are transposing your weight vector in your second tf.matmul to ensure this but not in the first?
self.thought_vectors = tf.tanh(tf.matmul(self.images, W) + b) # The thought vector
and
logits = tf.matmul(self.thought_vectors, tf.transpose(W)) + b
To simplify your code, it helps that one of the arguments of the tf.matmul function (I assume you're using a tf version prior to 2.0) defines if one of the matrices should be transposed. Try changing your matmul functions to:
self.thought_vectors = tf.tanh(tf.matmul(self.images, W,tranpose_b=True) + b) # The thought vector
and
logits = tf.matmul(self.thought_vectors, W,tranpose_b=True) + b
Also this code won't work if you don't uncomment the second W definition.
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.