Dimensions must be equal Tensorflow 'MatMul'

Hi i got this piece of code which I'm currently debugging. It's a multi-task model that performs two tasks given an MNIST handwritten digits. It acts as a simple autoencoder neural network and classifies the digit encoded in the thought vector of the autoencoder.

The issue I'm having is stating:

ValueError: Dimensions must be equal, but are 784 and 256 for 'MatMul' (op: 'MatMul') with input shapes: [?,784], [256,784].

Were in reality it is suppose to be of equal shape. ie both [256,784].

This is my code:

import warnings
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import tsne
import timeit

start_time = timeit.default_timer()


with open('dataset.txt', 'r', encoding='utf-8') as f:
    data = [ line.strip().split('\t') for line in f.read().strip().split('\n') ]
data_imgs = np.array([ [ float(px) for px in img.replace('-', '') ] for (lbl, img) in data ], np.float32)

train_imgs = data_imgs

with open('test.txt', 'r', encoding='utf-8') as f:
    data = [ line.strip().split('\t') for line in f.read().strip().split('\n') ]
test_imgs = np.array([ [ float(px) for px in img.replace('-', '') ] for (lbl, img) in data ], np.float32)
test_lbls = np.array([ int(lbl) for (lbl,img) in data ], np.int32)

class Model(object):

    def __init__(self):
        #Set model hyperparameters here
        input_size = 28*28
        gen_output_size = input_size
        thought_vector_size = 256
        learning_rate = 0.75
        momentum = 0.65

        self.graph = tf.Graph()
        with self.graph.as_default():
            self.images = tf.placeholder(tf.float32, [None, gen_output_size], 'images')

            with tf.variable_scope('hidden'):
                W = tf.get_variable('W', [28 * 28, thought_vector_size], tf.float32,
                b = tf.get_variable('b', [thought_vector_size], tf.float32, tf.zeros_initializer())
                self.thought_vectors = tf.tanh(tf.matmul(self.images, W) + b)  # The thought vector

            with tf.variable_scope('output'):
                # W = tf.get_variable('W', [thought_vector_size, 28*28], tf.float32, tf.random_normal_initializer(stddev=0.1))
                b = tf.get_variable('b', [28 * 28], tf.float32, tf.zeros_initializer())
                logits = tf.matmul(self.thought_vectors, tf.transpose(W)) + b
                self.out_images =  tf.sigmoid(logits)  # The output image

            self.params = []

            #Define model here
            self.error = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=self.images))
            self.optimiser_step = tf.train.MomentumOptimizer(learning_rate, momentum).minimize(self.error)

            self.init = tf.global_variables_initializer()


            self.sess = tf.Session()

    def initialise(self):
        self.sess.run([ self.init ], { })

    def close(self):

    def optimisation_step(self, images):
        return self.sess.run([ self.optimiser_step ], { self.images: images })

    def get_params(self):
        return self.sess.run(self.params, { })

    def get_error(self, images):
        return self.sess.run([ self.error ], { self.images: images })[0]

    def get_thoughtvectors(self, images):
        return self.sess.run([ self.thought_vectors ], { self.images: images })[0]

    def predict(self, images):
        return self.sess.run([ self.out_images ], { self.images: images })[0]


#Set training hyperparameters here
max_epochs = 1250

(fig, ax) = plt.subplots(1, 1)

[ train_error_plot ] = ax.plot([], [], color='red', linestyle='-', linewidth=1, label='train')
ax.set_xlim(0, max_epochs)
ax.set_ylim(0.0, 1.5)
ax.set_title('Error progress')

fig.canvas.set_window_title('Training progress')


model = Model()

train_errors = list()
print('epoch', 'train error', sep='\t')
for epoch in range(1, max_epochs+1):
    train_error = model.get_error(train_imgs)

    if epoch%100 == 0:
        print(epoch, train_error, sep='\t')

        train_error_plot.set_data(np.arange(len(train_errors)), train_errors)
    #Optimisation per epoch here

(fig, axs) = plt.subplots(4, 5)

accuracy = np.sum(np.round(model.predict(test_imgs)) == test_imgs)/test_imgs.size
duration = round((timeit.default_timer() - start_time)/60, 1)
num_params = sum(p.size for p in model.get_params())

digit = 0
row = 0
for _ in range(2):
    for col in range(5):
        img = test_imgs[test_lbls == digit][0]
        [ out_img ] = model.predict([ img ])

        axs[row,col].matshow(np.reshape(img, [28, 28]), vmin=0.0, vmax=1.0, cmap='bwr')

        axs[row+1,col].matshow(np.reshape(out_img, [28, 28]), vmin=0.0, vmax=1.0, cmap='bwr')

        digit += 1
        if digit == 5:
            row += 2
axs[1,4].text(1.0, 0.5, 'Accuracy: {:.2%}\nDuration: {}min\nParams: {}'.format(accuracy, duration, num_params), dict(fontsize=10, ha='left', va='center', transform=axs[1,4].transAxes))

fig.canvas.set_window_title('Generated images')

(fig, ax) = plt.subplots(1, 1)

thought_vectors = model.get_thoughtvectors(test_imgs)
points_2d = tsne.tsne(thought_vectors)
for digit in range(0, 9+1):
    ax.plot(points_2d[test_lbls==digit, 0], points_2d[test_lbls==digit, 1], linestyle='', marker='o', markersize=5, label=str(digit))

fig.canvas.set_window_title('Thought vectors')


Matrix multiplication requires that the inner dimensions match. It appears you are transposing your weight vector in your second tf.matmul to ensure this but not in the first?

self.thought_vectors = tf.tanh(tf.matmul(self.images, W) + b)  # The thought vector


logits = tf.matmul(self.thought_vectors, tf.transpose(W)) + b

To simplify your code, it helps that one of the arguments of the tf.matmul function (I assume you're using a tf version prior to 2.0) defines if one of the matrices should be transposed. Try changing your matmul functions to:

self.thought_vectors = tf.tanh(tf.matmul(self.images, W,tranpose_b=True) + b)  # The thought vector


logits = tf.matmul(self.thought_vectors, W,tranpose_b=True) + b

Also this code won't work if you don't uncomment the second W definition.

