繁体   English   中英

尺寸必须相等 Tensorflow 'MatMul'

[英]Dimensions must be equal Tensorflow 'MatMul'

嗨,我得到了我目前正在调试的这段代码。 它是一个多任务 model,它在给定 MNIST 手写数字的情况下执行两项任务。 它充当一个简单的自动编码器神经网络,并对自动编码器的思想向量中编码的数字进行分类。

我遇到的问题是:

ValueError:尺寸必须相等,但输入形状为 [?,784]、[256,784] 的“MatMul”(操作:“MatMul”)的尺寸为 784 和 256。

如果在现实中,它应该是相同的形状。 即两者[256,784]。

这是我的代码:

import warnings
warnings.filterwarnings('ignore')
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
import numpy as np
import matplotlib.pyplot as plt
import tsne
import timeit

start_time = timeit.default_timer()

###################################

with open('dataset.txt', 'r', encoding='utf-8') as f:
    data = [ line.strip().split('\t') for line in f.read().strip().split('\n') ]
data_imgs = np.array([ [ float(px) for px in img.replace('-', '') ] for (lbl, img) in data ], np.float32)

train_imgs = data_imgs

with open('test.txt', 'r', encoding='utf-8') as f:
    data = [ line.strip().split('\t') for line in f.read().strip().split('\n') ]
test_imgs = np.array([ [ float(px) for px in img.replace('-', '') ] for (lbl, img) in data ], np.float32)
test_lbls = np.array([ int(lbl) for (lbl,img) in data ], np.int32)

###################################
class Model(object):

    def __init__(self):
        #Set model hyperparameters here
        input_size = 28*28
        gen_output_size = input_size
        thought_vector_size = 256
        learning_rate = 0.75
        momentum = 0.65

        self.graph = tf.Graph()
        with self.graph.as_default():
            self.images = tf.placeholder(tf.float32, [None, gen_output_size], 'images')

            with tf.variable_scope('hidden'):
                W = tf.get_variable('W', [28 * 28, thought_vector_size], tf.float32,
                                    tf.random_normal_initializer(stddev=0.1))
                b = tf.get_variable('b', [thought_vector_size], tf.float32, tf.zeros_initializer())
                self.thought_vectors = tf.tanh(tf.matmul(self.images, W) + b)  # The thought vector


            with tf.variable_scope('output'):
                # W = tf.get_variable('W', [thought_vector_size, 28*28], tf.float32, tf.random_normal_initializer(stddev=0.1))
                b = tf.get_variable('b', [28 * 28], tf.float32, tf.zeros_initializer())
                logits = tf.matmul(self.thought_vectors, tf.transpose(W)) + b
                self.out_images =  tf.sigmoid(logits)  # The output image

            self.params = []

            #Define model here
            self.error = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=self.images))
            self.optimiser_step = tf.train.MomentumOptimizer(learning_rate, momentum).minimize(self.error)

            self.init = tf.global_variables_initializer()

            self.graph.finalize()

            self.sess = tf.Session()

    def initialise(self):
        self.sess.run([ self.init ], { })

    def close(self):
        self.sess.close()

    def optimisation_step(self, images):
        return self.sess.run([ self.optimiser_step ], { self.images: images })

    def get_params(self):
        return self.sess.run(self.params, { })

    def get_error(self, images):
        return self.sess.run([ self.error ], { self.images: images })[0]

    def get_thoughtvectors(self, images):
        return self.sess.run([ self.thought_vectors ], { self.images: images })[0]

    def predict(self, images):
        return self.sess.run([ self.out_images ], { self.images: images })[0]

###################################

#Set training hyperparameters here
max_epochs = 1250

(fig, ax) = plt.subplots(1, 1)

[ train_error_plot ] = ax.plot([], [], color='red', linestyle='-', linewidth=1, label='train')
ax.set_xlim(0, max_epochs)
ax.set_xlabel('epoch')
ax.set_ylim(0.0, 1.5)
ax.set_ylabel('Error')
ax.grid(True)
ax.set_title('Error progress')
ax.legend()

fig.canvas.set_window_title('Training progress')
fig.tight_layout()
fig.show()

###################################

model = Model()
model.initialise()

train_errors = list()
print('epoch', 'train error', sep='\t')
for epoch in range(1, max_epochs+1):
    train_error = model.get_error(train_imgs)
    train_errors.append(train_error)

    if epoch%100 == 0:
        print(epoch, train_error, sep='\t')

        train_error_plot.set_data(np.arange(len(train_errors)), train_errors)
        plt.draw()
        fig.canvas.flush_events()
    model.optimisation_step(train_imgs)
    #Optimisation per epoch here
print()

(fig, axs) = plt.subplots(4, 5)

accuracy = np.sum(np.round(model.predict(test_imgs)) == test_imgs)/test_imgs.size
duration = round((timeit.default_timer() - start_time)/60, 1)
num_params = sum(p.size for p in model.get_params())

digit = 0
row = 0
for _ in range(2):
    for col in range(5):
        img = test_imgs[test_lbls == digit][0]
        [ out_img ] = model.predict([ img ])

        axs[row,col].set_axis_off()
        axs[row,col].matshow(np.reshape(img, [28, 28]), vmin=0.0, vmax=1.0, cmap='bwr')

        axs[row+1,col].set_axis_off()
        axs[row+1,col].matshow(np.reshape(out_img, [28, 28]), vmin=0.0, vmax=1.0, cmap='bwr')

        digit += 1
        if digit == 5:
            row += 2
axs[1,4].text(1.0, 0.5, 'Accuracy: {:.2%}\nDuration: {}min\nParams: {}'.format(accuracy, duration, num_params), dict(fontsize=10, ha='left', va='center', transform=axs[1,4].transAxes))

fig.canvas.set_window_title('Generated images')
fig.tight_layout()
fig.show()

(fig, ax) = plt.subplots(1, 1)

thought_vectors = model.get_thoughtvectors(test_imgs)
points_2d = tsne.tsne(thought_vectors)
for digit in range(0, 9+1):
    ax.plot(points_2d[test_lbls==digit, 0], points_2d[test_lbls==digit, 1], linestyle='', marker='o', markersize=5, label=str(digit))
ax.legend()

fig.canvas.set_window_title('Thought vectors')
fig.tight_layout()
fig.show()

model.close()

矩阵乘法要求内部维度匹配。 看来您在第二个 tf.matmul 中转置权重向量以确保这一点,但不是在第一个?

self.thought_vectors = tf.tanh(tf.matmul(self.images, W) + b)  # The thought vector

logits = tf.matmul(self.thought_vectors, tf.transpose(W)) + b

为了简化您的代码,它有助于定义 tf.matmul function 的 arguments 之一(我假设您使用的是 2.0 之前的 tf 版本)定义是否应该转置其中一个矩阵。 尝试将您的 matmul 函数更改为:

self.thought_vectors = tf.tanh(tf.matmul(self.images, W,tranpose_b=True) + b)  # The thought vector

logits = tf.matmul(self.thought_vectors, W,tranpose_b=True) + b

如果您不取消注释第二个W定义,此代码将不起作用。

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM