[英]Tensorflow throws “Dimensions must be equal, but are 100 and 0 for 'MatMul' (op: 'MatMul') with input shapes: [0,100], [0,100].”
[英]Dimensions must be equal Tensorflow 'MatMul'
嗨,我得到了我目前正在调试的这段代码。 它是一个多任务 model,它在给定 MNIST 手写数字的情况下执行两项任务。 它充当一个简单的自动编码器神经网络,并对自动编码器的思想向量中编码的数字进行分类。
我遇到的问题是:
ValueError:尺寸必须相等,但输入形状为 [?,784]、[256,784] 的“MatMul”(操作:“MatMul”)的尺寸为 784 和 256。
如果在现实中,它应该是相同的形状。 即两者[256,784]。
这是我的代码:
import warnings
warnings.filterwarnings('ignore')
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
import numpy as np
import matplotlib.pyplot as plt
import tsne
import timeit
start_time = timeit.default_timer()
###################################
with open('dataset.txt', 'r', encoding='utf-8') as f:
data = [ line.strip().split('\t') for line in f.read().strip().split('\n') ]
data_imgs = np.array([ [ float(px) for px in img.replace('-', '') ] for (lbl, img) in data ], np.float32)
train_imgs = data_imgs
with open('test.txt', 'r', encoding='utf-8') as f:
data = [ line.strip().split('\t') for line in f.read().strip().split('\n') ]
test_imgs = np.array([ [ float(px) for px in img.replace('-', '') ] for (lbl, img) in data ], np.float32)
test_lbls = np.array([ int(lbl) for (lbl,img) in data ], np.int32)
###################################
class Model(object):
def __init__(self):
#Set model hyperparameters here
input_size = 28*28
gen_output_size = input_size
thought_vector_size = 256
learning_rate = 0.75
momentum = 0.65
self.graph = tf.Graph()
with self.graph.as_default():
self.images = tf.placeholder(tf.float32, [None, gen_output_size], 'images')
with tf.variable_scope('hidden'):
W = tf.get_variable('W', [28 * 28, thought_vector_size], tf.float32,
tf.random_normal_initializer(stddev=0.1))
b = tf.get_variable('b', [thought_vector_size], tf.float32, tf.zeros_initializer())
self.thought_vectors = tf.tanh(tf.matmul(self.images, W) + b) # The thought vector
with tf.variable_scope('output'):
# W = tf.get_variable('W', [thought_vector_size, 28*28], tf.float32, tf.random_normal_initializer(stddev=0.1))
b = tf.get_variable('b', [28 * 28], tf.float32, tf.zeros_initializer())
logits = tf.matmul(self.thought_vectors, tf.transpose(W)) + b
self.out_images = tf.sigmoid(logits) # The output image
self.params = []
#Define model here
self.error = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=self.images))
self.optimiser_step = tf.train.MomentumOptimizer(learning_rate, momentum).minimize(self.error)
self.init = tf.global_variables_initializer()
self.graph.finalize()
self.sess = tf.Session()
def initialise(self):
self.sess.run([ self.init ], { })
def close(self):
self.sess.close()
def optimisation_step(self, images):
return self.sess.run([ self.optimiser_step ], { self.images: images })
def get_params(self):
return self.sess.run(self.params, { })
def get_error(self, images):
return self.sess.run([ self.error ], { self.images: images })[0]
def get_thoughtvectors(self, images):
return self.sess.run([ self.thought_vectors ], { self.images: images })[0]
def predict(self, images):
return self.sess.run([ self.out_images ], { self.images: images })[0]
###################################
#Set training hyperparameters here
max_epochs = 1250
(fig, ax) = plt.subplots(1, 1)
[ train_error_plot ] = ax.plot([], [], color='red', linestyle='-', linewidth=1, label='train')
ax.set_xlim(0, max_epochs)
ax.set_xlabel('epoch')
ax.set_ylim(0.0, 1.5)
ax.set_ylabel('Error')
ax.grid(True)
ax.set_title('Error progress')
ax.legend()
fig.canvas.set_window_title('Training progress')
fig.tight_layout()
fig.show()
###################################
model = Model()
model.initialise()
train_errors = list()
print('epoch', 'train error', sep='\t')
for epoch in range(1, max_epochs+1):
train_error = model.get_error(train_imgs)
train_errors.append(train_error)
if epoch%100 == 0:
print(epoch, train_error, sep='\t')
train_error_plot.set_data(np.arange(len(train_errors)), train_errors)
plt.draw()
fig.canvas.flush_events()
model.optimisation_step(train_imgs)
#Optimisation per epoch here
print()
(fig, axs) = plt.subplots(4, 5)
accuracy = np.sum(np.round(model.predict(test_imgs)) == test_imgs)/test_imgs.size
duration = round((timeit.default_timer() - start_time)/60, 1)
num_params = sum(p.size for p in model.get_params())
digit = 0
row = 0
for _ in range(2):
for col in range(5):
img = test_imgs[test_lbls == digit][0]
[ out_img ] = model.predict([ img ])
axs[row,col].set_axis_off()
axs[row,col].matshow(np.reshape(img, [28, 28]), vmin=0.0, vmax=1.0, cmap='bwr')
axs[row+1,col].set_axis_off()
axs[row+1,col].matshow(np.reshape(out_img, [28, 28]), vmin=0.0, vmax=1.0, cmap='bwr')
digit += 1
if digit == 5:
row += 2
axs[1,4].text(1.0, 0.5, 'Accuracy: {:.2%}\nDuration: {}min\nParams: {}'.format(accuracy, duration, num_params), dict(fontsize=10, ha='left', va='center', transform=axs[1,4].transAxes))
fig.canvas.set_window_title('Generated images')
fig.tight_layout()
fig.show()
(fig, ax) = plt.subplots(1, 1)
thought_vectors = model.get_thoughtvectors(test_imgs)
points_2d = tsne.tsne(thought_vectors)
for digit in range(0, 9+1):
ax.plot(points_2d[test_lbls==digit, 0], points_2d[test_lbls==digit, 1], linestyle='', marker='o', markersize=5, label=str(digit))
ax.legend()
fig.canvas.set_window_title('Thought vectors')
fig.tight_layout()
fig.show()
model.close()
矩阵乘法要求内部维度匹配。 看来您在第二个 tf.matmul 中转置权重向量以确保这一点,但不是在第一个?
self.thought_vectors = tf.tanh(tf.matmul(self.images, W) + b) # The thought vector
和
logits = tf.matmul(self.thought_vectors, tf.transpose(W)) + b
为了简化您的代码,它有助于定义 tf.matmul function 的 arguments 之一(我假设您使用的是 2.0 之前的 tf 版本)定义是否应该转置其中一个矩阵。 尝试将您的 matmul 函数更改为:
self.thought_vectors = tf.tanh(tf.matmul(self.images, W,tranpose_b=True) + b) # The thought vector
和
logits = tf.matmul(self.thought_vectors, W,tranpose_b=True) + b
如果您不取消注释第二个W定义,此代码也将不起作用。
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.