簡體   English   中英

如何運行定義Tensorflow圖,所有變量都在float16中而不是float32中

[英]How to run define Tensorflow graph were all variables are in float16 instead instead of float32

默認情況下,變量Tensorflow在float32中。 為了節省內存,我試圖在float16中運行。 在我的圖形中,我可以在每個可以將數據類型定義為float16的地方進行操作。 但是,運行代碼時出現錯誤

這是我的下面的代碼。

import math
import numpy as np
import tensorflow as tf

vocabulary_size = 10
batch_size = 64 
embedding_size = 100 
num_inputs =4
num_sampled = 128 

graph = tf.Graph()

with graph.as_default(): #took out " , tf.device('/cpu:0')"


    train_dataset = tf.placeholder(tf.int32, shape=[batch_size, num_inputs ])
    train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1])

    embeddings = tf.get_variable( 'embeddings', dtype=tf.float16,
        initializer= tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0, dtype=tf.float16) )

    softmax_weights = tf.get_variable( 'softmax_weights', dtype=tf.float16,
        initializer= tf.truncated_normal([vocabulary_size, embedding_size],
                             stddev=1.0 / math.sqrt(embedding_size), dtype=tf.float16 ) )

    softmax_biases = tf.get_variable('softmax_biases', dtype=tf.float16,
        initializer= tf.zeros([vocabulary_size], dtype=tf.float16),  trainable=False )

    embed = tf.nn.embedding_lookup(embeddings, train_dataset) #train data set is

    embed_reshaped = tf.reshape( embed, [batch_size*num_inputs, embedding_size] )

    segments= np.arange(batch_size).repeat(num_inputs)

    averaged_embeds = tf.segment_mean(embed_reshaped, segments, name=None)

    sam_sof_los = tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,
                                   labels=train_labels, num_sampled=num_sampled, num_classes=vocabulary_size)

    loss = tf.reduce_mean( sam_sof_los )

    optimizer = tf.train.AdagradOptimizer(1.0).minimize(loss) 

    saver = tf.train.Saver()

這是錯誤消息

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
    509                 as_ref=input_arg.is_ref,
--> 510                 preferred_dtype=default_dtype)
    511           except TypeError as err:

/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py in internal_convert_to_tensor(value, dtype, name, as_ref, preferred_dtype, ctx)
   1143     if ret is None:
-> 1144       ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
   1145 

/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py in _TensorTensorConversionFunction(t, dtype, name, as_ref)
    980         "Tensor conversion requested dtype %s for Tensor with dtype %s: %r" %
--> 981         (dtype.name, t.dtype.name, str(t)))
    982   return t

ValueError: Tensor conversion requested dtype float16 for Tensor with dtype float32: 'Tensor("sampled_softmax_loss/Log:0", shape=(64, 1), dtype=float32)'

During handling of the above exception, another exception occurred:

TypeError                                 Traceback (most recent call last)
<ipython-input-2-12d508b9e5d7> in <module>()
     46 
     47     sam_sof_los = tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,
---> 48                                    labels=train_labels, num_sampled=num_sampled, num_classes=vocabulary_size)
     49 
     50     loss = tf.reduce_mean( sam_sof_los )

/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in sampled_softmax_loss(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, remove_accidental_hits, partition_strategy, name, seed)
   1347       partition_strategy=partition_strategy,
   1348       name=name,
-> 1349       seed=seed)
   1350   labels = array_ops.stop_gradient(labels, name="labels_stop_gradient")
   1351   sampled_losses = nn_ops.softmax_cross_entropy_with_logits_v2(

/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in _compute_sampled_logits(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, subtract_log_q, remove_accidental_hits, partition_strategy, name, seed)
   1126     if subtract_log_q:
   1127       # Subtract log of Q(l), prior probability that l appears in sampled.
-> 1128       true_logits -= math_ops.log(true_expected_count)
   1129       sampled_logits -= math_ops.log(sampled_expected_count)
   1130 

/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_ops.py in binary_op_wrapper(x, y)
    860     with ops.name_scope(None, op_name, [x, y]) as name:
    861       if isinstance(x, ops.Tensor) and isinstance(y, ops.Tensor):
--> 862         return func(x, y, name=name)
    863       elif not isinstance(y, sparse_tensor.SparseTensor):
    864         try:

/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gen_math_ops.py in sub(x, y, name)
   8316   if _ctx is None or not _ctx._eager_context.is_eager:
   8317     _, _, _op = _op_def_lib._apply_op_helper(
-> 8318         "Sub", x=x, y=y, name=name)
   8319     _result = _op.outputs[:]
   8320     _inputs_flat = _op.inputs

/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
    544                   "%s type %s of argument '%s'." %
    545                   (prefix, dtypes.as_dtype(attrs[input_arg.type_attr]).name,
--> 546                    inferred_from[input_arg.type_attr]))
    547 
    548           types = [values.dtype]

TypeError: Input 'y' of 'Sub' Op has type float32 that does not match type float16 of argument 'x'.

錯誤來自tf.nn.sampled_softmax_loss行。

起初我以為tf.segment_mean可能會將輸出強制轉換為float32,所以我嘗試將average_embeds強制轉換為float16,但仍然遇到相同的錯誤。

根據文檔,似乎沒有一種方法可以在sampled_softmax_loss中定義任何數據類型

https://www.tensorflow.org/api_docs/python/tf/nn/sampled_softmax_loss

據我所知,您只能使用hack來做到這一點。

問題出在以下方面:

  if sampled_values is None:
      sampled_values = candidate_sampling_ops.log_uniform_candidate_sampler(
          true_classes=labels,
          num_true=num_true,
          num_sampled=num_sampled,
          unique=True,
          range_max=num_classes,
          seed=seed)

輸出以下類型的對象:

LogUniformCandidateSampler(
    sampled_candidates=<tf.Tensor 'LogUniformCandidateSampler:0' shape=(128,) dtype=int64>,
    true_expected_count=<tf.Tensor 'LogUniformCandidateSampler:1' shape=(64, 1) dtype=float32>,
    sampled_expected_count=<tf.Tensor 'LogUniformCandidateSampler:2' shape=(128,) dtype=float32>
)

黑客將自己生成LogUniformCandidateSampler ,將其結果轉換為tf.float16並將其傳遞給tf.nn.sampled_softmax_loss

# Redefine it as the tensorflow one is not exposed.
LogUniformCandidateSampler = namedtuple("namedtuple", ["sampled_candidates", "true_expected_count", "sampled_expected_count"]) 
sampled_values = tf.nn.log_uniform_candidate_sampler(
      true_classes=tf.cast(train_labels, tf.int64), num_sampled=num_sampled,
      num_true=1,
      unique=True,
      range_max=vocabulary_size,
      seed=None)

sampled_value_16 = LogUniformCandidateSampler(
    sampled_values.sampled_candidates,
    tf.cast(sampled_values.true_expected_count, tf.float16),
    tf.cast(sampled_values.sampled_expected_count, tf.float16))

sam_sof_los = tf.nn.sampled_softmax_loss(
    weights=softmax_weights,
    biases=softmax_biases,
    inputs=averaged_embeds,
    labels=train_labels, num_sampled=num_sampled, num_classes=vocabulary_size, 
    sampled_values=sampled_value_16)

但這確實是一種黑客行為,並且可能會帶來意想不到的后果(可以預料的是, tf.cast操作不可區分)。

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM