如何在 Keras 中使用自定義損失函數加快模型的訓練速度？

Question

我在 Keras 中為深度度量學習定義了一個自定義損失函數，盡管問題很簡單，但我在使用 GPU 加速器的 Colab 中獲得了糟糕的性能。 我可以做些什么來加快訓練速度？

示例代碼：代碼被編寫為在 Colab/Jupyter 中運行，但我想它也可以作為腳本運行（我沒有嘗試，但我沒有看到它不應該工作的任何明顯原因）。

import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Flatten, Dense
from tensorflow.keras.datasets import mnist

(X_train, y_train), (X_test, y_test) = mnist.load_data()

X_train = (X_train / 255.0).astype('float32')
X_test = (X_test / 255.0).astype('float32')

X_train = X_train.reshape(X_train.shape[0], 28, 28, 1)
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1)
input_shape = (28, 28, 1)

tf.keras.backend.clear_session()

embedding_size = 3
model = keras.Sequential()
model.add(Flatten(input_shape = input_shape))
model.add(Dense(128, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(embedding_size))

model.summary()

margin = 0.2
def contrastive_loss(y_true, embeddings):
  loss = 0.0

  b = embeddings.shape[0]
  
  for i in range(0,b):
    yi = y_true[i]
    xi = embeddings[i]

    for j in range(i+1,b):
      yj = y_true[j]
      xj = embeddings[j]

      yij = tf.minimum(1.0, tf.abs(tf.cast(yi-yj, dtype = tf.float32)))
      distance = tf.norm(xi-xj)
      loss = loss + (1-yij)*distance**2 + yij*tf.maximum(0.0, margin-distance)**2
    
  loss = 0.5 * loss 
  return loss

model.compile(keras.optimizers.Adam(learning_rate=1e-4), loss=contrastive_loss)
history = model.fit(X_train, y_train, batch_size=32, epochs=10, shuffle=True)

print(X_test.shape)
emb = model.predict(X_test)
print(emb.shape, "emb")
print(y_test.shape, "y_test")
plt.scatter(emb[:,0], emb[:,1], c=y_test, cmap='tab10')

在帶有 GPU 加速器的 Colab 上訓練非常慢。 關於如何加快速度的任何提示？ 現在我得到 > 200 ms/step，即每個 epoch ≈ 380 s。

編輯：最初我編寫了實際代碼的精簡版本，但是，根據一些評論，加快代碼速度的方式可能取決於我在損失函數中實際計算的內容，所以我現在放置了實際代碼。 對不起，如果這會使代碼變慢......

EDIT2 ：我試圖在傑夫的回答中實施解決方案，但我遇到了一個我不明白其原因的錯誤。 這是新代碼：

import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Flatten, Dense
from tensorflow.keras.datasets import mnist

(X_train, y_train), (X_test, y_test) = mnist.load_data()

X_train = (X_train / 255.0).astype('float32')
X_test = (X_test / 255.0).astype('float32')

X_train = X_train.reshape(X_train.shape[0], 28, 28, 1)
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1)
input_shape = (28, 28, 1)
tf.keras.backend.clear_session()

embedding_size = 3
model = keras.Sequential()
model.add(Flatten(input_shape = input_shape))
model.add(Dense(128, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(embedding_size))

model.summary() 

margin = 0.2
def contrastive_loss(y_true, embeddings):

  # it's easier to work with a flattened array
  flat_y_true = tf.reshape(y_true, (-1)) 

  # matrix of absolute differences, clipped to 1 (if yi!=yj then yij =1)
  yijs = abs(np.subtract.outer(flat_y_true, flat_y_true)).clip(max=1) 

  # we only need the upper triangular part of the matrix
  yijs = yijs[np.triu_indices_from(yijs)] 

  # first compute row differences of the embeddings matrix, the compute norms for
  # each row with axis=2
  distances = np.linalg.norm(embeddings[:,None] - embeddings[None], axis=2)

  # we only need the upper triangular part, again
  distances = distances[np.triu_indices_from(distances)]

  loss = ((1-yijs)*(distances**2) + (yijs*((margin-distances).clip(min=0)**2))).sum()*0.5

  return loss

model.compile(keras.optimizers.Adam(learning_rate=1e-4), loss=contrastive_loss)
history = model.fit(X_train, y_train, batch_size=32, epochs=10, shuffle=True)

我收到以下錯誤：

Epoch 1/10
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-5-1ae713ccf38c> in <module>()
      4 
      5 # When setting batch size, remember we are *quadratically* expanding it in our loss.
----> 6 history = model.fit(X_train, y_train, batch_size=32, epochs=10, shuffle=True)

10 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
    971           except Exception as e:  # pylint:disable=broad-except
    972             if hasattr(e, "ag_error_metadata"):
--> 973               raise e.ag_error_metadata.to_exception(e)
    974             else:
    975               raise

ValueError: in user code:

    /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:806 train_function  *
        return step_function(self, iterator)
    <ipython-input-4-6c3b8535eb43>:5 contrastive_loss  *
        flat_y_true = tf.reshape(y_true, (-1))
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper  **
        return target(*args, **kwargs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/array_ops.py:195 reshape
        result = gen_array_ops.reshape(tensor, shape, name)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gen_array_ops.py:8234 reshape
        "Reshape", tensor=tensor, shape=shape, name=name)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/op_def_library.py:744 _apply_op_helper
        attrs=attr_protos, op_def=op_def)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/func_graph.py:593 _create_op_internal
        compute_device)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py:3485 _create_op_internal
        op_def=op_def)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py:1975 __init__
        control_input_ops, op_def)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py:1815 _create_c_op
        raise ValueError(str(e))

    ValueError: Shape must be rank 1 but is rank 0 for '{{node contrastive_loss/Reshape}} = Reshape[T=DT_UINT8, Tshape=DT_INT32](ExpandDims, contrastive_loss/Reshape/shape)' with input shapes: [32,1], [].

Answer 1

如果這不是您想要的，我們將嘗試自己實施。

由於損失函數中的 python for 循環可能是問題所在，因此我們將重點放在那里。

編輯：我想我需要tensorflow因為我可以發誓我之前有numpy函數可以在自定義tensorflow量tensorflow損失函數中工作。 將所有操作切換到tensorflow操作，因為實際上並非如此。

編輯完整功能：

def contrastive_loss(y_true, embeddings):
    
    yijs = tf.cast(tf.reshape((y_true[:,None]!=y_true),(y_true.get_shape()[0],y_true.get_shape()[0])),tf.dtypes.float32)
    distances = tf.norm(tf.cast(embeddings[:,None]-embeddings,tf.dtypes.float32),axis=2)
    dist_len = distances.get_shape()[0]
    return tf.math.reduce_sum((1-yijs)*distances**2 + yijs*(tf.maximum(0.2-distances,tf.zeros((dist_len,dist_len)))**2))*0.25

逐行編輯演練：

def contrastive_loss(y_true, embeddings):
    y_true_len = y_true.get_shape()[0] #shape (None,1)
    yijs = tf.cast(
        tf.reshape(
            (
                #Max clipping abs diff of every value and all other values 
                #to 1 is essentially checking whether 
                #the other values are equal to itself or not
                #this would be a much faster method of doing so
                y_true[:,None]!=y_true
            ),
            #resize to square matrix of shape (batch_size,batch_size)
            (y_true_len,y_true_len) 
        ),
        tf.dtypes.float32 
    )
    
    distances = tf.norm(
        tf.cast(
            #find difference between each row and all other rows
            embeddings[:,None]-embeddings,
            tf.dtypes.float32
        ),
        #normalize every row to find 
        #magnitude of every row vector
        axis=2 
    )
    
    #get len to setup equivalent square matrix tensor of 0's later
    dist_len = distances.get_shape()[0] 
    #Sums all values in tensor
    loss = tf.math.reduce_sum(
        (1-yijs)*(distances**2) 
        + yijs*(
            #clip all values to be above 0
            tf.maximum(
                0.2-distances,
                #declare tensor of same dimension as 0.2-distances
                #for element wise comparison so tf.maximum can do its thing
                tf.zeros((dist_len,dist_len))
            )**2
        )
    )*0.25 #since we didn't drop lower triangle, need to multiply by 0.25 instead of 0.5
    
    return loss

如何在 Keras 中使用自定義損失函數加快模型的訓練速度？

問題描述

1 個解決方案

解決方案1
1 已采納 2020-09-11 03:00:42

如何在 Keras 中使用自定義損失函數加快模型的訓練速度？

問題描述

1 個解決方案

解決方案1 1 已采納 2020-09-11 03:00:42

解決方案1
1 已采納 2020-09-11 03:00:42