I have tried and failed to make Keras model.fit() work on my multi-output model with a custom loss that uses all outputs' targets and predictions (specifically for 2 outputs) in TF 2.
When I tried to do this on a model made with the Keras functional API, I get the error: "SymbolicException: Inputs to eager execution function cannot be Keras symbolic tensors, but found ..." meaning I can't use my loss function because it returns an eager tensor to a Keras DAG that works with symbolic tensors (functional API model). To get around this, I used model.add_loss() instead of passing my loss function into model.compile(), but I believe this hogged GPU memory and caused OOM errors.
I've tried workarounds, where I put my functional API model inside a Keras subclassed model or make a completely new Keras subclassed model.
Workaround 1 is below in code, and runs yet gives me NaNs across the epochs on training on a variety of gradient clippings, and gives 0-valued outputs.
Workaround 2 gives me an error inside the override call() method because the inputs param is different shapes during model compile-time and run-time because my model (in a quirky way) has 3 inputs: 1 is the actual input to the DLNN, and the 2 others are the targets for the input sample. This is so that I can get the targets from each sample into the loss function.
from scipy.io import wavfile
import scipy.signal as sg
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.layers import Input, SimpleRNN, Dense, Lambda, TimeDistributed, Layer, LSTM, Bidirectional, BatchNormalization, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.activations import relu
from tensorflow.keras.callbacks import EarlyStopping
import numpy as np
import datetime
import numpy as np
import math
import random
import json
import os
import sys
# Loss function
def discriminative_loss(piano_true, noise_true, piano_pred, noise_pred, loss_const):
last_dim = piano_pred.shape[1] * piano_pred.shape[2]
return (
tf.math.reduce_mean(tf.reshape(noise_pred - noise_true, shape=(-1, last_dim)) ** 2, axis=-1) -
(loss_const * tf.math.reduce_mean(tf.reshape(noise_pred - piano_true, shape=(-1, last_dim)) ** 2, axis=-1)) +
tf.math.reduce_mean(tf.reshape(piano_pred - piano_true, shape=(-1, last_dim)) ** 2, axis=-1) -
(loss_const * tf.math.reduce_mean(tf.reshape(piano_pred - noise_true, shape=(-1, last_dim)) ** 2, axis=-1))
)
def make_model(features, sequences, name='Model'):
input_layer = Input(shape=(sequences, features), dtype='float32',
name='piano_noise_mixed')
piano_true = Input(shape=(sequences, features), dtype='float32',
name='piano_true')
noise_true = Input(shape=(sequences, features), dtype='float32',
name='noise_true')
x = SimpleRNN(features // 2,
activation='relu',
return_sequences=True) (input_layer)
piano_pred = TimeDistributed(Dense(features), name='piano_hat') (x) # source 1 branch
noise_pred = TimeDistributed(Dense(features), name='noise_hat') (x) # source 2 branch
model = Model(inputs=[input_layer, piano_true, noise_true],
outputs=[piano_pred, noise_pred])
return model
# Model "wrapper" for many-input loss function
class RestorationModel2(Model):
def __init__(self, model, loss_const):
super(RestorationModel2, self).__init__()
self.model = model
self.loss_const = loss_const
def call(self, inputs):
return self.model(inputs)
def compile(self, optimizer, loss):
super(RestorationModel2, self).compile()
self.optimizer = optimizer
self.loss = loss
def train_step(self, data):
# Unpack data - what generator yeilds
x, piano_true, noise_true = data
with tf.GradientTape() as tape:
piano_pred, noise_pred = self.model((x, piano_true, noise_true), training=True)
loss = self.loss(piano_true, noise_true, piano_pred, noise_pred, self.loss_const)
trainable_vars = self.model.trainable_variables
gradients = tape.gradient(loss, trainable_vars)
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
return {'loss': loss}
def test_step(self, data):
x, piano_true, noise_true = data
piano_pred, noise_pred = self.model((x, piano_true, noise_true), training=False)
loss = self.loss(piano_true, noise_true, piano_pred, noise_pred, self.loss_const)
return {'loss': loss}
def make_imp_model(features, sequences, loss_const=0.05,
optimizer=tf.keras.optimizers.RMSprop(clipvalue=0.7),
name='Restoration Model', epsilon=10 ** (-10)):
# NEW Semi-imperative model
model = RestorationModel2(make_model(features, sequences, name='Training Model'),
loss_const=loss_const)
model.compile(optimizer=optimizer, loss=discriminative_loss)
return model
# MODEL TRAIN & EVAL FUNCTION
def evaluate_source_sep(train_generator, validation_generator,
num_train, num_val, n_feat, n_seq, batch_size,
loss_const, epochs=20,
optimizer=tf.keras.optimizers.RMSprop(clipvalue=0.75),
patience=10, epsilon=10 ** (-10)):
print('Making model...') # IMPERATIVE MODEL - Customize Fit
model = make_imp_model(n_feat, n_seq, loss_const=loss_const, optimizer=optimizer, epsilon=epsilon)
print('Going into training now...')
hist = model.fit(train_generator,
steps_per_epoch=math.ceil(num_train / batch_size),
epochs=epochs,
validation_data=validation_generator,
validation_steps=math.ceil(num_val / batch_size),
callbacks=[EarlyStopping('val_loss', patience=patience, mode='min')])
print(model.summary())
# NEURAL NETWORK DATA GENERATOR
def my_dummy_generator(num_samples, batch_size, train_seq, train_feat):
while True:
for offset in range(0, num_samples, batch_size):
# Initialise x, y1 and y2 arrays for this batch
x, y1, y2 = (np.empty((batch_size, train_seq, train_feat)),
np.empty((batch_size, train_seq, train_feat)),
np.empty((batch_size, train_seq, train_feat)))
yield (x, y1, y2)
def main():
epsilon = 10 ** (-10)
train_batch_size = 5
loss_const, epochs, val_split = 0.05, 10, 0.25
optimizer = tf.keras.optimizers.RMSprop(clipvalue=0.9)
TRAIN_SEQ_LEN, TRAIN_FEAT_LEN = 1847, 2049
TOTAL_SMPLS = 60
# Validation & Training Split
indices = list(range(TOTAL_SMPLS))
val_indices = indices[:math.ceil(TOTAL_SMPLS * val_split)]
num_val = len(val_indices)
num_train = TOTAL_SMPLS - num_val
train_seq, train_feat = TRAIN_SEQ_LEN, TRAIN_FEAT_LEN
print('Train Input Stats:')
print('N Feat:', train_feat, 'Seq Len:', train_seq, 'Batch Size:', train_batch_size)
# Create data generators and evaluate model with them
train_generator = my_dummy_generator(num_train,
batch_size=train_batch_size, train_seq=train_seq,
train_feat=train_feat)
validation_generator = my_dummy_generator(num_val,
batch_size=train_batch_size, train_seq=train_seq,
train_feat=train_feat)
evaluate_source_sep(train_generator, validation_generator, num_train, num_val,
n_feat=train_feat, n_seq=train_seq,
batch_size=train_batch_size,
loss_const=loss_const, epochs=epochs,
optimizer=optimizer, epsilon=epsilon)
if __name__ == '__main__':
main()
Thanks for the help!
Solution, don't pass your loss into model.add_loss(). Instead concatenate your outputs together which lets you pass your custom loss into model.compile(). Then deal with the outputs in the custom loss function.
class TimeFreqMasking(Layer):
# Init is for input-independent variables
def __init__(self, epsilon, **kwargs):
super(TimeFreqMasking, self).__init__(**kwargs)
self.epsilon = epsilon
# No build method, b/c passing in multiple inputs to layer (no single shape)
def call(self, inputs):
y_hat_self, y_hat_other, x_mixed = inputs
mask = tf.abs(y_hat_self) / (tf.abs(y_hat_self) + tf.abs(y_hat_other) + self.epsilon)
y_tilde_self = mask * x_mixed
return y_tilde_self
def discrim_loss(y_true, y_pred):
piano_true, noise_true = tf.split(y_true, num_or_size_splits=2, axis=-1)
loss_const = y_pred[-1, :, :][0][0]
piano_pred, noise_pred = tf.split(y_pred[:-1, :, :], num_or_size_splits=2, axis=0)
last_dim = piano_pred.shape[1] * piano_pred.shape[2]
return (
tf.math.reduce_mean(tf.reshape(noise_pred - noise_true, shape=(-1, last_dim)) ** 2) -
(loss_const * tf.math.reduce_mean(tf.reshape(noise_pred - piano_true, shape=(-1, last_dim)) ** 2)) +
tf.math.reduce_mean(tf.reshape(piano_pred - piano_true, shape=(-1, last_dim)) ** 2) -
(loss_const * tf.math.reduce_mean(tf.reshape(piano_pred - noise_true, shape=(-1, last_dim)) ** 2))
)
def make_model(features, sequences, epsilon, loss_const):
input_layer = Input(shape=(sequences, features), name='piano_noise_mixed')
x = SimpleRNN(features // 2,
activation='relu',
return_sequences=True) (input_layer)
x = SimpleRNN(features // 2,
activation='relu',
return_sequences=True) (x)
piano_hat = TimeDistributed(Dense(features), name='piano_hat') (x) # source 1 branch
noise_hat = TimeDistributed(Dense(features), name='noise_hat') (x) # source 2 branch
piano_pred = TimeFreqMasking(epsilon=epsilon,
name='piano_pred') ((piano_hat, noise_hat, input_layer))
noise_pred = TimeFreqMasking(epsilon=epsilon,
name='noise_pred') ((noise_hat, piano_hat, input_layer))
preds_and_gamma = Concatenate(axis=0) ([piano_pred,
noise_pred,
# loss_const_tensor
tf.broadcast_to(tf.constant(loss_const), [1, sequences, features])
])
model = Model(inputs=input_layer, outputs=preds_and_gamma)
model.compile(optimizer=optimizer, loss=discrim_loss)
return model
def dummy_generator(num_samples, batch_size, num_seq, num_feat):
while True:
for _ in range(0, num_samples, batch_size):
x, y1, y2 = (np.random.rand(batch_size, num_seq, num_feat),
np.random.rand(batch_size, num_seq, num_feat),
np.random.rand(batch_size, num_seq, num_feat))
yield ([x, np.concatenate((y1, y2), axis=-1)])
total_samples = 6
batch_size = 2
time_steps = 3
features = 4
loss_const = 2
epochs = 10
val_split = 0.25
epsilon = 10 ** (-10)
model = make_model(features, time_steps, epsilon, loss_const)
print(model.summary())
num_val = math.ceil(actual_samples * val_split)
num_train = total_samples - val_samples
train_dataset = dummy_generator(num_train, batch_size, time_steps, features)
val_dataset = dummy_generator(num_val, batch_size, time_steps, features)
model.fit(train_dataset,
steps_per_epoch=math.ceil(num_train / batch_size),
epochs=epochs,
validation_data=val_dataset,
validation_steps=math.ceil(num_val / batch_size)
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.