简体   繁体   中英

GPU out of memory when initializing model

I am trying to build siamese neural network with triplet loss function using tensorflow. This is how its look

def build_network(input_shape, embeddingsize):
    network = Sequential()
    network.add(Conv2D(128, (7,7), activation='relu',
                     input_shape=input_shape,
                     kernel_initializer='he_uniform',
                     kernel_regularizer=l2(2e-4)))
    network.add(MaxPooling2D())
    network.add(Conv2D(128, (3,3), activation='relu', kernel_initializer='he_uniform',
                     kernel_regularizer=l2(2e-4)))
    network.add(MaxPooling2D())
    network.add(Conv2D(256, (3,3), activation='relu', kernel_initializer='he_uniform',
                     kernel_regularizer=l2(2e-4)))
    network.add(Flatten())
    network.add(Dense(4096, activation='relu',
                   kernel_regularizer=l2(1e-3),
                   kernel_initializer='he_uniform'))
    
    
    network.add(Dense(embeddingsize, activation=None,
                   kernel_regularizer=l2(1e-3),
                   kernel_initializer='he_uniform'))
    
    #Force the encoding to live on the d-dimentional hypershpere
    network.add(Lambda(lambda x: K.l2_normalize(x,axis=-1)))
    
    return network

When I am trying to initialize model by this code

emb_dim = 64
embedding_model = build_network(X_train[1].shape, emb_dim)

embedding_model.summary()

It is showing this error.

ResourceExhaustedError                    Traceback (most recent call last)
<ipython-input-22-9a90ee998c2d> in <module>
      1 emb_dim = 64
      2 
----> 3 embedding_model = build_network(X_train[1].shape, emb_dim)
      4 
      5 # embedding_model = Sequential([

<ipython-input-19-f51afd4ad3e5> in build_network(input_shape, embeddingsize)
     21     network.add(Dense(4096, activation='relu',
     22                    kernel_regularizer=l2(1e-3),
---> 23                    kernel_initializer='he_uniform'))
     24 
     25 

~\.conda\envs\py36\lib\site-packages\tensorflow_core\python\training\tracking\base.py in _method_wrapper(self, *args, **kwargs)
    455     self._self_setattr_tracking = False  # pylint: disable=protected-access
    456     try:
--> 457       result = method(self, *args, **kwargs)
    458     finally:
    459       self._self_setattr_tracking = previous_value  # pylint: disable=protected-access

~\.conda\envs\py36\lib\site-packages\tensorflow_core\python\keras\engine\sequential.py in add(self, layer)
    201       # If the model is being built continuously on top of an input layer:
    202       # refresh its output.
--> 203       output_tensor = layer(self.outputs[0])
    204       if len(nest.flatten(output_tensor)) != 1:
    205         raise TypeError('All layers in a Sequential model '

~\.conda\envs\py36\lib\site-packages\tensorflow_core\python\keras\engine\base_layer.py in __call__(self, inputs, *args, **kwargs)
    746           # Build layer if applicable (if the `build` method has been
    747           # overridden).
--> 748           self._maybe_build(inputs)
    749           cast_inputs = self._maybe_cast_inputs(inputs)
    750 

~\.conda\envs\py36\lib\site-packages\tensorflow_core\python\keras\engine\base_layer.py in _maybe_build(self, inputs)
   2114         # operations.
   2115         with tf_utils.maybe_init_scope(self):
-> 2116           self.build(input_shapes)
   2117       # We must set self.built since user defined build functions are not
   2118       # constrained to set self.built.

~\.conda\envs\py36\lib\site-packages\tensorflow_core\python\keras\layers\core.py in build(self, input_shape)
   1111         constraint=self.kernel_constraint,
   1112         dtype=self.dtype,
-> 1113         trainable=True)
   1114     if self.use_bias:
   1115       self.bias = self.add_weight(

~\.conda\envs\py36\lib\site-packages\tensorflow_core\python\keras\engine\base_layer.py in add_weight(self, name, shape, dtype, initializer, regularizer, trainable, constraint, partitioner, use_resource, synchronization, aggregation, **kwargs)
    444         synchronization=synchronization,
    445         aggregation=aggregation,
--> 446         caching_device=caching_device)
    447     backend.track_variable(variable)
    448 

~\.conda\envs\py36\lib\site-packages\tensorflow_core\python\training\tracking\base.py in _add_variable_with_custom_getter(self, name, shape, dtype, initializer, getter, overwrite, **kwargs_for_getter)
    742         dtype=dtype,
    743         initializer=initializer,
--> 744         **kwargs_for_getter)
    745 
    746     # If we set an initializer and the variable processed it, tracking will not

~\.conda\envs\py36\lib\site-packages\tensorflow_core\python\keras\engine\base_layer_utils.py in make_variable(name, shape, dtype, initializer, trainable, caching_device, validate_shape, constraint, use_resource, collections, synchronization, aggregation, partitioner)
    140       synchronization=synchronization,
    141       aggregation=aggregation,
--> 142       shape=variable_shape if variable_shape else None)
    143 
    144 

~\.conda\envs\py36\lib\site-packages\tensorflow_core\python\ops\variables.py in __call__(cls, *args, **kwargs)
    256   def __call__(cls, *args, **kwargs):
    257     if cls is VariableV1:
--> 258       return cls._variable_v1_call(*args, **kwargs)
    259     elif cls is Variable:
    260       return cls._variable_v2_call(*args, **kwargs)

~\.conda\envs\py36\lib\site-packages\tensorflow_core\python\ops\variables.py in _variable_v1_call(cls, initial_value, trainable, collections, validate_shape, caching_device, name, variable_def, dtype, expected_shape, import_scope, constraint, use_resource, synchronization, aggregation, shape)
    217         synchronization=synchronization,
    218         aggregation=aggregation,
--> 219         shape=shape)
    220 
    221   def _variable_v2_call(cls,

~\.conda\envs\py36\lib\site-packages\tensorflow_core\python\ops\variables.py in <lambda>(**kwargs)
    195                         shape=None):
    196     """Call on Variable class. Useful to force the signature."""
--> 197     previous_getter = lambda **kwargs: default_variable_creator(None, **kwargs)
    198     for _, getter in ops.get_default_graph()._variable_creator_stack:  # pylint: disable=protected-access
    199       previous_getter = _make_getter(getter, previous_getter)

~\.conda\envs\py36\lib\site-packages\tensorflow_core\python\ops\variable_scope.py in default_variable_creator(next_creator, **kwargs)
   2594         synchronization=synchronization,
   2595         aggregation=aggregation,
-> 2596         shape=shape)
   2597   else:
   2598     return variables.RefVariable(

~\.conda\envs\py36\lib\site-packages\tensorflow_core\python\ops\variables.py in __call__(cls, *args, **kwargs)
    260       return cls._variable_v2_call(*args, **kwargs)
    261     else:
--> 262       return super(VariableMetaclass, cls).__call__(*args, **kwargs)
    263 
    264 

~\.conda\envs\py36\lib\site-packages\tensorflow_core\python\ops\resource_variable_ops.py in __init__(self, initial_value, trainable, collections, validate_shape, caching_device, name, dtype, variable_def, import_scope, constraint, distribute_strategy, synchronization, aggregation, shape)
   1409           aggregation=aggregation,
   1410           shape=shape,
-> 1411           distribute_strategy=distribute_strategy)
   1412 
   1413   def _init_from_args(self,

~\.conda\envs\py36\lib\site-packages\tensorflow_core\python\ops\resource_variable_ops.py in _init_from_args(self, initial_value, trainable, collections, caching_device, name, dtype, constraint, synchronization, aggregation, distribute_strategy, shape)
   1540           with ops.name_scope("Initializer"), device_context_manager(None):
   1541             initial_value = ops.convert_to_tensor(
-> 1542                 initial_value() if init_from_fn else initial_value,
   1543                 name="initial_value", dtype=dtype)
   1544           if shape is not None:

~\.conda\envs\py36\lib\site-packages\tensorflow_core\python\keras\engine\base_layer_utils.py in <lambda>()
    120           (type(init_ops.Initializer), type(init_ops_v2.Initializer))):
    121         initializer = initializer()
--> 122       init_val = lambda: initializer(shape, dtype=dtype)
    123       variable_dtype = dtype.base_dtype
    124   if use_resource is None:

~\.conda\envs\py36\lib\site-packages\tensorflow_core\python\ops\init_ops_v2.py in __call__(self, shape, dtype)
    423     else:
    424       limit = math.sqrt(3.0 * scale)
--> 425       return self._random_generator.random_uniform(shape, -limit, limit, dtype)
    426 
    427   def get_config(self):

~\.conda\envs\py36\lib\site-packages\tensorflow_core\python\ops\init_ops_v2.py in random_uniform(self, shape, minval, maxval, dtype)
    786       op = random_ops.random_uniform
    787     return op(
--> 788         shape=shape, minval=minval, maxval=maxval, dtype=dtype, seed=self.seed)
    789 
    790   def truncated_normal(self, shape, mean, stddev, dtype):

~\.conda\envs\py36\lib\site-packages\tensorflow_core\python\ops\random_ops.py in random_uniform(shape, minval, maxval, dtype, seed, name)
    271     else:
    272       rnd = gen_random_ops.random_uniform(shape, dtype, seed=seed1, seed2=seed2)
--> 273       result = math_ops.add(rnd * (maxval - minval), minval, name=name)
    274     # TODO(b/132092188): C++ shape inference inside functional ops does not
    275     # cross FuncGraph boundaries since that information is only available in

~\.conda\envs\py36\lib\site-packages\tensorflow_core\python\ops\gen_math_ops.py in add(x, y, name)
    341         raise
    342     except _core._NotOkStatusException as e:
--> 343       _ops.raise_from_not_ok_status(e, name)
    344   # Add nodes to the TensorFlow graph.
    345   try:

~\.conda\envs\py36\lib\site-packages\tensorflow_core\python\framework\ops.py in raise_from_not_ok_status(e, name)
   6604   message = e.message + (" name: " + name if name is not None else "")
   6605   # pylint: disable=protected-access
-> 6606   six.raise_from(core._status_to_exception(e.code, message), None)
   6607   # pylint: enable=protected-access
   6608 

~\.conda\envs\py36\lib\site-packages\six.py in raise_from(value, from_value)

ResourceExhaustedError: OOM when allocating tensor with shape[278784,4096] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc [Op:Add] name: dense/kernel/Initializer/random_uniform/

I am using microsoft azure virtual machine with nvidia k80 GPU. There is one core available with 12GB of memory. I checked nvidia-smi and it seems like model is taking all of memory

+-----------------------------------------------------------------------------+
| NVIDIA-SMI 426.00       Driver Version: 426.00       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name            TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|===============================+======================+======================|
|   0  Tesla K80           TCC  | 00000001:00:00.0 Off |                    0 |
| N/A   54C    P0    55W / 149W |  10889MiB / 11448MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+

+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|=============================================================================|
|    0      6620      C   ...cbbivmadmin\.conda\envs\py36\python.exe 10766MiB |
+-----------------------------------------------------------------------------+

When I was trying load the same model on other machine with CPU only it worked

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv2d (Conv2D)              (None, 144, 144, 128)     18944     
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 72, 72, 128)       0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 70, 70, 128)       147584    
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 35, 35, 128)       0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 33, 33, 256)       295168    
_________________________________________________________________
flatten (Flatten)            (None, 278784)            0         
_________________________________________________________________
dense (Dense)                (None, 4096)              1141903360
_________________________________________________________________
dense_1 (Dense)              (None, 64)                262208    
_________________________________________________________________
lambda (Lambda)              (None, 64)                0         
=================================================================
Total params: 1,142,627,264
Trainable params: 1,142,627,264
Non-trainable params: 0

Moreover I am not sure why it is loading to GPU memory by default and taking all of memory.

You seem to have the wrong size specified somewhere:

OOM when allocating tensor with shape[278784,4096] and type float
                                      ^^^^^^

Make sure you're using the correct sizes when defining the layers in your model.


Update:

I checked nvidia-smi and it seems like model is taking all of memory

Unless told otherwise, tensorflow preallocates almost all the GPU memory and runs its own memory allocation strategy inside it, so from nvidia-smi it will always look like the GPU's memory is fully utilized.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM