I've tried using Tensorflow GPU accelerator in google colab with local runtime on my machine which has the following system information
I've followed all the steps precisely on https://www.tensorflow.org/install/gpu and ran then ran this code to check if it can discover my gpu and to see the difference in speed between it and cpu:
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))
import timeit
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
print(
'\n\nThis error most likely means that this notebook is not '
'configured to use a GPU. Change this in Notebook Settings via the '
'command palette (cmd/ctrl-shift-P) or the Edit menu.\n\n')
raise SystemError('GPU device not found')
def cpu():
with tf.device('/cpu:0'):
random_image_cpu = tf.random.normal((100, 100, 100, 3))
net_cpu = tf.keras.layers.Conv2D(32, 7)(random_image_cpu)
return tf.math.reduce_sum(net_cpu)
def gpu():
with tf.device('/device:GPU:0'):
random_image_gpu = tf.random.normal((100, 100, 100, 3))
net_gpu = tf.keras.layers.Conv2D(32, 7)(random_image_gpu)
return tf.math.reduce_sum(net_gpu)
# We run each op once to warm up; see: https://stackoverflow.com/a/45067900
cpu()
gpu()
# Run the op several times.
print('Time (s) to convolve 32x7x7x3 filter over random 100x100x100x3 images '
'(batch x height x width x channel). Sum of ten runs.')
print('CPU (s):')
cpu_time = timeit.timeit('cpu()', number=10, setup="from __main__ import cpu")
print(cpu_time)
print('GPU (s):')
gpu_time = timeit.timeit('gpu()', number=10, setup="from __main__ import gpu")
print(gpu_time)
print('GPU speedup over CPU: {}x'.format(int(cpu_time/gpu_time)))
it returned the following error:
Found GPU at: /device:GPU:0
---------------------------------------------------------------------------
UnknownError Traceback (most recent call last)
<ipython-input-1-121519b30cf2> in <module>
29 # We run each op once to warm up; see: https://stackoverflow.com/a/45067900
30 cpu()
---> 31 gpu()
32
33 # Run the op several times.
<ipython-input-1-121519b30cf2> in gpu()
24 with tf.device('/device:GPU:0'):
25 random_image_gpu = tf.random.normal((100, 100, 100, 3))
---> 26 net_gpu = tf.keras.layers.Conv2D(32, 7)(random_image_gpu)
27 return tf.math.reduce_sum(net_gpu)
28
~\anaconda3\lib\site-packages\tensorflow_core\python\keras\engine\base_layer.py in __call__(self, inputs, *args, **kwargs)
820 with base_layer_utils.autocast_context_manager(
821 self._compute_dtype):
--> 822 outputs = self.call(cast_inputs, *args, **kwargs)
823 self._handle_activity_regularization(inputs, outputs)
824 self._set_mask_metadata(inputs, outputs, input_masks)
~\anaconda3\lib\site-packages\tensorflow_core\python\keras\layers\convolutional.py in call(self, inputs)
207 inputs = array_ops.pad(inputs, self._compute_causal_padding())
208
--> 209 outputs = self._convolution_op(inputs, self.kernel)
210
211 if self.use_bias:
~\anaconda3\lib\site-packages\tensorflow_core\python\ops\nn_ops.py in __call__(self, inp, filter)
1133 call_from_convolution=False)
1134 else:
-> 1135 return self.conv_op(inp, filter)
1136 # copybara:strip_end
1137 # copybara:insert return self.conv_op(inp, filter)
~\anaconda3\lib\site-packages\tensorflow_core\python\ops\nn_ops.py in __call__(self, inp, filter)
638
639 def __call__(self, inp, filter): # pylint: disable=redefined-builtin
--> 640 return self.call(inp, filter)
641
642
~\anaconda3\lib\site-packages\tensorflow_core\python\ops\nn_ops.py in __call__(self, inp, filter)
237 padding=self.padding,
238 data_format=self.data_format,
--> 239 name=self.name)
240
241
~\anaconda3\lib\site-packages\tensorflow_core\python\ops\nn_ops.py in conv2d(input, filter, strides, padding, use_cudnn_on_gpu, data_format, dilations, name, filters)
2009 data_format=data_format,
2010 dilations=dilations,
-> 2011 name=name)
2012
2013
~\anaconda3\lib\site-packages\tensorflow_core\python\ops\gen_nn_ops.py in conv2d(input, filter, strides, padding, use_cudnn_on_gpu, explicit_paddings, data_format, dilations, name)
931 input, filter, strides=strides, use_cudnn_on_gpu=use_cudnn_on_gpu,
932 padding=padding, explicit_paddings=explicit_paddings,
--> 933 data_format=data_format, dilations=dilations, name=name, ctx=_ctx)
934 except _core._SymbolicException:
935 pass # Add nodes to the TensorFlow graph.
~\anaconda3\lib\site-packages\tensorflow_core\python\ops\gen_nn_ops.py in conv2d_eager_fallback(input, filter, strides, padding, use_cudnn_on_gpu, explicit_paddings, data_format, dilations, name, ctx)
1020 explicit_paddings, "data_format", data_format, "dilations", dilations)
1021 _result = _execute.execute(b"Conv2D", 1, inputs=_inputs_flat, attrs=_attrs,
-> 1022 ctx=ctx, name=name)
1023 if _execute.must_record_gradient():
1024 _execute.record_gradient(
~\anaconda3\lib\site-packages\tensorflow_core\python\eager\execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
65 else:
66 message = e.message
---> 67 six.raise_from(core._status_to_exception(e.code, message), None)
68 except TypeError as e:
69 keras_symbolic_tensors = [
~\anaconda3\lib\site-packages\six.py in raise_from(value, from_value)
UnknownError: Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above. [Op:Conv2D]
and this is the log from the jupyter terminal:
2020-08-09 04:37:22.168805: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cudart64_101.dll
2020-08-09 04:37:24.322956: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2
2020-08-09 04:37:24.329330: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library nvcuda.dll
2020-08-09 04:37:25.599803: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1555] Found device 0 with properties:
pciBusID: 0000:01:00.0 name: GeForce RTX 2060 computeCapability: 7.5
coreClock: 1.335GHz coreCount: 30 deviceMemorySize: 6.00GiB deviceMemoryBandwidth: 312.97GiB/s
2020-08-09 04:37:25.607874: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cudart64_101.dll
2020-08-09 04:37:25.616921: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cublas64_10.dll
2020-08-09 04:37:25.626584: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cufft64_10.dll
2020-08-09 04:37:25.635135: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library curand64_10.dll
2020-08-09 04:37:25.650044: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cusolver64_10.dll
2020-08-09 04:37:25.659390: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cusparse64_10.dll
2020-08-09 04:37:25.681098: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cudnn64_7.dll
2020-08-09 04:37:25.686397: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 0
2020-08-09 04:37:26.217444: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-08-09 04:37:26.222044: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102] 0
2020-08-09 04:37:26.225124: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 0: N
2020-08-09 04:37:26.228586: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/device:GPU:0 with 4604 MB memory) -> physical GPU (device: 0, name: GeForce RTX 2060, pci bus id: 0000:01:00.0, compute capability: 7.5)
2020-08-09 04:37:26.239786: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1555] Found device 0 with properties:
pciBusID: 0000:01:00.0 name: GeForce RTX 2060 computeCapability: 7.5
coreClock: 1.335GHz coreCount: 30 deviceMemorySize: 6.00GiB deviceMemoryBandwidth: 312.97GiB/s
2020-08-09 04:37:26.249100: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cudart64_101.dll
2020-08-09 04:37:26.254350: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cublas64_10.dll
2020-08-09 04:37:26.260971: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cufft64_10.dll
2020-08-09 04:37:26.265307: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library curand64_10.dll
2020-08-09 04:37:26.271569: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cusolver64_10.dll
2020-08-09 04:37:26.276251: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cusparse64_10.dll
2020-08-09 04:37:26.281798: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cudnn64_7.dll
2020-08-09 04:37:26.287682: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 0
2020-08-09 04:37:26.291846: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-08-09 04:37:26.298235: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102] 0
2020-08-09 04:37:26.300794: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 0: N
2020-08-09 04:37:26.305262: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/device:GPU:0 with 4604 MB memory) -> physical GPU (device: 0, name: GeForce RTX 2060, pci bus id: 0000:01:00.0, compute capability: 7.5)
2020-08-09 04:37:26.313775: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1555] Found device 0 with properties:
pciBusID: 0000:01:00.0 name: GeForce RTX 2060 computeCapability: 7.5
coreClock: 1.335GHz coreCount: 30 deviceMemorySize: 6.00GiB deviceMemoryBandwidth: 312.97GiB/s
2020-08-09 04:37:26.328318: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cudart64_101.dll
2020-08-09 04:37:26.339994: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cublas64_10.dll
2020-08-09 04:37:26.345874: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cufft64_10.dll
2020-08-09 04:37:26.352587: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library curand64_10.dll
2020-08-09 04:37:26.359694: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cusolver64_10.dll
2020-08-09 04:37:26.365286: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cusparse64_10.dll
2020-08-09 04:37:26.371099: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cudnn64_7.dll
2020-08-09 04:37:26.375749: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 0
2020-08-09 04:37:26.380113: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1555] Found device 0 with properties:
pciBusID: 0000:01:00.0 name: GeForce RTX 2060 computeCapability: 7.5
coreClock: 1.335GHz coreCount: 30 deviceMemorySize: 6.00GiB deviceMemoryBandwidth: 312.97GiB/s
2020-08-09 04:37:26.393424: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cudart64_101.dll
2020-08-09 04:37:26.403150: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cublas64_10.dll
2020-08-09 04:37:26.408577: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cufft64_10.dll
2020-08-09 04:37:26.423141: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library curand64_10.dll
2020-08-09 04:37:26.428838: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cusolver64_10.dll
2020-08-09 04:37:26.434061: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cusparse64_10.dll
2020-08-09 04:37:26.438479: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cudnn64_7.dll
2020-08-09 04:37:26.443288: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 0
2020-08-09 04:37:26.446511: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-08-09 04:37:26.453204: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102] 0
2020-08-09 04:37:26.458931: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 0: N
2020-08-09 04:37:26.463016: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 4604 MB memory) -> physical GPU (device: 0, name: GeForce RTX 2060, pci bus id: 0000:01:00.0, compute capability: 7.5)
2020-08-09 04:37:26.823644: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cudnn64_7.dll
2020-08-09 04:37:27.877441: E tensorflow/stream_executor/cuda/cuda_dnn.cc:329] Could not create cudnn handle: CUDNN_STATUS_ALLOC_FAILED
2020-08-09 04:37:27.882143: E tensorflow/stream_executor/cuda/cuda_dnn.cc:329] Could not create cudnn handle: CUDNN_STATUS_ALLOC_FAILED
I've tried the solutions mentioned here https://forums.developer.nvidia.com/t/could-not-create-cudnn-handle-cudnn-status-alloc-failed/108261/2 but to no avail, I hope I can someone who can assist me in this here.
If tf.config.list_physical_devices('GPU')
returns device_type='GPU'
means, there was no issue with TF_GPU installation.
So, in the next step you can try GPU memory resources management by allowing GPU memory growth.
It can be done by calling tf.config.experimental.set_memory_growth
, which attempts to allocate only as much GPU memory as needed for the runtime allocations: it starts out allocating very little memory, and as the program gets run and more GPU memory is needed, we extend the GPU memory region allocated to the TensorFlow process.
To turn on memory growth for a specific GPU, use the following code prior to allocating any tensors or executing any ops.
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
try:
# Currently, memory growth needs to be the same across GPUs
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
logical_gpus = tf.config.experimental.list_logical_devices('GPU')
print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
except RuntimeError as e:
# Memory growth must be set before GPUs have been initialized
print(e)
For more details please refer TensorFlow GPU Guide.
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.