![](/img/trans.png)
[英]Tensorflow-gpu cudnn_cnn_infer64_8.dll not recognised [Error code 193]
[英]Tensorflow GPU version CNN CuDNN error on Windows 10
我正在嘗試使用Python Tensorflow CNN進行CIFAR-100培訓,但是錯誤CUDA_ERROR_OUT_OF_MEMORY
, CUDA_STATUS_NOT_INITIALIZED
和CUDA_STATUS_BAD_PARAM
一直困擾着我,我正在使用Anaconda虛擬環境,該環境依賴於Tensorflow虛擬機,Python版本是Anaconda,Python版本是Anaconda Tensorflow的版本是1.1.0,這是我的代碼:tf_cifar_learning.py:
# Set working directory
import os
dir_model = "c:/tf_model_cifar100"
# Modules needed
import numpy as np
import tensorflow as tf
import pandas as pd
from mlxtend.preprocessing import one_hot
# Load CIFAR Data
from batch import next_batch
from read import unpickle
import time
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.995)
# Prepare test data
testdata = unpickle('test')[b'data']
testdata1 = testdata.astype('float')
del testdata
testdata = testdata1[0:5000, :]
testlabel = unpickle('test')[b'coarse_labels'][0:5000]
testlabel = one_hot(testlabel, 100)
for i in range(testdata.shape[0]):
for j in range(3072):
testdata[i][j] = float(testdata[i][j]) / 255.0
if(i % 1000 == 0):
print("%d of 5000 test datasets processed" % i)
# Parameters
learning_rate = 0.001
training_iters = 1000000
batch_size = 10 # 128
display_step = 2
# Network Parameters
n_input = 1024*3 # CIFAR data input (img shape: 32*32)
n_classes = 100 # CIFAR total classes
dropout = 0.75 # Dropout, probability to keep units
# tf Graph input
x = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_classes])
keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)
# Create some wrappers for simplicity
def conv2d(x, W, b, strides=1):
# Conv2D wrapper, with bias and relu activation
x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
x = tf.nn.bias_add(x, b)
o = tf.nn.relu(x)
return o
def maxpool2d(x, k=2):
# MaxPool2D wrapper
o = tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME')
return o
# Create model
def conv_net(x, weights, biases, dropout):
# Reshape input picture
x = tf.reshape(x, shape=[-1, 32, 32, 3])
# Convolution Layer
conv1 = conv2d(x, weights['wc1'], biases['bc1'])
# Max Pooling (down-sampling)
conv1 = maxpool2d(conv1, k=2)
# Convolution Layer
conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
# Max Pooling (down-sampling)
conv2 = maxpool2d(conv2, k=2)
# Fully connected layer
# Reshape conv2 output to fit fully connected layer input
fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]])
fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
fc1 = tf.nn.relu(fc1)
# Apply Dropout
fc1 = tf.nn.dropout(fc1, dropout)
# Output, class prediction
out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
return out
# Store layers weight & bias
weights = {
# 5x5 conv, 1 input, 32 outputs
'wc1': tf.Variable(tf.random_normal([5, 5, 3, 32])),
# 5x5 conv, 32 inputs, 64 outputs
'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64])),
# fully connected, 8*8*64 inputs, 1024 outputs
'wd1': tf.Variable(tf.random_normal([8*8*64, 1024])),
# 1024 inputs, 100 outputs (class prediction)
'out': tf.Variable(tf.random_normal([1024, n_classes]))
}
biases = {
'bc1': tf.Variable(tf.random_normal([32])),
'bc2': tf.Variable(tf.random_normal([64])),
'bd1': tf.Variable(tf.random_normal([1024])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
# Construct model
pred = conv_net(x, weights, biases, keep_prob)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Evaluate model
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Initializing the variables
init = tf.global_variables_initializer()
# Launch the graph
with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
sess.run(init)
step = 1
# Time measuring
t1 = time.time()
# Keep training until reach max iterations
while step * batch_size < training_iters:
# Prepare training batch
batch_x, batch_y = next_batch(batch_size)
batch_x1 = np.zeros([batch_size, 3072], dtype="float32")
for i in range(batch_size):
for j in range(3072):
batch_x1[i][j] = batch_x[i][j] / 255.0
#if(i % 200 == 0):
#print("%d of %d training batch images processed" % (i, batch_size))
# Run optimization op (backprop)
sess.run(optimizer, feed_dict={x: batch_x1, y: batch_y, keep_prob: dropout})
if step % display_step == 0:
# Calculate batch loss and accuracy
loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x1, y: batch_y, keep_prob: 1.})
# Calculate accuracy for all test samples
acc = accuracy.eval({x: testdata, y: testlabel, keep_prob: 1.})
# Time measuring
t2 = time.time()
tmp = t2-t1
sec = tmp % 60
m = int(tmp / 60)
print("Iter# %8d"%(step*batch_size) + \
", Minibatch Loss= %16.10f"%(loss) + \
", Testing Accuracy= %8.6f"%(acc) + \
", Training currently elapsed " + \
"{:d} mins {:f} secs".format(m, sec))
step += 1
print("Optimization Finished!")
# Save the model after learning
model_saver = tf.train.Saver()
model_saver.save(sess, dir_model + "/CIFAR-100_cnn_model.chkp")
batch.py:
def next_batch(batch_size, onehot=True):
class a:
try:
temp = current_batch
except NameError:
current_batch = 0
import numpy as np
from read import unpickle
import tensorflow as tf
#from mlxtend.preprocessing import one_hot
dict_data = unpickle('train')
label = np.array(dict_data[b'fine_labels'][a.current_batch:a.current_batch+batch_size])
a1 = dict_data[b'data']
a2 = a1[a.current_batch:a.current_batch+batch_size, :]
a.current_batch += batch_size
a2 = np.reshape(a2, (batch_size, 3072))
with tf.device('/cpu:0'):
if(onehot==True):
label = tf.Session().run(tf.one_hot(label, 100))
return a2,label
read.py:
def unpickle(file):
import pickle
with open(file, 'rb') as a:
dict = pickle.load(a, encoding='bytes')
return dict
Windows CMD python tf_cifar_learning.py
輸出:
(tensorflow) C:\Users\Administrator\learn_tensorflow\cifar-100-python>python tf_cifar_learning.py
0 of 5000 test datasets processed
1000 of 5000 test datasets processed
2000 of 5000 test datasets processed
3000 of 5000 test datasets processed
4000 of 5000 test datasets processed
2017-05-02 17:48:46.635855: W c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE instructions, but these are available on your machine and could speed up CPU computations.
2017-05-02 17:48:46.635975: W c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE2 instructions, but these are available on your machine and could speed up CPU computations.
2017-05-02 17:48:46.637256: W c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE3 instructions, but these are available on your machine and could speed up CPU computations.
2017-05-02 17:48:46.638434: W c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE4.1 instructions, but these are available on your machine and could speed up CPU computations.
2017-05-02 17:48:46.638939: W c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE4.2 instructions, but these are available on your machine and could speed up CPU computations.
2017-05-02 17:48:46.639456: W c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX instructions, but these are available on your machine and could speed up CPU computations.
2017-05-02 17:48:46.641753: W c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX2 instructions, but these are available on your machine and could speed up CPU computations.
2017-05-02 17:48:46.641909: W c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use FMA instructions, but these are available on your machine and could speed up CPU computations.
2017-05-02 17:48:46.994154: I c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\common_runtime\gpu\gpu_device.cc:887] Found device 0 with properties:
name: GeForce GT 730
major: 3 minor: 5 memoryClockRate (GHz) 0.9015
pciBusID 0000:01:00.0
Total memory: 2.00GiB
Free memory: 1.66GiB
2017-05-02 17:48:46.994318: I c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\common_runtime\gpu\gpu_device.cc:908] DMA: 0
2017-05-02 17:48:46.997080: I c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\common_runtime\gpu\gpu_device.cc:918] 0: Y
2017-05-02 17:48:46.997985: I c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\common_runtime\gpu\gpu_device.cc:977] Creating TensorFlow device (/gpu:0) -> (device: 0, name: GeForce GT 730, pci bus id: 0000:01:00.0)
2017-05-02 17:48:46.999359: E c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\stream_executor\cuda\cuda_driver.cc:893] failed to allocate 1.99G (2136745984 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY
2017-05-02 17:48:46.999434: E c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\stream_executor\cuda\cuda_driver.cc:893] failed to allocate 1.79G (1923071488 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY
2017-05-02 17:48:47.766766: I c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\common_runtime\gpu\gpu_device.cc:977] Creating TensorFlow device (/gpu:0) -> (device: 0, name: GeForce GT 730, pci bus id: 0000:01:00.0)
2017-05-02 17:48:48.334298: E c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\stream_executor\cuda\cuda_dnn.cc:359] could not create cudnn handle: CUDNN_STATUS_NOT_INITIALIZED
2017-05-02 17:48:48.334466: E c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\stream_executor\cuda\cuda_dnn.cc:366] error retrieving driver version: Unimplemented: kernel reported driver version not implemented on Windows
2017-05-02 17:48:48.343454: E c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\stream_executor\cuda\cuda_dnn.cc:326] could not destroy cudnn handle: CUDNN_STATUS_BAD_PARAM
2017-05-02 17:48:48.343558: F c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\kernels\conv_ops.cc:659] Check failed: stream->parent()->GetConvolveAlgorithms(&algorithms)
(tensorflow) C:\Users\Administrator\learn_tensorflow\cifar-100-python>
Windows 10說Python已經停止工作並立即將其殺死,有人可以告訴我問題出在哪里,然后告訴我(或者舉個例子)如何解決它嗎?
問題很可能與您的環境有關。
您只有一個GPU,可能您也在使用它進行顯示。 這就是為什么TensorFlow無法分配其所需的所有內存的原因。 您可以像這樣控制要使用per_process_gpu_memory_fraction的GPU內存量:
https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/framework/test_util.py#L388
關於Cudnn,似乎Cudnn庫無法初始化自身“ CUDNN_STATUS_NOT_INITIALIZED”。 您確定可以在該環境中運行其他Cuda和Cudnn示例嗎?
嘗試將per_process_gpu_memory_fraction=0.995
設置為像0.7或0.6這樣的小數據
現在我知道發生了什么事。 它實際上是一個OOM。 重新啟動並減小批處理大小即可。
使用conda安裝的TensorFlow-gpu == 1.13.1時遇到相同的錯誤。 經過幾天的努力,我用下面的代碼解決了這個問題:
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
K.set_session(sess)
我認為問題是由將顯示器插入圖形卡引起的。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.