简体   繁体   English

cifar10的分段错误(核心转储)错误

[英]Segmentation fault (core dumped) error for cifar10 example tensorflow

I am trying to adjust the example cifar10 code and I am not sure why I have Segmentation fault (core dumped) error when I run my adjust cifar10_eval.py. 我正在尝试调整示例cifar10代码,但是我不确定为什么在运行adjust cifar10_eval.py时出现分段错误(核心转储)错误。 It seems like this code actually works in Mac and I am not sure why it doesn't work for linux. 看起来这段代码实际上可以在Mac上运行,但我不确定为什么它不适用于linux。

Thanks for your help. 谢谢你的帮助。

-----------------------Below Code ------------------------------ -----------------------下面的代码------------------------- -----

# Copyright 2015 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.c
# ==============================================================================

"""Evaluation for CIFAR-10
Accuracy:
cifar10_train.py achieves 83.0% accuracy after 100K steps (256 epochs
of data) as judged by cifar10_eval.py.
Speed:
On a single Tesla K40, cifar10_train.py processes a single batch of 128 imagecs
in 0.25-0.35 sec (i.e. 350 - 600 images /sec). The model reaches ~86%
accuracy after 100K steps in 8 hours of training time.
Usage:
Please see the tutorial and website for how to download the CIFAR-10
data set, compile the program and train the model.
http://tensorflow.org/tutorials/deep_cnn/
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from datetime import datetime
import math
import time

import numpy as np
import tensorflow as tf
import os
import StringIO
import cv
import cv2
import urllib


from PIL import Image

import matplotlib

import glob

import cifar10

cur_dir = os.getcwd()

FLAGS = tf.app.flags.FLAGS

tf.app.flags.DEFINE_string('eval_dir', '/tmp/cifar10_eval',
                          """Directory where to write event logs.""")
tf.app.flags.DEFINE_string('eval_data', 'test',
                           """Either 'test' or 'train_eval'.""")
tf.app.flags.DEFINE_string('checkpoint_dir', '/tmp/cifar10_train',
                          """Directory where to read model checkpoints.""")
tf.app.flags.DEFINE_integer('eval_interval_secs', 60 * 5,
                           """How often to run the eval.""")
tf.app.flags.DEFINE_integer('num_examples', 128,
                           """Number of examples to run.""")
tf.app.flags.DEFINE_boolean('run_once', False,
                        """Whether to run eval only once.""")


def eval_once(saver, summary_writer, top_k_op, summary_op,images,labels, logits):
 """Run Eval once.
 Args:
   saver: Saver.
   summary_writer: Summary writer.
   top_k_op: Top K op.
   summary_op: Summary op.
 """
 with tf.Session() as sess:
   ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
   if ckpt and ckpt.model_checkpoint_path:
     # Restores from checkpoint
     saver.restore(sess, ckpt.model_checkpoint_path)
     # Assuming model_checkpoint_path looks something like:
     #   /my-favorite-path/cifar10_train/model.ckpt-0,
     # extract global_step from it.
     global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
   else:
     print('No checkpoint file found')
     return

   # Start the queue runners.
   coord = tf.train.Coordinator()
   try:
     threads = []
     for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS):
       threads.extend(qr.create_threads(sess, coord=coord, daemon=True,
                                        start=True))

     num_iter = int(math.ceil(FLAGS.num_examples / FLAGS.batch_size))
     true_count = 0  # Counts the number of correct predictions.
     total_sample_count = num_iter * FLAGS.batch_size
     step = 0





     while step < num_iter and not coord.should_stop():
       predictions = sess.run([top_k_op])
       true_count += np.sum(predictions)
       step += 1
     # Compute precision @ 1.
       precision = true_count / total_sample_count
       print('%s: precision @ 1 = %.3f' % (datetime.now(), precision))
       e = tf.nn.softmax(logits)
       log = sess.run(e)
       #print(log)
       predict = np.zeros([FLAGS.batch_size])
       max_logi = np.zeros([FLAGS.batch_size])

       for i in xrange(FLAGS.batch_size):
         predict[i] = np.argmax(log[i, :])
         max_logi[i] = log[i, :].max()
       lab = sess.run(labels)
       top = sess.run([top_k_op])
       predictions = sess.run([top_k_op])
       true_count = 0
       true_count += np.sum(predictions)
       # chk = sess.run(images)
       #print(top)c
       for i in xrange(FLAGS.batch_size):
         #    tf.cast(images, tf.uint8)
         img = sess.run(images)
         save_img = img[i, :]

         save_img = ((save_img - save_img.min()) / (save_img.max() - save_img.min()) * 255)

         #      save_img2 = Image.fromarray(save_img, "RGB")

         path = cur_dir + "/result/"

         if not os.path.exists(path):
           os.mkdir(path, 0755)
         if predictions[0][i]==True:
           path = path + "Correct/"
         else:
           path = path + "Incorect/"

         if not os.path.exists(path):
           os.mkdir(path, 0755)
         class_fold = path + str(predict[i]) + "/"
         # class_fold = path + str(max_logi[i]) + "/
         if not os.path.exists(path + str(predict[i]) + "/"):
           os.mkdir(class_fold, 0755)

         cv2.imwrite(os.path.join(class_fold, str(i) + ".jpeg"), save_img)



     summary = tf.Summary()
     summary.ParseFromString(sess.run(summary_op))
     summary.value.add(tag='Precision @ 1', simple_value=precision)
     summary_writer.add_summary(summary, global_step)
   except Exception as e:  # pylint: disable=broad-except
     coord.request_stop(e)

   coord.request_stop()
   coord.join(threads, stop_grace_period_secs=10)


def evaluate():
 """Eval CIFAR-10 for a number of steps."""
 with tf.Graph().as_default() as g:
   # Get images and labels for CIFAR-10.
   eval_data = FLAGS.eval_data == 'test'
   images, labels = cifar10.inputs(eval_data=eval_data)

   # Build a Graph that computes the logits predictions from the
   # inference model.
   logits = cifar10.inference(images)
   true_count = 0
   # Calculate predictions.
   top_k_op = tf.nn.in_top_k(logits, labels, 1)




   # Restore the moving average version of the learned variables for eval.
   variable_averages = tf.train.ExponentialMovingAverage(
       cifar10.MOVING_AVERAGE_DECAY)
   variables_to_restore = variable_averages.variables_to_restore()
   saver = tf.train.Saver(variables_to_restore)

   # Build the summary operation based on the TF collection of Summaries.
   summary_op = tf.merge_all_summaries()

   summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir, g)

   #while True:
   eval_once(saver, summary_writer, top_k_op, summary_op,images,labels, logits)
   #  if False:
   #    break
   #  time.sleep(FLAGS.eval_interval_secs)


def main(argv=None):  # pylint: disable=unused-argument
 cifar10.maybe_download_and_extract()
 if tf.gfile.Exists(FLAGS.eval_dir):
   tf.gfile.DeleteRecursively(FLAGS.eval_dir)
 tf.gfile.MakeDirs(FLAGS.eval_dir)
 evaluate()


if __name__ == '__main__':
 tf.app.run()

This looks like a recurring issue where the TensorFlow Python module clashes with code in the OpenCV and/or PIL libraries. 这似乎是一个反复出现的问题 ,其中TensorFlow Python模块与OpenCV和/或PIL库中的代码冲突。 The root cause is usually incompatible versions of libjpeg or libpng included in those libraries. 根本原因通常是这些库中包含的libjpeglibpng版本不兼容。

This issue should be fixed in the latest nightly version of TensorFlow. 这个问题应该固定在TensorFlow的最新nightly版本。 As an alternative workaround, you could try moving the line: 作为替代解决方法,您可以尝试以下方法:

import tensorflow as tf

...below the import statements for cv , cv2 and PIL . ...在cvcv2PIL的import语句下面。

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM