Python機器學習數字識別

Question

我跟着這個網站上的代碼：

https://blog.luisfred.com.br/reconhecimento-de-escrita-manual-com-redes-neurais-convolucionais/

下面是該網站遍歷的代碼：

from keras. datasets import mnist
from keras. models import Sequential
from keras. layers import Dense
from keras. layers import Dropout
from keras. layers import Flatten
import numpy as np
from matplotlib import pyplot as plt
from keras. layers . convolutional import Conv2D
from keras. layers . convolutional import MaxPooling2D
from keras. utils import np_utils
from keras import backend as K
K . set_image_dim_ordering ( 'th' )
import cv2
import matplotlib. pyplot as plt
#% inline matplotlib # If you are using Jupyter, it will be useful for plotting graphics or figures inside cells

#Divided the data into subsets of training and testing.
( X_train , y_train ) , ( X_test , y_test ) = mnist. load_data ( )
# Since we are working in gray scale we can
# set the depth to the value 1.
X_train = X_train . reshape ( X_train . shape [ 0 ] , 1 , 28 , 28 ) . astype ( 'float32' )
X_test = X_test . reshape ( X_test . shape [ 0 ] , 1 , 28 , 28 ) . astype ( 'float32' )
# We normalize our data according to the
# gray scale. The floating point values are in the range [0,1], instead of [.255]
X_train = X_train / 255
X_test = X_test / 255
# Converts y_train and y_test, which are class vectors, to a binary class array (one-hot vectors)
y_train = np_utils. to_categorical ( y_train )
y_test = np_utils. to_categorical ( y_test )
# Number of digit types found in MNIST. In this case, the value is 10, corresponding to (0,1,2,3,4,5,6,7,8,9).
num_classes = y_test. shape [ 1 ]


def deeper_cnn_model ( ) :
    model = Sequential ( )
    # Convolution2D will be our input layer. We can observe that it has
    # 30 feature maps with size of 5 × 5 and an activation function of type ReLU.
    model.add ( Conv2D ( 30 , ( 5 , 5 ) , input_shape = ( 1 , 28 , 28 ) , activation = 'relu' ) )
    # The MaxPooling2D layer will be our second layer where we will have a sample window of size 2 x 2
    model.add ( MaxPooling2D ( pool_size = ( 2 , 2 ) ) )
    # A new convolutional layer, with 15 feature maps of size 3 × 3, and activation function ReLU
    model.add ( Conv2D ( 15 , ( 3 , 3 ) , activation = 'relu' ) )
    # A new subsampling with a 2x2 dimension pooling.
    model.add ( MaxPooling2D ( pool_size = ( 2 , 2 ) ) )

    # We include a dropout with a 20% probability (you can try other values)
    model.add ( Dropout ( 0.2 ) )
    # We need to convert the output of the convolutional layer, so that it can be used as input to the densely connected layer that is next.
    # What this does is "flatten / flatten" the structure of the output of the convolutional layers, creating a single long vector of features
    # that will be used by the Fully Connected layer.
    model.add ( Flatten ( ) )
    # Fully connected layer with 128 neurons.
    model.add ( Dense ( 128 , activation = 'relu' ) )
    # Followed by a new fully connected layer with 64 neurons
    model.add ( Dense ( 64 , activation = 'relu' ) )

    # Followed by a new fully connected layer with 32 neurons
    model.add ( Dense ( 32 , activation = 'relu' ) )
    # The output layer has the number of neurons compatible with the
    # number of classes to be obtained. Notice that we are using a softmax activation function,
    model.add ( Dense ( num_classes, activation = 'softmax' , name = 'preds' ) )
    # Configure the entire training process of the neural network
    model.compile ( loss = 'categorical_crossentropy' , optimizer = 'adam' , metrics = [ 'accuracy' ] )

    return model


model = deeper_cnn_model ( )
model.summary ( )
model.fit ( X_train , y_train, validation_data = ( X_test , y_test ) , epochs = 10 , batch_size = 200 )
scores = model. evaluate ( X_test , y_test, verbose = 0 )
print ( "\ nacc:% .2f %%" % (scores [1] * 100))


###enhance to check multiple numbers after the training is done

img_pred = cv2. imread ( 'five.JPG' ,   0 )

plt.imshow(img_pred, cmap='gray')
# forces the image to have the input dimensions equal to those used in the training data (28x28)
if img_pred. shape != [ 28 , 28 ] :
    img2 = cv2. resize ( img_pred, ( 28 , 28 ) )
    img_pred = img2. reshape ( 28 , 28 , - 1 ) ;
else :
    img_pred = img_pred. reshape ( 28 , 28 , - 1 ) ;

# here also we inform the value for the depth = 1, number of rows and columns, which correspond 28x28 of the image.
img_pred = img_pred. reshape ( 1 , 1 , 28 , 28 )
pred = model. predict_classes ( img_pred )
pred_proba = model. predict_proba ( img_pred )
pred_proba = "% .2f %%" % (pred_proba [0] [pred] * 100)
print ( pred [ 0 ] , "with probability of" , pred_proba )

最后，我嘗試對繪制和導入的數字進行預測（我嘗試過使用其他手繪數字，但結果同樣差）：

img_pred = cv2. imread ( 'five.JPG' ,   0 )

plt.imshow(img_pred, cmap='gray')
# forces the image to have the input dimensions equal to those used in the training data (28x28)
if img_pred. shape != [ 28 , 28 ] :
    img2 = cv2. resize ( img_pred, ( 28 , 28 ) )
    img_pred = img2. reshape ( 28 , 28 , - 1 ) ;
else :
    img_pred = img_pred. reshape ( 28 , 28 , - 1 ) ;

# here also we inform the value for the depth = 1, number of rows and columns, which correspond 28x28 of the image.
img_pred = img_pred. reshape ( 1 , 1 , 28 , 28 )
pred = model. predict_classes ( img_pred )
pred_proba = model. predict_proba ( img_pred )
pred_proba = "% .2f %%" % (pred_proba [0] [pred] * 100)
print ( pred [ 0 ] , "with probability of" , pred_proba )

看一下Five.jpg：

手繪五個圖像

但是，當我輸入自己的數字時，模型會預測錯誤。 關於為什么會這樣的任何想法？ 我承認我是ML的新手，並且剛剛開始涉足它。 我的想法可能是圖像居中或圖像規范化關閉了？ 任何幫助深表感謝！

編輯1：

MNIST測試編號如下所示：

白色數字黑色背景

Answer 1

您似乎有兩個問題，您懷疑這與數據的預處理有關。

首先是您的圖像相對於訓練數據是反轉的：

用img_pred = cv2. imread ( 'five.JPG' , 0 )閱讀.jpg的一個通道后img_pred = cv2. imread ( 'five.JPG' , 0 ) img_pred = cv2. imread ( 'five.JPG' , 0 ) ，背景像素接近白色，其值在215-238附近。
如果您查看X_train中的訓練數據，則背景像素全為零，數字為白色或接近白色（上部210-255）。

嘗試在X_train某些選擇旁邊繪制圖像，您將看到它們被反轉了。

另一個問題是cv2.resize()中的默認插值不能保留數據的縮放比例。 調整數據大小后，最小值跳升至60，而不是0。在調整尺度之前和之后，比較img.pred.min()和img.pred.max()的值。

您可以使用以下功能反轉和縮放數據，使其看起來更像MNIST輸入數據：

 def mnist_bytescale(image):
    # Use float for rescaling
    img_temp = image.astype(np.float32)
    #Re-zero the data
    img_temp -= img_temp.min()
    #Re-scale and invert
    img_temp /= (img_temp.max()-img_temp.min())
    img_temp *= 255
    return 255 - img_temp.astype('uint')

這將翻轉您的數據，並將其從0線性縮放到255，非常類似於網絡正在訓練的數據。 但是，如果繪制mnist_bytescale(img_pred) ，則會注意到大多數像素中的背景水平仍然不是0，這是因為原始圖像的背景水平不是恆定的（可能是由於JPEG壓縮所致。）翻轉和縮放數據的問題，您可以嘗試使用np.clip將背景水平歸零，看看是否有幫助。

Python機器學習數字識別

問題描述

1 個解決方案

解決方案1
2 已采納 2018-04-09 22:05:49

Python機器學習數字識別

問題描述

1 個解決方案

解決方案1 2 已采納 2018-04-09 22:05:49

解決方案1
2 已采納 2018-04-09 22:05:49