繁体   English   中英

Keras python 中的 k 折交叉验证

[英]k-Fold Cross Validation in Keras python

我使用 keras 和 tensorflow 微调卷积神经网络:

from itertools import cycle

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import AveragePooling2D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from keras.utils import np_utils
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from imutils import paths
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import argparse
import cv2
import os
import time
import sys

# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-d", "--dataset", required=True, help="path to input dataset")
ap.add_argument("-p", "--plot", type=str, help="path to output loss/accuracy plot")
ap.add_argument("-m", "--model", type=str, help="path to output model")
args = vars(ap.parse_args())

INIT_LR = 1e-3
EPOCHS = 25
BS = 8

print("[INFO] loading images...")
imagePaths = list(paths.list_images(args["dataset"]))
data = []
labels = []

counter = 0
for imagePath in imagePaths:
    label = imagePath.split(os.path.sep)[-2].split("_")
    image = cv2.imread(imagePath)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, (224, 224))
    counter = counter +1
    data.append(image)
    labels.append(label)
   
data = np.array(data) / 255.0
labels = np.array(labels)

lb = MultiLabelBinarizer()
labels = lb.fit_transform(labels)

# partition the data into training and testing splits using 80% of
# the data for training and the remaining 20% for testing
(trainX, testX, trainY, testY) = train_test_split(data, labels,
    test_size=0.20, random_state=42)

trainAug = ImageDataGenerator(rotation_range=30, width_shift_range=0.1,
    height_shift_range=0.1, shear_range=0.2, zoom_range=0.2,
    horizontal_flip=True, fill_mode="nearest")


# load the VGG16 network, ensuring the head FC layer sets are left
baseModel = VGG16(weights="imagenet", include_top=False,
    input_tensor=Input(shape=(224, 224, 3)))

headModel = baseModel.output
headModel = AveragePooling2D(pool_size=(4, 4))(headModel)
headModel = Flatten(name="flatten")(headModel)
headModel = Dense(64, activation="relu")(headModel)
headModel = Dropout(0.5)(headModel)
headModel = Dense(3, activation="softmax")(headModel) # Attention: or "sigmoid" activation

model = Model(inputs=baseModel.input, outputs=headModel)

for layer in baseModel.layers:
    layer.trainable = False

print("[INFO] compiling model...")
opt = Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS)
model.compile(loss="categorical_crossentropy", optimizer=opt, 
    metrics=["accuracy"])

# train the head of the network
print("[INFO] training head...")
H = model.fit_generator(
    trainAug.flow(trainX, trainY, batch_size=BS),
    steps_per_epoch=len(trainX) // BS,
    validation_data=(testX, testY),
    validation_steps=len(testX) // BS,
    epochs=EPOCHS)

# make predictions on the testing set
print("[INFO] evaluating network...")
predIdxs = model.predict(testX, batch_size=BS)

predIdxs = np.argmax(predIdxs, axis=1)

print(classification_report(testY.argmax(axis=1), predIdxs))

acc_NN = accuracy_score(testY.argmax(axis=1), predIdxs)
print('Overall accuracy of Neural Network model:', acc_NN)

# serialize the model to disk
print("[INFO] saving model...")
model.save(args["model"], save_format="h5")

我想执行 k-Fold 交叉验证,到目前为止,我已经看到了添加(示例)的解决方案:

# split into input (X) and output (Y) variables
X = dataset[:,0:8]
Y = dataset[:,8]
# define 10-fold cross validation test harness
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
cvscores = []
for train, test in kfold.split(X, Y):
    # do

for train, test in kfold.split(X, Y):我不明白在我的情况下使用什么作为XY 谁能帮我这个?

从文档中。

X 应该是数据数组 Y 应该是 label

基于这条线

(trainX, testX, trainY, testY) = train_test_split(data, labels,
    test_size=0.20, random_state=42)

你的代码应该看起来像

for train, test in kfold.split(data, labels):
 trainX, testX = data[train], data[test]
 trainY, testY = labels[train], labels[test]

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM