如何使用smac進行卷積神經網絡的超參數優化？

Question

注意：長帖子。 請多多包涵

我已經在KMNIST數據集的PyTorch中實現了卷積神經網絡。 我需要使用SMAC優化CNN的學習率和隨機梯度下降動量。 我是超參數優化的新手，從smac文檔中學到的是，

SMAC通過目標算法評估器（TAE）調用算法來評估要優化的算法。
我們需要一個方案對象來配置優化過程。
Scenario對象中的run_obj參數指定SMAC應該優化的內容。

我的終極目標是獲得良好的准確性或低損失

到目前為止，這是我所做的：

卷積神經網絡

import numpy as np
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms 
import torchvision.datasets as datasets
from torch.autograd import Variable
from datasets import *
import torch.utils.data
import torch.nn.functional as F
import matplotlib.pyplot as plt

# Create the model class

class CNN(nn.Module):
    def __init__(self):

        super(CNN, self).__init__() # to inherent the features of nn.Module

        self.cnn1 = nn.Conv2d(in_channels = 1, out_channels = 8, kernel_size = 3, stride = 1, padding =1)

        # in_channels =1 because of grey scale image
        # kernel_size = feature_size
        # padding = 1 because for same padding = [(filter_size -1)/2]
        # the output size of the 8 feature maps is [(input_size - filter_size +2(padding)/stride)+1]

        #Batch Normalization

        self.batchnorm1 = nn.BatchNorm2d(8)

        # RELU

        self.relu = nn.ReLU()
        self.maxpool1 = nn.MaxPool2d(kernel_size =2)

        # After maxpooling, the output of each feature map is 28/2 =14

        self.cnn2 = nn.Conv2d(in_channels = 8, out_channels = 32, kernel_size = 5, stride = 1, padding =2)

        #Batch Normalization

        self.batchnorm2 = nn.BatchNorm2d(32)

        # RELU

        #self.relu = nn.ReLU()
        self.maxpool2 = nn.MaxPool2d(kernel_size =2)

        # After maxpooling , the output of each feature map is 14/2 =7of them is of size 7x7 --> 32*7*7=1568
        # Flatten the feature maps. You have 32 feature maps, each 
        self.fc1 = nn.Linear(in_features=1568, out_features = 600)
        self.dropout = nn.Dropout(p=0.5)
        self.fc2 = nn.Linear(in_features=600, out_features = 10)

    def forward(self,x):

        out = self.cnn1(x)
        #out = F.relu(self.cnn1(x))
        out = self.batchnorm1(out)
        out = self.relu(out)
        out = self.maxpool1(out)

        out = self.cnn2(out)
        out = self.batchnorm2(out)
        out = self.relu(out)
        out = self.maxpool2(out)

        #Now we have to flatten the output. This is where we apply the feed forward neural network as learned
        #before!

        #It will the take the shape (batch_size, 1568) = (100, 1568)

        out = out.view(-1, 1568)

        #Then we forward through our fully connected layer

        out = self.fc1(out)
        out = self.relu(out)
        out = self.dropout(out)
        out = self.fc2(out)

        return out

def train(model, train_loader, optimizer, epoch, CUDA, loss_fn):
        model.train()
        cum_loss=0
        iter_count = 0

        for i, (images, labels) in enumerate(train_load):

            if CUDA:

               images = Variable(images.cuda())
               images = images.unsqueeze(1)
               images = images.type(torch.FloatTensor)
               images = images.cuda()

               labels = Variable(labels.cuda())
               labels = labels.type(torch.LongTensor)
               labels = labels.cuda()

            else:

               images = Variable(images)
               images = images.unsqueeze(1)
               images = images.type(torch.DoubleTensor)

               labels = Variable(labels)
               labels = labels.type(torch.DoubleTensor)

            optimizer.zero_grad()
            outputs = model(images)
            loss = loss_fn(outputs, labels)
            loss.backward()
            optimizer.step()
            cum_loss += loss


            if (i+1) % batch_size == 0:
               correct = 0
               total = 0
               acc = 0
               _, predicted = torch.max(outputs.data,1)
               total += labels.size(0)
               if CUDA:
                  correct += (predicted.cpu()==labels.cpu()).sum()
               else:
                  correct += (predicted==labels).sum()

               accuracy = 100*correct/total

            if i % len(train_load) == 0:

               iter_count += 1
               ave_loss = cum_loss/batch_size
        return ave_loss

batch_size = 100 
epochs = 5
e = range(epochs)
#print(e)

#Load datasets

variable_name=KMNIST()

train_images = variable_name.images
train_images = torch.from_numpy(train_images)

#print(train_images.shape)
#print(type(train_images))

train_labels = variable_name.labels
train_labels = torch.from_numpy(train_labels)

#print(train_labels.shape)
#print(type(train_labels))

train_dataset = torch.utils.data.TensorDataset(train_images, train_labels)

# Make the dataset iterable

train_load = torch.utils.data.DataLoader(dataset = train_dataset, batch_size = batch_size, shuffle = True)

print('There are {} images in the training set' .format(len(train_dataset)))
print('There are {} images in the loaded training set' .format(len(train_load)))



def net(learning_rate, Momentum):
    model = CNN()
    CUDA = torch.cuda.is_available()
    if CUDA:
        model = model.cuda()

    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate,momentum = Momentum, nesterov= True)

    iteration = 0
    total_loss=[]

    for epoch in range(epochs):
        ave_loss = train(model, train_load, optimizer, epoch, CUDA, loss_fn)

        total_loss.append(ave_loss)

    return optimizer, loss_fn, model, total_loss

optimizer, loss_fn, model, total_loss = net(learning_rate= 0.01, Momentum = 0.09)

# Print model's state_dict

print("---------------")

print("Model's state_dict:")

for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())

print("---------------")

#print("Optimizer's state_dict:")

#for var_name in optimizer.state_dict():
 #   print(var_name, "\t", optimizer.state_dict()[var_name])

torch.save(model.state_dict(), "kmnist_cnn.pt")

plt.plot(e, (np.array(total_loss)))
plt.xlabel("# Epoch")
plt.ylabel("Loss")
plt.show()

print('Done!')

smac超參數優化 ：

from smac.configspace import ConfigurationSpace
from ConfigSpace.hyperparameters import CategoricalHyperparameter, \
    UniformFloatHyperparameter, UniformIntegerHyperparameter

from smac.configspace.util import convert_configurations_to_array
#from ConfigSpace.conditions import InCondition

# Import SMAC-utilities
from smac.tae.execute_func import ExecuteTAFuncDict
from smac.scenario.scenario import Scenario
from smac.facade.smac_facade import SMAC

# Build Configuration Space which defines all parameters and their ranges
cs = ConfigurationSpace()

# We define a few possible types of SVM-kernels and add them as "kernel" to our cs

lr = UniformFloatHyperparameter('learning_rate', 1e-4, 1e-1, default_value='1e-2')
momentum = UniformFloatHyperparameter('Momentum', 0.01, 0.1, default_value='0.09')

cs.add_hyperparameters([lr, momentum])

def kmnist_from_cfg(cfg):

    cfg = {k : cfg[k] for k in cfg if cfg[k]}
    print('Config is', cfg)

    #optimizer, loss_fn, model, total_loss = net(**cfg)
    #optimizer, loss_fn, model, total_loss = net(learning_rate= cfg["learning_rate"], Momentum= cfg["Momentum"])

    optimizer, loss_fn, model, total_loss = net(learning_rate= 0.02, Momentum= 0.05)

    return optimizer, loss_fn, model, total_loss

# Scenario object
scenario = Scenario({"run_obj": "quality",   # we optimize quality (alternatively runtime)
                     "runcount-limit": 200,  # maximum function evaluations
                     "cs": cs,               # configuration space
                     "deterministic": "true"
                     })

#def_value = kmnist_from_cfg(cs.get_default_configuration())
#print("Default Value: %.2f" % (def_value))


# Optimize, using a SMAC-object

print("Optimizing! Depending on your machine, this might take a few minutes.")
smac = SMAC(scenario=scenario,tae_runner=kmnist_from_cfg) #rng=np.random.RandomState(42)
smac.solver.intensifier.tae_runner.use_pynisher = False

print("SMAC", smac)
incumbent = smac.optimize()


inc_value = kmnist_from_cfg(incumbent)

print("Optimized Value: %.2f" % (inc_value))

當我將損失作為run_obj參數時，會收到錯誤消息

ArgumentError：參數--run-obj /-run_obj：無效選擇：'total_loss'（從'runtime'，'quality'中選擇）

老實說，我不知道“質量”是什么意思。 無論如何，當我將品質作為run_obj參數時，我得到了錯誤消息

TypeError：輸入類型不支持ufunc'isfinite'，並且根據強制轉換規則“ safe”，不能將輸入安全地強制轉換為任何受支持的類型

如果我正確理解，則當需要int但給出str時，會獲得上述錯誤消息。 為了檢查問題是否出在配置空間上，我嘗試了

optimizer, loss_fn, model, total_loss = net(learning_rate= 0.02, Momentum= 0.05)

代替這些：

optimizer, loss_fn, model, total_loss = net(**cfg)
optimizer, loss_fn, model, total_loss = net(learning_rate= cfg["learning_rate"], Momentum= cfg["Momentum"])

錯誤保持不變。

關於如何使用smac優化CNN的超參數的任何想法，為什么我會收到此錯誤消息？ 我試圖在網上尋找類似的問題。 這篇文章有點幫助。 不幸的是，由於在NN上沒有smac的實現（至少我沒有找到它），所以我找不到解決方案。 我沒辦法了。

任何幫助，想法或有用的鏈接表示贊賞。

謝謝！

Answer 1

我相信tae_runner （ kmnist_from_cfg你的情況）必須是一個可調用，需要一個配置空間點，你提供正確，並輸出一個數字。 您輸出一個元組的東西。 也許只返回驗證集上的total_loss ？ 我將其基於smac github中svm示例，網址為https://github.com/automl/SMAC3/blob/master/examples/svm.py 。

如何使用smac進行卷積神經網絡的超參數優化？

問題描述

1 個解決方案

解決方案1
0 2019-05-23 11:47:16

如何使用smac進行卷積神經網絡的超參數優化？

問題描述

1 個解決方案

解決方案1 0 2019-05-23 11:47:16

解決方案1
0 2019-05-23 11:47:16