[英]10 fold cross validation evaluation
我有以下 model 用於分類。 我有訓練集和測試集。 我在訓練集上對其進行了訓練,輸入是 3400 向量,output 它是 3 個類(0、1、2)之間的 class。 我將 model 保存為以下代碼。 現在我想應用 10 折交叉驗證來評估測試集上保存的 model。 你能告訴我怎么做,因為我以前從未使用過 10 交叉驗證。
training_set = Dataset("train_data.txt","train_target.txt")
training_generator = torch.utils.data.DataLoader(training_set, **params)
testing_set = Dataset("test_data.txt","testtarget.txt")
testing_generator = torch.utils.data.DataLoader(testing_set, **params)
for i, (seq_batch, stat_batch) in enumerate(training_generator):
seq_batch, stat_batch = seq_batch.to(device), stat_batch.to(device)
optimizer.zero_grad()
#print(seq_batch.shape,stat_batch.shape)
# Model computation
seq_batch = seq_batch.unsqueeze(-1)
outputs = model(seq_batch)
if CUDA:
loss = criterion(outputs, stat_batch)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
epoch_loss += loss.item()*outputs.shape[0]
if i % 2000 == 1999: # print every 2000 mini-batches
print('[%d, %5d] loss: %.3f' %
(epoch + 1, i + 1, running_loss / 2000),"acc",(outputs.argmax(1) == stat_batch).float().mean())
running_loss = 0.0
sum_acc += (outputs.argmax(1) == stat_batch).float().sum()
print("epoch" , epoch+1, "acc", sum_acc/len(training_set),"loss", epoch_loss/len(training_set))
loss_values.append(epoch_loss/len(training_set))
if epoch % 20 == 0:
torch.save(model.state_dict(), path + name_file + "model_epoch_i_" + str(epoch) + ".cnn")
本主題可能對您有用。 答案之一包含自定義 CV function: k-fold cross validation using DataLoaders in PyTorch
# define a cross validation function
def crossvalid(model=None,criterion=None,optimizer=None,dataset=None,k_fold=10):
train_score = pd.Series()
val_score = pd.Series()
total_size = len(dataset)
fraction = 1/k_fold
seg = int(total_size * fraction)
# tr:train,val:valid; r:right,l:left; eg: trrr: right index of right side train subset
# index: [trll,trlr],[vall,valr],[trrl,trrr]
for i in range(k_fold):
trll = 0
trlr = i * seg
vall = trlr
valr = i * seg + seg
trrl = valr
trrr = total_size
# msg
# print("train indices: [%d,%d),[%d,%d), test indices: [%d,%d)"
# % (trll,trlr,trrl,trrr,vall,valr))
train_left_indices = list(range(trll,trlr))
train_right_indices = list(range(trrl,trrr))
train_indices = train_left_indices + train_right_indices
val_indices = list(range(vall,valr))
train_set = torch.utils.data.dataset.Subset(dataset,train_indices)
val_set = torch.utils.data.dataset.Subset(dataset,val_indices)
# print(len(train_set),len(val_set))
# print()
train_loader = torch.utils.data.DataLoader(train_set, batch_size=50,
shuffle=True, num_workers=4)
val_loader = torch.utils.data.DataLoader(val_set, batch_size=50,
shuffle=True, num_workers=4)
train_acc = train(res_model,criterion,optimizer,train_loader,epoch=1)
train_score.at[i] = train_acc
val_acc = valid(res_model,criterion,optimizer,val_loader)
val_score.at[i] = val_acc
return train_score,val_score
train_score,val_score = crossvalid(res_model,criterion,optimizer,dataset=tiny_dataset)
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.