![](/img/trans.png)
[英]I want to use variable length input with dynamic RNN of tensorflow, but I don't know how to padding
[英]How to access dynamic variable from another function if I don't want use class?
我有 2 个函数是 train 函数和 logreg 函数,主要函数是在其中运行 logreg 函数的 train。
当我执行 train 函数时,它给了我错误,
NameError: name 'clf_hyper' is not defined
我认为我没有得到导致 logreg 函数的clf_hyper
变量,
对数函数,
from sklearn import model_selection
def logreg(clf,xtrain, ytrain):
# define a grid of parameter
# this can be a dictionary or a list of
# dictionaries
param_grid = {
#"solver": ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
#"penalty": ['none', 'l1', 'l2', 'elasticnet'],
"C": [100, 10]
}
# initialize grid search
model = model_selection.GridSearchCV(
estimator = clf,
param_grid = param_grid,
scoring = "accuracy",
verbose = 10,
n_jobs = 1
)
# fit the model and extract best score
model.fit(xtrain, ytrain)
best_parameters = model.best_estimator_.get_params()
for param_name in sorted(param_grid.keys()):
print(f"\t{param_name}: {best_parameters[param_name]}")
# initialize model with best_estimator_
clf_hyper = model.best_estimator_
训练功能,
import argparse
import os
import config
#import model_dispatcher
#import vectorizer_dispatcher
import dispatcher
import use_function
import hyperparameter
import pandas as pd
import joblib
from nltk.tokenize import word_tokenize
from sklearn import linear_model
from sklearn import metrics
from sklearn import model_selection
from sklearn.feature_extraction.text import CountVectorizer
def run(fold, model, vectorizer):
#read the training data
df = pd.read_csv(config.TRAINING_FILE)
# applying clean_text to Revies column
df.loc[:, 'Review'] = df.Review.apply(use_function.clean_text)
# training data is where kfold is not equal to provided fold
# also, note that we reset the index
df_train = df[df.kfold != fold].reset_index(drop=True)
# validation data is where kfold is equal to provided fold
df_test = df[df.kfold == fold].reset_index(drop=True)
# initialize CountVectorizer with NLTK,s word_tokenize
# function as tokenizer
vectorizer = dispatcher.vectorizers[vectorizer]
#fit count_vec on training data reviews
vectorizer.fit(df_train.Review)
#transform training and validation data reviews
xtrain = vectorizer.transform(df_train.Review)
xtest = vectorizer.transform(df_test.Review)
ytrain = df_train.Rating
# initialize model
clf = dispatcher.models[model]
#initialize hyperparameter if you want use
# if not just give # sign in
hyperparameter.logreg(clf,xtrain,ytrain)
#return clf value from hyperparameter function
#return clf_hyper
#fit the model on training data reviews and Rating
clf_hyper.fit(xtrain, df_train.Rating)
# make prediction on test data
# threshold for predictions is 0.5
preds = clf_hyper.predict(xtest)
#calculate accuracy
accuracy = metrics.accuracy_score(df_test.Rating, preds)
print(f"Fold={fold}")
print(f"Accuracy = {accuracy}")
print("")
# save the model
joblib.dump(clf,os.path.join(config.MODEL_OUTPUT, "dt_{fold}.bin")
)
那么如何在 logreg 函数中获取变量 clf_hyper,如果我不想使用类,我将在训练函数中使用该函数? 谢谢
如果你真的想这样做(我不推荐)你可以将 clf_hyper 定义为全局变量
def logreg(clf,xtrain, ytrain):
global clf_hyper
我会尝试(如果可能?)直接从函数 Logreg() 返回变量 clf_hyper。 然后您可以通过调用该函数来获取 clf_hyper 的值。
def logreg(clf, xtrain, ytrain):
...
return clf_hyper
def run(fold, model, vectorizer):
...
#return clf_hyper and exec the func
return hyperparameter.logreg(clf,xtrain,ytrain)
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.