繁体   English   中英

输入 TfIdf 输出到 CNN

[英]Input TfIdf output to CNN

我使用 CNN 模型来解决从 Twitter 文本中预测 5 个值的 NLP 问题。 该模型目前使用 Keras 嵌入层从文本创建向量。 我使用的另一种方法是 TfIdfVectorizer,应用 PCA 来减少输出的形状,并将其传递给具有 Dense 层的常规 Keras 模型。 我的想法是移除 PCA,并使用接受来自 TfIdfVectorizer 的输出的 CNN 层。 有谁知道怎么做? 这是我的代码:

class TfidfModel(Embedder, ABC):
    """
    Tf-idf + PCA.
    """

    def __init__(self):
        self.name = ''
        self.model = TfidfVectorizer(lowercase=True, max_features=20000)
        self.pca = PCA(n_components=9000)

    def fit(self, X):
        #print('Tokenizing training data...')
        #tokenized_text = self.tokenize_text(X)
        #print('Tokenizing training data finished!')
        print('Fitting the tfidf vectorizer...')
        matrix = self.model.fit_transform(X).todense()
        print('Fitting the tfidf vectorizer finished!')
        matrix = np.squeeze(np.asarray(matrix))
        print('Dimension of original tfidf matrix: ', matrix.shape)

        print('Fit transforming the PCA on the training data...')
        self.pca.fit(matrix)
        reduced_matrix = self.pca.transform(matrix)
        print('Fit transforming of the PCA training data finished!')
        print('Dimension of reduced matrix: ', reduced_matrix.shape)
        print('Encoder fitting completed!')
        return reduced_matrix

    def encode(self, X):
        #print('Tokenizing test data...')
        #tokenized_text = self.tokenize_text(X)
        #print('Tokenizing test data finished!')
        print('TfIdf transforming test data...')
        matrix = self.model.transform(X).todense()
        print('TfIdf transform finished!')
        matrix = np.squeeze(np.asarray(matrix))
        print('PCA transforming test data...')
        reduced_matrix = self.pca.transform(matrix)
        print('PCA transformign finsihed!')
        return reduced_matrix

class NeuralNetMulti(Regressor):
    def __init__(self):
        self.name = 'keras-sequential'
        self.model = Sequential()
        # self.earlystopping = callbacks.EarlyStopping(monitor="mae",
        #                                              mode="min", patience=5,
        #                                              restore_best_weights=True)

    def fit(self, X, y):
        print('Fitting into the neural net...')
        n_inputs = X.shape[1]
        n_outputs = y.shape[1]
        self.model.add(Dense(1024, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))
        self.model.add(Dense(512, activation='relu'))
        self.model.add(Dense(256, activation='relu'))
        self.model.add(Dense(128, activation='relu'))
        self.model.add(Dense(n_outputs, activation='sigmoid'))
        self.model.summary()
        self.model.compile(loss='mse', optimizer='adam', metrics=['mse', 'mae'])
        history = self.model.fit(X, y, verbose=1, epochs=100, validation_split=0.1)
        # self.model.fit(X, y, verbose=1, epochs=1000, callbacks=[self.earlystopping])
        # MSE
        plt.plot(history.history['mse'])
        plt.plot(history.history['val_mse'])
        plt.title('model MSE')
        plt.ylabel('MSE')
        plt.xlabel('epoch')
        plt.legend(['train', 'test'], loc='upper left')
        plt.savefig('../visualization/v1.png')
        plt.close()
        # summarize history for loss
        plt.plot(history.history['mae'])
        plt.plot(history.history['val_mae'])
        plt.title('model MAE')
        plt.ylabel('MAE')
        plt.xlabel('epoch')
        plt.legend(['train', 'test'], loc='upper left')
        plt.savefig('../visualization/v2.png')
        print('Fitting completed!')

    def predict(self, X):
        print('Predicting...')
        predictions = self.model.predict(X, verbose=1)
        print('Predicted!')
        return predictions

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM