简体   繁体   中英

Seq2Seq in Python using tensorflow and tensorlayer

I am trying to build a chatbot in python using tensorflow and tensorlayer. My code is the following:

from sklearn.feature_extraction.text import CountVectorizer
from nltk.tokenize import word_tokenize
import os
import time
import re
import tensorflow as tf
import tensorlayer as tl
import cPickle as pickle

FILE_DIR = os.path.dirname(os.path.realpath(__file__))

PAD_TOKEN = '<PAD>'
START_TOKEN = '<START>'
END_TOKEN = '<END>'
UNK_TOKEN = '<UNK>'

PAD_ID = 0
START_ID = 1
END_ID = 2
UNK_ID = 3

STARTING_VOCAB = {PAD_TOKEN: PAD_ID, START_TOKEN: START_ID, END_TOKEN: END_ID, UNK_TOKEN: UNK_ID}

_DIGIT_RE = re.compile(br"\d")


class Chatbot(object):

def __init__(self, embedding_dim, n_layers=3):

    self.embedding_dim = embedding_dim
    self.n_layers = n_layers
    self.w2idx = STARTING_VOCAB
    self.idx2w = {}
    self.encode_seqs = None
    self.decode_seqs = None
    self.net = None
    self.net_rnn = None
    self.y = None

@staticmethod
def load():
    with open(os.path.join(location, 'object.pkl'), 'rb') as pickle_file:
        obj = pickle.load(pickle_file)
    obj.encode_seqs = tf.placeholder(dtype=tf.int64, shape=[1, None], name="encode_seqs")
    obj.decode_seqs = tf.placeholder(dtype=tf.int64, shape=[1, None], name="decode_seqs")
    obj.net, obj.net_rnn = Chatbot.model(obj.encode_seqs, obj.decode_seqs, obj.idx2w, obj.embedding_dim, obj.n_layers, is_train=False, reuse=True)
    obj.y = tf.nn.softmax(obj.net.outputs)
    new_saver = tf.train.import_meta_graph(os.path.join(location, 'my-model.meta'))
    new_saver.restore(sess, tf.train.latest_checkpoint(location))
    return obj

@staticmethod
def model(encode_seqs, decode_seqs, idx2w, embedding_dim, n_layers, is_train=True, reuse=False):

    with tf.variable_scope("model", reuse=reuse):
        # for chatbot, you can use the same embedding layer,
        # for translation, you may want to use 2 seperated embedding layers
        with tf.variable_scope("embedding") as vs:
            net_encode = tl.layers.EmbeddingInputlayer(inputs=encode_seqs,
                                                       vocabulary_size=len(idx2w),
                                                       embedding_size=embedding_dim,
                                                       name='seq_embedding')
            vs.reuse_variables()
            tl.layers.set_name_reuse(True)
            net_decode = tl.layers.EmbeddingInputlayer(inputs=decode_seqs,
                                                       vocabulary_size=len(idx2w),
                                                       embedding_size=embedding_dim,
                                                       name='seq_embedding')
        net_rnn = tl.layers.Seq2Seq(net_encode,
                                    net_decode,
                                    cell_fn=tf.contrib.rnn.BasicLSTMCell,
                                    n_hidden=embedding_dim,
                                    initializer=tf.random_uniform_initializer(-0.1, 0.1),
                                    encode_sequence_length=tl.layers.retrieve_seq_length_op2(encode_seqs),
                                    decode_sequence_length=tl.layers.retrieve_seq_length_op2(decode_seqs),
                                    initial_state_encode=None,
                                    dropout=(0.5 if is_train else None),
                                    n_layer=n_layers,
                                    return_seq_2d=True, name='seq2seq')
        net_out = tl.layers.DenseLayer(net_rnn, n_units=len(idx2w), act=tf.identity, name='output')
    return net_out, net_rnn

def train(self, X_train, y_train, sess, batch_size, n_epochs):

    n_step = int(len(X_train) / batch_size)

    # Create vocabulary
    X_train = [re.sub(_DIGIT_RE, UNK_TOKEN, x.decode('utf-8')) for x in X_train]
    y_train = [re.sub(_DIGIT_RE, UNK_TOKEN, x.decode('utf-8')) for x in y_train]
    vectorizer = CountVectorizer(tokenizer=word_tokenize)
    all_sentences = X_train + y_train
    vectorizer.fit_transform(all_sentences)
    for k, v in vectorizer.vocabulary_.iteritems():
        vectorizer.vocabulary_[k] = v + len(self.w2idx)
    self.w2idx.update(vectorizer.vocabulary_)
    self.idx2w = dict((v, k) for k, v in self.w2idx.iteritems())

    # Transform data from sentences to sequences of ids
    for i in range(len(X_train)):
        X_train_id_seq, y_train_id_seq = [], []
        for w in word_tokenize(X_train[i]):
            if w.lower() in self.w2idx:
                X_train_id_seq.append(self.w2idx[w.lower()])
            else:
                X_train_id_seq.append(self.w2idx[UNK_TOKEN])
        X_train[i] = X_train_id_seq + [PAD_ID]
        for w in word_tokenize(y_train[i]):
            if w.lower() in self.w2idx:
                y_train_id_seq.append(self.w2idx[w.lower()])
            else:
                y_train_id_seq.append(self.w2idx[UNK_TOKEN])
        y_train[i] = y_train_id_seq + [PAD_ID]

    training_encode_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="encode_seqs")
    training_decode_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="decode_seqs")
    training_target_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="target_seqs")
    training_target_mask = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="target_mask")
    training_net_out, _ = Chatbot.model(training_encode_seqs, training_decode_seqs, self.idx2w, self.embedding_dim, self.n_layers, is_train=True, reuse=False)

    # model for inferencing
    self.encode_seqs = tf.placeholder(dtype=tf.int64, shape=[1, None], name="encode_seqs")
    self.decode_seqs = tf.placeholder(dtype=tf.int64, shape=[1, None], name="decode_seqs")
    self.net, self.net_rnn = Chatbot.model(self.encode_seqs, self.decode_seqs, self.idx2w, self.embedding_dim, self.n_layers, is_train=False, reuse=True)
    self.y = tf.nn.softmax(self.net.outputs)

    loss = tl.cost.cross_entropy_seq_with_mask(logits=training_net_out.outputs, target_seqs=training_target_seqs,
                                               input_mask=training_target_mask, return_details=False, name='cost')
    lr = 0.0001
    train_op = tf.train.AdamOptimizer(learning_rate=lr).minimize(loss)

    tl.layers.initialize_global_variables(sess)

    for epoch in range(n_epochs):
        epoch_time = time.time()

        # shuffle training data
        from sklearn.utils import shuffle
        X_train, y_train = shuffle(X_train, y_train, random_state=0)

        # train an epoch
        total_err, n_iter = 0, 0
        for X, Y in tl.iterate.minibatches(inputs=X_train, targets=y_train, batch_size=batch_size, shuffle=False):
            step_time = time.time()

            X = tl.prepro.pad_sequences(X)
            _target_seqs = tl.prepro.sequences_add_end_id(Y, end_id=END_ID)
            _target_seqs = tl.prepro.pad_sequences(_target_seqs)

            _decode_seqs = tl.prepro.sequences_add_start_id(Y, start_id=START_ID, remove_last=False)
            _decode_seqs = tl.prepro.pad_sequences(_decode_seqs)
            _target_mask = tl.prepro.sequences_get_mask(_target_seqs)

            _, err = sess.run([train_op, loss], 
                {training_encode_seqs: X, 
                training_decode_seqs: _decode_seqs,
                training_target_seqs: _target_seqs,
                training_target_mask: _target_mask})

            print("Epoch[%d/%d] step:[%d/%d] loss:%f took:%.5fs" % (
            epoch, n_epochs, n_iter, n_step, err, time.time() - step_time))

            total_err += err;
            n_iter += 1

        print("Epoch[%d/%d] averaged loss:%f took:%.5fs" % (epoch, n_epochs, total_err / n_iter,
                                                            time.time() - epoch_time))

def save(self):
    if not os.path.exists(location):
        os.makedirs(location)
    saver = tf.train.Saver()
    saver.save(sess, os.path.join(location, 'my-model'))
    tf.train.write_graph(sess.graph, location, 'my-graph.pbtxt')
    self.net = None
    self.net_rnn = None
    self.y = None
    self.encode_seqs = None
    self.decode_seqs = None
    with open(os.path.join(location, 'object.pkl'), 'wb') as pickle_file:
        pickle.dump(self, pickle_file)

I can do training perfectly fine, I pass a list of sentences as X_train and another as y_train. What I need help with is saving the model and then reloading it later for training or testing. I tried just using pickle but it gives an error. How can I save and load seq2seq models in python using tensorflow and tensorlayer?

pickle object is not the best way to save your model.

After you finished training use

saver = tf.train.Saver()
saver.save(sess, model_name)

and the load it

saver = tf.train.Saver()
sess = tf.Session()
saver.restore(sess = sess, save_path='sentiment_analysis_tsm')

in order to load it, you'll have to previously build an equivalent model to the one you trained with.

To save the graph object, try using tf.train.write_graph

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM