Predict code is pretty much the same with Predict code for simple sequence dataset, so I won’t explain in detail.
Code
The code is on the github, predict_ptb.py.
"""Inference/predict code for simple_sequence dataset
model must be trained before inference,
train_simple_sequence.py must be executed beforehand.
"""
from __future__ import print_function
import argparse
import os
import sys
import matplotlib
import numpy as np
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import chainer
import chainer.functions as F
import chainer.links as L
from chainer import training, iterators, serializers, optimizers, Variable, cuda
from chainer.training import extensions
sys.path.append(os.pardir)
from RNN import RNN
from RNN2 import RNN2
from RNN3 import RNN3
from RNNForLM import RNNForLM
def main():
archs = {
'rnn': RNN,
'rnn2': RNN2,
'rnn3': RNN3,
'lstm': RNNForLM
}
parser = argparse.ArgumentParser(description='simple_sequence RNN predict code')
parser.add_argument('--arch', '-a', choices=archs.keys(),
default='rnn', help='Net architecture')
#parser.add_argument('--batchsize', '-b', type=int, default=64,
# help='Number of images in each mini-batch')
parser.add_argument('--unit', '-u', type=int, default=100,
help='Number of LSTM units in each layer')
parser.add_argument('--gpu', '-g', type=int, default=-1,
help='GPU ID (negative value indicates CPU)')
parser.add_argument('--primeindex', '-p', type=int, default=1,
help='base index data, used for sequence generation')
parser.add_argument('--length', '-l', type=int, default=100,
help='length of the generated sequence')
parser.add_argument('--modelpath', '-m', default='',
help='Model path to be loaded')
args = parser.parse_args()
print('GPU: {}'.format(args.gpu))
#print('# Minibatch-size: {}'.format(args.batchsize))
print('')
train, val, test = chainer.datasets.get_ptb_words()
n_vocab = max(train) + 1 # train is just an array of integers
print('#vocab =', n_vocab)
print('')
# load vocabulary
ptb_word_id_dict = chainer.datasets.get_ptb_words_vocabulary()
ptb_id_word_dict = dict((v, k) for k, v in ptb_word_id_dict.items())
# Model Setup
model = archs[args.arch](n_vocab=n_vocab, n_units=args.unit)
classifier_model = L.Classifier(model)
if args.gpu >= 0:
chainer.cuda.get_device(args.gpu).use() # Make a specified GPU current
classifier_model.to_gpu() # Copy the model to the GPU
xp = np if args.gpu < 0 else cuda.cupy
if args.modelpath:
serializers.load_npz(args.modelpath, model)
else:
serializers.load_npz('result/{}_ptb.model'.format(args.arch), model)
# Dataset preparation
prev_index = args.primeindex
# Predict
predicted_sequence = [prev_index]
for i in range(args.length):
prev = chainer.Variable(xp.array([prev_index], dtype=xp.int32))
current = model(prev)
current_index = np.argmax(cuda.to_cpu(current.data))
predicted_sequence.append(current_index)
prev_index = current_index
predicted_text_list = [ptb_id_word_dict[i] for i in predicted_sequence]
print('Predicted sequence: ', predicted_sequence)
print('Predicted text: ', ' '.join(predicted_text_list))
if __name__ == '__main__':
main()
Given the first text by the index, args.primeindex, model will predict the following sequence as word id.
The last three line converts the word id sequence into readable word sentence using ptb_id_word_dict.
predicted_text_list = [ptb_id_word_dict[i] for i in predicted_sequence]
print('Predicted sequence: ', predicted_sequence)
print('Predicted text: ', ' '.join(predicted_text_list))
Result
When I run, (the model is RNN model)
$ python predict_ptb.py -p 553
I got the text
Predicted text: executive vice president and chief operating officer of <unk> <unk> & <unk> a <unk> mass. newsletter <eos> the <unk> <unk> <unk> <unk> <unk> <unk> from the <unk> <eos> the <unk> <unk> <unk> <unk> <unk> <unk> from the <unk> <eos> the <unk> <unk> <unk> <unk> <unk> <unk> from the <unk> <eos> the <unk> <unk> <unk> <unk> <unk> <unk> from the <unk> <eos> the <unk> <unk> <unk> <unk> <unk> <unk> from the <unk> <eos> the <unk> <unk> <unk> <unk> <unk> <unk> from the <unk> <eos> the <unk> <unk> <unk> <unk> <unk> <unk> from the <unk> <eos> the <unk> <unk> <unk> <unk> <unk> <unk>
It seems the model can predict a first shot sentence but once it has reached to <unk> or <eos>, it will keep returning the same symbol. Also “the” will appear quite often than other words.
I think the model is not trained well enough yet, and you may try training the model more to get more good result!