Predict code for Penn Bank Tree (ptb) dataset

Predict code is pretty much the same with Predict code for simple sequence dataset, so I won’t explain in detail.

Code

The code is on the github, predict_ptb.py.

"""Inference/predict code for simple_sequence dataset

model must be trained before inference, 
train_simple_sequence.py must be executed beforehand.
"""
from __future__ import print_function

import argparse
import os
import sys

import matplotlib
import numpy as np

matplotlib.use('Agg')
import matplotlib.pyplot as plt
import chainer
import chainer.functions as F
import chainer.links as L
from chainer import training, iterators, serializers, optimizers, Variable, cuda
from chainer.training import extensions

sys.path.append(os.pardir)
from RNN import RNN
from RNN2 import RNN2
from RNN3 import RNN3
from RNNForLM import RNNForLM


def main():
    archs = {
        'rnn': RNN,
        'rnn2': RNN2,
        'rnn3': RNN3,
        'lstm': RNNForLM
    }

    parser = argparse.ArgumentParser(description='simple_sequence RNN predict code')
    parser.add_argument('--arch', '-a', choices=archs.keys(),
                        default='rnn', help='Net architecture')
    #parser.add_argument('--batchsize', '-b', type=int, default=64,
    #                    help='Number of images in each mini-batch')
    parser.add_argument('--unit', '-u', type=int, default=100,
                        help='Number of LSTM units in each layer')
    parser.add_argument('--gpu', '-g', type=int, default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--primeindex', '-p', type=int, default=1,
                        help='base index data, used for sequence generation')
    parser.add_argument('--length', '-l', type=int, default=100,
                        help='length of the generated sequence')
    parser.add_argument('--modelpath', '-m', default='',
                        help='Model path to be loaded')
    args = parser.parse_args()

    print('GPU: {}'.format(args.gpu))
    #print('# Minibatch-size: {}'.format(args.batchsize))
    print('')

    train, val, test = chainer.datasets.get_ptb_words()
    n_vocab = max(train) + 1  # train is just an array of integers
    print('#vocab =', n_vocab)
    print('')

    # load vocabulary
    ptb_word_id_dict = chainer.datasets.get_ptb_words_vocabulary()
    ptb_id_word_dict = dict((v, k) for k, v in ptb_word_id_dict.items())

    # Model Setup
    model = archs[args.arch](n_vocab=n_vocab, n_units=args.unit)
    classifier_model = L.Classifier(model)
    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()  # Make a specified GPU current
        classifier_model.to_gpu()  # Copy the model to the GPU
    xp = np if args.gpu < 0 else cuda.cupy

    if args.modelpath:
        serializers.load_npz(args.modelpath, model)
    else:
        serializers.load_npz('result/{}_ptb.model'.format(args.arch), model)

    # Dataset preparation
    prev_index = args.primeindex

    # Predict
    predicted_sequence = [prev_index]
    for i in range(args.length):
        prev = chainer.Variable(xp.array([prev_index], dtype=xp.int32))
        current = model(prev)
        current_index = np.argmax(cuda.to_cpu(current.data))
        predicted_sequence.append(current_index)
        prev_index = current_index

    predicted_text_list = [ptb_id_word_dict[i] for i in predicted_sequence]
    print('Predicted sequence: ', predicted_sequence)
    print('Predicted text: ', ' '.join(predicted_text_list))

if __name__ == '__main__':
    main()

Given the first text by the index, args.primeindex, model will predict the following sequence as word id.

The last three line converts the word id sequence into readable word sentence using ptb_id_word_dict.

    predicted_text_list = [ptb_id_word_dict[i] for i in predicted_sequence]
    print('Predicted sequence: ', predicted_sequence)
    print('Predicted text: ', ' '.join(predicted_text_list))

Result

When I run, (the model is RNN model)

$ python predict_ptb.py -p 553

I got the text

Predicted text: executive vice president and chief operating officer of <unk> <unk> & <unk> a <unk> mass. newsletter <eos> the <unk> <unk> <unk> <unk> <unk> <unk> from the <unk> <eos> the <unk> <unk> <unk> <unk> <unk> <unk> from the <unk> <eos> the <unk> <unk> <unk> <unk> <unk> <unk> from the <unk> <eos> the <unk> <unk> <unk> <unk> <unk> <unk> from the <unk> <eos> the <unk> <unk> <unk> <unk> <unk> <unk> from the <unk> <eos> the <unk> <unk> <unk> <unk> <unk> <unk> from the <unk> <eos> the <unk> <unk> <unk> <unk> <unk> <unk> from the <unk> <eos> the <unk> <unk> <unk> <unk> <unk> <unk>

It seems the model can predict a first shot sentence but once it has reached to <unk> or <eos>, it will keep returning the same symbol. Also “the” will appear quite often than other words.

I think the model is not trained well enough yet, and you may try training the model more to get more good result!

Code

Result

Leave a Comment Cancel reply