dimanche 26 juin 2016

Theano class returns 'Bad input argument to theano function with name , Class instance has no attribute '__trunc__'


While working with the Theano GPU machine learning set shown here , i encounter this error:

    AttributeError: ('Bad input argument to theano function with name "TRNN.py:80"  at index 0(0-based)', "TheanoRNN instance has no attribute '__trunc__'")

And I can't trace the error to a specific line of code. Can someone please show me how to fix this?

Here are the files:

TRNN.py

import operator

import theano
import theano.tensor as T

import numpy as np


class TheanoRNN():
"""Theano verison of the RNN"""
def __init__(self, worddim, hiddendim=100, bptt_truncate=4):

    self.worddim = worddim
    self.hiddendim = hiddendim
    self.bptt_truncate = bptt_truncate

    U = np.random.uniform(-np.sqrt(1. / worddim),
                            np.sqrt(1. / worddim),
                            (hiddendim, worddim)
                         )

    V = np.random.uniform(-np.sqrt(1. / hiddendim),
                            np.sqrt(1. / hiddendim),
                            (worddim, hiddendim)
                         )

    W = np.random.uniform(-np.sqrt(1. / hiddendim),
                            np.sqrt(1. / hiddendim),
                            (hiddendim, hiddendim)
                         )

    self.U = theano.shared(name='U', value=U.astype(theano.config.floatX))
    self.V = theano.shared(name='V', value=V.astype(theano.config.floatX))
    self.W = theano.shared(name='W', value=W.astype(theano.config.floatX))

    self.theano = {}
    self.__theano_build__()

def __theano_build__(self):
    """No clue what this does yet."""
    U = self.U
    V = self.V
    W = self.W

    x = T.ivector('x')
    y = T.ivector('y')

    def forward_prop(x_t, s_t_prev, U, V, W):
        s_t = T.tanh(U[:, x_t] + W.dot(s_t_prev))
        o_t = T.nnet.softmax(V.dot(s_t))
        return [o_t[0], s_t]

    [o, s], updates = theano.scan(
        forward_prop,
        sequences=x,
        outputs_info=[None, dict(initial=T.zeros(self.hiddendim))],
        non_sequences=[U, V, W],
        truncate_gradient=self.bptt_truncate,
        strict=True)

    prediction = T.argmax(o, axis=1)
    o_error = T.sum(T.nnet.categorical_crossentropy(o, y))

    dU = T.grad(o_error, U)
    dV = T.grad(o_error, V)
    dW = T.grad(o_error, W)

    self.forward_propagation = theano.function([x], o)
    self.predict = theano.function([x], prediction)
    self.ce_error = theano.function([x, y], o_error)
    self.bptt = theano.function([x, y], [dU, dV, dW])

    learning_rate = T.scalar('learning_rate')

    self.sgd_step = theano.function([x, y, learning_rate], [],
                                    updates=[
                                    (self.U, self.U - learning_rate * dU),
                                    (self.V, self.V - learning_rate * dV),
                                    (self.W, self.W - learning_rate * dW)
                                            ]
                                            )

def calc_total_loss(self, X, Y):
    return np.sum([self.ce_error(x, y) for x, y in zip(X, Y)])

def calc_loss(self, X, Y):
    num_words = np.sum([len(y) for y in Y])
    return self.calc_total_loss(X, Y) / float(num_words)


    def gradient_check_theano(model, x, y, h=0.001, error_threshold=0.01):

model.bptt_truncate = 1000

bptt_gradients = model.bptt(x, y)

model_parameters = ['U', 'V', 'W']  # Possibly change to tuple

for pidx, pname in enumerate(model_parameters):
    parameter_T = operator.attrgeter(pname)(model)
    parameter = parameter_T.get_value()

    print(("Performing check for parameters %s with size %d" % (
                                        pname, np.prod(parameter.shape))))

    it = np.nditer(parameter, flags=['multi-index'], op_flags=['readwrite'])

    while not it.finished:
        ix = it.multi_index

        original_value = parameter[x]
        parameter[ix] = original_value + h
        parameter_T.set_value(parameter)

        gradplus = model.calc_total_loss([x], [y])

        parameter[ix] = original_value - h
        parameter_T.set_value(parameter)

        gradminus = model.calc_total_loss([x], [y])

        estimated_gradient = (gradplus - gradminus) / (2 * h)

        parameter[ix] = original_value
        parameter_T.set_value(parameter)

        backprop_gradients = bptt_gradients[pidx][ix]

        relative_error = np.abs(backprop_gradients - estimated_gradient) / (
            np.abs(backprop_gradients) + np.abs(estimated_gradient))

        if relative_error > error_threshold:
            print(("Gradient Check: params %s | ix %s" % (pname, ix)))
            print(("+h loss: %f" % (gradplus)))
            print(("-h loss: %f" % (gradminus)))
            print(("Estimated gradient: %f" % (estimated_gradient)))
            print(("Backpropogation gradient: %f" % (backprop_gradients)))
            print(("Relative error: %f" % (relative_error)))
            return

        it.iternext()

    print(("Gradient check for parameter %s passed" % (pname)))

rnn.py:

import os
import sys
import csv
import time
import nltk
import pickle
import itertools

import numpy as np
import theano

from TRNN import TheanoRNN

_VOCABULARY_SIZE = int(os.environ.get('VOCAB_SIZE', '8000'))
_HIDDEN_DIM = int(os.environ.get('HIDDEN_DIM', '80'))
_LEARNING_RATE = float(os.environ.get('LEARNING_RATE', '0.005'))
_NEPOCH = int(os.environ.get('NEPOCH', '100'))
_MODEL_FILE = (os.environ.get('MODEL_FILE'))

vocab_size = 8000

unknown = "UNKNOWN_TOKEN"
start = "START_TOKEN"
end = "END_TOKEN"

print("Reading CSV File...")

with open("reddit.csv", "rb") as f:
reader = csv.reader(f, skipinitialspace=True)
reader.next()

sentences = itertools.chain(*[nltk.sent_tokenize(
                            x[0].decode('utf-8').lower()) for x in reader])

# Slow part of code #1

sentences = ["%s %s %s" % (start, x, end) for x in sentences]

print ("Parsed %s sentences" % len(sentences))

print("Tokenizing...")
tokenized_sentences = [nltk.word_tokenize(sent) for sent in sentences]

print("Getting frequencies...")  # slow part of code #2 (Choke)
freq = nltk.FreqDist(itertools.chain(*tokenized_sentences))
print("Found %s unique" % (len(freq.items())))

vocab = freq.most_common(vocab_size - 1)

wordindex = [x[0] for words in vocab]
wordindex.append(unknown)

word_to_index = dict([w, i] for i, w in enumerate(wordindex))

for i, sent in enumerate(tokenized_sentences):

    tokenized_sentences[i] = [
                    w if w in word_to_index else unknown
                    for w in sent
                ]

#Create training matrixes

x_train = np.asarray([[word_to_index[w]
        for w in sent[:-1]]
            for sent in tokenized_sentences])

y_train = np.asarray([[word_to_index[w] 
                       for w in sent[1:]]
                       for sent in tokenized_sentences])

with open("rnntrain.pkl", 'wb') as train:
    pickle.dump([
        x_train,y_train,
        _VOCABULARY_SIZE, _HIDDEN_DIM,
        _LEARNING_RATE, _NEPOCH, _MODEL_FILE
    ], train)
    train.close()
print("dumped the variables")

"""model = TheanoRNN(_VOCABULARY_SIZE, hiddendim=_HIDDEN_DIM)

print("Starting the thingy")
t1 = time.time()
print("Got time")
model.sgd_step(model, x_train[10], y_train[10], _LEARNING_RATE)
t2 = time.time()
print("SDG step time : %f milliseconds" % ((t2 - t1) * 1000.))"""
def main():
    pass

if __name__ is "__main__":
    main()

train_rnn.py:

import cPickle as pickle                 
import time

from utils import *
from TRNN import TheanoRNN

def train_with_sdg(model, X_train, Y_Train, learning_rate=0.005, nepoch=1, eval_loss=5):

    losses = []
    examples_seen = 0

    for epoch in range(nepoch):  # Possibly change to 'xrange'

        if (epoch % eval_loss) is 0:
            loss = model.calculate_loss(X_train, Y_train)
            losses.append((examples_seen, loss))

        if (len(losses) > 1) and (losses[-1][1] > losses[-2][1]):
            learning_rate *= 0.5
        sys.stout.flush()

        save_model_parameters_theano("./data/rnn-theano-%d-%d-%s.npz" % (
            model.hidden_dim, model.word_dim, time
                                      ), model)
    for i in range(len(Y_train)):  # Change into xrange

        model.sgd_step(X_train[i], Y_train[i], learning_rate)
        examples_seen += 1

with open("rnntrain.pkl", 'rb') as f:

    (
        x_train, y_train,
        _VOCABULARY_SIZE, _HIDDEN_DIM,
        _LEARNING_RATE, _NEPOCH,
        _MODEL_FILE
    )  = pickle.load(f)

model = TheanoRNN(_VOCABULARY_SIZE, hiddendim=_HIDDEN_DIM)

t1 = time.time()
model.sgd_step(model, x_train[10], y_train[10], _LEARNING_RATE)
t2 = time.time()

print("SGD Step : %f milliseconds" % ((t2 - t1) * 1000))

And here is the full traceback:

Traceback (most recent call last):
  File "/home/usr/miniconda2/lib/python2.7/runpy.py", line 162, in _run_module_as_main
    "__main__", fname, loader, pkg_name)
  File "/home/usr/miniconda2/lib/python2.7/runpy.py", line 72, in _run_code
    exec code in run_globals
  File "/home/usr/train_rnn.py", line 43, in <module>
    model.sgd_step(model, x_train[10], y_train[10], _LEARNING_RATE)
  File "/home/usr/miniconda2/lib/python2.7/site-packages/theano/compile/function_module.py", line 786, in __call__
    allow_downcast=s.allow_downcast)
  File "/home/usr/miniconda2/lib/python2.7/site-packages/theano/tensor/type.py", line 149, in filter
    converted_data = theano._asarray(data, self.dtype)
  File "/home/usr/miniconda2/lib/python2.7/site-packages/theano/misc/safe_asarray.py", line 33, in _asarray
    rval = numpy.asarray(a, dtype=dtype, order=order)
  File "/home/usr/miniconda2/lib/python2.7/site-packages/numpy/core/numeric.py", line 482, in asarray
    return array(a, dtype, copy=False, order=order)
AttributeError: ('Bad input argument to theano function with name "TRNN.py:80"  at index 0(0-based)', "TheanoRNN instance has no attribute '__trunc__'")

Aucun commentaire:

Enregistrer un commentaire