While working with the Theano GPU machine learning set shown here , i encounter this error:
AttributeError: ('Bad input argument to theano function with name "TRNN.py:80" at index 0(0-based)', "TheanoRNN instance has no attribute '__trunc__'")
And I can't trace the error to a specific line of code. Can someone please show me how to fix this?
Here are the files:
TRNN.py
import operator
import theano
import theano.tensor as T
import numpy as np
class TheanoRNN():
"""Theano verison of the RNN"""
def __init__(self, worddim, hiddendim=100, bptt_truncate=4):
self.worddim = worddim
self.hiddendim = hiddendim
self.bptt_truncate = bptt_truncate
U = np.random.uniform(-np.sqrt(1. / worddim),
np.sqrt(1. / worddim),
(hiddendim, worddim)
)
V = np.random.uniform(-np.sqrt(1. / hiddendim),
np.sqrt(1. / hiddendim),
(worddim, hiddendim)
)
W = np.random.uniform(-np.sqrt(1. / hiddendim),
np.sqrt(1. / hiddendim),
(hiddendim, hiddendim)
)
self.U = theano.shared(name='U', value=U.astype(theano.config.floatX))
self.V = theano.shared(name='V', value=V.astype(theano.config.floatX))
self.W = theano.shared(name='W', value=W.astype(theano.config.floatX))
self.theano = {}
self.__theano_build__()
def __theano_build__(self):
"""No clue what this does yet."""
U = self.U
V = self.V
W = self.W
x = T.ivector('x')
y = T.ivector('y')
def forward_prop(x_t, s_t_prev, U, V, W):
s_t = T.tanh(U[:, x_t] + W.dot(s_t_prev))
o_t = T.nnet.softmax(V.dot(s_t))
return [o_t[0], s_t]
[o, s], updates = theano.scan(
forward_prop,
sequences=x,
outputs_info=[None, dict(initial=T.zeros(self.hiddendim))],
non_sequences=[U, V, W],
truncate_gradient=self.bptt_truncate,
strict=True)
prediction = T.argmax(o, axis=1)
o_error = T.sum(T.nnet.categorical_crossentropy(o, y))
dU = T.grad(o_error, U)
dV = T.grad(o_error, V)
dW = T.grad(o_error, W)
self.forward_propagation = theano.function([x], o)
self.predict = theano.function([x], prediction)
self.ce_error = theano.function([x, y], o_error)
self.bptt = theano.function([x, y], [dU, dV, dW])
learning_rate = T.scalar('learning_rate')
self.sgd_step = theano.function([x, y, learning_rate], [],
updates=[
(self.U, self.U - learning_rate * dU),
(self.V, self.V - learning_rate * dV),
(self.W, self.W - learning_rate * dW)
]
)
def calc_total_loss(self, X, Y):
return np.sum([self.ce_error(x, y) for x, y in zip(X, Y)])
def calc_loss(self, X, Y):
num_words = np.sum([len(y) for y in Y])
return self.calc_total_loss(X, Y) / float(num_words)
def gradient_check_theano(model, x, y, h=0.001, error_threshold=0.01):
model.bptt_truncate = 1000
bptt_gradients = model.bptt(x, y)
model_parameters = ['U', 'V', 'W'] # Possibly change to tuple
for pidx, pname in enumerate(model_parameters):
parameter_T = operator.attrgeter(pname)(model)
parameter = parameter_T.get_value()
print(("Performing check for parameters %s with size %d" % (
pname, np.prod(parameter.shape))))
it = np.nditer(parameter, flags=['multi-index'], op_flags=['readwrite'])
while not it.finished:
ix = it.multi_index
original_value = parameter[x]
parameter[ix] = original_value + h
parameter_T.set_value(parameter)
gradplus = model.calc_total_loss([x], [y])
parameter[ix] = original_value - h
parameter_T.set_value(parameter)
gradminus = model.calc_total_loss([x], [y])
estimated_gradient = (gradplus - gradminus) / (2 * h)
parameter[ix] = original_value
parameter_T.set_value(parameter)
backprop_gradients = bptt_gradients[pidx][ix]
relative_error = np.abs(backprop_gradients - estimated_gradient) / (
np.abs(backprop_gradients) + np.abs(estimated_gradient))
if relative_error > error_threshold:
print(("Gradient Check: params %s | ix %s" % (pname, ix)))
print(("+h loss: %f" % (gradplus)))
print(("-h loss: %f" % (gradminus)))
print(("Estimated gradient: %f" % (estimated_gradient)))
print(("Backpropogation gradient: %f" % (backprop_gradients)))
print(("Relative error: %f" % (relative_error)))
return
it.iternext()
print(("Gradient check for parameter %s passed" % (pname)))
rnn.py:
import os
import sys
import csv
import time
import nltk
import pickle
import itertools
import numpy as np
import theano
from TRNN import TheanoRNN
_VOCABULARY_SIZE = int(os.environ.get('VOCAB_SIZE', '8000'))
_HIDDEN_DIM = int(os.environ.get('HIDDEN_DIM', '80'))
_LEARNING_RATE = float(os.environ.get('LEARNING_RATE', '0.005'))
_NEPOCH = int(os.environ.get('NEPOCH', '100'))
_MODEL_FILE = (os.environ.get('MODEL_FILE'))
vocab_size = 8000
unknown = "UNKNOWN_TOKEN"
start = "START_TOKEN"
end = "END_TOKEN"
print("Reading CSV File...")
with open("reddit.csv", "rb") as f:
reader = csv.reader(f, skipinitialspace=True)
reader.next()
sentences = itertools.chain(*[nltk.sent_tokenize(
x[0].decode('utf-8').lower()) for x in reader])
# Slow part of code #1
sentences = ["%s %s %s" % (start, x, end) for x in sentences]
print ("Parsed %s sentences" % len(sentences))
print("Tokenizing...")
tokenized_sentences = [nltk.word_tokenize(sent) for sent in sentences]
print("Getting frequencies...") # slow part of code #2 (Choke)
freq = nltk.FreqDist(itertools.chain(*tokenized_sentences))
print("Found %s unique" % (len(freq.items())))
vocab = freq.most_common(vocab_size - 1)
wordindex = [x[0] for words in vocab]
wordindex.append(unknown)
word_to_index = dict([w, i] for i, w in enumerate(wordindex))
for i, sent in enumerate(tokenized_sentences):
tokenized_sentences[i] = [
w if w in word_to_index else unknown
for w in sent
]
#Create training matrixes
x_train = np.asarray([[word_to_index[w]
for w in sent[:-1]]
for sent in tokenized_sentences])
y_train = np.asarray([[word_to_index[w]
for w in sent[1:]]
for sent in tokenized_sentences])
with open("rnntrain.pkl", 'wb') as train:
pickle.dump([
x_train,y_train,
_VOCABULARY_SIZE, _HIDDEN_DIM,
_LEARNING_RATE, _NEPOCH, _MODEL_FILE
], train)
train.close()
print("dumped the variables")
"""model = TheanoRNN(_VOCABULARY_SIZE, hiddendim=_HIDDEN_DIM)
print("Starting the thingy")
t1 = time.time()
print("Got time")
model.sgd_step(model, x_train[10], y_train[10], _LEARNING_RATE)
t2 = time.time()
print("SDG step time : %f milliseconds" % ((t2 - t1) * 1000.))"""
def main():
pass
if __name__ is "__main__":
main()
train_rnn.py:
import cPickle as pickle
import time
from utils import *
from TRNN import TheanoRNN
def train_with_sdg(model, X_train, Y_Train, learning_rate=0.005, nepoch=1, eval_loss=5):
losses = []
examples_seen = 0
for epoch in range(nepoch): # Possibly change to 'xrange'
if (epoch % eval_loss) is 0:
loss = model.calculate_loss(X_train, Y_train)
losses.append((examples_seen, loss))
if (len(losses) > 1) and (losses[-1][1] > losses[-2][1]):
learning_rate *= 0.5
sys.stout.flush()
save_model_parameters_theano("./data/rnn-theano-%d-%d-%s.npz" % (
model.hidden_dim, model.word_dim, time
), model)
for i in range(len(Y_train)): # Change into xrange
model.sgd_step(X_train[i], Y_train[i], learning_rate)
examples_seen += 1
with open("rnntrain.pkl", 'rb') as f:
(
x_train, y_train,
_VOCABULARY_SIZE, _HIDDEN_DIM,
_LEARNING_RATE, _NEPOCH,
_MODEL_FILE
) = pickle.load(f)
model = TheanoRNN(_VOCABULARY_SIZE, hiddendim=_HIDDEN_DIM)
t1 = time.time()
model.sgd_step(model, x_train[10], y_train[10], _LEARNING_RATE)
t2 = time.time()
print("SGD Step : %f milliseconds" % ((t2 - t1) * 1000))
And here is the full traceback:
Traceback (most recent call last):
File "/home/usr/miniconda2/lib/python2.7/runpy.py", line 162, in _run_module_as_main
"__main__", fname, loader, pkg_name)
File "/home/usr/miniconda2/lib/python2.7/runpy.py", line 72, in _run_code
exec code in run_globals
File "/home/usr/train_rnn.py", line 43, in <module>
model.sgd_step(model, x_train[10], y_train[10], _LEARNING_RATE)
File "/home/usr/miniconda2/lib/python2.7/site-packages/theano/compile/function_module.py", line 786, in __call__
allow_downcast=s.allow_downcast)
File "/home/usr/miniconda2/lib/python2.7/site-packages/theano/tensor/type.py", line 149, in filter
converted_data = theano._asarray(data, self.dtype)
File "/home/usr/miniconda2/lib/python2.7/site-packages/theano/misc/safe_asarray.py", line 33, in _asarray
rval = numpy.asarray(a, dtype=dtype, order=order)
File "/home/usr/miniconda2/lib/python2.7/site-packages/numpy/core/numeric.py", line 482, in asarray
return array(a, dtype, copy=False, order=order)
AttributeError: ('Bad input argument to theano function with name "TRNN.py:80" at index 0(0-based)', "TheanoRNN instance has no attribute '__trunc__'")
Aucun commentaire:
Enregistrer un commentaire