Skip to content

Commit

Permalink
Added log statements
Browse files Browse the repository at this point in the history
  • Loading branch information
batzner committed Aug 25, 2017
1 parent a60de54 commit a82ced2
Show file tree
Hide file tree
Showing 6 changed files with 33 additions and 3 deletions.
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -138,4 +138,8 @@ results
.DS_Store
resources
models
venv-gpu
venv-gpu



!.keep
13 changes: 13 additions & 0 deletions aws-clipboard.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
git clone https://github.com/batzner/tensorlm
cd tensorlm
mkdir datasets
mkdir datasets/sherlock

sudo pip3 install -r requirements.txt

# Copy the datasets
scp -i ~/dev/pems/tensorflow.pem ~/dev/python-hacks/tensorlm/datasets/sherlock/train.txt ubuntu@54.149.27.213:~/tensorlm/datasets/sherlock/train.txt
scp -i ~/dev/pems/tensorflow.pem ~/dev/python-hacks/tensorlm/datasets/sherlock/valid.txt ubuntu@54.149.27.213:~/tensorlm/datasets/sherlock/valid.txt

# Train
python3 -m tensorlm.run --train=True --level=word --max_vocab_size=10000 --neurons_per_layer=250 --num_layers=3 --max_batch_size=100 --num_timesteps=160 --save_dir=out/model --evaluate_text_path=datasets/sherlock/valid.txt --train_text_path=datasets/sherlock/train.txt --max_epochs=30 --save_interval_hours=1
11 changes: 11 additions & 0 deletions tensorlm/common/num_params.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
vocabulary_size = 10000
num_layers = 3
num_neurons = 250

num_first_layer = 4 * (num_neurons * (vocabulary_size + num_neurons) + num_neurons)
num_other_layer = 4 * (num_neurons * 2 * num_neurons + num_neurons)
num_softmax = vocabulary_size * num_neurons + vocabulary_size

total = num_first_layer + (num_layers - 1) * num_other_layer + num_softmax

print("{} MM model parameters".format(total / pow(10, 6)))
2 changes: 2 additions & 0 deletions tensorlm/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,8 @@ def load_from_dir(save_dir, level="char"):

@staticmethod
def create_from_text(text_path, max_vocab_size, level="char"):
LOGGER.info("Creating vocabulary from {}", text_path)

# Get the most common tokens from the text
token_counter = Counter()
for tokens in TextIterator(text_path, level, bytes_in_memory=1000000):
Expand Down
2 changes: 1 addition & 1 deletion tensorlm/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

class GeneratingLSTM:
def __init__(self, vocab_size, neurons_per_layer, num_layers, max_batch_size,
output_keep_prob=1.0, max_gradient_norm=5,
output_keep_prob=0.5, max_gradient_norm=5,
initial_learning_rate=0.001, forward_only=False):
self.neurons_per_layer = neurons_per_layer
self.num_layers = num_layers
Expand Down
2 changes: 1 addition & 1 deletion tensorlm/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def main(_):
flags_path = os.path.join(FLAGS.save_dir, "flags.json")
if not os.path.exists(flags_path):
with open(flags_path, "w") as f:
json.dump(FLAGS["__flags"].__dict__, f)
json.dump(FLAGS.__dict__["__flags"], f)

model.train(session, max_epochs=FLAGS.max_epochs,
max_steps=FLAGS.max_steps,
Expand Down

0 comments on commit a82ced2

Please sign in to comment.