From ba0808fa867fd68283cc053f3f934bd51ad12779 Mon Sep 17 00:00:00 2001 From: tomogwen Date: Wed, 28 Feb 2024 20:21:58 +0000 Subject: [PATCH 1/2] add todo --- src/litgpt/model.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/litgpt/model.py b/src/litgpt/model.py index 8d6cca1..e72ed5c 100644 --- a/src/litgpt/model.py +++ b/src/litgpt/model.py @@ -8,6 +8,7 @@ from torch.nn import functional as F # global hyperparams +# TODO: add these to an args/hparam object VOCAB_SIZE = 65 N_EMBD = 384 # dimension of token embeddings N_HEADS = 6 # number of self-attention heads @@ -51,7 +52,7 @@ def forward(self, x): class MultiHeadAttention(nn.Module): - """ " Multiple heads of self-attention in parallel""" + """Multiple heads of self-attention in parallel""" def __init__(self, num_heads, head_size): super().__init__() From 527b5e447a3db5d62e49be63c178218c29b5b5a7 Mon Sep 17 00:00:00 2001 From: tomogwen Date: Thu, 29 Feb 2024 13:47:10 +0000 Subject: [PATCH 2/2] update readme --- README.md | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index d387ca6..7e30aa9 100644 --- a/README.md +++ b/README.md @@ -3,41 +3,36 @@ This repo contains my efforts to learn how to create a (better than research code, aspiring to production quality) deep learning repository. It trains an implementation of Kaparthy's [minGPT](https://github.com/karpathy/minGPT) in PyTorch Lightning. -## Goal +This repo grew out of a minimal example of multi-node, multi-GPU training with PyTorch Lightning on a slurm cluster - if you're interested in that, please see the slurmformer branch. -Some things that I'd like to learn whilst creating this repo are: +## Goal -Software Development: -- [X] Setup github actions. -- [X] Writing tests. -- [X] Setup pre-commit checks. -- [X] 'Packagify' the code. -- [X] Good repo structure. +A non-exhaustive list of skills I'd like to learn about via this repo are listed below. -Deep Learning: -- [ ] Deal with hyperparams nicely +Machine Learning Engineering: +- [ ] Dealing with hyperparams nicely - Config files + CLI - Use an args objects or pass around many hparams? -- [ ] Deal with different accelerators nicely +- [ ] Dealing with different accelerators nicely - should run easily on CPU, MPS, or (multi-)GPU. -- [ ] Tune hyperparams - - Understand the effect of tuning different hparams -I will hopefully add more to this as I go! +Software development: +- [ ] Doc strings and type hints +- [X] Setting up github actions. +- [X] Writing tests. +- [X] Setting up pre-commit checks. +- [X] 'Packagify'-ing code. +- [X] Having good repo structure. ## Installation -To install dependencies: +To install dependencies and activate the conda environment: ``` > conda env create -f env.yml -``` - -Activate the conda environment: -``` > conda activate litgpt ``` -To install pre-commit checks: +If developing, install pre-commit checks: ``` > pre-commit install ```