Skip to content

Commit

Permalink
Minor readability improvement in dataloader.ipynb (#461)
Browse files Browse the repository at this point in the history
* Minor readability improvement in dataloader.ipynb

- The tokenizer and encoded_text variables at the root level are unused.
- The default params for create_dataloader_v1 are confusing, especially for the default batch_size 4, which happens to be the same as the max_length.

* readability improvements

---------

Co-authored-by: rasbt <mail@sebastianraschka.com>
  • Loading branch information
tao-qian and rasbt authored Jan 4, 2025
1 parent 1b635f7 commit cec445f
Showing 1 changed file with 10 additions and 7 deletions.
17 changes: 10 additions & 7 deletions ch02/01_main-chapter-code/dataloader.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,8 @@
" return self.input_ids[idx], self.target_ids[idx]\n",
"\n",
"\n",
"def create_dataloader_v1(txt, batch_size=4, max_length=256, \n",
" stride=128, shuffle=True, drop_last=True, num_workers=0):\n",
"def create_dataloader_v1(txt, batch_size, max_length, stride,\n",
" shuffle=True, drop_last=True, num_workers=0):\n",
" # Initialize the tokenizer\n",
" tokenizer = tiktoken.get_encoding(\"gpt2\")\n",
"\n",
Expand All @@ -121,9 +121,6 @@
"with open(\"the-verdict.txt\", \"r\", encoding=\"utf-8\") as f:\n",
" raw_text = f.read()\n",
"\n",
"tokenizer = tiktoken.get_encoding(\"gpt2\")\n",
"encoded_text = tokenizer.encode(raw_text)\n",
"\n",
"vocab_size = 50257\n",
"output_dim = 256\n",
"context_length = 1024\n",
Expand All @@ -132,8 +129,14 @@
"token_embedding_layer = torch.nn.Embedding(vocab_size, output_dim)\n",
"pos_embedding_layer = torch.nn.Embedding(context_length, output_dim)\n",
"\n",
"batch_size = 8\n",
"max_length = 4\n",
"dataloader = create_dataloader_v1(raw_text, batch_size=8, max_length=max_length, stride=max_length)"
"dataloader = create_dataloader_v1(\n",
" raw_text,\n",
" batch_size=batch_size,\n",
" max_length=max_length,\n",
" stride=max_length\n",
")"
]
},
{
Expand Down Expand Up @@ -189,7 +192,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
"version": "3.11.4"
}
},
"nbformat": 4,
Expand Down

0 comments on commit cec445f

Please sign in to comment.