Skip to content

Commit

Permalink
minor typo fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
rasbt committed Mar 28, 2024
1 parent 5bb5c19 commit 784a4d2
Showing 1 changed file with 91 additions and 75 deletions.
166 changes: 91 additions & 75 deletions ch05/01_main-chapter-code/ch05.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 2,
"id": "92b989e9-da36-4159-b212-799184764dd9",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -119,7 +119,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 3,
"id": "86000d74-624a-48f0-86da-f41926cb9e04",
"metadata": {
"colab": {
Expand Down Expand Up @@ -180,7 +180,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 4,
"id": "5e062b82-3540-48ce-8eb4-009686d0d16c",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -260,7 +260,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 5,
"id": "6b5402f8-ec0c-4a44-9892-18a97779ee4f",
"metadata": {
"colab": {
Expand Down Expand Up @@ -290,7 +290,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 6,
"id": "e7b6ec51-6f8c-49bd-a349-95ba38b46fb6",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -345,7 +345,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 7,
"id": "34ebd76a-16ec-4c17-8958-8a135735cc1c",
"metadata": {
"colab": {
Expand Down Expand Up @@ -385,7 +385,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 8,
"id": "c990ead6-53cd-49a7-a6d1-14d8c1518249",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -430,7 +430,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 9,
"id": "54aef09c-d6e3-4238-8653-b3a1b0a1077a",
"metadata": {
"colab": {
Expand Down Expand Up @@ -470,7 +470,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 10,
"id": "31402a67-a16e-4aeb-977e-70abb9c9949b",
"metadata": {
"colab": {
Expand Down Expand Up @@ -504,7 +504,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 11,
"id": "9b003797-161b-4d98-81dc-e68320e09fec",
"metadata": {
"colab": {
Expand Down Expand Up @@ -548,7 +548,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 12,
"id": "176ddf35-1c5f-4d7c-bf17-70f3e7069bd4",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -591,7 +591,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 13,
"id": "695d6f64-5084-4c23-aea4-105c9e38cfe4",
"metadata": {
"colab": {
Expand Down Expand Up @@ -628,7 +628,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 14,
"id": "0e17e027-ab9f-4fb5-ac9b-a009b831c122",
"metadata": {
"colab": {
Expand Down Expand Up @@ -666,7 +666,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 15,
"id": "62d0816e-b29a-4c8f-a9a5-a167562de978",
"metadata": {
"colab": {
Expand Down Expand Up @@ -700,7 +700,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 16,
"id": "168952a1-b964-4aa7-8e49-966fa26add54",
"metadata": {
"colab": {
Expand Down Expand Up @@ -764,7 +764,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 17,
"id": "654fde37-b2a9-4a20-a8d3-0206c056e2ff",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -795,7 +795,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 18,
"id": "6kgJbe4ehI4q",
"metadata": {
"colab": {
Expand All @@ -821,7 +821,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 19,
"id": "j2XPde_ThM_e",
"metadata": {
"colab": {
Expand All @@ -847,7 +847,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 20,
"id": "6b46a952-d50a-4837-af09-4095698f7fd1",
"metadata": {
"colab": {
Expand Down Expand Up @@ -903,7 +903,7 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 21,
"id": "0959c855-f860-4358-8b98-bc654f047578",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -940,7 +940,7 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 22,
"id": "f37b3eb0-854e-4895-9898-fa7d1e67566e",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -977,7 +977,7 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 23,
"id": "ca0116d0-d229-472c-9fbf-ebc229331c3e",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -1021,7 +1021,7 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 24,
"id": "eb860488-5453-41d7-9870-23b723f742a0",
"metadata": {
"colab": {
Expand Down Expand Up @@ -1066,7 +1066,7 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 25,
"id": "7b9de31e-4096-47b3-976d-b6d2fdce04bc",
"metadata": {
"id": "7b9de31e-4096-47b3-976d-b6d2fdce04bc"
Expand Down Expand Up @@ -1110,7 +1110,7 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 26,
"id": "56f5b0c9-1065-4d67-98b9-010e42fc1e2a",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -1178,8 +1178,7 @@
" eval_freq, eval_iter, start_context):\n",
" # Initialize lists to track losses and tokens seen\n",
" train_losses, val_losses, track_tokens_seen = [], [], []\n",
" tokens_seen = 0\n",
" global_step = -1\n",
" tokens_seen, global_step = 0, -1\n",
"\n",
" # Main training loop\n",
" for epoch in range(num_epochs):\n",
Expand Down Expand Up @@ -1408,7 +1407,7 @@
"metadata": {},
"source": [
"- Inference is relatively cheap with a relatively small LLM as the GPT model we trained above, so there's no need to use a GPU for it in case you used a GPU for training it above\n",
"- Using the `generate_text_simple method` (from the previous chapter) that we used earlier inside the simple training function, we can generate new text one word (or token) at a time\n",
"- Using the `generate_text_simple` function (from the previous chapter) that we used earlier inside the simple training function, we can generate new text one word (or token) at a time\n",
"- As explained in section 5.1.2, the next generated token is the token corresponding to the largest probability score among all tokens in the vocabulary"
]
},
Expand Down Expand Up @@ -1498,8 +1497,6 @@
}
],
"source": [
"# Assume some logits from a neural network output for 7 vocabulary tokens\n",
"\n",
"vocab = { \n",
" \"closer\": 0,\n",
" \"every\": 1, \n",
Expand Down Expand Up @@ -1527,12 +1524,74 @@
"print(inverse_vocab[next_token_id])"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "6400572f-b3c8-49e2-95bc-433e55c5b3a1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"forward\n"
]
}
],
"source": [
"torch.manual_seed(123)\n",
"next_token_id = torch.multinomial(probas, num_samples=1).item()\n",
"print(inverse_vocab[next_token_id])"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "b23b863e-252a-403c-b5b1-62bc0a42319f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"73 x closer\n",
"0 x every\n",
"0 x effort\n",
"582 x forward\n",
"2 x inches\n",
"0 x moves\n",
"0 x pizza\n",
"343 x toward\n"
]
}
],
"source": [
"def print_sampled_tokens(probas):\n",
" torch.manual_seed(123) # Manual seed for reproducibility\n",
" sample = [torch.multinomial(probas, num_samples=1).item() for i in range(1_000)]\n",
" sampled_ids = torch.bincount(torch.tensor(sample))\n",
" for i, freq in enumerate(sampled_ids):\n",
" print(f\"{freq} x {inverse_vocab[i]}\")\n",
"\n",
"print_sampled_tokens(probas)"
]
},
{
"cell_type": "markdown",
"id": "c63d0a27-830b-42b5-9986-6d1a7de04dd9",
"metadata": {},
"source": [
"- Instead of determining the most likely token via `torch.argmax`, we use `torch.multinomial(probas, num_samples=1)` to determine the most likely token by sampling from the softmax distribution\n",
"- For illustration purposes, let's see what happens when we sample the next token 1,000 times using the original softmax probabilities:"
]
},
{
"cell_type": "markdown",
"id": "32e7d9cf-a26d-4d9a-8664-4af1efa73832",
"metadata": {},
"source": [
"- \"Temperature scaling\" is just a fancy word for diving the logits by a number greater than 0\n",
"- We can control the distribution and selection process via a concept called temperature scaling\n",
"- \"Temperature scaling\" is just a fancy word for dividing the logits by a number greater than 0\n",
"- Temperatures greater than 1 will result in more uniformly distributed token probabilities after applying the softmax\n",
"- Temperatures smaller than 1 will result in more confident (sharper or more peaky) distributions after applying the softmax"
]
Expand All @@ -1549,7 +1608,7 @@
" return torch.softmax(scaled_logits, dim=0)\n",
"\n",
"# Temperature values\n",
"temperatures = [1, 0.1, 5] # Original, higher confidence, and\n",
"temperatures = [1, 0.1, 5] # Original, higher confidence, and lower confidence\n",
"\n",
"# Calculate scaled probabilities\n",
"scaled_probas = [softmax_with_temperature(next_token_logits, T) for T in temperatures]"
Expand Down Expand Up @@ -1591,49 +1650,6 @@
"plt.show()"
]
},
{
"cell_type": "markdown",
"id": "c63d0a27-830b-42b5-9986-6d1a7de04dd9",
"metadata": {},
"source": [
"- Instead of determining the most likely token via `torch.argmax`, we use `torch.multinomial(probas, num_samples=1)` to determine the most likely token by sampling from the softmax distribution\n",
"- For illustration purposes, let's see what happens when we sample the next token 1,000 times using the original softmax probabilities:"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "b23b863e-252a-403c-b5b1-62bc0a42319f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"73 x closer\n",
"0 x every\n",
"0 x effort\n",
"582 x forward\n",
"2 x inches\n",
"0 x moves\n",
"0 x pizza\n",
"343 x toward\n"
]
}
],
"source": [
"# Original probas\n",
"\n",
"def print_sampled_tokens(probas):\n",
" torch.manual_seed(123)\n",
" sample = [torch.multinomial(probas, num_samples=1).item() for i in range(1_000)]\n",
" sampled_ids = torch.bincount(torch.tensor(sample))\n",
" for i, freq in enumerate(sampled_ids):\n",
" print(f\"{freq} x {inverse_vocab[i]}\")\n",
"\n",
"print_sampled_tokens(probas)"
]
},
{
"cell_type": "markdown",
"id": "d750e989-842a-4cfa-a44b-cf44d6e49163",
Expand Down

0 comments on commit 784a4d2

Please sign in to comment.