From eefe4bf12b274c9890250f86a479c08c91cd07c7 Mon Sep 17 00:00:00 2001
From: Daniel Kleine <53251018+d-kleine@users.noreply.github.com>
Date: Wed, 18 Sep 2024 17:08:50 +0200
Subject: [PATCH] Chainlit bonus material fixes (#361)

* fix cmd

* moved idx to device

* improved code with clone().detach()

* fixed path

* fix: added extra line for pep8

* updated .gitginore

* Update ch05/06_user_interface/app_orig.py

* Update ch05/06_user_interface/app_own.py

* Apply suggestions from code review

---------

Co-authored-by: Sebastian Raschka <mail@sebastianraschka.com>
---
 .gitignore                         |  1 +
 ch05/06_user_interface/README.md   |  2 +-
 ch05/06_user_interface/app_orig.py |  8 ++++----
 ch05/06_user_interface/app_own.py  | 10 +++++-----
 4 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/.gitignore b/.gitignore
index 08536279..1e7a7898 100644
--- a/.gitignore
+++ b/.gitignore
@@ -92,6 +92,7 @@ ch07/04_preference-tuning-with-dpo/loss-plot.pdf
 # Other
 ch05/06_user_interface/chainlit.md
 ch05/06_user_interface/.chainlit
+ch05/06_user_interface/.files
 
 # Temporary OS-related files
 .DS_Store
diff --git a/ch05/06_user_interface/README.md b/ch05/06_user_interface/README.md
index 07565897..5d60dc1f 100644
--- a/ch05/06_user_interface/README.md
+++ b/ch05/06_user_interface/README.md
@@ -17,7 +17,7 @@ To implement this user interface, we use the open-source [Chainlit Python packag
 
 First, we install the `chainlit` package via
 
-```python
+```bash
 pip install chainlit
 ```
 
diff --git a/ch05/06_user_interface/app_orig.py b/ch05/06_user_interface/app_orig.py
index be0f25b8..54427734 100644
--- a/ch05/06_user_interface/app_orig.py
+++ b/ch05/06_user_interface/app_orig.py
@@ -16,6 +16,8 @@
     token_ids_to_text,
 )
 
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
 
 def get_model_and_tokenizer():
     """
@@ -44,8 +46,6 @@ def get_model_and_tokenizer():
 
     BASE_CONFIG.update(model_configs[CHOOSE_MODEL])
 
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-
     settings, params = download_and_load_gpt2(model_size=model_size, models_dir="gpt2")
 
     gpt = GPTModel(BASE_CONFIG)
@@ -67,9 +67,9 @@ async def main(message: chainlit.Message):
     """
     The main Chainlit function.
     """
-    token_ids = generate(
+    token_ids = generate(  # function uses `with torch.no_grad()` internally already
         model=model,
-        idx=text_to_token_ids(message.content, tokenizer),  # The user text is provided via as `message.content`
+        idx=text_to_token_ids(message.content, tokenizer).to(device),  # The user text is provided via as `message.content`
         max_new_tokens=50,
         context_size=model_config["context_length"],
         top_k=1,
diff --git a/ch05/06_user_interface/app_own.py b/ch05/06_user_interface/app_own.py
index ae03e974..08530faf 100644
--- a/ch05/06_user_interface/app_own.py
+++ b/ch05/06_user_interface/app_own.py
@@ -17,6 +17,8 @@
     token_ids_to_text,
 )
 
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
 
 def get_model_and_tokenizer():
     """
@@ -34,8 +36,6 @@ def get_model_and_tokenizer():
         "qkv_bias": False       # Query-key-value bias
     }
 
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-
     tokenizer = tiktoken.get_encoding("gpt2")
 
     model_path = Path("..") / "01_main-chapter-code" / "model.pth"
@@ -43,7 +43,7 @@ def get_model_and_tokenizer():
         print(f"Could not find the {model_path} file. Please run the chapter 5 code (ch05.ipynb) to generate the model.pth file.")
         sys.exit()
 
-    checkpoint = torch.load("model.pth", weights_only=True)
+    checkpoint = torch.load(model_path, weights_only=True)
     model = GPTModel(GPT_CONFIG_124M)
     model.load_state_dict(checkpoint)
     model.to(device)
@@ -60,9 +60,9 @@ async def main(message: chainlit.Message):
     """
     The main Chainlit function.
     """
-    token_ids = generate(
+    token_ids = generate(  # function uses `with torch.no_grad()` internally already
         model=model,
-        idx=text_to_token_ids(message.content, tokenizer),  # The user text is provided via as `message.content`
+        idx=text_to_token_ids(message.content, tokenizer).to(device),  # The user text is provided via as `message.content`
         max_new_tokens=50,
         context_size=model_config["context_length"],
         top_k=1,