Skip to content

Commit

Permalink
tokenizing example
Browse files Browse the repository at this point in the history
  • Loading branch information
rasbt committed May 6, 2024
1 parent 258dcad commit cddcbc8
Showing 1 changed file with 8 additions and 10 deletions.
18 changes: 8 additions & 10 deletions ch06/01_main-chapter-code/ch06.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"File downloaded and saved as sms_spam_collection/SMSSpamCollection.tsv\n"
"sms_spam_collection/SMSSpamCollection.tsv already exists. Skipping download and extraction.\n"
]
}
],
Expand Down Expand Up @@ -557,18 +557,16 @@
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[42, 13, 314, 481, 1908, 340, 757]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
"name": "stdout",
"output_type": "stream",
"text": [
"[1212, 318, 262, 717, 2420, 3275]\n"
]
}
],
"source": [
"tokenizer.encode(\"K. I will sent it again\")"
"token_ids = tokenizer.encode(\"This is the first text message\")\n",
"print(token_ids)"
]
},
{
Expand Down

0 comments on commit cddcbc8

Please sign in to comment.