From ffd23912afaa0ba7674c6f0e16229bb1eeced06a Mon Sep 17 00:00:00 2001
From: Simon Willison <swillison@gmail.com>
Date: Thu, 30 Jan 2025 16:36:07 -0800
Subject: [PATCH 1/6] prefill, hide_prefill and stop_sequences options

Refs #2
---
 .github/workflows/test.yml |   3 ++
 README.md                  | 105 +++++++++++++++++++++++++++++++++++++
 llm_anthropic.py           |  62 ++++++++++++++++++++--
 pyproject.toml             |   2 +-
 4 files changed, 167 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index e64c5fb..fd33032 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -25,3 +25,6 @@ jobs:
     - name: Run tests
       run: |
         pytest
+    - name: Check if cog needs to be run
+      run: |
+        cog --check README.md
diff --git a/README.md b/README.md
index 99bbc13..ad32e6a 100644
--- a/README.md
+++ b/README.md
@@ -46,6 +46,111 @@ The plugin sets up `claude-3.5-sonnet` and similar as aliases, usable like this:
 llm -m claude-3.5-sonnet 'Fun facts about pelicans'
 ```
 
+## Model options
+
+The following options can be passed using `-o name value` on the CLI or as `keyword=value` arguments to the Python `model.prompt()` method:
+
+<!-- [[[cog
+import cog, llm
+_type_lookup = {
+    "number": "float",
+    "integer": "int",
+    "string": "str",
+    "object": "dict",
+}
+
+model = llm.get_model("claude-3.5-sonnet")
+output = []
+for name, field in model.Options.schema()["properties"].items():
+    any_of = field.get("anyOf")
+    if any_of is None:
+        any_of = [{"type": field["type"]}]
+    types = ", ".join(
+        [
+            _type_lookup.get(item["type"], item["type"])
+            for item in any_of
+            if item["type"] != "null"
+        ]
+    )
+    bits = ["- **", name, "**: `", types, "`\n"]
+    description = field.get("description", "")
+    if description:
+        bits.append('\n    ' + description + '\n\n')
+    output.append("".join(bits))
+cog.out("".join(output))
+]]] -->
+- **max_tokens**: `int`
+
+    The maximum number of tokens to generate before stopping
+
+- **temperature**: `float`
+
+    Amount of randomness injected into the response. Defaults to 1.0. Ranges from 0.0 to 1.0. Use temperature closer to 0.0 for analytical / multiple choice, and closer to 1.0 for creative and generative tasks. Note that even with temperature of 0.0, the results will not be fully deterministic.
+
+- **top_p**: `float`
+
+    Use nucleus sampling. In nucleus sampling, we compute the cumulative distribution over all the options for each subsequent token in decreasing probability order and cut it off once it reaches a particular probability specified by top_p. You should either alter temperature or top_p, but not both. Recommended for advanced use cases only. You usually only need to use temperature.
+
+- **top_k**: `int`
+
+    Only sample from the top K options for each subsequent token. Used to remove 'long tail' low probability responses. Recommended for advanced use cases only. You usually only need to use temperature.
+
+- **user_id**: `str`
+
+    An external identifier for the user who is associated with the request
+
+- **prefill**: `str`
+
+    A prefill to use for the response
+
+- **hide_prefill**: `boolean`
+
+    Do not repeat the prefill value at the start of the response
+
+- **stop_sequences**: `array, str`
+
+    Custom text sequences that will cause the model to stop generating - pass either a list of strings or a single string
+
+<!-- [[[end]]] -->
+
+The `prefill` option can be used to set the first part of the response. To increase the chance of returning JSON, set that to `{`:
+
+```bash
+llm -m claude-3.5-sonnet 'Fun data about pelicans' \
+  -o prefill '{'
+```
+If you do not want the prefill token to be echoed in the response, set `hide_prefill` to `true`:
+
+```bash
+llm -m claude-3.5-haiku 'Short python function describing a pelican' \
+  -o prefill '```python' \
+  -o hide_prefill true \
+  -o stop_sequences '```'
+```
+This example sets `` ``` `` as the stop sequence, so the response will be a Python function without the wrapping Markdown code block.
+
+To pass a single stop sequence, send a string:
+```bash
+llm -m claude-3.5-sonnet 'Fun facts about pelicans' \
+  -o stop-sequences "beak"
+```
+For multiple stop sequences, pass a JSON array:
+
+```bash
+llm -m claude-3.5-sonnet 'Fun facts about pelicans' \
+  -o stop-sequences '["beak", "feathers"]'
+```
+
+When using the Python API, pass a string or an array of strings:
+
+```python
+response = llm.query(
+    model="claude-3.5-sonnet",
+    query="Fun facts about pelicans",
+    stop_sequences = ["beak", "feathers"],
+)
+```
+
 ## Development
 
 To set up this plugin locally, first checkout the code. Then create a new virtual environment:
diff --git a/llm_anthropic.py b/llm_anthropic.py
index a923266..7a9fadc 100644
--- a/llm_anthropic.py
+++ b/llm_anthropic.py
@@ -1,7 +1,8 @@
 from anthropic import Anthropic, AsyncAnthropic
 import llm
+import json
 from pydantic import Field, field_validator, model_validator
-from typing import Optional, List
+from typing import Optional, List, Union
 
 
 @llm.hookimpl
@@ -73,6 +74,44 @@ class ClaudeOptions(llm.Options):
         default=None,
     )
 
+    prefill: Optional[str] = Field(
+        description="A prefill to use for the response",
+        default=None,
+    )
+
+    hide_prefill: Optional[bool] = Field(
+        description="Do not repeat the prefill value at the start of the response",
+        default=None,
+    )
+
+    stop_sequences: Optional[Union[list, str]] = Field(
+        description=(
+            "Custom text sequences that will cause the model to stop generating - "
+            "pass either a list of strings or a single string"
+        ),
+        default=None,
+    )
+
+    @field_validator("stop_sequences")
+    def validate_stop_sequences(cls, stop_sequences):
+        error_msg = "stop_sequences must be a list of strings or a single string"
+        if isinstance(stop_sequences, str):
+            try:
+                stop_sequences = json.loads(stop_sequences)
+                if not isinstance(stop_sequences, list) or not all(
+                    isinstance(seq, str) for seq in stop_sequences
+                ):
+                    raise ValueError(error_msg)
+                return stop_sequences
+            except json.JSONDecodeError:
+                return [stop_sequences]
+        elif isinstance(stop_sequences, list):
+            if not all(isinstance(seq, str) for seq in stop_sequences):
+                raise ValueError(error_msg)
+            return stop_sequences
+        else:
+            raise ValueError(error_msg)
+
     @field_validator("max_tokens")
     @classmethod
     def validate_max_tokens(cls, max_tokens):
@@ -129,7 +168,7 @@ def __init__(
         supports_images=True,
         supports_pdf=False,
     ):
-        self.model_id = 'anthropic/' + model_id
+        self.model_id = "anthropic/" + model_id
         self.claude_model_id = claude_model_id or model_id
         self.attachment_types = set()
         if supports_images:
@@ -201,6 +240,8 @@ def build_messages(self, prompt, conversation) -> List[dict]:
             )
         else:
             messages.append({"role": "user", "content": prompt.prompt})
+        if prompt.options.prefill:
+            messages.append({"role": "assistant", "content": prompt.options.prefill})
         return messages
 
     def build_kwargs(self, prompt, conversation):
@@ -223,6 +264,9 @@ def build_kwargs(self, prompt, conversation):
         if prompt.system:
             kwargs["system"] = prompt.system
 
+        if prompt.options.stop_sequences:
+            kwargs["stop_sequences"] = prompt.options.stop_sequences
+
         return kwargs
 
     def set_usage(self, response):
@@ -243,13 +287,18 @@ def execute(self, prompt, stream, response, conversation):
         kwargs = self.build_kwargs(prompt, conversation)
         if stream:
             with client.messages.stream(**kwargs) as stream:
+                if prompt.options.prefill and not prompt.options.hide_prefill:
+                    yield prompt.options.prefill
                 for text in stream.text_stream:
                     yield text
                 # This records usage and other data:
                 response.response_json = stream.get_final_message().model_dump()
         else:
             completion = client.messages.create(**kwargs)
-            yield completion.content[0].text
+            text = completion.content[0].text
+            if prompt.options.prefill and not prompt.options.hide_prefill:
+                text = prompt.options.prefill + text
+            yield text
             response.response_json = completion.model_dump()
         self.set_usage(response)
 
@@ -265,12 +314,17 @@ async def execute(self, prompt, stream, response, conversation):
         kwargs = self.build_kwargs(prompt, conversation)
         if stream:
             async with client.messages.stream(**kwargs) as stream_obj:
+                if prompt.options.prefill and not prompt.options.hide_prefill:
+                    yield prompt.options.prefill
                 async for text in stream_obj.text_stream:
                     yield text
             response.response_json = (await stream_obj.get_final_message()).model_dump()
         else:
             completion = await client.messages.create(**kwargs)
-            yield completion.content[0].text
+            text = completion.content[0].text
+            if prompt.options.prefill and not prompt.options.hide_prefill:
+                text = prompt.options.prefill + text
+            yield text
             response.response_json = completion.model_dump()
         self.set_usage(response)
 
diff --git a/pyproject.toml b/pyproject.toml
index 5e44222..3dbd71a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,4 +23,4 @@ CI = "https://github.com/simonw/llm-anthropic/actions"
 anthropic = "llm_anthropic"
 
 [project.optional-dependencies]
-test = ["pytest", "pytest-recording", "pytest-asyncio"]
+test = ["pytest", "pytest-recording", "pytest-asyncio", "cogapp"]

From 24430fe00aa9cd38bd0580653260eebf55d7f4f4 Mon Sep 17 00:00:00 2001
From: Simon Willison <swillison@gmail.com>
Date: Thu, 30 Jan 2025 16:37:13 -0800
Subject: [PATCH 2/6] Tidy up code example

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index ad32e6a..c05b942 100644
--- a/README.md
+++ b/README.md
@@ -147,7 +147,7 @@ When using the Python API, pass a string or an array of strings:
 response = llm.query(
     model="claude-3.5-sonnet",
     query="Fun facts about pelicans",
-    stop_sequences = ["beak", "feathers"],
+    stop_sequences=["beak", "feathers"],
 )
 ```
 

From d1cd80e382561aab3bdebca35e20f9735aedb785 Mon Sep 17 00:00:00 2001
From: Simon Willison <swillison@gmail.com>
Date: Thu, 30 Jan 2025 16:42:47 -0800
Subject: [PATCH 3/6] Test for prefill, stop_sequences, hide_prefill - refs #2

---
 ...rompt_with_prefill_and_stop_sequences.yaml | 454 ++++++++++++++++++
 tests/test_claude_3.py                        |  29 ++
 2 files changed, 483 insertions(+)
 create mode 100644 tests/cassettes/test_claude_3/test_prompt_with_prefill_and_stop_sequences.yaml

diff --git a/tests/cassettes/test_claude_3/test_prompt_with_prefill_and_stop_sequences.yaml b/tests/cassettes/test_claude_3/test_prompt_with_prefill_and_stop_sequences.yaml
new file mode 100644
index 0000000..778a5d0
--- /dev/null
+++ b/tests/cassettes/test_claude_3/test_prompt_with_prefill_and_stop_sequences.yaml
@@ -0,0 +1,454 @@
+interactions:
+- request:
+    body: '{"max_tokens":8192,"messages":[{"role":"user","content":"Python function
+      describing a pelican"},{"role":"assistant","content":"```python"}],"model":"claude-3-5-haiku-latest","stop_sequences":["```"],"temperature":1.0,"stream":true}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      anthropic-version:
+      - '2023-06-01'
+      connection:
+      - keep-alive
+      content-length:
+      - '231'
+      content-type:
+      - application/json
+      host:
+      - api.anthropic.com
+      user-agent:
+      - Anthropic/Python 0.41.0
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 0.41.0
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.0
+      x-stainless-stream-helper:
+      - messages
+    method: POST
+    uri: https://api.anthropic.com/v1/messages
+  response:
+    body:
+      string: 'event: message_start
+
+        data: {"type":"message_start","message":{"id":"msg_01Hc6vd1tdGFuu8g31Dtwocw","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":15,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":3}}           }
+
+
+        event: content_block_start
+
+        data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}             }
+
+
+        event: ping
+
+        data: {"type": "ping"}
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"\ndef
+        describe"}    }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"_pelican():\n    \"\"\"\n    A"}      }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
+        function describing the characteristics of a pelican."}           }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"\n    \n    Returns:\n        "}            }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"A
+        dictionary with various details about pel"}              }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"icans.\n    \"\"\"\n    pel"}         }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"ican_details
+        = {"}             }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"\n        \"species\":
+        \"Pelec"}}
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"anus\",\n        \"habitat\":
+        \"Coastal"}     }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
+        areas, lakes, and rivers\","}}
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"\n        \"physical_characteristics\":
+        {"}              }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"\n            \"size\":
+        {"}        }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"\n                \"length\":
+        \"1.2"}           }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
+        - 1.8 "}              }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"meters\",\n                \"wingspan\":"}   }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
+        \"2 - 3."}             }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"5
+        meters\""}}
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"\n            },\n            \""}       }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"weight\":
+        \"4 - 15"}          }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
+        kg\",\n            \"distinctive"}    }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"_features\":
+        [\n                \"Large"}          }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
+        throat pouch\",\n                \"Long"}  }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
+        beak\",\n                \""}            }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"White
+        or gray plumage\""}      }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"\n            ]\n        },\n        \"diet"}    }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"\":
+        [\n            \"Fish\","}               }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"\n            \"Smaller
+        water"}        }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
+        birds\",\n            \""}          }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Crustaceans\",\n            \"Amp"}}
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"hibians\"\n        ],\n        \"behavior"}   }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"\":
+        {\n            \"hunting_style"}           }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"\":
+        \"Diving and sco"}}
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"oping
+        fish with throat pouch\",\n            \""}}
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"social_nature\":
+        \"Often foun"}   }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"d
+        in large groups\",\n            \"breeding\":"}             }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
+        \"Colonial nesters\"\n        },"}               }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"\n        \"interesting"}         }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"_facts\":
+        ["}           }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"\n            \"Can
+        hold up to 3"}  }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
+        gallons of water in their"}          }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
+        pouch\",\n            \""}         }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Excellent
+        fish"}    }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"ers\",\n            \"Foun"}     }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"d
+        on every continent except Antarctica\""}        }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"\n        ]\n    }"}     }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"\n    \n    return
+        pelican_"}       }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"details\n\n#"}   }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
+        Example usage\npel"} }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"ican_info"}      }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
+        = describe_pelican"}    }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"()\nprint(\""}         }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Pelican"}    }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
+        Habitat:\", pelican_info"}            }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"[\"habitat\"])\nprint"}            }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"(\"Distinctive"}         }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
+        Features:\", pelican_"}           }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"info[\"physical_characteristics\"][\"distinctive_"}     }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"features\"])\n"}          }
+
+
+        event: content_block_stop
+
+        data: {"type":"content_block_stop","index":0 }
+
+
+        event: message_delta
+
+        data: {"type":"message_delta","delta":{"stop_reason":"stop_sequence","stop_sequence":"```"},"usage":{"output_tokens":386}     }
+
+
+        event: message_stop
+
+        data: {"type":"message_stop"}
+
+
+        '
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-RAY:
+      - 90a5a4742d35f7a5-LAX
+      Cache-Control:
+      - no-cache
+      Connection:
+      - keep-alive
+      Content-Type:
+      - text/event-stream; charset=utf-8
+      Date:
+      - Fri, 31 Jan 2025 00:39:03 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Robots-Tag:
+      - none
+      anthropic-ratelimit-input-tokens-limit:
+      - '400000'
+      anthropic-ratelimit-input-tokens-remaining:
+      - '400000'
+      anthropic-ratelimit-input-tokens-reset:
+      - '2025-01-31T00:39:03Z'
+      anthropic-ratelimit-output-tokens-limit:
+      - '80000'
+      anthropic-ratelimit-output-tokens-remaining:
+      - '76000'
+      anthropic-ratelimit-output-tokens-reset:
+      - '2025-01-31T00:39:06Z'
+      anthropic-ratelimit-requests-limit:
+      - '4000'
+      anthropic-ratelimit-requests-remaining:
+      - '3999'
+      anthropic-ratelimit-requests-reset:
+      - '2025-01-31T00:39:03Z'
+      anthropic-ratelimit-tokens-limit:
+      - '480000'
+      anthropic-ratelimit-tokens-remaining:
+      - '476000'
+      anthropic-ratelimit-tokens-reset:
+      - '2025-01-31T00:39:03Z'
+      request-id:
+      - req_014Jc1PrkzF3jzDiWV8f4d1e
+      via:
+      - 1.1 google
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/test_claude_3.py b/tests/test_claude_3.py
index 258a006..fdf213c 100644
--- a/tests/test_claude_3.py
+++ b/tests/test_claude_3.py
@@ -94,3 +94,32 @@ def test_image_prompt():
     assert response.input_tokens == 76
     assert response.output_tokens == 75
     assert response.token_details is None
+
+
+@pytest.mark.vcr
+def test_prompt_with_prefill_and_stop_sequences():
+    model = llm.get_model("claude-3.5-haiku")
+    model.key = model.key or ANTHROPIC_API_KEY
+    response = model.prompt(
+        "Very short function describing a pelican",
+        prefill="```python",
+        stop_sequences=["```"],
+        hide_prefill=True,
+    )
+    text = response.text()
+    assert text.startswith(
+        "\ndef describe_pelican():\n"
+        '    """\n'
+        "    A function describing the characteristics of a pelican.\n"
+        "    \n"
+        "    Returns:\n"
+        "        A dictionary with various details about pelicans.\n"
+        '    """\n'
+        "    pelican_details = {\n"
+        '        "species": "Pelecanus",\n'
+        '        "habitat": "Coastal areas, lakes, and rivers",\n'
+    )
+    assert text.endswith(
+        'print("Distinctive Features:", '
+        'pelican_info["physical_characteristics"]["distinctive_features"])\n'
+    )

From 402473cfe1b0a86d022c907d9d19641acf1cd15f Mon Sep 17 00:00:00 2001
From: Simon Willison <swillison@gmail.com>
Date: Thu, 30 Jan 2025 16:49:18 -0800
Subject: [PATCH 4/6] Fix tests by upgrading Anthropic client, refs #2

Also refs citations feature in #1
---
 pyproject.toml         | 2 +-
 tests/test_claude_3.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 3dbd71a..4227c63 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -10,7 +10,7 @@ classifiers = [
 ]
 dependencies = [
     "llm>=0.19",
-    "anthropic>=0.39.0",
+    "anthropic>=0.45.2",
 ]
 
 [project.urls]
diff --git a/tests/test_claude_3.py b/tests/test_claude_3.py
index fdf213c..099d787 100644
--- a/tests/test_claude_3.py
+++ b/tests/test_claude_3.py
@@ -24,7 +24,7 @@ def test_prompt():
     response_dict = dict(response.response_json)
     response_dict.pop("id")  # differs between requests
     assert response_dict == {
-        "content": [{"text": "1. Pelly\n2. Beaky", "type": "text"}],
+        "content": [{"citations": None, "text": "1. Pelly\n2. Beaky", "type": "text"}],
         "model": "claude-3-opus-20240229",
         "role": "assistant",
         "stop_reason": "end_turn",
@@ -46,7 +46,7 @@ async def test_async_prompt():
     response_dict = dict(response.response_json)
     response_dict.pop("id")  # differs between requests
     assert response_dict == {
-        "content": [{"text": "1. Pelly\n2. Beaky", "type": "text"}],
+        "content": [{"citations": None, "text": "1. Pelly\n2. Beaky", "type": "text"}],
         "model": "claude-3-opus-20240229",
         "role": "assistant",
         "stop_reason": "end_turn",

From 16735760c58dbbccf0ddf0b174ecc71fe711dcfc Mon Sep 17 00:00:00 2001
From: Simon Willison <swillison@gmail.com>
Date: Thu, 30 Jan 2025 16:51:36 -0800
Subject: [PATCH 5/6] Fixed one more test, refs #2

---
 tests/test_claude_3.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/tests/test_claude_3.py b/tests/test_claude_3.py
index 099d787..8997248 100644
--- a/tests/test_claude_3.py
+++ b/tests/test_claude_3.py
@@ -79,18 +79,14 @@ def test_image_prompt():
     response_dict = response.response_json
     response_dict.pop("id")  # differs between requests
     assert response_dict == {
-        "content": [
-            {
-                "text": EXPECTED_IMAGE_TEXT,
-                "type": "text",
-            }
-        ],
+        "content": [{"citations": None, "text": EXPECTED_IMAGE_TEXT, "type": "text"}],
         "model": "claude-3-5-sonnet-20241022",
         "role": "assistant",
         "stop_reason": "end_turn",
         "stop_sequence": None,
         "type": "message",
     }
+
     assert response.input_tokens == 76
     assert response.output_tokens == 75
     assert response.token_details is None

From d8ea765a96ca1a8b8bf988e15304e04a1267b2c8 Mon Sep 17 00:00:00 2001
From: Simon Willison <swillison@gmail.com>
Date: Thu, 30 Jan 2025 17:05:37 -0800
Subject: [PATCH 6/6] Refactored prefill text output logic

---
 llm_anthropic.py | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/llm_anthropic.py b/llm_anthropic.py
index 7a9fadc..e06e418 100644
--- a/llm_anthropic.py
+++ b/llm_anthropic.py
@@ -183,6 +183,11 @@ def __init__(
         if supports_pdf:
             self.attachment_types.add("application/pdf")
 
+    def prefill_text(self, prompt):
+        if prompt.options.prefill and not prompt.options.hide_prefill:
+            return prompt.options.prefill
+        return ""
+
     def build_messages(self, prompt, conversation) -> List[dict]:
         messages = []
         if conversation:
@@ -285,10 +290,11 @@ class ClaudeMessages(_Shared, llm.Model):
     def execute(self, prompt, stream, response, conversation):
         client = Anthropic(api_key=self.get_key())
         kwargs = self.build_kwargs(prompt, conversation)
+        prefill_text = self.prefill_text(prompt)
         if stream:
             with client.messages.stream(**kwargs) as stream:
-                if prompt.options.prefill and not prompt.options.hide_prefill:
-                    yield prompt.options.prefill
+                if prefill_text:
+                    yield prefill_text
                 for text in stream.text_stream:
                     yield text
                 # This records usage and other data:
@@ -296,9 +302,7 @@ def execute(self, prompt, stream, response, conversation):
         else:
             completion = client.messages.create(**kwargs)
             text = completion.content[0].text
-            if prompt.options.prefill and not prompt.options.hide_prefill:
-                text = prompt.options.prefill + text
-            yield text
+            yield prefill_text + text
             response.response_json = completion.model_dump()
         self.set_usage(response)
 
@@ -312,19 +316,18 @@ class AsyncClaudeMessages(_Shared, llm.AsyncModel):
     async def execute(self, prompt, stream, response, conversation):
         client = AsyncAnthropic(api_key=self.get_key())
         kwargs = self.build_kwargs(prompt, conversation)
+        prefill_text = self.prefill_text(prompt)
         if stream:
             async with client.messages.stream(**kwargs) as stream_obj:
-                if prompt.options.prefill and not prompt.options.hide_prefill:
-                    yield prompt.options.prefill
+                if prefill_text:
+                    yield prefill_text
                 async for text in stream_obj.text_stream:
                     yield text
             response.response_json = (await stream_obj.get_final_message()).model_dump()
         else:
             completion = await client.messages.create(**kwargs)
             text = completion.content[0].text
-            if prompt.options.prefill and not prompt.options.hide_prefill:
-                text = prompt.options.prefill + text
-            yield text
+            yield prefill_text + text
             response.response_json = completion.model_dump()
         self.set_usage(response)