Merge branch 'main' into llm-azure

simonw · Feb 13, 2025 · 9c29f96 · 9c29f96
2 parents 3890709 + 9a1374b
commit 9c29f96
Show file tree

Hide file tree

Showing 46 changed files with 3,706 additions and 644 deletions.
diff --git a/.github/workflows/cog.yml b/.github/workflows/cog.yml
@@ -0,0 +1,48 @@
+name: Run Cog
+
+on:
+  pull_request:
+    types: [opened, synchronize]
+
+permissions:
+  contents: write
+  pull-requests: write
+
+jobs:
+  run-cog:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          ref: ${{ github.head_ref }}
+
+      - name: Set up Python 3.11
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+
+      - name: Install dependencies
+        run: pip install -e '.[test]'
+
+      - name: Run cog
+        run: |
+          cog -r -p "import sys, os; sys._called_from_test=True; os.environ['LLM_USER_PATH'] = '/tmp'" docs/**/*.md docs/*.md
+
+      - name: Check for changes
+        id: check-changes
+        run: |
+          if [ -n "$(git diff)" ]; then
+            echo "changes=true" >> $GITHUB_OUTPUT
+          else
+            echo "changes=false" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Commit and push if changed
+        if: steps.check-changes.outputs.changes == 'true'
+        run: |
+          git config --local user.email "github-actions[bot]@users.noreply.github.com"
+          git config --local user.name "github-actions[bot]"
+          git add -A
+          git commit -m "Ran cog"
+          git push
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -12,7 +12,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
     steps:
     - uses: actions/checkout@v4
     - name: Set up Python ${{ matrix.python-version }}
@@ -29,23 +29,23 @@ jobs:
         pytest
   deploy:
     runs-on: ubuntu-latest
+    environment: release
+    permissions:
+      id-token: write
     needs: [test]
     steps:
     - uses: actions/checkout@v4
     - name: Set up Python
       uses: actions/setup-python@v5
       with:
-        python-version: "3.12"
+        python-version: '3.13'
         cache: pip
         cache-dependency-path: setup.py
     - name: Install dependencies
       run: |
-        pip install setuptools wheel twine build
-    - name: Publish
-      env:
-        TWINE_USERNAME: __token__
-        TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
+        pip install setuptools wheel build
+    - name: Build
       run: |
         python -m build
-        twine upload dist/*
-
+    - name: Publish
+      uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -11,7 +11,7 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest, macos-latest, windows-latest]
-        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
         pydantic: ["==1.10.2", ">=2.0.0"]
     steps:
     - uses: actions/checkout@v4
@@ -45,7 +45,7 @@ jobs:
     - name: Run ruff
       if: matrix.os != 'windows-latest'
       run: |
-        ruff .
+        ruff check .
     - name: Run test-llm-load-plugins.sh
       if: matrix.os != 'windows-latest'
       run: |

diff --git a/Justfile b/Justfile
@@ -21,7 +21,7 @@
   echo "  mypy"
   pipenv run mypy llm
   echo "  ruff"
-  pipenv run ruff .
+  pipenv run ruff check .
 
 # Run mypy
 @mypy:
@@ -42,7 +42,7 @@
 
 # Run automatic fixes
 @fix: cog
-  pipenv run ruff . --fix
+  pipenv run ruff check . --fix
   pipenv run black .
 
 # Push commit if tests pass

diff --git a/README.md b/README.md
@@ -12,6 +12,8 @@ A CLI utility and Python library for interacting with Large Language Models, bot
 
 [Run prompts from the command-line](https://llm.datasette.io/en/stable/usage.html#executing-a-prompt), [store the results in SQLite](https://llm.datasette.io/en/stable/logging.html), [generate embeddings](https://llm.datasette.io/en/stable/embeddings/index.html) and more.
 
+Consult the **[LLM plugins directory](https://llm.datasette.io/en/stable/plugins/directory.html)** for plugins that provide access to remote and local models.
+
 Full documentation: **[llm.datasette.io](https://llm.datasette.io/)**
 
 Background on this project:
@@ -101,7 +103,7 @@ Type '!multi' to enter multiple lines, then '!end' to finish
 
 You can use the `-s/--system` option to set a system prompt, providing instructions for processing other input to the tool.
 
-To describe how the code a file works, try this:
+To describe how the code in a file works, try this:
 
 ```bash
 cat mycode.py | llm -s "Explain this code"

diff --git a/docs/aliases.md b/docs/aliases.md
@@ -19,19 +19,27 @@ result = CliRunner().invoke(cli, ["aliases", "list"])
 cog.out("```\n{}```".format(result.output))
 ]]] -->
 ```
-3.5              : gpt-3.5-turbo
-chatgpt          : gpt-3.5-turbo
-chatgpt-16k      : gpt-3.5-turbo-16k
-3.5-16k          : gpt-3.5-turbo-16k
-4                : gpt-4
-gpt4             : gpt-4
-4-32k            : gpt-4-32k
-gpt-4-turbo      : gpt-4-turbo-preview
-4-turbo          : gpt-4-turbo-preview
-4t               : gpt-4-turbo-preview
-3.5-instruct     : gpt-3.5-turbo-instruct
-chatgpt-instruct : gpt-3.5-turbo-instruct
-ada              : ada-002 (embedding)
+4o                  : gpt-4o
+4o-mini             : gpt-4o-mini
+3.5                 : gpt-3.5-turbo
+chatgpt             : gpt-3.5-turbo
+chatgpt-16k         : gpt-3.5-turbo-16k
+3.5-16k             : gpt-3.5-turbo-16k
+4                   : gpt-4
+gpt4                : gpt-4
+4-32k               : gpt-4-32k
+gpt-4-turbo-preview : gpt-4-turbo
+4-turbo             : gpt-4-turbo
+4t                  : gpt-4-turbo
+3.5-instruct        : gpt-3.5-turbo-instruct
+chatgpt-instruct    : gpt-3.5-turbo-instruct
+ada                 : text-embedding-ada-002 (embedding)
+ada-002             : text-embedding-ada-002 (embedding)
+3-small             : text-embedding-3-small (embedding)
+3-large             : text-embedding-3-large (embedding)
+3-small-512         : text-embedding-3-small-512 (embedding)
+3-large-256         : text-embedding-3-large-256 (embedding)
+3-large-1024        : text-embedding-3-large-1024 (embedding)
 ```
 <!-- [[[end]]] -->
 

diff --git a/docs/changelog.md b/docs/changelog.md
@@ -1,5 +1,169 @@
 # Changelog
 
+(v0_21)=
+## 0.21 (2025-01-31)
+
+- New model: `o3-mini`. [#728](https://github.com/simonw/llm/issues/728)
+- The `o3-mini` and `o1` models now support a `reasoning_effort` option which can be set to `low`, `medium` or `high`.
+- `llm prompt` and `llm logs` now have a `--xl/--extract-last` option for extracting the last fenced code block in the response - a complement to the existing `--x/--extract` option. [#717](https://github.com/simonw/llm/issues/717)
+
+(v0_20)=
+## 0.20 (2025-01-22)
+
+- New model, `o1`. This model does not yet support streaming. [#676](https://github.com/simonw/llm/issues/676)
+- `o1-preview` and `o1-mini` models now support streaming.
+- New models, `gpt-4o-audio-preview` and `gpt-4o-mini-audio-preview`. [#677](https://github.com/simonw/llm/issues/677)
+- `llm prompt -x/--extract` option, which returns just the content of the first fenced code block in the response. Try `llm prompt -x 'Python function to reverse a string'`. [#681](https://github.com/simonw/llm/issues/681)
+  - Creating a template using `llm ... --save x` now supports the `-x/--extract` option, which is saved to the template. YAML templates can set this option using `extract: true`.
+  - New `llm logs -x/--extract` option extracts the first fenced code block from matching logged responses.
+- New `llm models -q 'search'` option returning models that case-insensitively match the search query. [#700](https://github.com/simonw/llm/issues/700)
+- Installation documentation now also includes `uv`. Thanks, [Ariel Marcus](https://github.com/ajmarcus). [#690](https://github.com/simonw/llm/pull/690) and [#702](https://github.com/simonw/llm/issues/702)
+- `llm models` command now shows the current default model at the bottom of the listing. Thanks, [Amjith Ramanujam](https://github.com/amjith). [#688](https://github.com/simonw/llm/pull/688)
+- {ref}`Plugin directory <plugin-directory>` now includes `llm-venice`, `llm-bedrock`, `llm-deepseek` and `llm-cmd-comp`.
+- Fixed bug where some dependency version combinations could cause a `Client.__init__() got an unexpected keyword argument 'proxies'` error. [#709](https://github.com/simonw/llm/issues/709)
+- OpenAI embedding models are now available using their full names of `text-embedding-ada-002`, `text-embedding-3-small` and `text-embedding-3-large` - the previous names are still supported as aliases. Thanks, [web-sst](https://github.com/web-sst). [#654](https://github.com/simonw/llm/pull/654)
+
+(v0_19_1)=
+## 0.19.1 (2024-12-05)
+
+- FIxed bug where `llm.get_models()` and `llm.get_async_models()` returned the same model multiple times. [#667](https://github.com/simonw/llm/issues/667)
+
+(v0_19)=
+## 0.19 (2024-12-01)
+
+- Tokens used by a response are now logged to new `input_tokens` and `output_tokens` integer columns and a `token_details` JSON string column, for the default OpenAI models and models from other plugins that {ref}`implement this feature <advanced-model-plugins-usage>`. [#610](https://github.com/simonw/llm/issues/610)
+- `llm prompt` now takes a `-u/--usage` flag to display token usage at the end of the response.
+- `llm logs -u/--usage` shows token usage information for logged responses.
+- `llm prompt ... --async` responses are now logged to the database. [#641](https://github.com/simonw/llm/issues/641)
+- `llm.get_models()` and `llm.get_async_models()` functions, {ref}`documented here <python-api-listing-models>`. [#640](https://github.com/simonw/llm/issues/640)
+- `response.usage()` and async response `await response.usage()` methods, returning a `Usage(input=2, output=1, details=None)` dataclass. [#644](https://github.com/simonw/llm/issues/644)
+- `response.on_done(callback)` and `await response.on_done(callback)` methods for specifying a callback to be executed when a response has completed, {ref}`documented here <python-api-response-on-done>`. [#653](https://github.com/simonw/llm/issues/653)
+- Fix for bug running `llm chat` on Windows 11. Thanks, [Sukhbinder Singh](https://github.com/sukhbinder). [#495](https://github.com/simonw/llm/issues/495)
+
+(v0_19a2)=
+## 0.19a2 (2024-11-20)
+
+- `llm.get_models()` and `llm.get_async_models()` functions, {ref}`documented here <python-api-listing-models>`. [#640](https://github.com/simonw/llm/issues/640)
+
+(v0_19a1)=
+## 0.19a1 (2024-11-19)
+
+- `response.usage()` and async response `await response.usage()` methods, returning a `Usage(input=2, output=1, details=None)` dataclass. [#644](https://github.com/simonw/llm/issues/644)
+
+(v0_19a0)=
+## 0.19a0 (2024-11-19)
+
+- Tokens used by a response are now logged to new `input_tokens` and `output_tokens` integer columns and a `token_details` JSON string column, for the default OpenAI models and models from other plugins that {ref}`implement this feature <advanced-model-plugins-usage>`. [#610](https://github.com/simonw/llm/issues/610)
+- `llm prompt` now takes a `-u/--usage` flag to display token usage at the end of the response.
+- `llm logs -u/--usage` shows token usage information for logged responses.
+- `llm prompt ... --async` responses are now logged to the database. [#641](https://github.com/simonw/llm/issues/641)
+
+(v0_18)=
+## 0.18 (2024-11-17)
+
+- Initial support for async models. Plugins can now provide an `AsyncModel` subclass that can be accessed in the Python API using the new `llm.get_async_model(model_id)` method. See {ref}`async models in the Python API docs<python-api-async>` and {ref}`implementing async models in plugins <advanced-model-plugins-async>`. [#507](https://github.com/simonw/llm/issues/507)
+- OpenAI models all now include async models, so function calls such as `llm.get_async_model("gpt-4o-mini")` will return an async model.
+- `gpt-4o-audio-preview` model can be used to send audio attachments to the GPT-4o audio model. [#608](https://github.com/simonw/llm/issues/608)
+- Attachments can now be sent without requiring a prompt. [#611](https://github.com/simonw/llm/issues/611)
+- `llm models --options` now includes information on whether a model supports attachments. [#612](https://github.com/simonw/llm/issues/612)
+- `llm models --async` shows available async models.
+- Custom OpenAI-compatible models can now be marked as `can_stream: false` in the YAML if they do not support streaming. Thanks, [Chris Mungall](https://github.com/cmungall). [#600](https://github.com/simonw/llm/pull/600)
+- Fixed bug where OpenAI usage data was incorrectly serialized to JSON. [#614](https://github.com/simonw/llm/issues/614)
+- Standardized on `audio/wav` MIME type for audio attachments rather than `audio/wave`. [#603](https://github.com/simonw/llm/issues/603)
+
+(v0_18a1)=
+## 0.18a1 (2024-11-14)
+
+- Fixed bug where conversations did not work for async OpenAI models. [#632](https://github.com/simonw/llm/issues/632)
+- `__repr__` methods for `Response` and `AsyncResponse`.
+
+(v0_18a0)=
+## 0.18a0 (2024-11-13)
+
+Alpha support for **async models**. [#507](https://github.com/simonw/llm/issues/507)
+
+Multiple [smaller changes](https://github.com/simonw/llm/compare/0.17.1...0.18a0).
+
+(v0_17)=
+## 0.17 (2024-10-29)
+
+Support for **attachments**, allowing multi-modal models to accept images, audio, video and other formats. [#578](https://github.com/simonw/llm/issues/578)
+
+The default OpenAI `gpt-4o` and `gpt-4o-mini` models can both now be prompted with JPEG, GIF, PNG and WEBP images.
+
+Attachments {ref}`in the CLI <usage-attachments>` can be URLs:
+
+```bash
+llm -m gpt-4o "describe this image" \
+  -a https://static.simonwillison.net/static/2024/pelicans.jpg
+```
+Or file paths:
+```bash
+llm -m gpt-4o-mini "extract text" -a image1.jpg -a image2.jpg
+```
+Or binary data, which may need to use `--attachment-type` to specify the MIME type:
+```bash
+cat image | llm -m gpt-4o-mini "extract text" --attachment-type - image/jpeg
+```
+
+Attachments are also available {ref}`in the Python API <python-api-attachments>`:
+
+```python
+model = llm.get_model("gpt-4o-mini")
+response = model.prompt(
+    "Describe these images",
+    attachments=[
+        llm.Attachment(path="pelican.jpg"),
+        llm.Attachment(url="https://static.simonwillison.net/static/2024/pelicans.jpg"),
+    ]
+)
+```
+Plugins that provide alternative models can support attachments, see {ref}`advanced-model-plugins-attachments` for details.
+
+The latest **[llm-claude-3](https://github.com/simonw/llm-claude-3)** plugin now supports attachments for Anthropic's Claude 3 and 3.5 models. The **[llm-gemini](https://github.com/simonw/llm-gemini)** plugin supports attachments for Google's Gemini 1.5 models.
+
+Also in this release: OpenAI models now record their `"usage"` data in the database even when the response was streamed. These records can be viewed using `llm logs --json`. [#591](https://github.com/simonw/llm/issues/591)
+
+(v0_17a0)=
+## 0.17a0 (2024-10-28)
+
+Alpha support for **attachments**. [#578](https://github.com/simonw/llm/issues/578)
+
+(v0_16)=
+## 0.16 (2024-09-12)
+
+- OpenAI models now use the internal `self.get_key()` mechanism, which means they can be used from Python code in a way that will pick up keys that have been configured using `llm keys set` or the `OPENAI_API_KEY` environment variable. [#552](https://github.com/simonw/llm/issues/552). This code now works correctly:
+    ```python
+    import llm
+    print(llm.get_model("gpt-4o-mini").prompt("hi"))
+    ```
+- New documented API methods: `llm.get_default_model()`, `llm.set_default_model(alias)`, `llm.get_default_embedding_model(alias)`, `llm.set_default_embedding_model()`. [#553](https://github.com/simonw/llm/issues/553)
+- Support for OpenAI's new [o1 family](https://openai.com/o1/) of preview models, `llm -m o1-preview "prompt"` and `llm -m o1-mini "prompt"`. These models are currently only available to [tier 5](https://platform.openai.com/docs/guides/rate-limits/usage-tiers?context=tier-five) OpenAI API users, though this may change in the future. [#570](https://github.com/simonw/llm/issues/570)
+
+(v0_15)=
+## 0.15 (2024-07-18)
+
+- Support for OpenAI's [new GPT-4o mini](https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/) model: `llm -m gpt-4o-mini 'rave about pelicans in French'` [#536](https://github.com/simonw/llm/issues/536)
+- `gpt-4o-mini` is now the default model if you do not {ref}`specify your own default <setup-default-model>`, replacing GPT-3.5 Turbo. GPT-4o mini is both cheaper and better than GPT-3.5 Turbo.
+- Fixed a bug where `llm logs -q 'flourish' -m haiku` could not combine both the `-q` search query and the `-m` model specifier. [#515](https://github.com/simonw/llm/issues/515)
+
+(v0_14)=
+## 0.14 (2024-05-13)
+
+- Support for OpenAI's [new GPT-4o](https://openai.com/index/hello-gpt-4o/) model: `llm -m gpt-4o 'say hi in Spanish'` [#490](https://github.com/simonw/llm/issues/490)
+- The `gpt-4-turbo` alias is now a model ID, which indicates the latest version of OpenAI's GPT-4 Turbo text and image model. Your existing `logs.db` database may contain records under the previous model ID of `gpt-4-turbo-preview`. [#493](https://github.com/simonw/llm/issues/493)
+- New `llm logs -r/--response` option for outputting just the last captured response, without wrapping it in Markdown and accompanying it with the prompt. [#431](https://github.com/simonw/llm/issues/431)
+- Nine new {ref}`plugins <plugin-directory>` since version 0.13:
+  - **[llm-claude-3](https://github.com/simonw/llm-claude-3)** supporting Anthropic's [Claude 3 family](https://www.anthropic.com/news/claude-3-family) of models.
+  - **[llm-command-r](https://github.com/simonw/llm-command-r)** supporting Cohere's Command R and [Command R Plus](https://txt.cohere.com/command-r-plus-microsoft-azure/) API models.
+  - **[llm-reka](https://github.com/simonw/llm-reka)** supports the [Reka](https://www.reka.ai/) family of models via their API.
+  - **[llm-perplexity](https://github.com/hex/llm-perplexity)** by Alexandru Geana supporting the [Perplexity Labs](https://docs.perplexity.ai/) API models, including `llama-3-sonar-large-32k-online` which can search for things online and `llama-3-70b-instruct`.
+  - **[llm-groq](https://github.com/angerman/llm-groq)** by Moritz Angermann providing access to fast models hosted by [Groq](https://console.groq.com/docs/models).
+  - **[llm-fireworks](https://github.com/simonw/llm-fireworks)** supporting models hosted by [Fireworks AI](https://fireworks.ai/).
+  - **[llm-together](https://github.com/wearedevx/llm-together)** adds support for the [Together AI](https://www.together.ai/) extensive family of hosted openly licensed models.
+  - **[llm-embed-onnx](https://github.com/simonw/llm-embed-onnx)** provides seven embedding models that can be executed using the ONNX model framework.
+  - **[llm-cmd](https://github.com/simonw/llm-cmd)** accepts a prompt for a shell command, runs that prompt and populates the result in your shell so you can review it, edit it and then hit `<enter>` to execute or `ctrl+c` to cancel, see [this post for details](https://simonwillison.net/2024/Mar/26/llm-cmd/).
+
 (v0_13_1)=
 ## 0.13.1 (2024-01-26)
 
@@ -153,7 +317,7 @@ To create embeddings for every JPEG in a directory stored in a `photos` collecti
 llm install llm-clip
 llm embed-multi photos --files photos/ '*.jpg' --binary -m clip
 ```
-Now you can search for photos of racoons using:
+Now you can search for photos of raccoons using:
 ```
 llm similar photos -c 'raccoon'
 ```

diff --git a/docs/embeddings/cli.md b/docs/embeddings/cli.md
@@ -148,8 +148,11 @@ All three mechanisms support these options:
 - `-d database.db` to specify a different database file to store the embeddings in
 - `--store` to store the original content in the embeddings table in addition to the embedding vector
 - `--prefix` to prepend a prefix to the stored ID of each item
+- `--prepend` to prepend a string to the content before embedding 
 - `--batch-size SIZE` to process embeddings in batches of the specified size
 
+The `--prepend` option is useful for embedding models that require you to prepend a special token to the content before embedding it. [nomic-embed-text-v2-moe](https://huggingface.co/nomic-ai/nomic-embed-text-v2-moe) for example requires documents to be prepended `'search_document: '` and search queries to be prepended `'search_query: '`.
+
 (embeddings-cli-embed-multi-csv-etc)=
 ### Embedding data from a CSV, TSV or JSON file