From 20ff9274139b100ba8f021438a68b5c6f7e6b03f Mon Sep 17 00:00:00 2001
From: Yeray Diaz <6739793+yeraydiazdiaz@users.noreply.github.com>
Date: Sat, 6 Jul 2024 13:33:58 +0100
Subject: [PATCH 1/7] Fix black failing in CI

Also move GH Actions to Python 3.11
---
 lunr/query_parser.py |  8 +++++---
 tox.ini              | 12 ++++++------
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/lunr/query_parser.py b/lunr/query_parser.py
index e1cdd0b..ec318bc 100644
--- a/lunr/query_parser.py
+++ b/lunr/query_parser.py
@@ -52,9 +52,11 @@ def parse_clause(cls, parser):
             raise QueryParseError(
                 "Expected either a field or a term, found {}{}".format(
                     lexeme["type"],
-                    'with value "' + lexeme["string"] + '"'
-                    if len(lexeme["string"])
-                    else "",
+                    (
+                        'with value "' + lexeme["string"] + '"'
+                        if len(lexeme["string"])
+                        else ""
+                    ),
                 )
             )
 
diff --git a/tox.ini b/tox.ini
index ea340c7..bb409d8 100644
--- a/tox.ini
+++ b/tox.ini
@@ -10,24 +10,24 @@ commands =
     pytest -m "acceptance"
 
 [testenv:black]
-basepython = python3.10
+basepython = python3.11
 deps=
     black
 commands={envbindir}/black --check lunr tests
 
 [testenv:flake8]
-basepython = python3.10
+basepython = python3.11
 deps=
     flake8
 commands={envbindir}/flake8 lunr tests
 
 [testenv:docs]
-basepython = python3.10
+basepython = python3.11
 extras = docs
 commands={envbindir}/sphinx-build docs docs/_build/html
 
 [testenv:mypy]
-basepython = python3.10
+basepython = python3.11
 deps = mypy
 commands={envbindir}/mypy lunr
 
@@ -45,6 +45,6 @@ python =
     3.7: py37
     3.8: py38
     3.9: py39
-    3.10: py310,flake8,black,docs,mypy
-    3.11: py311
+    3.10: py310
+    3.11: py311,flake8,black,docs,mypy
     pypy3: pypy3

From 64668d28657458502c259660f7916c25ba1ba5eb Mon Sep 17 00:00:00 2001
From: Yeray Diaz <6739793+yeraydiazdiaz@users.noreply.github.com>
Date: Sat, 6 Jul 2024 13:44:31 +0100
Subject: [PATCH 2/7] Upgrade Codecov configuration

---
 .github/workflows/test-suite.yml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/test-suite.yml b/.github/workflows/test-suite.yml
index 54c7f77..9ac1668 100644
--- a/.github/workflows/test-suite.yml
+++ b/.github/workflows/test-suite.yml
@@ -12,7 +12,7 @@ jobs:
     name: "Python ${{ matrix.python-version }}"
     runs-on: "ubuntu-latest"
     env:
-      USING_COVERAGE: "3.10"
+      USING_COVERAGE: "3.11"
 
     strategy:
       matrix:
@@ -46,6 +46,7 @@ jobs:
 
       - name: "Upload coverage to Codecov"
         if: "contains(env.USING_COVERAGE, matrix.python-version)"
-        uses: "codecov/codecov-action@v3"
+        uses: "codecov/codecov-action@v4.0.1"
         with:
           fail_ci_if_error: true
+          token: ${{ secrets.CODECOV_TOKEN }}

From 8b0232d41d7e2f998c4514b658695ace6f8fd382 Mon Sep 17 00:00:00 2001
From: David Huggins-Daines <dhd@ecolingui.ca>
Date: Thu, 4 Jul 2024 11:23:58 -0400
Subject: [PATCH 3/7] docs: how to skip pipeline steps with language support

---
 docs/customisation.md | 38 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 36 insertions(+), 2 deletions(-)

diff --git a/docs/customisation.md b/docs/customisation.md
index 71987e1..6dfe318 100644
--- a/docs/customisation.md
+++ b/docs/customisation.md
@@ -43,8 +43,11 @@ token list, and the token list itself.
 
 ## Skip a pipeline function for specific field names
 
-The `Pipeline.skip()` method allows you to skip a pipeline function for specific field names.
-This example skips the `stop_word_filter` pipeline function for the field `fullName`.
+The `Pipeline.skip()` method allows you to skip a pipeline function
+for specific field names.  It takes the function itself (not its name
+or its registered name) and the field name to skip as arguments. This
+example skips the `stop_word_filter` pipeline function for the field
+`fullName`.
 
 ```python
 from lunr import lunr, get_default_builder, stop_word_filter
@@ -58,6 +61,37 @@ builder.pipeline.skip(stop_word_filter.stop_word_filter, ["fullName"])
 idx = lunr(ref="id", fields=("fullName", "body"), documents=documents, builder=builder)
 ```
 
+Importantly, if you are using language support, the above code will
+not work, since there is a separate builder for each language, and the
+pipeline functions are generated by the code and so cannot be
+imported.  Instead, you can access them by name.  For instance to skip
+the stop word filter and stemmer for French for the field `titre`, you
+could do this:
+
+```python
+from lunr import lunr, get_default_builder, stop_word_filter
+
+documents = [...]
+
+builder = get_default_builder("fr")
+
+for funcname in "stopWordFilter-fr", "stemmer-fr":
+    builder.pipeline.skip(
+        builder.pipeline.registered_functions[funcname], ["titre"]
+    )
+
+idx = lunr(ref="id", fields=("titre", "texte"), documents=documents, builder=builder)
+```
+
+The current language support registers the functions
+`lunr-multi-trimmer-{lang}`, `stopWordFilter-{lang}` and
+`stemmer-{lang}` but these are by convention only.  You can access the
+full list through the `registered_functions` attribute of the
+pipeline, but this is not necessarily the list of actual pipeline
+steps, which is contained in a private field (though you can see them
+in the string representation of the pipeline).
+
+
 ## Token meta-data
 
 Lunr.py `Token` instances include meta-data information which can be used in

From 98a2e56f7006758489b1d9bbf43616e8df06caac Mon Sep 17 00:00:00 2001
From: David Huggins-Daines <dhd@ecolingui.ca>
Date: Thu, 4 Jul 2024 12:17:31 -0400
Subject: [PATCH 4/7] docs: unicode folding for fun and profit

---
 docs/languages.md | 70 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)

diff --git a/docs/languages.md b/docs/languages.md
index 15394df..e18ce14 100644
--- a/docs/languages.md
+++ b/docs/languages.md
@@ -72,6 +72,76 @@ If you have documents in multiple language pass a list of language codes:
 [{'ref': 'c', 'score': 1.106, 'match_data': <MatchData "english">}]
 ```
 
+## Folding to ASCII
+
+It is often useful to allow for transliterated or unaccented
+characters when indexing and searching.  This is not implemented in
+the language support but can be done by adding a pipeline stage which
+"folds" the tokens to ASCII.  There are
+[various](https://pypi.org/project/text-unidecode/)
+[libraries](https://pypi.org/project/Unidecode/) to do this in Python
+as well as in [JavaScript](https://www.npmjs.com/package/unidecode).
+
+On the Python side, for example, to fold accents in French text using
+`text-unidecode` or `unidecode` (depending on your licensing
+preferences):
+
+```python
+import json
+from lunr import lunr, get_default_builder
+from lunr.pipeline import Pipeline
+from text_unidecode import unidecode
+
+def unifold(token, _idx=None, _tokens=None):
+    def wrap_unidecode(text, _metadata):
+        return unidecode(text)
+    return token.update(wrap_unidecode)
+
+Pipeline.register_function(unifold, "unifold")
+builder = get_default_builder("fr")
+builder.pipeline.add(unifold)
+builder.search_pipeline.add(unifold)
+index = lunr(
+    ref="id",
+    fields=["titre", "texte"],
+    documents=[
+        {"id": "1314-2023-DEM", "titre": "Règlement de démolition", "texte": "Texte"}
+    ],
+    languages="fr",
+    builder=builder,
+)
+print(index.search("reglement de demolition"))
+# [{'ref': '1314-2023-DEM', 'score': 0.4072935059634513, 'match_data': <MatchData "demolit,regl">}]
+print(index.search("règlement de démolition"))
+# [{'ref': '1314-2023-DEM', 'score': 0.4072935059634513, 'match_data': <MatchData "demolit,regl">}]
+with open("index.json", "wt") as outfh:
+    json.dump(index.serialize(), outfh)
+```
+
+Note that it is important to do folding on both the indexing and
+search pipelines to ensure that users who have the right keyboard and
+can remember which accents go where will still get the expected
+results.
+
+On the JavaScript side [the
+API](https://lunrjs.com/docs/lunr.Pipeline.html) is of course quite
+similar:
+
+```js
+const lunr = require("lunr");
+const fs = require("fs");
+const unidecode = require("unidecode");
+require("lunr-languages/lunr.stemmer.support.js")(lunr);
+require("lunr-languages/lunr.fr.js")(lunr);
+
+lunr.Pipeline.registerFunction(token => token.update(unidecode), "unifold")
+const index = lunr.Index.load(JSON.parse(fs.readFileSync("index.json", "utf8")));
+console.log(JSON.stringify(index.search("reglement de demolition")));
+# [{"ref":"1314-2023-DEM","score":0.4072935059634513,"matchData":{"metadata":{"regl":{"titre":{}},"demolit":{"titre":{}}}}}]
+console.log(JSON.stringify(index.search("règlement de démolition")));
+# [{"ref":"1314-2023-DEM","score":0.4072935059634513,"matchData":{"metadata":{"regl":{"titre":{}},"demolit":{"titre":{}}}}}]
+```
+
 ## Notes on language support
 
 - Using multiple languages means the terms will be stemmed once per language. This can yield unexpected results.

From f3a95f1767024b922297e94e998ae9ebffde841f Mon Sep 17 00:00:00 2001
From: David Huggins-Daines <dhd@ecolingui.ca>
Date: Thu, 4 Jul 2024 12:20:46 -0400
Subject: [PATCH 5/7] fix(docs): add a note about lunr-folding

Not to use it (even though I "maintain" it) because it is not good
---
 docs/languages.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/languages.md b/docs/languages.md
index e18ce14..40c7179 100644
--- a/docs/languages.md
+++ b/docs/languages.md
@@ -142,6 +142,10 @@ console.log(JSON.stringify(index.search("règlement de démolition")));
 # [{"ref":"1314-2023-DEM","score":0.4072935059634513,"matchData":{"metadata":{"regl":{"titre":{}},"demolit":{"titre":{}}}}}]
 ```
 
+There is also `lunr-folding` for JavaScript, but its folding is not
+the same as `unidecode` and it may not be fully compatible with
+language support, so it is recommended to use the above method.
+
 ## Notes on language support
 
 - Using multiple languages means the terms will be stemmed once per language. This can yield unexpected results.

From a5f64b4bd59dfe12fe25d6a50f70d078f0cb0df7 Mon Sep 17 00:00:00 2001
From: David Huggins-Daines <dhd@ecolingui.ca>
Date: Thu, 4 Jul 2024 12:21:20 -0400
Subject: [PATCH 6/7] fix(docs): add the url to lunr-folding

---
 docs/languages.md | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/docs/languages.md b/docs/languages.md
index 40c7179..a321632 100644
--- a/docs/languages.md
+++ b/docs/languages.md
@@ -142,9 +142,11 @@ console.log(JSON.stringify(index.search("règlement de démolition")));
 # [{"ref":"1314-2023-DEM","score":0.4072935059634513,"matchData":{"metadata":{"regl":{"titre":{}},"demolit":{"titre":{}}}}}]
 ```
 
-There is also `lunr-folding` for JavaScript, but its folding is not
-the same as `unidecode` and it may not be fully compatible with
-language support, so it is recommended to use the above method.
+There is also
+[lunr-folding](https://www.npmjs.com/package/lunr-folding) for
+JavaScript, but its folding is not the same as `unidecode` and it may
+not be fully compatible with language support, so it is recommended to
+use the above method.
 
 ## Notes on language support
 

From d07b60f8d91466364bcad268805ee6d53a36829e Mon Sep 17 00:00:00 2001
From: Yeray Diaz <6739793+yeraydiazdiaz@users.noreply.github.com>
Date: Sun, 8 Sep 2024 12:25:29 +0100
Subject: [PATCH 7/7] Bump codecov-action to 4.5.0

Hopefully fixing its configuration as well.
---
 .github/workflows/test-suite.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test-suite.yml b/.github/workflows/test-suite.yml
index 9ac1668..4cfbaf5 100644
--- a/.github/workflows/test-suite.yml
+++ b/.github/workflows/test-suite.yml
@@ -46,7 +46,7 @@ jobs:
 
       - name: "Upload coverage to Codecov"
         if: "contains(env.USING_COVERAGE, matrix.python-version)"
-        uses: "codecov/codecov-action@v4.0.1"
+        uses: "codecov/codecov-action@v4.5.0"
         with:
           fail_ci_if_error: true
           token: ${{ secrets.CODECOV_TOKEN }}