diff --git a/.gitmodules b/.gitmodules
index f25c178..246fd7f 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -99,3 +99,6 @@
[submodule "german/de_deepspeech-aashishag"]
path = german/de_deepspeech-aashishag
url = https://github.com/synesthesiam/de_deepspeech-aashishag.git
+[submodule "korean/ko-kr_kaldi-montreal"]
+ path = korean/ko-kr_kaldi-montreal
+ url = https://github.com/synesthesiam/ko-kr_kaldi-montreal.git
diff --git a/PROFILES b/PROFILES
index 64e02e3..89ab54e 100644
--- a/PROFILES
+++ b/PROFILES
@@ -6,6 +6,7 @@ english/en-us_deepspeech-mozilla
english/en-us_julius-github
english/en-us_kaldi-zamia
english/en-us_pocketsphinx-cmu
+french/fr_kaldi-guyot
french/fr_pocketsphinx-cmu
german/de_deepspeech-aashishag
german/de_kaldi-zamia
@@ -14,7 +15,7 @@ greek/el-gr_pocketsphinx-cmu
hindi/hi_pocketsphinx-cmu
italian/it_pocketsphinx-cmu
kazakh/kz_pocketsphinx-cmu
-korean/ko-KR_kaldi-montreal
+korean/ko-kr_kaldi-montreal
mandarin/zh-cn_pocketsphinx-cmu
polish/pl_julius-github
portuguese/pt-br_pocketsphinx-cmu
diff --git a/README.md b/README.md
index f0d2587..e56ccc0 100644
--- a/README.md
+++ b/README.md
@@ -41,181 +41,277 @@ Untested profiles (highlighted below) *may* work, but I don't have the necessary
-
- Download |
+
+
+ Download
+ |
Catalan |
ca-es |
pocketsphinx |
- UNTESTED |
- UNTESTED |
+ UNTESTED |
+
+ UNTESTED
+ |
- Download |
- Dutch (Nederlands) |
+
+ Download
+ |
+ Dutch (Nederlands) |
nl |
- pocketsphinx |
- ★ ★ ★ (36x) |
- ☹ (6x) |
+ kaldi |
+ ★ ★ ★ ★ ★ (2x) |
+ ☹ (1x) |
- Download |
- Dutch (Nederlands) |
+
+ Download
+ |
+ Dutch (Nederlands) |
nl |
- kaldi |
- ★ ★ ★ ★ ★ (17x) |
- ☹ ☹ ☹ ☹ ☹ (8x) |
+ pocketsphinx |
+ ★ ★ ★ ★ (18x) |
+ ☹ (3x) |
- Download |
+
+ Download
+ |
English |
- en-us |
- kaldi |
- ★ ★ ★ ★ ★ (3x) |
- ★ ★ ★ ★ ★ (4x) |
+ en-in |
+ pocketsphinx |
+ ☹ (4x) |
+ ☹ (4x) |
- Download |
+
+ Download
+ |
English |
en-us |
- pocketsphinx |
- ★ ★ ★ ★ ★ (17x) |
- ★ ★ (2x) |
+ deepspeech |
+ ★ ★ ★ ★ ★ (1x) |
+ ★ ★ ★ ★ (1x) |
- Download |
+
+ Download
+ |
English |
en-us |
julius |
- ★ ★ ★ ★ ★ (2x) |
- ☹ (1x) |
+ ★ ★ ★ ★ (1x) |
+
+ UNTESTED
+ |
-
- Download |
- Indian English |
- en-in |
+
+
+ Download
+ |
+ English |
+ en-us |
+ kaldi |
+ ★ ★ ★ ★ ★ (3x) |
+ ★ ★ ★ ★ (1x) |
+
+
+
+ Download
+ |
+ English |
+ en-us |
pocketsphinx |
- UNTESTED |
- UNTESTED |
+ ★ ★ ★ ★ ★ (9x) |
+ ★ ★ ★ ★ (2x) |
- Download |
+
+ Download
+ |
+ French (Français) |
+ fr |
+ kaldi |
+ ★ ★ ★ ★ (4x) |
+ ★ ★ ★ ★ (1x) |
+
+
+
+ Download
+ |
French (Français) |
fr |
pocketsphinx |
- ★ ★ ★ (49x) |
- ☹ (4x) |
+ ★ ★ ★ ★ (23x) |
+ ☹ (3x) |
- Download |
+
+ Download
+ |
German |
de |
- kaldi |
- ★ ★ ★ ★ ★ (3x) |
+ pocketsphinx |
+ ★ ★ ★ ★ ★ (17x) |
★ ★ ★ ★ ★ (3x) |
- Download |
+
+ Download
+ |
German |
- de |
- pocketsphinx |
- ★ ★ ★ ★ ★ (29x) |
- ★ ★ ★ ★ ★ (5x) |
+ de-DE |
+ deepspeech |
+ ★ ★ ★ ★ ★ (1x) |
+ ★ ★ ★ ★ (1x) |
+
+
+
+ Download
+ |
+ German |
+ de-DE |
+ kaldi |
+ ★ ★ ★ ★ ★ (4x) |
+ ★ ★ ★ ★ (1x) |
- Download |
- Greek (Ελληνικά) |
+
+ Download
+ |
+ Greek (Ελληνικά) |
el-gr |
pocketsphinx |
- ★ ★ (17x) |
+ ★ ★ ★ ★ ★ (15x) |
☹ (1x) |
-
- Download |
+
+
+ Download
+ |
Hindi (Devanagari) |
hi |
pocketsphinx |
- UNTESTED |
- UNTESTED |
+ UNTESTED |
+
+ UNTESTED
+ |
- Download |
+
+ Download
+ |
Italian (Italiano) |
it |
pocketsphinx |
- ★ ★ ★ ★ ★ (39x) |
- ★ ★ ★ ★ ★ (14x) |
+ ★ ★ ★ ★ ★ (21x) |
+ ★ ★ ★ ★ ★ (7x) |
-
- Download |
+
+
+ Download
+ |
Kazakh (қазақша) |
kz |
pocketsphinx |
- UNTESTED |
- UNTESTED |
+ UNTESTED |
+
+ UNTESTED
+ |
-
- Download |
- Mandarin (中文) |
+
+
+ Download
+ |
+ Korean |
+ ko-kr |
+ kaldi |
+ ☹ (4x) |
+ ☹ (4x) |
+
+
+
+ Download
+ |
+ Mandarin |
zh-cn |
pocketsphinx |
- UNTESTED |
- UNTESTED |
+ UNTESTED |
+
+ UNTESTED
+ |
- Download |
+
+ Download
+ |
Polish (polski) |
pl |
julius |
- ★ (1x) |
- UNTESTED |
+ UNTESTED |
+
+ UNTESTED
+ |
- Download |
- Portugese (Português) |
+
+ Download
+ |
+ Portuguese (Português) |
pt-br |
pocketsphinx |
- ★ ★ (77x) |
- ☹ (20x) |
+ ★ ★ ★ ★ (51x) |
+ ☹ (11x) |
- Download |
+
+ Download
+ |
Russian (Русский) |
ru |
pocketsphinx |
- ★ ★ ★ ★ ★ (21x) |
+ ★ ★ ★ ★ ★ (17x) |
☹ (1x) |
- Download |
+
+ Download
+ |
Spanish (Español) |
es |
pocketsphinx |
- ★ ★ ★ ★ (35x) |
- ★ ★ ★ (22x) |
+ ★ ★ ★ ★ (25x) |
+ ★ ★ ★ ★ (15x) |
-
- Download |
- Mexican Spanish |
+
+
+ Download
+ |
+ Spanish |
es-mexican |
pocketsphinx |
- UNTESTED |
- UNTESTED |
+ ★ ★ ★ ★ ★ (9x) |
+ ★ ★ ★ ★ (2x) |
- Download |
+
+ Download
+ |
Swedish (svenska) |
sv |
kaldi |
- ★ (13x) |
+ ★ ★ ★ ★ (3x) |
☹ (1x) |
- Download |
+
+ Download
+ |
Vietnamese (Tiếng Việt) |
vi |
kaldi |
- ★ ★ ★ ★ ★ (10x) |
- ☹ (0.15x) |
+ ★ ★ ★ ★ ★ (4x) |
+ ☹ (1x) |
diff --git a/bin/generate-reports.sh b/bin/generate-reports.sh
index a2df449..11bfb42 100644
--- a/bin/generate-reports.sh
+++ b/bin/generate-reports.sh
@@ -110,7 +110,7 @@ for profile in "${profiles[@]}"; do
voice2json -p "${dest_dir}" --debug test-examples --open --directory "${open_dir}" | \
jq . > "${open_dir}/${report_name}"
- cp "${open_dir}/report.json" "${src_dir}/${profile}/test/open/"
+ cp "${open_dir}/${report_name}" "${src_dir}/${profile}/test/open/"
else
echo "${open_dir}" does not exist
fi
diff --git a/bin/make_performance_table.py b/bin/make_performance_table.py
new file mode 100644
index 0000000..0644e74
--- /dev/null
+++ b/bin/make_performance_table.py
@@ -0,0 +1,215 @@
+#!/usr/bin/env python3
+import math
+import json
+import os
+import sys
+from pathlib import Path
+
+import yaml
+from yattag import Doc, indent
+
+NATIVE = {
+ "nl": "Nederlands",
+ "fr": "Français",
+ "el-gr": "Ελληνικά",
+ "hi": "Devanagari",
+ "it": "Italiano",
+ "kz": "қазақша",
+ "zh": "中文",
+ "pl": "polski",
+ "pt-br": "Português",
+ "ru": "Русский",
+ "es": "Español",
+ "sv": "svenska",
+ "vi": "Tiếng Việt",
+}
+
+STAR = "★"
+SAD_FACE = "☹"
+
+# -----------------------------------------------------------------------------
+
+
+def main():
+ yaml.SafeLoader.add_constructor("!env", env_constructor)
+ base_dir = Path(__file__).parent.parent
+
+ closed_reports = {}
+ open_reports = {}
+ profile_yml = {}
+
+ for profile_dir in sys.stdin:
+ profile_dir = base_dir / profile_dir.strip()
+ profile_name = profile_dir.name
+
+ with open(profile_dir / "profile.yml", "r") as yaml_file:
+ profile_yml[profile_name] = yaml.safe_load(yaml_file)
+
+ closed_path = profile_dir / "test" / "closed" / "report.json"
+ open_path = profile_dir / "test" / "open" / "report.json"
+
+ if not open_path.is_file():
+ # Try alternative name
+ open_path = profile_dir / "test" / "open" / "report_open.json"
+
+ if closed_path.is_file():
+ with open(closed_path, "r") as closed_file:
+ closed_reports[profile_name] = json.load(closed_file)
+
+ if open_path.is_file():
+ with open(open_path, "r") as open_file:
+ open_reports[profile_name] = json.load(open_file)
+ else:
+ print("Missing", closed_path, file=sys.stderr)
+
+ rows = []
+ for profile_name in profile_yml:
+ print("Processing", profile_name, file=sys.stderr)
+ profile = profile_yml[profile_name]
+ closed_report = closed_reports.get(profile_name)
+ open_report = open_reports.get(profile_name)
+
+ row = {
+ "name": profile_name,
+ "version": profile["version"],
+ "language": profile["language"]["name"],
+ "locale": profile["language"]["code"],
+ "system": profile["speech-to-text"]["acoustic-model-type"],
+ }
+
+ if closed_report:
+ row["closed_accuracy"] = closed_report["transcription_accuracy"]
+ row["closed_speedup"] = closed_report["average_transcription_speedup"]
+
+ if open_report:
+ row["open_accuracy"] = open_report["transcription_accuracy"]
+ row["open_speedup"] = open_report["average_transcription_speedup"]
+
+ rows.append(row)
+
+ # Convert to HTML
+ rows = sorted(rows, key=lambda r: (r["language"], r["locale"]))
+ doc, tag, text = Doc().tagtext()
+
+ with tag("table"):
+ # Header
+ with tag("thead"):
+ with tag("tr"):
+ with tag("th"):
+ # Download
+ pass
+
+ with tag("th"):
+ text("Language")
+
+ with tag("th"):
+ text("Locale")
+
+ with tag("th"):
+ text("System")
+
+ with tag("th"):
+ text("Closed")
+
+ with tag("th"):
+ text("Open")
+
+ # Body
+ with tag("tbody"):
+ for row in rows:
+ with tag("tr"):
+
+ # Download
+ with tag("td"):
+ with tag(
+ "a",
+ href=f'https://github.com/synesthesiam/{row["name"]}/archive/v{row["version"]}.tar.gz',
+ ):
+ text("Download")
+
+ # Language
+ with tag("td"):
+ lang = row["language"]
+ lang = lang[0].upper() + lang[1:]
+
+ native = NATIVE.get(row["locale"])
+ if native:
+ lang = f"{lang} ({native})"
+
+ text(lang)
+
+ # Locale
+ with tag("td"):
+ text(row["locale"])
+
+ # System
+ with tag("td"):
+ text(row["system"])
+
+ # Closed
+ with tag("td"):
+ closed_accuracy = row.get("closed_accuracy")
+
+ if closed_accuracy:
+ closed_text = to_stars(closed_accuracy)
+ closed_speedup = row.get("closed_speedup")
+ if closed_speedup:
+ closed_speedx = int(math.ceil(float(closed_speedup)))
+ closed_text = f"{closed_text} ({closed_speedx}x)"
+
+ doc.asis(closed_text)
+ else:
+ text("UNTESTED")
+
+ # Open
+ with tag("td"):
+ open_accuracy = row.get("open_accuracy")
+
+ if open_accuracy:
+ open_text = to_stars(open_accuracy)
+ open_speedup = row.get("open_speedup")
+ if open_speedup:
+ open_speedx = int(math.ceil(float(open_speedup)))
+ open_text = f"{open_text} ({open_speedx}x)"
+
+ doc.asis(open_text)
+ else:
+ with tag("strong"):
+ text("UNTESTED")
+
+ print(indent(doc.getvalue()))
+
+
+# -----------------------------------------------------------------------------
+
+
+def to_stars(accuracy):
+ accuracy = float(accuracy)
+ if accuracy < 0.75:
+ return SAD_FACE
+
+ num_stars = 5
+
+ if accuracy < 0.8:
+ num_stars = 1
+
+ if accuracy < 0.85:
+ num_stars = 2
+
+ if accuracy < 0.90:
+ num_stars = 3
+
+ if accuracy < 0.95:
+ num_stars = 4
+
+ return " ".join([STAR] * num_stars)
+
+
+def env_constructor(loader, node):
+ """Expand !env STRING to replace environment variables in STRING."""
+ return os.path.expandvars(node.value)
+
+
+# -----------------------------------------------------------------------------
+if __name__ == "__main__":
+ main()
diff --git a/english/en-us_julius-github b/english/en-us_julius-github
index d415ce3..2086c33 160000
--- a/english/en-us_julius-github
+++ b/english/en-us_julius-github
@@ -1 +1 @@
-Subproject commit d415ce3e91e01cc837bf65fb34cd976962dda392
+Subproject commit 2086c33f6e6ef17b551e5d02b48fc33a602cd20b
diff --git a/korean/ko-KR_kaldi-montreal b/korean/ko-KR_kaldi-montreal
deleted file mode 160000
index 8a5cc6e..0000000
--- a/korean/ko-KR_kaldi-montreal
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 8a5cc6e496690c8c88ac0ebebba9755e5a731216
diff --git a/korean/ko-kr_kaldi-montreal b/korean/ko-kr_kaldi-montreal
new file mode 160000
index 0000000..83b3d74
--- /dev/null
+++ b/korean/ko-kr_kaldi-montreal
@@ -0,0 +1 @@
+Subproject commit 83b3d74de5ee9ed5c32a82828b32d211db0ac0a7
diff --git a/polish/pl_julius-github b/polish/pl_julius-github
index 428638d..e7b770b 160000
--- a/polish/pl_julius-github
+++ b/polish/pl_julius-github
@@ -1 +1 @@
-Subproject commit 428638d6e292abe054a8e77df455944836701b45
+Subproject commit e7b770b1368ae8dcab104ceae62a45ad61099c46
diff --git a/requirements.txt b/requirements.txt
index fd53361..78b8384 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,8 @@
-bs4
-html5lib
-requests
-pyyaml
-pydash
-conllu
+beautifulsoup4==4.7.1
+conllu==1.3.1
+html5lib==1.0.1
+pydash==4.7.4
+pyyaml==5.1.2
quart==0.6.15
+requests==2.21.0
+yattag==1.13.2