Skip to content

Commit

Permalink
Add Italian Kaldi model
Browse files Browse the repository at this point in the history
  • Loading branch information
Michael Hansen committed Oct 17, 2020
1 parent b3345c5 commit 7228a57
Show file tree
Hide file tree
Showing 9 changed files with 56 additions and 7 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -105,3 +105,6 @@
[submodule "czech/cs_kaldi-rhasspy"]
path = czech/cs_kaldi-rhasspy
url = https://github.com/rhasspy/cs_kaldi-rhasspy.git
[submodule "italian/it_kaldi-rhasspy"]
path = italian/it_kaldi-rhasspy
url = https://github.com/rhasspy/it_kaldi-rhasspy.git
2 changes: 2 additions & 0 deletions PROFILES
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
catalan/ca-es_pocketsphinx-cmu
czech/cs_kaldi-rhasspy
dutch/nl_kaldi-cgn
dutch/nl_pocketsphinx-cmu
english/en-in_pocketsphinx-cmu
Expand All @@ -13,6 +14,7 @@ german/de_kaldi-zamia
german/de_pocketsphinx-cmu
greek/el-gr_pocketsphinx-cmu
hindi/hi_pocketsphinx-cmu
italian/it_kaldi-rhasspy
italian/it_pocketsphinx-cmu
kazakh/kz_pocketsphinx-cmu
korean/ko-kr_kaldi-montreal
Expand Down
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,16 @@ Untested profiles (highlighted below) *may* work, but I don't have the necessary
<td>&#9733; &#9733; &#9733; &#9733; &#9733; (21x)</td>
<td>&#9733; &#9733; &#9733; &#9733; &#9733; (7x)</td>
</tr>
<tr>
<td>
<a href="https://github.com/synesthesiam/it_kaldi-rhasspy/archive/v1.0.tar.gz">Download</a>
</td>
<td>Italian (Italiano)</td>
<td>it</td>
<td>kaldi</td>
<td>&#9733; &#9733; &#9733; &#9733; &#9733; (1x)</td>
<td>&#9733; &#9733; &#9733; &#9733; &#9733; (1x)</td>
</tr>
<tr>
<td>
<a href="https://github.com/synesthesiam/kz_pocketsphinx-cmu/archive/v1.1.tar.gz">Download</a>
Expand Down
4 changes: 4 additions & 0 deletions __main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@
@app.route("/<path:path>")
async def download_raw(path: str) -> Response:
components = path.split("/")

if components[0] in ("synesthesiam", "rhasspy"):
components = components[1:]

profile = components[0]

if components[1] == "raw":
Expand Down
8 changes: 8 additions & 0 deletions bin/make_g2p.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@ trap finish EXIT

# -----------------------------------------------------------------------------

if [[ "$1" == '-' ]]; then
# Read from stdin into temporary file
dict_path="${temp_dir}/unformatted.dict"
cat > "${dict_path}"
else
dict_path="$(realpath "$1")"
fi

# Format dictionary for phonetisaurus
cd "${temp_dir}"
perl -pe 's/\([0-9]+\)//;
Expand Down
31 changes: 26 additions & 5 deletions bin/print_phonemes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import sys
import re
import argparse
from collections import Counter


def main():
Expand All @@ -10,21 +11,41 @@ def main():
description="Prints the unique set of phonemes from a pronunciation dictionary",
)
parser.add_argument("dictionary", help="Path to pronunciation dictionary")
parser.add_argument(
"--min-count",
type=int,
help="Minimum use count before printing phoneme in list",
)
parser.add_argument(
"--counts", action="store_true", help="Print counts with list"
)
args = parser.parse_args()

phonemes = set()
with open(args.dictionary, "r") as dict_file:
if args.dictionary == "-":
dict_file = sys.stdin
else:
dict_file = open(args.dictionary, "r")

phonemes = Counter()
with dict_file:
for line in dict_file:
line = line.strip()
if len(line) == 0:
continue

# Use explicit whitespace (avoid 0xA0)
parts = re.split(r"[ \t]+", line)
phonemes.update(parts[1:])

for phoneme in sorted(phonemes):
print(phoneme)
for phoneme in parts[1:]:
phonemes[phoneme] += 1

for phoneme in sorted(phonemes):
count = phonemes[phoneme]
if args.min_count is None or (count >= args.min_count):
if args.counts:
print(phoneme, count)
else:
print(phoneme)


# -----------------------------------------------------------------------------
Expand Down
1 change: 1 addition & 0 deletions italian/it_kaldi-rhasspy
Submodule it_kaldi-rhasspy added at 7784d5
2 changes: 1 addition & 1 deletion italian/it_pocketsphinx-cmu

0 comments on commit 7228a57

Please sign in to comment.