Skip to content

Commit

Permalink
Add more scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
Michael Hansen committed May 1, 2020
1 parent 081f7b3 commit c9ca91d
Show file tree
Hide file tree
Showing 18 changed files with 163 additions and 14 deletions.
2 changes: 2 additions & 0 deletions PROFILES
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@ catalan/ca-es_pocketsphinx-cmu
dutch/nl_kaldi-cgn
dutch/nl_pocketsphinx-cmu
english/en-in_pocketsphinx-cmu
english/en-us_deepspeech-mozilla
english/en-us_julius-github
english/en-us_kaldi-zamia
english/en-us_pocketsphinx-cmu
french/fr_pocketsphinx-cmu
german/de_deepspeech-aashishag
german/de_kaldi-zamia
german/de_pocketsphinx-cmu
greek/el-gr_pocketsphinx-cmu
Expand Down
2 changes: 1 addition & 1 deletion __main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,4 @@ async def download_raw(path: str) -> Response:
# -----------------------------------------------------------------------------

if __name__ == "__main__":
app.run()
app.run(host="0.0.0.0")
46 changes: 46 additions & 0 deletions bin/check_files_yml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#!/usr/bin/env python3
"""Verifies sizes and sha256 sums for files.yml files."""
import os
import subprocess
import sys
from pathlib import Path

import yaml


def main():
"""Main entry point"""
for files_yaml_path in sys.argv[1:]:
profile_root = Path(files_yaml_path).parent
with open(files_yaml_path, "r") as files_yaml_file:
files_yaml = yaml.safe_load(files_yaml_file)

file_count = 0
for condition, files in files_yaml.items():
for file_path, file_info in files.items():
full_path = profile_root / file_path

# Check byte size
expected_bytes = int(file_info["bytes"])
actual_bytes = os.path.getsize(full_path)

assert (
actual_bytes == expected_bytes
), f"Expected size of {full_path} to be {expected_bytes}, got {actual_bytes}"

# Check sha256 sum
expected_sum = str(file_info["sha256"]).strip()
sum_result = subprocess.check_output(["sha256sum", str(full_path)]).decode().strip()
actual_sum = sum_result.split()[0]

assert (
actual_sum == expected_sum
), f"Expected sha256 sum of {full_path} to be {expected_sum}, got {actual_sum}"

file_count += 1

print(profile_root.name, file_count, "OK")


if __name__ == "__main__":
main()
69 changes: 69 additions & 0 deletions bin/generate-reports.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#!/usr/bin/env bash
set -e

this_dir="$( cd "$( dirname "$0" )" && pwd )"
src_dir="$(realpath "${this_dir}/..")"

# -----------------------------------------------------------------------------

# Create a temporary directory for testing
temp_dir="$(mktemp -d)"

function cleanup {
rm -rf "${temp_dir}"
}

trap cleanup EXIT

# -----------------------------------------------------------------------------

profiles=()
while [[ ! -z "$1" ]]; do
profiles+=("$1")
shift
done

if [[ -z "${profiles[*]}" ]]; then
while read -r profile; do
profiles+=("${profile}")
done < "${src_dir}/PROFILES"
fi

# -----------------------------------------------------------------------------

for profile in "${profiles[@]}"; do
echo "${profile}"

# Copy to temporary directory
dest_dir="${temp_dir}/${profile}"
rm -rf "${dest_dir}"
mkdir -p "${dest_dir}"

echo 'Copying...'
cp -aR "${src_dir}/${profile}"/* "${dest_dir}/"

echo 'Training...'
voice2json -p "${dest_dir}" --debug train-profile

closed_dir="${dest_dir}/test/closed"
if [[ -d "${closed_dir}" ]]; then
echo 'Testing (closed)...'
voice2json -p "${dest_dir}" --debug test-examples --directory "${closed_dir}" | \
jq . > "${closed_dir}/report.json"

cp "${closed_dir}/report.json" "${src_dir}/${profile}/test/closed/"
fi

open_dir="${dest_dir}/test/open"
if [[ -d "${open_dir}" ]]; then
echo 'Testing (open)...'
voice2json -p "${dest_dir}" --debug test-examples --open --directory "${open_dir}" | \
jq . > "${open_dir}/report.json"

cp "${open_dir}/report.json" "${src_dir}/${profile}/test/open/"
fi

echo 'Done'
echo '----------'
echo ''
done
2 changes: 1 addition & 1 deletion bin/make_download_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def main():
"bytes_expected": os.path.getsize(file_name),
}
)
file_details["parts"] = parts
file_details["parts"] = sorted(parts, key=lambda p: p["fragment"])
elif unzip:
file_details["url"] += ".gz"

Expand Down
32 changes: 32 additions & 0 deletions bin/update_files_yml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/usr/bin/env python3
"""Update sizes and sha256 sums for files.yml files."""
import os
import subprocess
import sys
from pathlib import Path

import yaml


def main():
"""Main entry point"""
assert len(sys.argv) > 1
files_yaml_path = sys.argv[1]
profile_root = Path(files_yaml_path).parent
with open(files_yaml_path, "r") as files_yaml_file:
files_yaml = yaml.safe_load(files_yaml_file)

file_count = 0
for condition, files in files_yaml.items():
for file_path, file_info in files.items():
full_path = profile_root / file_path

file_info["bytes"] = os.path.getsize(full_path)
sum_result = subprocess.check_output(["sha256sum", str(full_path)]).decode().strip()
file_info["sha256"] = sum_result.split()[0]

print(yaml.dump(files_yaml))


if __name__ == "__main__":
main()
2 changes: 1 addition & 1 deletion catalan/ca-es_pocketsphinx-cmu
2 changes: 1 addition & 1 deletion english/en-us_julius-github
2 changes: 1 addition & 1 deletion english/en-us_kaldi-zamia
2 changes: 1 addition & 1 deletion english/en-us_pocketsphinx-cmu
2 changes: 1 addition & 1 deletion french/fr_pocketsphinx-cmu
2 changes: 1 addition & 1 deletion german/de_pocketsphinx-cmu
2 changes: 1 addition & 1 deletion portuguese/pt-br_pocketsphinx-cmu
2 changes: 1 addition & 1 deletion spanish/es_pocketsphinx-cmu
2 changes: 1 addition & 1 deletion vietnamese/vi_kaldi-montreal
Submodule vi_kaldi-montreal updated 27 files
+ acoustic_model/model/graph/HCLG.fst.gz.part-00
+ acoustic_model/model/graph/HCLG.fst.gz.part-01
+ acoustic_model/model/graph/HCLG.fst.gz.part-02
+ acoustic_model/model/graph/HCLG.fst.gz.part-03
+ acoustic_model/model/graph/HCLG.fst.gz.part-04
+ acoustic_model/model/graph/HCLG.fst.gz.part-05
+ acoustic_model/model/graph/HCLG.fst.gz.part-06
+ acoustic_model/model/graph/HCLG.fst.gz.part-07
+ acoustic_model/model/graph/HCLG.fst.gz.part-08
+ acoustic_model/model/graph/HCLG.fst.gz.part-09
+ acoustic_model/model/graph/HCLG.fst.gz.part-10
+ acoustic_model/model/graph/HCLG.fst.gz.part-11
+ acoustic_model/model/graph/HCLG.fst.gz.part-12
+6 −0 acoustic_model/model/graph/disambig_tid.int
+1 −0 acoustic_model/model/graph/num_pdfs
+905 −0 acoustic_model/model/graph/phones.txt
+29,782 −0 acoustic_model/model/graph/phones/align_lexicon.int
+29,782 −0 acoustic_model/model/graph/phones/align_lexicon.txt
+5 −0 acoustic_model/model/graph/phones/disambig.int
+5 −0 acoustic_model/model/graph/phones/disambig.txt
+1 −0 acoustic_model/model/graph/phones/optional_silence.csl
+1 −0 acoustic_model/model/graph/phones/optional_silence.int
+1 −0 acoustic_model/model/graph/phones/optional_silence.txt
+1 −0 acoustic_model/model/graph/phones/silence.csl
+899 −0 acoustic_model/model/graph/phones/word_boundary.int
+899 −0 acoustic_model/model/graph/phones/word_boundary.txt
+29,785 −0 acoustic_model/model/graph/words.txt

0 comments on commit c9ca91d

Please sign in to comment.