Skip to content

Commit

Permalink
Update state due to minimum length filtering in SWC
Browse files Browse the repository at this point in the history
  • Loading branch information
ynop committed Dec 9, 2019
1 parent 366660a commit 4d98b3d
Show file tree
Hide file tree
Showing 8 changed files with 14 additions and 14 deletions.
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,12 @@ Checkout [https://github.com/ynop/audiomate](https://github.com/ynop/audiomate)

## Corpus Statistics

| Part | h | Speakers |
| ----------|--------| ----------------------------------------------------|
| full | 1039 | x (not known due to the absence of info in M-Ailabs |
| train | 486 | x (not known due to the absence of info in M-Ailabs |
| dev | 54 | 1259 |
| test | 54 | 2101 |
| Part | h | Speakers |
| -----------| -------| ----------------------------------------------------|
| unfiltered | 1021 | x (not known due to the absence of info in M-Ailabs |
| train | 474 | x (not known due to the absence of info in M-Ailabs |
| dev | 51 | 1251 |
| test | 51 | 2112 |

## Corpus sources

Expand Down
4 changes: 2 additions & 2 deletions create.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ val_path=$out_path/validation
python scripts/validate.py $dl_path $val_path


The results from the validation step (invalid utterances)
have to be incorporated to audiomate manually.
# The results from the validation step (invalid utterances)
# have to be incorporated to audiomate manually.


echo "##############################################################"
Expand Down
2 changes: 1 addition & 1 deletion data/corpus_stats.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"duration": 3737726.5508176847, "num_utterances": 892680, "num_issuers": 32374, "subviews": {"train": {"duration": 1749156.3571310113, "num_utterances": 431550, "num_issuers": 27619}, "dev_tuda": {"duration": 8692.799999999996, "num_utterances": 1079, "num_issuers": 16}, "dev": {"duration": 194324.76762499986, "num_utterances": 69805, "num_issuers": 1257}, "test_common_voice": {"duration": 27754.056000000062, "num_utterances": 5632, "num_issuers": 1901}, "full_mailabs": {"duration": 841330.2593808444, "num_utterances": 118521, "num_issuers": 26442}, "full_voxforge": {"duration": 114093.00250000002, "num_utterances": 24088, "num_issuers": 328}, "full_tuda": {"duration": 660216.7129375074, "num_utterances": 79110, "num_issuers": 179}, "test": {"duration": 194918.75512500064, "num_utterances": 67340, "num_issuers": 2105}, "full_common_voice": {"duration": 1167084.4559999288, "num_utterances": 281112, "num_issuers": 4852}, "train_tuda": {"duration": 120898.59999999966, "num_utterances": 14357, "num_issuers": 146}, "dev_common_voice": {"duration": 25343.375999999935, "num_utterances": 5631, "num_issuers": 1010}, "test_tuda": {"duration": 8559.810000000003, "num_utterances": 1020, "num_issuers": 17}, "test_swc": {"duration": 142265.19000000018, "num_utterances": 57461, "num_issuers": 91}, "dev_voxforge": {"duration": 16514.461625000033, "num_utterances": 3242, "num_issuers": 122}, "train_common_voice": {"duration": 36725.85599999993, "num_utterances": 8518, "num_issuers": 552}, "full_swc": {"duration": 955002.1200000154, "num_utterances": 389849, "num_issuers": 569}, "dev_swc": {"duration": 143774.1300000008, "num_utterances": 59853, "num_issuers": 109}, "train_swc": {"duration": 668962.8000000144, "num_utterances": 272535, "num_issuers": 369}, "test_voxforge": {"duration": 16339.699125000045, "num_utterances": 3227, "num_issuers": 96}, "train_voxforge": {"duration": 81238.84175000005, "num_utterances": 17619, "num_issuers": 110}}}
{"duration": 3677249.640817725, "num_utterances": 797797, "num_issuers": 32374, "subviews": {"train": {"duration": 1706886.6371310302, "num_utterances": 365258, "num_issuers": 27618}, "dev_tuda": {"duration": 8692.799999999996, "num_utterances": 1079, "num_issuers": 16}, "dev": {"duration": 184975.0676249986, "num_utterances": 54978, "num_issuers": 1251}, "test_common_voice": {"duration": 27754.056000000062, "num_utterances": 5632, "num_issuers": 1901}, "full_mailabs": {"duration": 841330.2593808444, "num_utterances": 118521, "num_issuers": 26442}, "full_voxforge": {"duration": 114093.00250000002, "num_utterances": 24088, "num_issuers": 328}, "full_tuda": {"duration": 660216.7129375074, "num_utterances": 79110, "num_issuers": 179}, "test": {"duration": 186061.26512499977, "num_utterances": 53576, "num_issuers": 2112}, "full_common_voice": {"duration": 1167084.4559999288, "num_utterances": 281112, "num_issuers": 4852}, "train_tuda": {"duration": 120898.59999999966, "num_utterances": 14357, "num_issuers": 146}, "dev_common_voice": {"duration": 25343.375999999935, "num_utterances": 5631, "num_issuers": 1010}, "test_tuda": {"duration": 8559.810000000003, "num_utterances": 1020, "num_issuers": 17}, "test_swc": {"duration": 133407.6999999993, "num_utterances": 43697, "num_issuers": 98}, "dev_voxforge": {"duration": 16514.461625000033, "num_utterances": 3242, "num_issuers": 122}, "train_common_voice": {"duration": 36725.85599999993, "num_utterances": 8518, "num_issuers": 552}, "full_swc": {"duration": 894525.210000059, "num_utterances": 294966, "num_issuers": 569}, "dev_swc": {"duration": 134424.42999999953, "num_utterances": 45026, "num_issuers": 103}, "train_swc": {"duration": 626693.0800000381, "num_utterances": 206243, "num_issuers": 368}, "test_voxforge": {"duration": 16339.699125000045, "num_utterances": 3227, "num_issuers": 96}, "train_voxforge": {"duration": 81238.84175000005, "num_utterances": 17619, "num_issuers": 110}}}
2 changes: 1 addition & 1 deletion data/state.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"meta_files": {"subview_train.txt": "16ed34c6980a491c2e687c93175ff860", "files.txt": "a6e1fa4571465e25bf2a5c22ae6a1a95", "subview_dev_tuda.txt": "ab23c25d2b9fa294e43c35416b73eb01", "subview_dev.txt": "b0d8aa1be944b64ebcc2d56333baaebf", "subview_test_common_voice.txt": "6082d635e4cf89a1599e54c99fa8830b", "subview_full_mailabs.txt": "966b8cb5d8789619082c3d8b39118d93", "subview_full_voxforge.txt": "7852af57463b057692d6b06616cbda16", "subview_full_tuda.txt": "84b2e3b0016909c068a5cbcf69e1dcce", "labels_word-transcript-raw.txt": "d3971c85ba4676c3192af3259360f11e", "utterances.txt": "12cc9297e143c3eeedf69b57cbd4758b", "subview_test.txt": "2d94b30c4cceb975c7dc7a927735c92e", "subview_full_common_voice.txt": "811134413761723187b6ef82e54f41e3", "features.txt": "d41d8cd98f00b204e9800998ecf8427e", "subview_train_tuda.txt": "18b799305b077499ef328b2467fd6384", "subview_dev_common_voice.txt": "d58e188a69000e89be364c146d332125", "subview_test_tuda.txt": "790b4324e01ab7385f90d5dc01f23bf2", "issuers.json": "8c54bd69a25fa1e5e9bff27dbece50b2", "subview_test_swc.txt": "dd92a601e0e38f88818260248d54ac80", "subview_dev_voxforge.txt": "752119ed6a53b3ef81720ed0a07a776b", "subview_train_common_voice.txt": "9578c8f4f12f609179a00d2785e6c7c6", "subview_full_swc.txt": "3e897b775fc6b8e9a3cdbf3c9fec9dbb", "subview_dev_swc.txt": "735b17fe126ed95a1e03907dde00c348", "labels_word-transcript.txt": "f9a26951fbb661caa38eb3399847d8df", "utt_issuers.txt": "5ff2e1ddf1efdf8633f4086ff62c30c1", "subview_train_swc.txt": "12ecc837212903254bd14ebe3d3b655f", "subview_test_voxforge.txt": "41eb18c1a1bc9c329f41ddac1c6e0424", "subview_train_voxforge.txt": "44b558b944176ba5e970e7f6b080ebf9", "audio.txt": "d41d8cd98f00b204e9800998ecf8427e"}, "audio_files": "487036b9a52b028929c8aacbe1e632a7"}
{"meta_files": {"subview_train.txt": "1c57131f164252f39bf458e24a4e4948", "files.txt": "a6e1fa4571465e25bf2a5c22ae6a1a95", "subview_dev_tuda.txt": "ab23c25d2b9fa294e43c35416b73eb01", "subview_dev.txt": "bfab6eef4fc51b7d939961f4eaba8c4b", "subview_test_common_voice.txt": "6082d635e4cf89a1599e54c99fa8830b", "subview_full_mailabs.txt": "966b8cb5d8789619082c3d8b39118d93", "subview_full_voxforge.txt": "7852af57463b057692d6b06616cbda16", "subview_full_tuda.txt": "84b2e3b0016909c068a5cbcf69e1dcce", "labels_word-transcript-raw.txt": "d3971c85ba4676c3192af3259360f11e", "utterances.txt": "fd9f63025dd661bcbe67b8601a20dd14", "subview_test.txt": "465b23d6db313fe3399873d94be426f2", "subview_full_common_voice.txt": "811134413761723187b6ef82e54f41e3", "features.txt": "d41d8cd98f00b204e9800998ecf8427e", "subview_train_tuda.txt": "18b799305b077499ef328b2467fd6384", "subview_dev_common_voice.txt": "d58e188a69000e89be364c146d332125", "subview_test_tuda.txt": "790b4324e01ab7385f90d5dc01f23bf2", "issuers.json": "8c54bd69a25fa1e5e9bff27dbece50b2", "subview_test_swc.txt": "65479ddcf5c92f029028f2a7f42dd00c", "subview_dev_voxforge.txt": "752119ed6a53b3ef81720ed0a07a776b", "subview_train_common_voice.txt": "9578c8f4f12f609179a00d2785e6c7c6", "subview_full_swc.txt": "002dba993b69fa6873fd2018e3aee121", "subview_dev_swc.txt": "d3495881903223a0a42791bfb4c516e7", "labels_word-transcript.txt": "38fad7ccb24f44a7e7f87cd717f21b67", "utt_issuers.txt": "bfe91ead5941378bb4462ef680f0e0c8", "subview_train_swc.txt": "ef057bb9a8f34e6c5ea23b313e5bfa26", "subview_test_voxforge.txt": "41eb18c1a1bc9c329f41ddac1c6e0424", "subview_train_voxforge.txt": "44b558b944176ba5e970e7f6b080ebf9", "audio.txt": "d41d8cd98f00b204e9800998ecf8427e"}, "audio_files": "487036b9a52b028929c8aacbe1e632a7"}
2 changes: 1 addition & 1 deletion data/validation/swc/invalid_all.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion data/validation/swc/invalid_character_ratio.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion data/validation/swc/invalid_transcripts.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
click==7.0
tqdm==4.39.0
git+git://github.com/ynop/audiomate.git@30488ce#egg=audiomate
git+git://github.com/ynop/audiomate.git@2e5a1aa#egg=audiomate
git+git://github.com/ynop/spoteno.git@7700d53#egg=spoteno

0 comments on commit 4d98b3d

Please sign in to comment.