Skip to content

Commit

Permalink
Make tts deploy notebook more robust
Browse files Browse the repository at this point in the history
Signed-off-by: Jason <jasoli@nvidia.com>
  • Loading branch information
blisc committed Jan 30, 2023
1 parent d3f3acd commit 0385ebe
Showing 1 changed file with 39 additions and 25 deletions.
64 changes: 39 additions & 25 deletions tts-python-deploy.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@
"Update the parameters in the following code block:\n",
"- `machine_type`: Type of machine the tutorial is being run on. Acceptable values are `AMD64`, `ARM64_linux`, `ARM64_l4t`. Defaults to `AMD64`. \n",
"- `target_machine`: Type of machine the RMIR will be deployed on. Acceptable values are `AMD64`, `ARM64_linux`, `ARM64_l4t`. Defaults to `AMD64`. \n",
"- `acoustic_model`: Full path for acoustic model `.riva` file. Defaults to `$pwd/speechsynthesis_en_us_fastpitch_ipa_vdeployable_v1.0/FastPitch_44k_EnglishUS_IPA.riva` which is the default download path for the NGC example model. This can be replaced with a custom acoustic model `.riva` checkpoint. \n",
"- `vocoder`: Full path for vocoder `.riva` file. Defaults to `$pwd/speechsynthesis_en_us_hifigan_ipa_vdeployable_v1.0/HifiGAN_44k_EnglishUS_IPA.riva` which is the default download path for the NGC example model. This can be replaced with a custom vocoder `.riva` checkpoint. \n",
"- `acoustic_model`: Full path for acoustic model `.riva` file. Defaults to `None`. This can be replaced with a custom acoustic model `.riva` checkpoint. \n",
"- `vocoder`: Full path for vocoder `.riva` file. Defaults to `None`. This can be replaced with a custom vocoder `.riva` checkpoint. \n",
"- `out_dir`: Directory to put the `TTS.rmir` file. The RMIR will be placed in `${out_dir}/RMIR/RMIR_NAME.rmir`. Defaults to `$pwd/out`. \n",
"- `voice`: Set the voice name of the model. Default to `\"test\"`. \n",
"- `key`: This is the encryption key used in `nemo2riva`. The same key will be used to deploy the RMIR generated in this tutorial. Defaults to `tlt_encode`. \n",
Expand All @@ -89,8 +89,8 @@
"\n",
"machine_type=\"AMD64\" #Change this to `ARM64_linux` or `ARM64_l4t` in case of an ARM64 machine.\n",
"target_machine=\"AMD64\" #Change this to `ARM64_linux` or `ARM64_l4t` in case of an ARM64 machine.\n",
"acoustic_model = pathlib.Path.cwd() / \"speechsynthesis_en_us_fastpitch_ipa_vdeployable_v1.0/FastPitch_44k_EnglishUS_IPA.riva\" ##acoustic_model .riva location\n",
"vocoder = pathlib.Path.cwd() / \"speechsynthesis_en_us_hifigan_ipa_vdeployable_v1.0/HifiGAN_44k_EnglishUS_IPA.riva\" ##vocoder .riva location\n",
"acoustic_model = None ##acoustic_model .riva location\n",
"vocoder = None ##vocoder .riva location\n",
"out_dir = pathlib.Path.cwd() / \"out\" ##Output directory to store the generated RMIR. The RMIR will be placed in `${out_dir}/RMIR/RMIR_NAME.rmir`.\n",
"voice = \"test\" ##Voice name\n",
"key = \"tlt_encode\" ##Encryption key used during nemo2riva\n",
Expand All @@ -99,8 +99,8 @@
"sample_rate = 44100 ##Sample rate of the audios\n",
"num_speakers = 2 ## Number of speakers\n",
"\n",
"riva_aux_files=pathlib.Path.cwd() / \"speechsynthesis_en_us_auxiliary_files_vdeployable_v1.3\" ##Riva model repo path. In the case of a custom model repo, change this to the full path of the custom Riva model repo.\n",
"riva_tn_files=pathlib.Path.cwd() / \"normalization_en_us_files_vdeployable_v1.1\" ##Riva model repo path. In the case of a custom model repo, change this to the full path of the custom Riva model repo.\n",
"riva_aux_files = None ##Riva model repo path. In the case of a custom model repo, change this to the full path of the custom Riva model repo.\n",
"riva_tn_files = None ##Riva model repo path. In the case of a custom model repo, change this to the full path of the custom Riva model repo.\n",
"\n",
"## Riva NGC, servicemaker image config.\n",
"riva_ngc_image_version = \"2.9.0\"\n",
Expand All @@ -110,16 +110,25 @@
" riva_init_image = f\"nvcr.io/nvidia/riva/riva-speech:{riva_ngc_image_version}-servicemaker-l4t-aarch64\"\n",
"rmir_dir = out_dir / \"rmir\"\n",
"\n",
"am_dir = acoustic_model.parent\n",
"voc_dir = vocoder.parent\n",
"\n",
"am_name = acoustic_model.name\n",
"voc_name = vocoder.name\n",
"\n",
"if not out_dir.exists():\n",
" out_dir.mkdir()\n",
"if not rmir_dir.exists():\n",
" rmir_dir.mkdir()"
" rmir_dir.mkdir()\n",
"\n",
"def get_ngc_download_dir(ngc_output, var, var_name):\n",
" output = None\n",
" for line in ngc_output:\n",
" if \"Downloaded local path\" in line:\n",
" output = pathlib.Path(line.split(\"path: \")[-1])\n",
" break\n",
" riva_files_in_dir = list(output.glob(\"*.riva\"))\n",
" if len(riva_files_in_dir) > 0:\n",
" output = riva_files_in_dir[0]\n",
" if output is not None and var is not None:\n",
" warnings.warn(\n",
" f\"`{var_name}` had a non-default value of `{var}`. `{var_name}` will be updated to `{var}`\"\n",
" )\n",
" return output"
]
},
{
Expand All @@ -141,12 +150,13 @@
"metadata": {},
"outputs": [],
"source": [
"!ngc registry model download-version \"nvidia/tao/speechsynthesis_en_us_fastpitch_ipa:deployable_v1.0\"\n",
"!ngc registry model download-version \"nvidia/tao/speechsynthesis_en_us_hifigan_ipa:deployable_v1.0\""
"fastpitch_output = !ngc registry model download-version \"nvidia/tao/speechsynthesis_en_us_fastpitch_ipa:deployable_v1.0\"\n",
"hifigan_output = !ngc registry model download-version \"nvidia/tao/speechsynthesis_en_us_hifigan_ipa:deployable_v1.0\"\n",
"acoustic_model = get_ngc_download_dir(fastpitch_output, acoustic_model, \"acoustic_model\")\n",
"vocoder = get_ngc_download_dir(hifigan_output, vocoder, \"vocoder\")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "b7d8e550",
"metadata": {},
Expand All @@ -167,8 +177,10 @@
"metadata": {},
"outputs": [],
"source": [
"!ngc registry model download-version \"nvidia/tao/speechsynthesis_en_us_auxiliary_files:deployable_v1.3\"\n",
"!ngc registry model download-version \"nvidia/tao/normalization_en_us:deployable_v1.1\""
"aux_output = !ngc registry model download-version \"nvidia/tao/speechsynthesis_en_us_auxiliary_files:deployable_v1.3\"\n",
"tn_output = !ngc registry model download-version \"nvidia/tao/normalization_en_us:deployable_v1.1\"\n",
"riva_aux_files = get_ngc_download_dir(aux_output, riva_aux_files, \"riva_aux_files\")\n",
"riva_tn_files = get_ngc_download_dir(tn_output, riva_tn_files, \"riva_tn_files\")"
]
},
{
Expand All @@ -190,7 +202,7 @@
"source": [
"##Run the riva servicemaker.\n",
"!docker stop riva_rmir_gen &> /dev/null\n",
"!set -x && docker run -td --gpus all --rm -v {str(riva_aux_files)}:/riva_aux -v {str(am_dir)}/:/synt -v {str(voc_dir)}:/voc -v {str(riva_tn_files)}:/riva_tn \\\n",
"!set -x && docker run -td --gpus all --rm -v {str(riva_aux_files)}:/riva_aux -v {str(acoustic_model.parent)}/:/synt -v {str(vocoder.parent)}:/voc -v {str(riva_tn_files)}:/riva_tn \\\n",
" -v {str(rmir_dir.resolve())}:/data --name riva_rmir_gen --entrypoint=\"/bin/bash\" {riva_init_image}"
]
},
Expand All @@ -212,10 +224,12 @@
"outputs": [],
"source": [
"warnings.warn(\"Using --force in riva-build will replace any existing RMIR.\")\n",
"riva_build=f\"\"\"riva-build speech_synthesis --force --voice_name={voice} --language_code={lang} \\\n",
" --sample_rate={sample_rate} /data/FastPitch_HifiGan.rmir:{key} /synt/{am_name}:{key} \\\n",
" /voc/{voc_name}:{key} --abbreviations_file=/riva_aux/abbr.txt \\\n",
" --wfst_tokenizer_model=/riva_tn/tokenize_and_classify.far --wfst_verbalizer_model=riva_tn/verbalize.far\"\"\"\n",
"riva_build=(\n",
" f\"riva-build speech_synthesis --force --voice_name={voice} --language_code={lang} \"\n",
" f\"--sample_rate={sample_rate} /data/FastPitch_HifiGan.rmir:{key} /synt/{str(acoustic_model.name)}:{key} \"\n",
" f\"/voc/{str(vocoder.name)}:{key} --abbreviations_file=/riva_aux/abbr.txt \"\n",
" f\"--wfst_tokenizer_model=/riva_tn/tokenize_and_classify.far --wfst_verbalizer_model=riva_tn/verbalize.far\"\n",
")\n",
"if target_machine==\"arm\":\n",
" riva_build += \"\"\"--max_batch_size 1 --denoiser.max_batch_size 1 --preprocessor.max_batch_size 1 \\\n",
" --encoderFastPitch.max_batch_size 1 --chunkerFastPitch.max_batch_size 1 --hifigan.max_batch_size 1\"\"\"\n",
Expand All @@ -225,7 +239,7 @@
" riva_build+=\" --phone_set=arpabet --phone_dictionary_file=/riva_repo/cmudict-0.7b_nv22.08\"\n",
"if num_speakers > 1:\n",
" riva_build+=f\" --num_speakers={num_speakers}\"\n",
" riva_build+=\"--subvoices \" + \",\".join([f\"{i}:{i}\" for i in range(num_speakers)])\n",
" riva_build+=\" --subvoices \" + \",\".join([f\"{i}:{i}\" for i in range(num_speakers)])\n",
"print(riva_build)"
]
},
Expand Down Expand Up @@ -512,7 +526,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
"version": "3.8.10"
},
"vscode": {
"interpreter": {
Expand Down

0 comments on commit 0385ebe

Please sign in to comment.