Make tts deploy notebook more robust

Signed-off-by: Jason <jasoli@nvidia.com>
nvidia-riva · Jan 30, 2023 · 0385ebe · 0385ebe
1 parent d3f3acd
commit 0385ebe
Showing 1 changed file with 39 additions and 25 deletions.
diff --git a/tts-python-deploy.ipynb b/tts-python-deploy.ipynb
@@ -66,8 +66,8 @@
     "Update the parameters in the following code block:\n",
     "- `machine_type`: Type of machine the tutorial is being run on. Acceptable values are `AMD64`, `ARM64_linux`, `ARM64_l4t`. Defaults to `AMD64`.  \n",
     "- `target_machine`: Type of machine the RMIR will be deployed on. Acceptable values are `AMD64`, `ARM64_linux`, `ARM64_l4t`. Defaults to `AMD64`.  \n",
-    "- `acoustic_model`: Full path for acoustic model `.riva` file. Defaults to `$pwd/speechsynthesis_en_us_fastpitch_ipa_vdeployable_v1.0/FastPitch_44k_EnglishUS_IPA.riva` which is the default download path for the NGC example model. This can be replaced with a custom acoustic model `.riva` checkpoint.  \n",
-    "- `vocoder`: Full path for vocoder `.riva` file. Defaults to `$pwd/speechsynthesis_en_us_hifigan_ipa_vdeployable_v1.0/HifiGAN_44k_EnglishUS_IPA.riva` which is the default download path for the NGC example model. This can be replaced with a custom vocoder `.riva` checkpoint.  \n",
+    "- `acoustic_model`: Full path for acoustic model `.riva` file. Defaults to `None`. This can be replaced with a custom acoustic model `.riva` checkpoint.  \n",
+    "- `vocoder`: Full path for vocoder `.riva` file. Defaults to `None`. This can be replaced with a custom vocoder `.riva` checkpoint.  \n",
     "- `out_dir`: Directory to put the `TTS.rmir` file. The RMIR will be placed in `${out_dir}/RMIR/RMIR_NAME.rmir`. Defaults to `$pwd/out`.  \n",
     "- `voice`: Set the voice name of the model. Default to `\"test\"`.  \n",
     "- `key`: This is the encryption key used in `nemo2riva`. The same key will be used to deploy the RMIR generated in this tutorial. Defaults to `tlt_encode`.  \n",
@@ -89,8 +89,8 @@
     "\n",
     "machine_type=\"AMD64\" #Change this to `ARM64_linux` or `ARM64_l4t` in case of an ARM64 machine.\n",
     "target_machine=\"AMD64\" #Change this to `ARM64_linux` or `ARM64_l4t` in case of an ARM64 machine.\n",
-    "acoustic_model = pathlib.Path.cwd() / \"speechsynthesis_en_us_fastpitch_ipa_vdeployable_v1.0/FastPitch_44k_EnglishUS_IPA.riva\" ##acoustic_model .riva location\n",
-    "vocoder = pathlib.Path.cwd() / \"speechsynthesis_en_us_hifigan_ipa_vdeployable_v1.0/HifiGAN_44k_EnglishUS_IPA.riva\" ##vocoder .riva location\n",
+    "acoustic_model = None ##acoustic_model .riva location\n",
+    "vocoder = None ##vocoder .riva location\n",
     "out_dir = pathlib.Path.cwd() / \"out\" ##Output directory to store the generated RMIR. The RMIR will be placed in `${out_dir}/RMIR/RMIR_NAME.rmir`.\n",
     "voice = \"test\" ##Voice name\n",
     "key = \"tlt_encode\" ##Encryption key used during nemo2riva\n",
@@ -99,8 +99,8 @@
     "sample_rate = 44100 ##Sample rate of the audios\n",
     "num_speakers = 2 ## Number of speakers\n",
     "\n",
-    "riva_aux_files=pathlib.Path.cwd() / \"speechsynthesis_en_us_auxiliary_files_vdeployable_v1.3\" ##Riva model repo path. In the case of a custom model repo, change this to the full path of the custom Riva model repo.\n",
-    "riva_tn_files=pathlib.Path.cwd() / \"normalization_en_us_files_vdeployable_v1.1\" ##Riva model repo path. In the case of a custom model repo, change this to the full path of the custom Riva model repo.\n",
+    "riva_aux_files = None ##Riva model repo path. In the case of a custom model repo, change this to the full path of the custom Riva model repo.\n",
+    "riva_tn_files = None ##Riva model repo path. In the case of a custom model repo, change this to the full path of the custom Riva model repo.\n",
     "\n",
     "## Riva NGC, servicemaker image config.\n",
     "riva_ngc_image_version = \"2.9.0\"\n",
@@ -110,16 +110,25 @@
     "    riva_init_image = f\"nvcr.io/nvidia/riva/riva-speech:{riva_ngc_image_version}-servicemaker-l4t-aarch64\"\n",
     "rmir_dir = out_dir / \"rmir\"\n",
     "\n",
-    "am_dir = acoustic_model.parent\n",
-    "voc_dir = vocoder.parent\n",
-    "\n",
-    "am_name = acoustic_model.name\n",
-    "voc_name = vocoder.name\n",
-    "\n",
     "if not out_dir.exists():\n",
     "    out_dir.mkdir()\n",
     "if not rmir_dir.exists():\n",
-    "    rmir_dir.mkdir()"
+    "    rmir_dir.mkdir()\n",
+    "\n",
+    "def get_ngc_download_dir(ngc_output, var, var_name):\n",
+    "    output = None\n",
+    "    for line in ngc_output:\n",
+    "        if \"Downloaded local path\" in line:\n",
+    "            output = pathlib.Path(line.split(\"path: \")[-1])\n",
+    "            break\n",
+    "    riva_files_in_dir = list(output.glob(\"*.riva\"))\n",
+    "    if len(riva_files_in_dir) > 0:\n",
+    "        output = riva_files_in_dir[0]\n",
+    "    if output is not None and var is not None:\n",
+    "        warnings.warn(\n",
+    "            f\"`{var_name}` had a non-default value of `{var}`. `{var_name}` will be updated to `{var}`\"\n",
+    "        )\n",
+    "    return output"
    ]
   },
   {
@@ -141,12 +150,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!ngc registry model download-version \"nvidia/tao/speechsynthesis_en_us_fastpitch_ipa:deployable_v1.0\"\n",
-    "!ngc registry model download-version \"nvidia/tao/speechsynthesis_en_us_hifigan_ipa:deployable_v1.0\""
+    "fastpitch_output = !ngc registry model download-version \"nvidia/tao/speechsynthesis_en_us_fastpitch_ipa:deployable_v1.0\"\n",
+    "hifigan_output = !ngc registry model download-version \"nvidia/tao/speechsynthesis_en_us_hifigan_ipa:deployable_v1.0\"\n",
+    "acoustic_model = get_ngc_download_dir(fastpitch_output, acoustic_model, \"acoustic_model\")\n",
+    "vocoder = get_ngc_download_dir(hifigan_output, vocoder, \"vocoder\")"
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "b7d8e550",
    "metadata": {},
@@ -167,8 +177,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!ngc registry model download-version \"nvidia/tao/speechsynthesis_en_us_auxiliary_files:deployable_v1.3\"\n",
-    "!ngc registry model download-version \"nvidia/tao/normalization_en_us:deployable_v1.1\""
+    "aux_output = !ngc registry model download-version \"nvidia/tao/speechsynthesis_en_us_auxiliary_files:deployable_v1.3\"\n",
+    "tn_output = !ngc registry model download-version \"nvidia/tao/normalization_en_us:deployable_v1.1\"\n",
+    "riva_aux_files = get_ngc_download_dir(aux_output, riva_aux_files, \"riva_aux_files\")\n",
+    "riva_tn_files = get_ngc_download_dir(tn_output, riva_tn_files, \"riva_tn_files\")"
    ]
   },
   {
@@ -190,7 +202,7 @@
    "source": [
     "##Run the riva servicemaker.\n",
     "!docker stop riva_rmir_gen &> /dev/null\n",
-    "!set -x && docker run -td --gpus all --rm -v {str(riva_aux_files)}:/riva_aux -v {str(am_dir)}/:/synt -v {str(voc_dir)}:/voc -v {str(riva_tn_files)}:/riva_tn \\\n",
+    "!set -x && docker run -td --gpus all --rm -v {str(riva_aux_files)}:/riva_aux -v {str(acoustic_model.parent)}/:/synt -v {str(vocoder.parent)}:/voc -v {str(riva_tn_files)}:/riva_tn \\\n",
     "            -v {str(rmir_dir.resolve())}:/data --name riva_rmir_gen --entrypoint=\"/bin/bash\" {riva_init_image}"
    ]
   },
@@ -212,10 +224,12 @@
    "outputs": [],
    "source": [
     "warnings.warn(\"Using --force in riva-build will replace any existing RMIR.\")\n",
-    "riva_build=f\"\"\"riva-build speech_synthesis --force --voice_name={voice}  --language_code={lang} \\\n",
-    "                --sample_rate={sample_rate} /data/FastPitch_HifiGan.rmir:{key} /synt/{am_name}:{key} \\\n",
-    "                /voc/{voc_name}:{key}  --abbreviations_file=/riva_aux/abbr.txt \\\n",
-    "                --wfst_tokenizer_model=/riva_tn/tokenize_and_classify.far --wfst_verbalizer_model=riva_tn/verbalize.far\"\"\"\n",
+    "riva_build=(\n",
+    "    f\"riva-build speech_synthesis --force --voice_name={voice} --language_code={lang} \"\n",
+    "    f\"--sample_rate={sample_rate} /data/FastPitch_HifiGan.rmir:{key} /synt/{str(acoustic_model.name)}:{key} \"\n",
+    "    f\"/voc/{str(vocoder.name)}:{key} --abbreviations_file=/riva_aux/abbr.txt \"\n",
+    "    f\"--wfst_tokenizer_model=/riva_tn/tokenize_and_classify.far --wfst_verbalizer_model=riva_tn/verbalize.far\"\n",
+    ")\n",
     "if target_machine==\"arm\":\n",
     "    riva_build += \"\"\"--max_batch_size 1 --denoiser.max_batch_size 1 --preprocessor.max_batch_size 1 \\\n",
     "                --encoderFastPitch.max_batch_size 1 --chunkerFastPitch.max_batch_size 1 --hifigan.max_batch_size 1\"\"\"\n",
@@ -225,7 +239,7 @@
     "    riva_build+=\" --phone_set=arpabet --phone_dictionary_file=/riva_repo/cmudict-0.7b_nv22.08\"\n",
     "if num_speakers > 1:\n",
     "    riva_build+=f\" --num_speakers={num_speakers}\"\n",
-    "    riva_build+=\"--subvoices \" + \",\".join([f\"{i}:{i}\" for i in range(num_speakers)])\n",
+    "    riva_build+=\" --subvoices \" + \",\".join([f\"{i}:{i}\" for i in range(num_speakers)])\n",
     "print(riva_build)"
    ]
   },
@@ -512,7 +526,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.16"
+   "version": "3.8.10"
   },
   "vscode": {
    "interpreter": {