guardian · philmcmahon · Feb 4, 2025 · Jan 21, 2025 · Jan 23, 2025 · Jan 23, 2025
@@ -0,0 +1,2 @@
+---
+model_script_stage: PROD
@@ -1,6 +1,7 @@
 ---
 dependencies:
   - pip3
+  - aws-tools
   - role: packages
     packages:
       - ffmpeg
@@ -9,13 +9,26 @@
   pip:
     name:
      - torch==2.0.0
-     - torchvision==0.15.1
      - torchaudio==2.0.1
     executable: pip3
     extra_args: "--index-url https://download.pytorch.org/whl/cu118"
 
 - name: Install whisperx
   pip:
-    name: whisperx
+    name: "{{ whisperx_package | default('whisperx') }}"
     executable: pip3
 
+# We use a python script to fetch the models which is owned by the transcription service. See the below PRs for details:
+#  -  https://github.com/guardian/amigo/pull/1607
+#  - https://github.com/guardian/transcription-service/pull/130
+- name: Download models script
+  shell: |
+    aws --quiet s3 cp s3://amigo-data-{{ model_script_stage.lower() }}/deploy/{{ model_script_stage }}/whisperx-model-fetch/download_whisperx_models.py /tmp/download_whisperx_models.py
+    exit 0
+
+# The script lives here https://github.com/guardian/transcription-service/blob/main/whisperx-model-fetch/download_whisperx_models.py
+# If you are changing these parameters it may be helpful to run it locally to test the changes.
+- name: Download whisperx models
+  command: "python3 /tmp/download_whisperx_models.py --whisper-models --diarization-models --torch-align-models --huggingface-token {{ huggingface_token }}"
+  become: yes
+  become_user: ubuntu