Merge branch 'master' into zh-tw

* master: (61 commits) deps: use pypi provided silero vad, upgrade to latest fix: remove public key validation (jitsi#123) fix: downgrade vllm (jitsi#122) feat: add fallback folder when looking up public keys (jitsi#119) fix: add ffmpeg dependency for pytorch ref: bypass queueing jobs with invalid payload (jitsi#121) fix: replace examplar usage with label for app_id feat: add instrumentation for app_id (jitsi#118) fix: re-enable vLLM multiprocessing (jitsi#116) fix: update incorrect prompt example fix: healthchecks failing due to missing internal id (jitsi#115) feat(openai-api) use Ollama for local development feat: expose openai api endpoints from vllm (jitsi#112) feat: update text hint type prompting (jitsi#111) feat: add meeting hint type and use it as default (jitsi#110) feat: enable requests batching (jitsi#109) metrics: add full duration metric metrics: add a skipped job status which will not count towards duration metrics fix: catch exceptions when echoing fails feat: add support for echoing requests (jitsi#107) ... # Conflicts: # Dockerfile # Makefile # requirements.txt
shooding · Nov 25, 2024 · c9a5c0c · c9a5c0c
2 parents f7bf944 + 4d17ab9
commit c9a5c0c
Show file tree

Hide file tree

Showing 44 changed files with 5,075 additions and 2,410 deletions.
diff --git a/.gitignore b/.gitignore
@@ -4,3 +4,5 @@ models
 .DS_Store
 .env
 .idea
+llama.log
+dump.rdb
diff --git a/.gitmodules b/.gitmodules
diff --git a/Dockerfile b/Dockerfile
@@ -1,8 +1,7 @@
-ARG BASE_IMAGE_BUILD=nvidia/cuda:12.2.2-cudnn8-devel-ubuntu20.04
-ARG BASE_IMAGE_RUN=nvidia/cuda:12.2.2-cudnn8-runtime-ubuntu20.04
+ARG BASE_IMAGE_BUILD=nvidia/cuda:12.2.2-cudnn8-devel-ubuntu22.04
+ARG BASE_IMAGE_RUN=nvidia/cuda:12.2.2-cudnn8-runtime-ubuntu22.04
 
 ## Base Image
-##
 
 FROM ${BASE_IMAGE_BUILD} AS builder
 
@@ -13,40 +12,93 @@ RUN \
 COPY docker/rootfs/ /
 
 RUN \
-    apt-dpkg-wrap apt-key adv --keyserver keyserver.ubuntu.com --recv-keys F23C5A6CF475977595C89F51BA6932366A755776 && \
+    apt-dpkg-wrap apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys F23C5A6CF475977595C89F51BA6932366A755776 && \
     apt-dpkg-wrap apt-get update && \
-    apt-dpkg-wrap apt-get install -y build-essential python3.11 python3.11-venv && \
+    apt-dpkg-wrap apt-get install -y build-essential libcurl4-openssl-dev python3.11 python3.11-venv && \
     apt-cleanup
 
 COPY requirements.txt /app/
+
 WORKDIR /app
 
-ENV \
-    CMAKE_ARGS="-DLLAMA_CUBLAS=ON -DLLAMA_NATIVE=OFF" \
-    FORCE_CMAKE=1 \
-    PIP_DISABLE_PIP_VERSION_CHECK=on
+ENV PIP_DISABLE_PIP_VERSION_CHECK=on
 
 RUN \
     python3.11 -m venv .venv && \
     . .venv/bin/activate && \
     pip install -vvv -r requirements.txt
 
+## Build ffmpeg
+
+FROM ${BASE_IMAGE_RUN} AS ffmpeg_install
+
+COPY docker/rootfs/ /
+
+# ffmpeg build dependencies
+RUN \
+    apt-dpkg-wrap apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys F23C5A6CF475977595C89F51BA6932366A755776 && \
+    apt-dpkg-wrap apt-get update && \
+    apt-dpkg-wrap apt-get install -y \
+        autoconf \
+        automake \
+        build-essential \
+        cmake \
+        libopus-dev \
+        libopus0 \
+        libtool \
+        pkg-config \
+        texinfo \
+        wget \
+        yasm \
+        zlib1g \
+        zlib1g-dev && \
+    apt-cleanup
+
+# Build ffmpeg6 (required for pytorch which only supports ffmpeg < v7)
+RUN \
+    mkdir -p /opt/ffmpeg && \
+    cd /opt/ && \
+    wget -q https://www.ffmpeg.org/releases/ffmpeg-6.1.2.tar.gz && \
+    tar -xzf ffmpeg-6.1.2.tar.gz -C /opt/ffmpeg --strip-components 1 && \
+    rm ffmpeg-6.1.2.tar.gz && \
+    cd /opt/ffmpeg/ && \
+    ./configure \
+      --enable-shared \
+      --enable-gpl \
+      --enable-libopus && \
+    make && \
+    make install && \
+    ldconfig
+
+RUN \
+    apt-dpkg-wrap apt-get autoremove -y \
+        autoconf \
+        automake \
+        build-essential \
+        cmake \
+        libopus-dev \
+        libtool \
+        pkg-config \
+        texinfo \
+        wget \
+        yasm \
+        zlib1g-dev
+
 ## Production Image
-##
 
-FROM ${BASE_IMAGE_RUN}
+FROM ffmpeg_install
 
 RUN \
     apt-get update && \
-    apt-get install -y apt-transport-https ca-certificates gnupg ffmpeg
+    apt-get install -y apt-transport-https ca-certificates gnupg
 
 COPY docker/rootfs/ /
 COPY --chown=jitsi:jitsi docker/run-skynet.sh /opt/
 
 RUN \
-    apt-dpkg-wrap apt-key adv --keyserver keyserver.ubuntu.com --recv-keys F23C5A6CF475977595C89F51BA6932366A755776 && \
+    apt-dpkg-wrap apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys F23C5A6CF475977595C89F51BA6932366A755776 && \
     apt-dpkg-wrap apt-get update && \
-    apt-dpkg-wrap apt-get install -y python3.11 python3.11-venv tini libgomp1 && \
+    apt-dpkg-wrap apt-get install -y python3.11 python3.11-venv tini libgomp1 strace gdb && \
     apt-cleanup
 
 # Principle of least privilege: create a new user for running the application
@@ -66,7 +118,10 @@ ENV \
     # https://docs.python.org/3/using/cmdline.html#envvar-PYTHONDONTWRITEBYTECODE
     PYTHONDONTWRITEBYTECODE=1 \
     PYTHONPATH=/app \
-    LLAMA_PATH="/models/llama-2-7b-chat.Q4_K_M.gguf"
+    OUTLINES_CACHE_DIR=/app/vllm/outlines \
+    VLLM_CONFIG_ROOT=/app/vllm/config \
+    HF_HOME=/app/hf  \
+    LLAMA_PATH="/models/Llama-3.1-8B-Instruct-Q8_0.gguf"
 
 VOLUME [ "/models" ]
 
@@ -76,7 +131,7 @@ RUN chown jitsi:jitsi ${PYTHONPATH}
 # Document the exposed port
 EXPOSE 8000
 
-# Use the unpriviledged user to run the application
+# Use the unprivileged user to run the application
 USER 1001
 
 # Use tini as our PID 1

diff --git a/Makefile b/Makefile
@@ -4,29 +4,16 @@ endif
 
 GIT_HASH ?= $(shell git rev-parse --short HEAD)
 PLATFORMS ?= linux/amd64
-CACHE_DIR ?= /tmp/docker-cache
 
 _login:
 	${DOCKER_LOGIN_CMD}
 
-build-summaries : _login
+build : _login
 	docker buildx build \
-	--build-arg="BASE_IMAGE_BUILD=nvidia/cuda:12.3.0-devel-ubuntu20.04" \
-	--build-arg="BASE_IMAGE_RUN=nvidia/cuda:12.3.0-runtime-ubuntu20.04" \
 	--progress plain \
 	--push \
 	--platform ${PLATFORMS} \
-	--cache-from type=local,src=${CACHE_DIR} \
-	--cache-to type=local,dest=${CACHE_DIR},mode=max \
-	-t ${IMAGE_REGISTRY}/skynet:summaries-${GIT_HASH} .
-
-build-whisper : _login
-	docker buildx build \
-	--build-arg="BASE_IMAGE_BUILD=nvidia/cuda:12.2.2-cudnn8-devel-ubuntu20.04" \
-	--build-arg="BASE_IMAGE_RUN=nvidia/cuda:12.2.2-cudnn8-runtime-ubuntu20.04" \
-	--progress plain \
-	--platform ${PLATFORMS} \
-	--push \
-	--cache-from type=local,src=${CACHE_DIR} \
-	--cache-to type=local,dest=${CACHE_DIR},mode=max \
-	-t ${IMAGE_REGISTRY}/skynet:whisper-${GIT_HASH} .
+	-t ${IMAGE_REGISTRY}/skynet:summaries-${GIT_HASH} \
+	-t ${IMAGE_REGISTRY}/skynet:whisper-${GIT_HASH} \
+	-t ${IMAGE_REGISTRY}/skynet:${GIT_HASH} \
+	-t ${IMAGE_REGISTRY}/skynet:latest .
diff --git a/README.md b/README.md
@@ -4,7 +4,7 @@ Skynet is an API server for AI services wrapping several apps and models.
 
 It is comprised of specialized modules which can be enabled or disabled as needed.
 
-- **Summary and Action Items** with llama.cpp (enabled by default)
+- **Summary and Action Items** with vllm (or llama.cpp)
 - **Live Transcriptions** with Faster Whisper via websockets
 - 🚧 _More to follow_
 
@@ -16,19 +16,15 @@ It is comprised of specialized modules which can be enabled or disabled as neede
 ## Summaries Quickstart
 
 ```bash
-# Download the preferred GGUF llama model
-mkdir "$HOME/models"
+# if VLLM cannot be used, make sure to have Ollama started. In that case LLAMA_PATH should be the model name, like "llama3.1".
+export LLAMA_PATH="$HOME/models/Llama-3.1-8B-Instruct-Q8_0.gguf"
 
-wget -q --show-progress "https://huggingface.co/jitsi/Llama-3-8B-Instruct-GGUF/resolve/main/llama-3-8b-instruct-Q4_K_M.gguf?download=true" -O "$HOME/models/llama-3-8b-instruct.Q4_K_M.gguf"
-
-export LLAMA_PATH="$HOME/models/llama-3-8b-instruct.Q4_K_M.gguf"
+# disable authorization (for testing)
+export BYPASS_AUTHORIZATION=1
 
 # start Redis
 docker run -d --rm -p 6379:6379 redis 
 
-# disable authorization (for testing)
-export BYPASS_AUTHORIZATION=1
-
 poetry install
 ./run.sh
 
@@ -37,6 +33,9 @@ poetry install
 
 ## Live Transcriptions Quickstart
 
+> **Note**: Make sure to have ffmpeg < 7 installed and to update the `DYLD_LIBRARY_PATH` with the path to the ffmpeg 
+> libraries, e.g. `export DYLD_LIBRARY_PATH=/Users/MyUser/ffmpeg/6.1.2/lib:$DYLD_LIBRARY_PATH`.
+
 ```bash
 mkdir -p "$HOME/models/streaming-whisper"
 export WHISPER_MODEL_NAME="tiny.en"
@@ -47,6 +46,17 @@ export WHISPER_MODEL_PATH="$HOME/models/streaming-whisper"
 poetry install
 ./run.sh
 ```
+
+## Testing docker changes
+```bash
+docker compose -f compose-dev.yaml up --build
+docker cp $HOME/models/Llama-3.1-8B-Instruct-Q8_0.gguf skynet-web-1:/models
+docker restart skynet-web-1
+
+# localhost:8000 for Skynet APIs
+# localhost:8001/metrics for Prometheus metrics
+```
+
 ### Test it from Github Pages
 Go to [Streaming Whisper Demo](https://jitsi.github.io/skynet/) to test your deployment from a browser
 

diff --git a/compose-dev.yaml b/compose-dev.yaml
@@ -0,0 +1,14 @@
+services:
+    web:
+        build:  .
+        environment:
+            - BYPASS_AUTHORIZATION=true
+            - REDIS_HOST=redis
+        platform: linux/amd64
+        ports:
+            - "8000:8000"
+            - "8001:8001"
+            - "8003:8003"
+    redis:
+        image: "redis:alpine"
+        platform: linux/amd64
diff --git a/credentials.yaml.sample b/credentials.yaml.sample
@@ -1,4 +1,18 @@
 customer_credentials:
-  test-customer_id:
-      api_key: sample-api-key
-      model_name: gpt-3.5-turbo
+  testCustomerId:
+    credentialsMap:
+        AZURE_OPENAI:
+            customerId: testCustomerId
+            enabled: true
+            metadata:
+                deploymentName: gpt-4o
+                endpoint: https://myinstance.openai.azure.com/
+            secret: test_secret
+            type: AZURE_OPENAI
+        OPENAI:
+            customerId: testCustomerId
+            enabled: false
+            metadata:
+                model: gpt-3
+            secret: test_secret
+            type: OPENAI
diff --git a/demos/js-client/skynet.mjs b/demos/js-client/skynet.mjs
@@ -0,0 +1,84 @@
+export class SkynetClient {
+    constructor(options = {}) {
+        this._baseUrl = options?.baseUrl ?? 'http://localhost:8000';
+        this._token = options?.token;
+    }
+
+    async summary(text, options) {
+        return this._fetchAndPoll(`${this._baseUrl}/summaries/v1/summary`, text, options)
+    }
+
+    async actionItems(text, options) {
+        return this._fetchAndPoll(`${this._baseUrl}/summaries/v1/action-items`, text, options)
+    }
+
+    async _fetchAndPoll(url, text, options = {}) {
+        // Submit the job.
+        const headers = {
+            'Content-Type': 'application/json'
+        };
+
+        if (this._token) {
+            headers['Authorization'] = `Bearer ${this._token}`;
+        }
+
+        const r = await fetch(url, {
+            method: 'POST',
+            headers,
+            body: JSON.stringify({
+                hint: options?.hint ?? 'text',
+                text
+            })
+        });
+        const data = await r.json();
+        const jobId = data.id;
+
+        if (!jobId) {
+            throw new Error('Could not create job');
+        }
+
+        const d = createDeferred();
+
+        // Poll for it.
+        const pHeaders = {};
+
+        if (this._token) {
+            pHeaders['Authorization'] = `Bearer ${this._token}`;
+        }
+
+        const int = setInterval(async () => {
+            try {
+                const r = await fetch(`${this._baseUrl}/summaries/v1/job/${jobId}`, {
+                    headers: pHeaders
+                });
+                const data = await r.json();
+
+                if (data.status === 'success') {
+                    clearInterval(int);
+                    d.resolve(data.result);
+                } else if (data.status === 'error') {
+                    clearInterval(int);
+                    d.reject(new Error(data.result));
+                }
+            } catch(_) {}
+        }, 5 * 1000);
+
+        return d.promise;
+    }
+}
+
+
+function createDeferred() {
+    if (Promise.withResolvers) {
+        return Promise.withResolvers();
+    }
+
+    const d = {};
+
+    d.promise = new Promise((resolve, reject) => {
+        d.resolve = resolve;
+        d.reject = reject;
+    })
+
+    return d;
+}
diff --git a/docker/rootfs/etc/apt/sources.list.d/python-ppa.list b/docker/rootfs/etc/apt/sources.list.d/python-ppa.list
@@ -1,2 +1 @@
-deb https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu focal main
-deb-src https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu focal main
+deb https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy main