Skip to content

Commit

Permalink
Merge branch 'master' into zh-tw
Browse files Browse the repository at this point in the history
* master: (61 commits)
  deps: use pypi provided silero vad, upgrade to latest
  fix: remove public key validation (jitsi#123)
  fix: downgrade vllm (jitsi#122)
  feat: add fallback folder when looking up public keys (jitsi#119)
  fix: add ffmpeg dependency for pytorch
  ref: bypass queueing jobs with invalid payload (jitsi#121)
  fix: replace examplar usage with label for app_id
  feat: add instrumentation for app_id (jitsi#118)
  fix: re-enable vLLM multiprocessing (jitsi#116)
  fix: update incorrect prompt example
  fix: healthchecks failing due to missing internal id (jitsi#115)
  feat(openai-api) use Ollama for local development
  feat: expose openai api endpoints from vllm (jitsi#112)
  feat: update text hint type prompting (jitsi#111)
  feat: add meeting hint type and use it as default (jitsi#110)
  feat: enable requests batching (jitsi#109)
  metrics: add full duration metric
  metrics: add a skipped job status which will not count towards duration metrics
  fix: catch exceptions when echoing fails
  feat: add support for echoing requests (jitsi#107)
  ...

# Conflicts:
#	Dockerfile
#	Makefile
#	requirements.txt
  • Loading branch information
shooding committed Nov 25, 2024
2 parents f7bf944 + 4d17ab9 commit c9a5c0c
Show file tree
Hide file tree
Showing 44 changed files with 5,075 additions and 2,410 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,5 @@ models
.DS_Store
.env
.idea
llama.log
dump.rdb
Empty file added .gitmodules
Empty file.
87 changes: 71 additions & 16 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
ARG BASE_IMAGE_BUILD=nvidia/cuda:12.2.2-cudnn8-devel-ubuntu20.04
ARG BASE_IMAGE_RUN=nvidia/cuda:12.2.2-cudnn8-runtime-ubuntu20.04
ARG BASE_IMAGE_BUILD=nvidia/cuda:12.2.2-cudnn8-devel-ubuntu22.04
ARG BASE_IMAGE_RUN=nvidia/cuda:12.2.2-cudnn8-runtime-ubuntu22.04

## Base Image
##

FROM ${BASE_IMAGE_BUILD} AS builder

Expand All @@ -13,40 +12,93 @@ RUN \
COPY docker/rootfs/ /

RUN \
apt-dpkg-wrap apt-key adv --keyserver keyserver.ubuntu.com --recv-keys F23C5A6CF475977595C89F51BA6932366A755776 && \
apt-dpkg-wrap apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys F23C5A6CF475977595C89F51BA6932366A755776 && \
apt-dpkg-wrap apt-get update && \
apt-dpkg-wrap apt-get install -y build-essential python3.11 python3.11-venv && \
apt-dpkg-wrap apt-get install -y build-essential libcurl4-openssl-dev python3.11 python3.11-venv && \
apt-cleanup

COPY requirements.txt /app/

WORKDIR /app

ENV \
CMAKE_ARGS="-DLLAMA_CUBLAS=ON -DLLAMA_NATIVE=OFF" \
FORCE_CMAKE=1 \
PIP_DISABLE_PIP_VERSION_CHECK=on
ENV PIP_DISABLE_PIP_VERSION_CHECK=on

RUN \
python3.11 -m venv .venv && \
. .venv/bin/activate && \
pip install -vvv -r requirements.txt

## Build ffmpeg

FROM ${BASE_IMAGE_RUN} AS ffmpeg_install

COPY docker/rootfs/ /

# ffmpeg build dependencies
RUN \
apt-dpkg-wrap apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys F23C5A6CF475977595C89F51BA6932366A755776 && \
apt-dpkg-wrap apt-get update && \
apt-dpkg-wrap apt-get install -y \
autoconf \
automake \
build-essential \
cmake \
libopus-dev \
libopus0 \
libtool \
pkg-config \
texinfo \
wget \
yasm \
zlib1g \
zlib1g-dev && \
apt-cleanup

# Build ffmpeg6 (required for pytorch which only supports ffmpeg < v7)
RUN \
mkdir -p /opt/ffmpeg && \
cd /opt/ && \
wget -q https://www.ffmpeg.org/releases/ffmpeg-6.1.2.tar.gz && \
tar -xzf ffmpeg-6.1.2.tar.gz -C /opt/ffmpeg --strip-components 1 && \
rm ffmpeg-6.1.2.tar.gz && \
cd /opt/ffmpeg/ && \
./configure \
--enable-shared \
--enable-gpl \
--enable-libopus && \
make && \
make install && \
ldconfig

RUN \
apt-dpkg-wrap apt-get autoremove -y \
autoconf \
automake \
build-essential \
cmake \
libopus-dev \
libtool \
pkg-config \
texinfo \
wget \
yasm \
zlib1g-dev

## Production Image
##

FROM ${BASE_IMAGE_RUN}
FROM ffmpeg_install

RUN \
apt-get update && \
apt-get install -y apt-transport-https ca-certificates gnupg ffmpeg
apt-get install -y apt-transport-https ca-certificates gnupg

COPY docker/rootfs/ /
COPY --chown=jitsi:jitsi docker/run-skynet.sh /opt/

RUN \
apt-dpkg-wrap apt-key adv --keyserver keyserver.ubuntu.com --recv-keys F23C5A6CF475977595C89F51BA6932366A755776 && \
apt-dpkg-wrap apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys F23C5A6CF475977595C89F51BA6932366A755776 && \
apt-dpkg-wrap apt-get update && \
apt-dpkg-wrap apt-get install -y python3.11 python3.11-venv tini libgomp1 && \
apt-dpkg-wrap apt-get install -y python3.11 python3.11-venv tini libgomp1 strace gdb && \
apt-cleanup

# Principle of least privilege: create a new user for running the application
Expand All @@ -66,7 +118,10 @@ ENV \
# https://docs.python.org/3/using/cmdline.html#envvar-PYTHONDONTWRITEBYTECODE
PYTHONDONTWRITEBYTECODE=1 \
PYTHONPATH=/app \
LLAMA_PATH="/models/llama-2-7b-chat.Q4_K_M.gguf"
OUTLINES_CACHE_DIR=/app/vllm/outlines \
VLLM_CONFIG_ROOT=/app/vllm/config \
HF_HOME=/app/hf \
LLAMA_PATH="/models/Llama-3.1-8B-Instruct-Q8_0.gguf"

VOLUME [ "/models" ]

Expand All @@ -76,7 +131,7 @@ RUN chown jitsi:jitsi ${PYTHONPATH}
# Document the exposed port
EXPOSE 8000

# Use the unpriviledged user to run the application
# Use the unprivileged user to run the application
USER 1001

# Use tini as our PID 1
Expand Down
23 changes: 5 additions & 18 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,29 +4,16 @@ endif

GIT_HASH ?= $(shell git rev-parse --short HEAD)
PLATFORMS ?= linux/amd64
CACHE_DIR ?= /tmp/docker-cache

_login:
${DOCKER_LOGIN_CMD}

build-summaries : _login
build : _login
docker buildx build \
--build-arg="BASE_IMAGE_BUILD=nvidia/cuda:12.3.0-devel-ubuntu20.04" \
--build-arg="BASE_IMAGE_RUN=nvidia/cuda:12.3.0-runtime-ubuntu20.04" \
--progress plain \
--push \
--platform ${PLATFORMS} \
--cache-from type=local,src=${CACHE_DIR} \
--cache-to type=local,dest=${CACHE_DIR},mode=max \
-t ${IMAGE_REGISTRY}/skynet:summaries-${GIT_HASH} .

build-whisper : _login
docker buildx build \
--build-arg="BASE_IMAGE_BUILD=nvidia/cuda:12.2.2-cudnn8-devel-ubuntu20.04" \
--build-arg="BASE_IMAGE_RUN=nvidia/cuda:12.2.2-cudnn8-runtime-ubuntu20.04" \
--progress plain \
--platform ${PLATFORMS} \
--push \
--cache-from type=local,src=${CACHE_DIR} \
--cache-to type=local,dest=${CACHE_DIR},mode=max \
-t ${IMAGE_REGISTRY}/skynet:whisper-${GIT_HASH} .
-t ${IMAGE_REGISTRY}/skynet:summaries-${GIT_HASH} \
-t ${IMAGE_REGISTRY}/skynet:whisper-${GIT_HASH} \
-t ${IMAGE_REGISTRY}/skynet:${GIT_HASH} \
-t ${IMAGE_REGISTRY}/skynet:latest .
28 changes: 19 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Skynet is an API server for AI services wrapping several apps and models.

It is comprised of specialized modules which can be enabled or disabled as needed.

- **Summary and Action Items** with llama.cpp (enabled by default)
- **Summary and Action Items** with vllm (or llama.cpp)
- **Live Transcriptions** with Faster Whisper via websockets
- 🚧 _More to follow_

Expand All @@ -16,19 +16,15 @@ It is comprised of specialized modules which can be enabled or disabled as neede
## Summaries Quickstart

```bash
# Download the preferred GGUF llama model
mkdir "$HOME/models"
# if VLLM cannot be used, make sure to have Ollama started. In that case LLAMA_PATH should be the model name, like "llama3.1".
export LLAMA_PATH="$HOME/models/Llama-3.1-8B-Instruct-Q8_0.gguf"

wget -q --show-progress "https://huggingface.co/jitsi/Llama-3-8B-Instruct-GGUF/resolve/main/llama-3-8b-instruct-Q4_K_M.gguf?download=true" -O "$HOME/models/llama-3-8b-instruct.Q4_K_M.gguf"

export LLAMA_PATH="$HOME/models/llama-3-8b-instruct.Q4_K_M.gguf"
# disable authorization (for testing)
export BYPASS_AUTHORIZATION=1

# start Redis
docker run -d --rm -p 6379:6379 redis

# disable authorization (for testing)
export BYPASS_AUTHORIZATION=1

poetry install
./run.sh

Expand All @@ -37,6 +33,9 @@ poetry install

## Live Transcriptions Quickstart

> **Note**: Make sure to have ffmpeg < 7 installed and to update the `DYLD_LIBRARY_PATH` with the path to the ffmpeg
> libraries, e.g. `export DYLD_LIBRARY_PATH=/Users/MyUser/ffmpeg/6.1.2/lib:$DYLD_LIBRARY_PATH`.
```bash
mkdir -p "$HOME/models/streaming-whisper"
export WHISPER_MODEL_NAME="tiny.en"
Expand All @@ -47,6 +46,17 @@ export WHISPER_MODEL_PATH="$HOME/models/streaming-whisper"
poetry install
./run.sh
```

## Testing docker changes
```bash
docker compose -f compose-dev.yaml up --build
docker cp $HOME/models/Llama-3.1-8B-Instruct-Q8_0.gguf skynet-web-1:/models
docker restart skynet-web-1

# localhost:8000 for Skynet APIs
# localhost:8001/metrics for Prometheus metrics
```

### Test it from Github Pages
Go to [Streaming Whisper Demo](https://jitsi.github.io/skynet/) to test your deployment from a browser

Expand Down
14 changes: 14 additions & 0 deletions compose-dev.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
services:
web:
build: .
environment:
- BYPASS_AUTHORIZATION=true
- REDIS_HOST=redis
platform: linux/amd64
ports:
- "8000:8000"
- "8001:8001"
- "8003:8003"
redis:
image: "redis:alpine"
platform: linux/amd64
20 changes: 17 additions & 3 deletions credentials.yaml.sample
Original file line number Diff line number Diff line change
@@ -1,4 +1,18 @@
customer_credentials:
test-customer_id:
api_key: sample-api-key
model_name: gpt-3.5-turbo
testCustomerId:
credentialsMap:
AZURE_OPENAI:
customerId: testCustomerId
enabled: true
metadata:
deploymentName: gpt-4o
endpoint: https://myinstance.openai.azure.com/
secret: test_secret
type: AZURE_OPENAI
OPENAI:
customerId: testCustomerId
enabled: false
metadata:
model: gpt-3
secret: test_secret
type: OPENAI
84 changes: 84 additions & 0 deletions demos/js-client/skynet.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
export class SkynetClient {
constructor(options = {}) {
this._baseUrl = options?.baseUrl ?? 'http://localhost:8000';
this._token = options?.token;
}

async summary(text, options) {
return this._fetchAndPoll(`${this._baseUrl}/summaries/v1/summary`, text, options)
}

async actionItems(text, options) {
return this._fetchAndPoll(`${this._baseUrl}/summaries/v1/action-items`, text, options)
}

async _fetchAndPoll(url, text, options = {}) {
// Submit the job.
const headers = {
'Content-Type': 'application/json'
};

if (this._token) {
headers['Authorization'] = `Bearer ${this._token}`;
}

const r = await fetch(url, {
method: 'POST',
headers,
body: JSON.stringify({
hint: options?.hint ?? 'text',
text
})
});
const data = await r.json();
const jobId = data.id;

if (!jobId) {
throw new Error('Could not create job');
}

const d = createDeferred();

// Poll for it.
const pHeaders = {};

if (this._token) {
pHeaders['Authorization'] = `Bearer ${this._token}`;
}

const int = setInterval(async () => {
try {
const r = await fetch(`${this._baseUrl}/summaries/v1/job/${jobId}`, {
headers: pHeaders
});
const data = await r.json();

if (data.status === 'success') {
clearInterval(int);
d.resolve(data.result);
} else if (data.status === 'error') {
clearInterval(int);
d.reject(new Error(data.result));
}
} catch(_) {}
}, 5 * 1000);

return d.promise;
}
}


function createDeferred() {
if (Promise.withResolvers) {
return Promise.withResolvers();
}

const d = {};

d.promise = new Promise((resolve, reject) => {
d.resolve = resolve;
d.reject = reject;
})

return d;
}
3 changes: 1 addition & 2 deletions docker/rootfs/etc/apt/sources.list.d/python-ppa.list
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
deb https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu focal main
deb-src https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu focal main
deb https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy main
Loading

0 comments on commit c9a5c0c

Please sign in to comment.