-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathservice.yaml
57 lines (46 loc) · 1.68 KB
/
service.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# service.yaml
service:
readiness_probe:
path: /v1/models
initial_delay_seconds: 5 # be sure to keep this low, or you will burn compute doing nothing
replicas: 2
# Fields below describe each replica.
resources:
ports: 8080
accelerators: [T4:1, L4:8, A10g:8, A100:1, A10g:1, A100:4, A100:8, A100-80GB:2, A100-80GB:4, A100-80GB:8]
envs:
AWS_ACCESS_KEY_ID:
AWS_SECRET_ACCESS_KEY:
AWS_REGION: auto
AWS_ENDPOINT_URL_S3: https://fly.storage.tigris.dev
# customize these in .env
BUCKET_NAME: mybucket
DATASET_NAME: mlabonne/FineTome-100k
MODEL_NAME: Qwen/Qwen2.5-0.5B
setup: |
conda create -n vllm python=3.9 -y
conda activate vllm
pip install vllm
wget -O geesefs https://github.com/yandex-cloud/geesefs/releases/download/v0.42.1/geesefs-linux-amd64
chmod +x geesefs && sudo mv geesefs /usr/local/bin/geesefs || chmod +x geesefs && mv geesefs /usr/local/bin/geesefs
mkdir -p ~/.aws
cat << EOF > ~/.aws/config
[default]
region = auto
output = json
endpoint_url = https://fly.storage.tigris.dev
EOF
cat << EOF > ~/.aws/credentials
[default]
aws_access_key_id = ${AWS_ACCESS_KEY_ID}
aws_secret_access_key = ${AWS_SECRET_ACCESS_KEY}
EOF
mkdir -p ~/.local/share/systemd/user
mkdir -p ~/tigris
run: |
/usr/local/bin/geesefs --endpoint https://fly.storage.tigris.dev --memory-limit 8192 --read-ahead-large 65536 --read-ahead-parallel 4096 --max-flushers 32 --max-parallel-parts 32 --part-sizes 32 ${BUCKET_NAME} ${HOME}/tigris
conda activate vllm
python -m vllm.entrypoints.openai.api_server \
--tensor-parallel-size $SKYPILOT_NUM_GPUS_PER_NODE \
--host 0.0.0.0 --port 8080 \
--model ${HOME}/tigris/${MODEL_NAME}/${DATASET_NAME}/fused