-
Notifications
You must be signed in to change notification settings - Fork 77
/
Copy pathcompose.yaml
119 lines (111 loc) · 2.92 KB
/
compose.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# env variables needed
# NGC_API_KEY
services:
nv-embedqa-e5-v5:
image: "nvcr.io/nim/nvidia/nv-embedqa-e5-v5:1.0.1"
profiles: ["Local LLM + Embedding", "Local LLM + Embedding + Reranking"]
deploy:
resources:
reservations:
devices:
- driver: nvidia
capabilities: ["gpu"]
count: 1
ipc: host
environment:
- NGC_API_KEY=${NGC_API_KEY}
volumes:
- nim-cache:/opt/nim/.cache
healthcheck:
test: ["CMD", "python3", "-c", "import requests; resp = requests.get('http://localhost:8000/v1/health/ready'); resp.raise_for_status()"]
interval: 30s
start_period: 600s
timeout: 20s
retries: 3
networks:
- default
nv-rerankqa-mistral-4b-v3:
image: "nvcr.io/nim/nvidia/nv-rerankqa-mistral-4b-v3:1.0.2"
profiles: ["Local LLM + Embedding + Reranking"]
runtime: "nvidia"
deploy:
resources:
reservations:
devices:
- driver: nvidia
capabilities: ["gpu"]
count: 1
ipc: host
environment:
- NGC_API_KEY=${NGC_API_KEY}
volumes:
- nim-cache:/opt/nim/.cache
healthcheck:
test: ["CMD", "python3", "-c", "import requests; resp = requests.get('http://localhost:8000/v1/health/ready'); resp.raise_for_status()"]
interval: 30s
start_period: 600s
timeout: 20s
retries: 3
networks:
- default
llm-nim:
image: "nvcr.io/nim/meta/llama3-8b-instruct:1"
profiles: ["Local LLM", "Local LLM + Embedding", "Local LLM + Embedding + Reranking"]
deploy:
resources:
reservations:
devices:
- driver: nvidia
capabilities: ["gpu"]
count: 1
ipc: host
environment:
- NGC_API_KEY=${NGC_API_KEY}
volumes:
- nim-cache:/opt/nim/.cache
healthcheck:
test: ["CMD", "python", "-c", "import requests; resp = requests.get('http://localhost:8000/v1/health/ready'); resp.raise_for_status()"]
interval: 30s
start_period: 600s
timeout: 20s
retries: 3
networks:
- default
milvus:
image: "milvusdb/milvus:v2.4.6"
security_opt:
- seccomp:unconfined
environment:
- ETCD_USE_EMBED=true
- ETCD_DATA_DIR=/var/lib/milvus/etcd
- COMMON_STORAGETYPE=local
volumes:
- milvus:/var/lib/milvus
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"]
interval: 30s
start_period: 90s
timeout: 20s
retries: 3
command: "milvus run standalone"
networks:
- default
redis:
image: "redis:7"
volumes:
- redis:/data
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 30s
start_period: 30s
timeout: 5s
retries: 5
command: "redis-server --save 20 1 --loglevel warning"
networks:
- default
networks:
default:
volumes:
milvus:
redis:
nim-cache: