-
Notifications
You must be signed in to change notification settings - Fork 120
/
Copy pathtriton-2.x.yaml
104 lines (101 loc) · 2.89 KB
/
triton-2.x.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# Copyright 2021 IBM Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: serving.kserve.io/v1alpha1
kind: ClusterServingRuntime
metadata:
name: triton-2.x
labels:
name: modelmesh-serving-triton-2.x-SR
annotations:
maxLoadingConcurrency: "2"
spec:
supportedModelFormats:
- name: keras
version: "2" # 2.6.0
autoSelect: true
- name: onnx
version: "1" # 1.5.3
autoSelect: true
- name: pytorch
version: "1" # 1.8.0a0+17f8c32
autoSelect: true
- name: tensorflow
version: "1" # 1.15.4
autoSelect: true
- name: tensorflow
version: "2" # 2.3.1
autoSelect: true
- name: tensorrt
version: "7" # 7.2.1
autoSelect: true
- name: sklearn
version: "0" # v0.23.1
autoSelect: false
- name: xgboost
version: "1" # v1.1.1
autoSelect: false
- name: lightgbm
version: "3" # v3.2.1
autoSelect: false
protocolVersions:
- grpc-v2
multiModel: true
grpcEndpoint: "port:8085"
grpcDataEndpoint: "port:8001"
containers:
- name: triton
image: tritonserver-2:replace
command: [/bin/sh]
args:
- -c
- 'mkdir -p /models/_triton_models;
chmod 777 /models/_triton_models;
exec tritonserver
"--model-repository=/models/_triton_models"
"--model-control-mode=explicit"
"--strict-model-config=false"
"--strict-readiness=false"
"--allow-http=true"
"--allow-sagemaker=false"
'
resources:
requests:
cpu: 500m
memory: 1Gi
limits:
cpu: "5"
memory: 1Gi
livenessProbe:
# the server is listening only on 127.0.0.1, so an httpGet probe sent
# from the kublet running on the node cannot connect to the server
# (not even with the Host header or host field)
# exec a curl call to have the request originate from localhost in the
# container
exec:
command:
- curl
- --fail
- --silent
- --show-error
- --max-time
- "9"
- http://localhost:8000/v2/health/live
initialDelaySeconds: 5
periodSeconds: 30
timeoutSeconds: 10
builtInAdapter:
serverType: triton
runtimeManagementPort: 8001
memBufferBytes: 134217728
modelLoadingTimeoutMillis: 90000