diff --git a/addons/hf-llm-models/templates/deployment.yaml b/addons/hf-llm-models/templates/deployment.yaml index df9bd94d4..b6a4f0643 100644 --- a/addons/hf-llm-models/templates/deployment.yaml +++ b/addons/hf-llm-models/templates/deployment.yaml @@ -69,12 +69,19 @@ spec: - --tensor-parallel-size={{ .Values.tensorParallelSize }} {{- if .Values.maxModelLen }} - --max-model-len={{ .Values.maxModelLen }} + {{- end }} + {{- if .Values.extraArgs }} + {{- range .Values.extraArgs }} + - {{ . }} + {{- end }} {{- end }} image: {{ .Values.vllmImage }} imagePullPolicy: IfNotPresent env: - name: HF_TOKEN value: {{ .Values.huggingFaceToken }} + - name: NCCL_DEBUG + value: INFO ports: - containerPort: 8000 protocol: TCP @@ -105,6 +112,8 @@ spec: nvidia.com/gpu: {{ .Values.resources.limits.nvidiaGpu }} {{- end }} volumeMounts: + - name: dshm + mountPath: /dev/shm - name: model-volume mountPath: {{ .Values.modelDir }} name: vllm @@ -112,6 +121,10 @@ spec: allowPrivilegeEscalation: false terminationGracePeriodSeconds: 10 volumes: + - name: dshm + emptyDir: + medium: Memory + sizeLimit: 4Gi - name: model-volume persistentVolumeClaim: claimName: "{{ .Release.Name }}-hf-llm"