From 89300ef9476a3a182b1b0d2fe4c02c481a743f23 Mon Sep 17 00:00:00 2001 From: Stefan McShane Date: Thu, 30 Jan 2025 05:14:25 -0500 Subject: [PATCH 1/2] hf llm additional values --- .../hf-llm-models/templates/deployment.yaml | 11 +++ test.yaml | 96 +++++++++++++++++++ 2 files changed, 107 insertions(+) create mode 100644 test.yaml diff --git a/addons/hf-llm-models/templates/deployment.yaml b/addons/hf-llm-models/templates/deployment.yaml index df9bd94d4..6f12485e8 100644 --- a/addons/hf-llm-models/templates/deployment.yaml +++ b/addons/hf-llm-models/templates/deployment.yaml @@ -69,12 +69,19 @@ spec: - --tensor-parallel-size={{ .Values.tensorParallelSize }} {{- if .Values.maxModelLen }} - --max-model-len={{ .Values.maxModelLen }} + {{- end }} + {{- if .Values.extraArgs }} + {{- range .Values.extraArgs }} + - {{ . }} + {{- end }} {{- end }} image: {{ .Values.vllmImage }} imagePullPolicy: IfNotPresent env: - name: HF_TOKEN value: {{ .Values.huggingFaceToken }} + - name: NCCL_DEBUG + value: INFO ports: - containerPort: 8000 protocol: TCP @@ -105,6 +112,10 @@ spec: nvidia.com/gpu: {{ .Values.resources.limits.nvidiaGpu }} {{- end }} volumeMounts: + - name: dshm + emptyDir: + medium: Memory + sizeLimit: 4Gi - name: model-volume mountPath: {{ .Values.modelDir }} name: vllm diff --git a/test.yaml b/test.yaml new file mode 100644 index 000000000..01a4c31c5 --- /dev/null +++ b/test.yaml @@ -0,0 +1,96 @@ +fromaws1-test: + autoscaling: + enabled: false + gpu: false + maxReplicas: 10 + minReplicas: 1 + targetCPUUtilizationPercentage: 50 + targetGPUMemoryUtilizationRatio: 1 + targetGPUUtilizationPercentage: 100 + targetMemoryUtilizationPercentage: 50 + cloudsql: + connectionName: "" + dbPort: 0 + enabled: false + serviceAccountJSONSecret: "" + configMapRefs: + - 6-134344.1 + container: + command: "" + env: + normal: null + port: 80 + deploymentStrategy: + blueGreen: {} + fileSecretMounts: + enabled: false + mounts: null + fullnameOverride: fromaws1-test + health: + livenessProbe: + command: "" + enabled: false + initialDelaySeconds: 15 + path: /healthz + timeoutSeconds: 1 + readinessProbe: + command: "" + enabled: false + initialDelaySeconds: 15 + path: /healthz + timeoutSeconds: 1 + ingress: + enabled: false + hosts: null + tls: true + wildcard: false + keda: + enabled: false + labels: + porter.run/absolute-name: fromaws1-test + porter.run/app-id: "6" + porter.run/app-instance-id: 2e3d0bd8-5f93-4472-8568-249d518de27b + porter.run/app-name: fromaws1 + porter.run/app-revision-id: 1e467633-7149-4345-b039-c0a64fccf2d0 + porter.run/deployment-target-id: 13434427-532a-400f-9a34-dcf133c47a8e + porter.run/image-repository-hash: 394d932930b463a4c36e253205279356f106d9dee7beb2b811f732088797721 + porter.run/image-tag: latest + porter.run/porter-application: "true" + porter.run/project-id: "10" + porter.run/service-name: test + porter.run/service-type: web + porter.run/tailscale-svc: "true" + nodeGroups: null + podAnnotations: {} + podDisruptionBudget: + enabled: false + podLabels: + porter.run/absolute-name: fromaws1-test + porter.run/app-id: "6" + porter.run/app-instance-id: 2e3d0bd8-5f93-4472-8568-249d518de27b + porter.run/app-name: fromaws1 + porter.run/app-revision-id: 1e467633-7149-4345-b039-c0a64fccf2d0 + porter.run/deployment-target-id: 13434427-532a-400f-9a34-dcf133c47a8e + porter.run/image-repository-hash: 394d932930b463a4c36e253205279356f106d9dee7beb2b811f732088797721 + porter.run/image-tag: latest + porter.run/porter-application: "true" + porter.run/project-id: "10" + porter.run/service-name: test + porter.run/service-type: web + porter.run/tailscale-svc: "true" + replicaCount: 1 + resources: + limits: {} + requests: + cpu: "0.2" + memory: 400M + secretRefs: + - 6-134344.1 + service: + port: 80 + terminationGracePeriodSeconds: 30 +global: + image: + imagePullSecret: porter-image-pull-secret + repository: 026281491146.dkr.ecr.us-east-2.amazonaws.com/stefan/test + tag: latest From a75650aff7573d542857c2e08659c938983e40e5 Mon Sep 17 00:00:00 2001 From: Stefan McShane Date: Thu, 30 Jan 2025 13:07:40 -0500 Subject: [PATCH 2/2] removing file --- .../hf-llm-models/templates/deployment.yaml | 8 +- test.yaml | 96 ------------------- 2 files changed, 5 insertions(+), 99 deletions(-) delete mode 100644 test.yaml diff --git a/addons/hf-llm-models/templates/deployment.yaml b/addons/hf-llm-models/templates/deployment.yaml index 6f12485e8..b6a4f0643 100644 --- a/addons/hf-llm-models/templates/deployment.yaml +++ b/addons/hf-llm-models/templates/deployment.yaml @@ -113,9 +113,7 @@ spec: {{- end }} volumeMounts: - name: dshm - emptyDir: - medium: Memory - sizeLimit: 4Gi + mountPath: /dev/shm - name: model-volume mountPath: {{ .Values.modelDir }} name: vllm @@ -123,6 +121,10 @@ spec: allowPrivilegeEscalation: false terminationGracePeriodSeconds: 10 volumes: + - name: dshm + emptyDir: + medium: Memory + sizeLimit: 4Gi - name: model-volume persistentVolumeClaim: claimName: "{{ .Release.Name }}-hf-llm" diff --git a/test.yaml b/test.yaml deleted file mode 100644 index 01a4c31c5..000000000 --- a/test.yaml +++ /dev/null @@ -1,96 +0,0 @@ -fromaws1-test: - autoscaling: - enabled: false - gpu: false - maxReplicas: 10 - minReplicas: 1 - targetCPUUtilizationPercentage: 50 - targetGPUMemoryUtilizationRatio: 1 - targetGPUUtilizationPercentage: 100 - targetMemoryUtilizationPercentage: 50 - cloudsql: - connectionName: "" - dbPort: 0 - enabled: false - serviceAccountJSONSecret: "" - configMapRefs: - - 6-134344.1 - container: - command: "" - env: - normal: null - port: 80 - deploymentStrategy: - blueGreen: {} - fileSecretMounts: - enabled: false - mounts: null - fullnameOverride: fromaws1-test - health: - livenessProbe: - command: "" - enabled: false - initialDelaySeconds: 15 - path: /healthz - timeoutSeconds: 1 - readinessProbe: - command: "" - enabled: false - initialDelaySeconds: 15 - path: /healthz - timeoutSeconds: 1 - ingress: - enabled: false - hosts: null - tls: true - wildcard: false - keda: - enabled: false - labels: - porter.run/absolute-name: fromaws1-test - porter.run/app-id: "6" - porter.run/app-instance-id: 2e3d0bd8-5f93-4472-8568-249d518de27b - porter.run/app-name: fromaws1 - porter.run/app-revision-id: 1e467633-7149-4345-b039-c0a64fccf2d0 - porter.run/deployment-target-id: 13434427-532a-400f-9a34-dcf133c47a8e - porter.run/image-repository-hash: 394d932930b463a4c36e253205279356f106d9dee7beb2b811f732088797721 - porter.run/image-tag: latest - porter.run/porter-application: "true" - porter.run/project-id: "10" - porter.run/service-name: test - porter.run/service-type: web - porter.run/tailscale-svc: "true" - nodeGroups: null - podAnnotations: {} - podDisruptionBudget: - enabled: false - podLabels: - porter.run/absolute-name: fromaws1-test - porter.run/app-id: "6" - porter.run/app-instance-id: 2e3d0bd8-5f93-4472-8568-249d518de27b - porter.run/app-name: fromaws1 - porter.run/app-revision-id: 1e467633-7149-4345-b039-c0a64fccf2d0 - porter.run/deployment-target-id: 13434427-532a-400f-9a34-dcf133c47a8e - porter.run/image-repository-hash: 394d932930b463a4c36e253205279356f106d9dee7beb2b811f732088797721 - porter.run/image-tag: latest - porter.run/porter-application: "true" - porter.run/project-id: "10" - porter.run/service-name: test - porter.run/service-type: web - porter.run/tailscale-svc: "true" - replicaCount: 1 - resources: - limits: {} - requests: - cpu: "0.2" - memory: 400M - secretRefs: - - 6-134344.1 - service: - port: 80 - terminationGracePeriodSeconds: 30 -global: - image: - imagePullSecret: porter-image-pull-secret - repository: 026281491146.dkr.ecr.us-east-2.amazonaws.com/stefan/test - tag: latest