adding chart text-generation-inference-0.2.1

intel · Oct 18, 2024 · c3d19f8 · c3d19f8
1 parent 9d7a13f
commit c3d19f8
Show file tree

Hide file tree

Showing 13 changed files with 529 additions and 0 deletions.
diff --git a/charts/text-generation-inference-0.2.1/.helmignore b/charts/text-generation-inference-0.2.1/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/charts/text-generation-inference-0.2.1/Chart.yaml b/charts/text-generation-inference-0.2.1/Chart.yaml
@@ -0,0 +1,11 @@
+apiVersion: v2
+appVersion: 1.16.0
+description: A Rust, Python and gRPC server for text generation inference by huggingface
+  on Intel GPUs.
+maintainers:
+- email: tyler.titsworth@intel.com
+  name: tylertitsworth
+  url: https://github.com/tylertitsworth
+name: text-generation-inference
+type: application
+version: 0.2.1
diff --git a/charts/text-generation-inference-0.2.1/README.md b/charts/text-generation-inference-0.2.1/README.md
@@ -0,0 +1,32 @@
+# Text Generation Inference on Intel GPU
+
+A Rust, Python and gRPC server for text generation inference by huggingface on Intel GPUs.
+
+For more information about how to use Huggingface text-generation-inference with Intel optimizations, check out [huggingface's documentation](https://huggingface.co/docs/text-generation-inference/installation_intel).
+
+> [!TIP]
+> For Gaudi-related documentation, check out [tgi-gaudi](https://github.com/huggingface/tgi-gaudi).
+
+![Version: 0.2.1](https://img.shields.io/badge/Version-0.2.1-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.16.0](https://img.shields.io/badge/AppVersion-1.16.0-informational?style=flat-square)
+
+## Values
+
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| deploy.configMap | object | `{"enabled":true,"name":"tgi-config"}` | ConfigMap of Environment Variables |
+| deploy.image | string | `"ghcr.io/huggingface/text-generation-inference:latest-intel"` | Intel TGI Image |
+| deploy.replicaCount | int | `1` | Number of pods |
+| deploy.resources | object | `{"limits":{"cpu":"4000m","gpu.intel.com/i915":1},"requests":{"cpu":"1000m","memory":"1Gi"}}` | Resource configuration |
+| deploy.resources.limits."gpu.intel.com/i915" | int | `1` | Intel GPU Device Configuration |
+| fullnameOverride | string | `""` | Full qualified Domain Name |
+| ingress | object | `{"annotations":{},"className":"","enabled":false,"hosts":[{"host":"chart-example.local","paths":[{"path":"/","pathType":"ImplementationSpecific"}]}],"tls":[]}` | Ingress configuration |
+| nameOverride | string | `""` | Name of the serving service |
+| pvc | object | `{"create":true,"name":"model-server-cache","size":"15Gi","storageClassName":"nil"}` | Configure Storage Volume |
+| pvc.create | bool | `true` | Create a new PVC |
+| pvc.name | string | `"model-server-cache"` | Specify the name of either an existing or new PVC |
+| secret.encodedToken | string | `""` | Base64 Encoded Huggingface Hub API Token |
+| securityContext | object | `{}` | Security Context Configuration |
+| service | object | `{"port":80,"type":"NodePort"}` | Service configuration |
+
+----------------------------------------------
+Autogenerated from chart metadata using [helm-docs v1.14.2](https://github.com/norwoodj/helm-docs/releases/v1.14.2)
diff --git a/charts/text-generation-inference-0.2.1/README.md.gotmpl b/charts/text-generation-inference-0.2.1/README.md.gotmpl
@@ -0,0 +1,16 @@
+# Text Generation Inference on Intel GPU
+
+{{ template "chart.description" . }}
+
+For more information about how to use Huggingface text-generation-inference with Intel optimizations, check out [huggingface's documentation](https://huggingface.co/docs/text-generation-inference/installation_intel).
+
+> [!TIP]
+> For Gaudi-related documentation, check out [tgi-gaudi](https://github.com/huggingface/tgi-gaudi).
+
+{{ template "chart.versionBadge" . }}{{ template "chart.typeBadge" . }}{{ template "chart.appVersionBadge" . }}
+
+{{ template "chart.requirementsSection" . }}
+
+{{ template "chart.valuesSection" . }}
+
+{{ template "helm-docs.versionFooter" . }}
diff --git a/charts/text-generation-inference-0.2.1/templates/NOTES.txt b/charts/text-generation-inference-0.2.1/templates/NOTES.txt
@@ -0,0 +1,21 @@
+1. Get the application URL by running these commands:
+{{- if .Values.ingress.enabled }}
+{{- range $host := .Values.ingress.hosts }}
+  {{- range .paths }}
+  http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
+  {{- end }}
+{{- end }}
+{{- else if contains "NodePort" .Values.service.type }}
+  export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "tgi.fullname" . }})
+  export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
+  echo http://$NODE_IP:$NODE_PORT
+{{- else if contains "LoadBalancer" .Values.service.type }}
+     NOTE: It may take a few minutes for the LoadBalancer IP to be available.
+           You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "tgi.fullname" . }}'
+  export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "tgi.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
+  echo http://$SERVICE_IP:{{ .Values.service.port }}
+{{- else if contains "ClusterIP" .Values.service.type }}
+  export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "tgi.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
+  export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
+  kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
+{{- end }}
diff --git a/charts/text-generation-inference-0.2.1/templates/_helpers.tpl b/charts/text-generation-inference-0.2.1/templates/_helpers.tpl
@@ -0,0 +1,76 @@
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "tgi.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "tgi.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "tgi.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "tgi.labels" -}}
+helm.sh/chart: {{ include "tgi.chart" . }}
+{{ include "tgi.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "tgi.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "tgi.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "tgi.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "tgi.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
diff --git a/charts/text-generation-inference-0.2.1/templates/deploy.yaml b/charts/text-generation-inference-0.2.1/templates/deploy.yaml
@@ -0,0 +1,89 @@
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "tgi.fullname" . }}
+  labels:
+    {{- include "tgi.labels" . | nindent 4 }}
+spec:
+  replicas: {{ .Values.deploy.replicaCount }}
+  selector:
+    matchLabels:
+      {{- include "tgi.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      labels:
+        {{- include "tgi.selectorLabels" . | nindent 8 }}
+    spec:
+      hostIPC: true
+      containers:
+        - name: {{ .Chart.Name }}
+          args:
+            - '-p'
+            - {{ .Values.service.port | quote }}
+            - '--cuda-graphs=0'
+          envFrom:
+            {{- if eq .Values.deploy.configMap.enabled true }}
+            - configMapRef:
+                name: {{ .Values.deploy.configMap.name }}
+            {{- end }}
+            - secretRef:
+                name: {{ .Release.Name }}-hf-token
+          # env:
+          #   - name: NUMBA_CACHE_DIR # https://github.com/huggingface/text-generation-inference/pull/2443
+          #     value: /data/numba_cache
+          image: {{ .Values.deploy.image }}
+          livenessProbe:
+            failureThreshold: 10
+            initialDelaySeconds: 5
+            periodSeconds: 5
+            tcpSocket:
+              port: http
+          readinessProbe:
+            initialDelaySeconds: 5
+            periodSeconds: 5
+            tcpSocket:
+              port: http
+          startupProbe:
+            failureThreshold: 120
+            initialDelaySeconds: 20
+            periodSeconds: 5
+            tcpSocket:
+              port: http
+          ports:
+            - name: http
+              containerPort: {{ .Values.service.port }}
+              protocol: TCP
+          resources:
+            {{- toYaml .Values.deploy.resources | nindent 12 }}
+          securityContext:
+            {{ toYaml .Values.securityContext | nindent 12 }}
+          volumeMounts:
+            - mountPath: /dev/shm
+              name: dshm
+            - mountPath: /data
+              name: hf-data
+            - mountPath: /tmp
+              name: tmp
+      volumes:
+        - name: dshm
+          emptyDir:
+            medium: Memory
+        - name: hf-data
+          persistentVolumeClaim:
+            claimName: {{ .Values.pvc.name }}
+        - name: tmp
+          emptyDir: {}
diff --git a/charts/text-generation-inference-0.2.1/templates/ingress.yaml b/charts/text-generation-inference-0.2.1/templates/ingress.yaml
@@ -0,0 +1,76 @@
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+{{- if .Values.ingress.enabled -}}
+{{- $fullName := include "tgi.fullname" . -}}
+{{- $svcPort := .Values.service.port -}}
+{{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }}
+  {{- if not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class") }}
+  {{- $_ := set .Values.ingress.annotations "kubernetes.io/ingress.class" .Values.ingress.className}}
+  {{- end }}
+{{- end }}
+{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
+apiVersion: networking.k8s.io/v1
+{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
+apiVersion: networking.k8s.io/v1beta1
+{{- else -}}
+apiVersion: extensions/v1beta1
+{{- end }}
+kind: Ingress
+metadata:
+  name: {{ $fullName }}
+  labels:
+    {{- include "tgi.labels" . | nindent 4 }}
+  {{- with .Values.ingress.annotations }}
+  annotations:
+    kubernetes.io/ingress.allow-http: "false"
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+spec:
+  {{- if and .Values.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }}
+  ingressClassName: {{ .Values.ingress.className }}
+  {{- end }}
+  {{- if .Values.ingress.tls }}
+  tls:
+    {{- range .Values.ingress.tls }}
+    - hosts:
+        {{- range .hosts }}
+        - {{ . | quote }}
+        {{- end }}
+      secretName: {{ .secretName }}
+    {{- end }}
+  {{- end }}
+  rules:
+    {{- range .Values.ingress.hosts }}
+    - host: {{ .host | quote }}
+      http:
+        paths:
+          {{- range .paths }}
+          - path: {{ .path }}
+            {{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }}
+            pathType: {{ .pathType }}
+            {{- end }}
+            backend:
+              {{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }}
+              service:
+                name: {{ $fullName }}
+                port:
+                  number: {{ $svcPort }}
+              {{- else }}
+              serviceName: {{ $fullName }}
+              servicePort: {{ $svcPort }}
+              {{- end }}
+          {{- end }}
+    {{- end }}
+{{- end }}
diff --git a/charts/text-generation-inference-0.2.1/templates/pvc.yaml b/charts/text-generation-inference-0.2.1/templates/pvc.yaml
@@ -0,0 +1,32 @@
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+{{- if .Values.pvc.create -}}
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ .Values.pvc.name }}
+  labels:
+    {{- include "tgi.labels" . | nindent 4 }}
+spec:
+  {{- if .Values.pvc.storageClassName }}
+  storageClassName: {{ .Values.pvc.storageClassName }}
+  {{- end }}
+  accessModes:
+    - ReadWriteMany
+  resources:
+    requests:
+      storage: {{ .Values.pvc.size }}
+{{- end }}