Skip to content

Commit

Permalink
feat: Add Phi-3 Manifests and Custom E2E Run Flag (#491)
Browse files Browse the repository at this point in the history
**Reason for Change**:
Adds Phi-3 manifests and allows us to run e2e for just Phi-3 Models
  • Loading branch information
ishaansehgal99 authored Jul 2, 2024
1 parent 21d7768 commit 421bd5f
Show file tree
Hide file tree
Showing 10 changed files with 281 additions and 0 deletions.
6 changes: 6 additions & 0 deletions .github/workflows/e2e-preset-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,16 @@ on:
type: boolean
default: false
description: "Test all models for E2E"
force-run-all-phi-models:
type: boolean
default: false
description: "Test all Phi models for E2E"

env:
GO_VERSION: "1.22"
BRANCH_NAME: ${{ github.head_ref || github.ref_name}}
FORCE_RUN_ALL: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.force-run-all == 'true' }}
FORCE_RUN_ALL_PHI: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.force-run-all-phi-models== 'true' }}

permissions:
id-token: write
Expand Down Expand Up @@ -43,6 +48,7 @@ jobs:
run: |
PR_BRANCH=${{ env.BRANCH_NAME }} \
FORCE_RUN_ALL=${{ env.FORCE_RUN_ALL }} \
FORCE_RUN_ALL_PHI=${{ env.FORCE_RUN_ALL_PHI }} \
python3 .github/workflows/kind-cluster/determine_models.py
- name: Print Determined Models
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/kind-cluster/determine_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,10 +118,13 @@ def check_modified_models(pr_branch):
def main():
pr_branch = os.environ.get("PR_BRANCH", "main") # If not specified default to 'main'
force_run_all = os.environ.get("FORCE_RUN_ALL", "false") # If not specified default to False
force_run_all_phi = os.environ.get("FORCE_RUN_ALL_PHI", "false") # If not specified default to False

affected_models = []
if force_run_all != "false":
affected_models = [model['name'] for model in YAML_PR['models']]
elif force_run_all_phi != "false":
affected_models = [model['name'] for model in YAML_PR['models'] if 'phi-3' in model['name']]
else:
# Logic to determine affected models
# Example: affected_models = ['model1', 'model2', 'model3']
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
apiVersion: v1
kind: Service
metadata:
name: phi-3-medium-128k-instruct
spec:
selector:
app: phi-3-medium-128k-instruct
ports:
- protocol: TCP
port: 80
targetPort: 5000
type: LoadBalancer
publishNotReadyAddresses: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: phi-3-medium-128k-instruct
spec:
replicas: 1
selector:
matchLabels:
app: phi-3-medium-128k-instruct
template:
metadata:
labels:
app: phi-3-medium-128k-instruct
spec:
containers:
- name: phi-3-medium-128k-instruct-container
image: REPO_HERE.azurecr.io/phi-3-medium-128k-instruct:TAG_HERE
command:
- /bin/sh
- -c
- accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all inference_api.py --pipeline text-generation --torch_dtype auto --trust_remote_code
resources:
requests:
nvidia.com/gpu: 1
limits:
nvidia.com/gpu: 1 # Requesting 1 GPU
livenessProbe:
httpGet:
path: /healthz
port: 5000
initialDelaySeconds: 600 # 10 Min
periodSeconds: 10
readinessProbe:
httpGet:
path: /healthz
port: 5000
initialDelaySeconds: 30
periodSeconds: 10
volumeMounts:
- name: dshm
mountPath: /dev/shm
volumes:
- name: dshm
emptyDir:
medium: Memory
tolerations:
- effect: NoSchedule
key: sku
operator: Equal
value: gpu
- effect: NoSchedule
key: nvidia.com/gpu
operator: Exists
nodeSelector:
pool: phi3medium12
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
apiVersion: v1
kind: Service
metadata:
name: phi-3-medium-4k-instruct
spec:
selector:
app: phi-3-medium-4k-instruct
ports:
- protocol: TCP
port: 80
targetPort: 5000
type: LoadBalancer
publishNotReadyAddresses: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: phi-3-medium-4k-instruct
spec:
replicas: 1
selector:
matchLabels:
app: phi-3-medium-4k-instruct
template:
metadata:
labels:
app: phi-3-medium-4k-instruct
spec:
containers:
- name: phi-3-medium-4k-instruct-container
image: REPO_HERE.azurecr.io/phi-3-medium-4k-instruct:TAG_HERE
command:
- /bin/sh
- -c
- accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all inference_api.py --pipeline text-generation --torch_dtype auto --trust_remote_code
resources:
requests:
nvidia.com/gpu: 1
limits:
nvidia.com/gpu: 1 # Requesting 1 GPU
livenessProbe:
httpGet:
path: /healthz
port: 5000
initialDelaySeconds: 600 # 10 Min
periodSeconds: 10
readinessProbe:
httpGet:
path: /healthz
port: 5000
initialDelaySeconds: 30
periodSeconds: 10
volumeMounts:
- name: dshm
mountPath: /dev/shm
volumes:
- name: dshm
emptyDir:
medium: Memory
tolerations:
- effect: NoSchedule
key: sku
operator: Equal
value: gpu
- effect: NoSchedule
key: nvidia.com/gpu
operator: Exists
nodeSelector:
pool: phi3medium4k
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
apiVersion: v1
kind: Service
metadata:
name: phi-3-small-128k-instruct
spec:
selector:
app: phi-3-small-128k-instruct
ports:
- protocol: TCP
port: 80
targetPort: 5000
type: LoadBalancer
publishNotReadyAddresses: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: phi-3-small-128k-instruct
spec:
replicas: 1
selector:
matchLabels:
app: phi-3-small-128k-instruct
template:
metadata:
labels:
app: phi-3-small-128k-instruct
spec:
containers:
- name: phi-3-small-128k-instruct-container
image: REPO_HERE.azurecr.io/phi-3-small-128k-instruct:TAG_HERE
command:
- /bin/sh
- -c
- accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all inference_api.py --pipeline text-generation --torch_dtype auto --trust_remote_code
resources:
requests:
nvidia.com/gpu: 1
limits:
nvidia.com/gpu: 1 # Requesting 1 GPU
livenessProbe:
httpGet:
path: /healthz
port: 5000
initialDelaySeconds: 600 # 10 Min
periodSeconds: 10
readinessProbe:
httpGet:
path: /healthz
port: 5000
initialDelaySeconds: 30
periodSeconds: 10
volumeMounts:
- name: dshm
mountPath: /dev/shm
volumes:
- name: dshm
emptyDir:
medium: Memory
tolerations:
- effect: NoSchedule
key: sku
operator: Equal
value: gpu
- effect: NoSchedule
key: nvidia.com/gpu
operator: Exists
nodeSelector:
pool: phi3small128
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
apiVersion: v1
kind: Service
metadata:
name: phi-3-small-8k-instruct
spec:
selector:
app: phi-3-small-8k-instruct
ports:
- protocol: TCP
port: 80
targetPort: 5000
type: LoadBalancer
publishNotReadyAddresses: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: phi-3-small-8k-instruct
spec:
replicas: 1
selector:
matchLabels:
app: phi-3-small-8k-instruct
template:
metadata:
labels:
app: phi-3-small-8k-instruct
spec:
containers:
- name: phi-3-small-8k-instruct-container
image: REPO_HERE.azurecr.io/phi-3-small-8k-instruct:TAG_HERE
command:
- /bin/sh
- -c
- accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all inference_api.py --pipeline text-generation --torch_dtype auto --trust_remote_code
resources:
requests:
nvidia.com/gpu: 1
limits:
nvidia.com/gpu: 1 # Requesting 1 GPU
livenessProbe:
httpGet:
path: /healthz
port: 5000
initialDelaySeconds: 600 # 10 Min
periodSeconds: 10
readinessProbe:
httpGet:
path: /healthz
port: 5000
initialDelaySeconds: 30
periodSeconds: 10
volumeMounts:
- name: dshm
mountPath: /dev/shm
volumes:
- name: dshm
emptyDir:
medium: Memory
tolerations:
- effect: NoSchedule
key: sku
operator: Equal
value: gpu
- effect: NoSchedule
key: nvidia.com/gpu
operator: Exists
nodeSelector:
pool: phi3small8ki

0 comments on commit 421bd5f

Please sign in to comment.