Skip to content

Commit

Permalink
feat: Add Adapter Loading Test for E2E Image Preset Workflow (#567)
Browse files Browse the repository at this point in the history
**Reason for Change**:
This PR adds a test for loading a mental health adapter onto the
falcon-7b model. And testing to make sure the adapter was loaded onto
the model successfully using `inference_api.py` logs.
  • Loading branch information
ishaansehgal99 authored Aug 19, 2024
1 parent 8b7aabf commit 1f06382
Show file tree
Hide file tree
Showing 4 changed files with 130 additions and 16 deletions.
54 changes: 39 additions & 15 deletions .github/e2e-preset-configs.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,105 +6,129 @@
"node-count": 1,
"node-vm-size": "Standard_NC12s_v3",
"node-osdisk-size": 100,
"OSS": true
"OSS": true,
"loads_adapter": false
},
{
"name": "falcon-7b-adapter",
"node-count": 1,
"node-vm-size": "Standard_NC12s_v3",
"node-osdisk-size": 100,
"OSS": true,
"loads_adapter": true,
"expected_adapter": "amod-mental-health"
},
{
"name": "falcon-7b-instruct",
"node-count": 1,
"node-vm-size": "Standard_NC12s_v3",
"node-osdisk-size": 100,
"OSS": true
"OSS": true,
"loads_adapter": false
},
{
"name": "falcon-40b",
"node-count": 1,
"node-vm-size": "Standard_NC24s_v3",
"node-osdisk-size": 400,
"OSS": true
"OSS": true,
"loads_adapter": false
},
{
"name": "falcon-40b-instruct",
"node-count": 1,
"node-vm-size": "Standard_NC24s_v3",
"node-osdisk-size": 400,
"OSS": true
"OSS": true,
"loads_adapter": false
},
{
"name": "mistral-7b",
"node-count": 1,
"node-vm-size": "Standard_NC12s_v3",
"node-osdisk-size": 100,
"OSS": true
"OSS": true,
"loads_adapter": false
},
{
"name": "mistral-7b-instruct",
"node-count": 1,
"node-vm-size": "Standard_NC12s_v3",
"node-osdisk-size": 100,
"OSS": true
"OSS": true,
"loads_adapter": false
},
{
"name": "phi-2",
"node-count": 1,
"node-vm-size": "Standard_NC6s_v3",
"node-osdisk-size": 50,
"OSS": true
"OSS": true,
"loads_adapter": false
},
{
"name": "phi-3-mini-4k-instruct",
"node-count": 1,
"node-vm-size": "Standard_NC6s_v3",
"node-osdisk-size": 50,
"OSS": true
"OSS": true,
"loads_adapter": false
},
{
"name": "phi-3-mini-128k-instruct",
"node-count": 1,
"node-vm-size": "Standard_NC6s_v3",
"node-osdisk-size": 50,
"OSS": true
"OSS": true,
"loads_adapter": false
},
{
"name": "phi-3-medium-4k-instruct",
"node-count": 1,
"node-vm-size": "Standard_NC12s_v3",
"node-osdisk-size": 100,
"OSS": true
"OSS": true,
"loads_adapter": false
},
{
"name": "phi-3-medium-128k-instruct",
"node-count": 1,
"node-vm-size": "Standard_NC12s_v3",
"node-osdisk-size": 100,
"OSS": true
"OSS": true,
"loads_adapter": false
},
{
"name": "llama-2-7b",
"node-count": 1,
"node-vm-size": "Standard_NC12s_v3",
"node-osdisk-size": 100,
"OSS": false
"OSS": false,
"loads_adapter": false
},
{
"name": "llama-2-7b-chat",
"node-count": 1,
"node-vm-size": "Standard_NC12s_v3",
"node-osdisk-size": 100,
"OSS": false
"OSS": false,
"loads_adapter": false
},
{
"name": "llama-2-13b",
"node-count": 2,
"node-vm-size": "Standard_NC12s_v3",
"node-osdisk-size": 150,
"OSS": false
"OSS": false,
"loads_adapter": false
},
{
"name": "llama-2-13b-chat",
"node-count": 2,
"node-vm-size": "Standard_NC12s_v3",
"node-osdisk-size": 150,
"OSS": false
"OSS": false,
"loads_adapter": false
}
]
}
Expand Down
8 changes: 7 additions & 1 deletion .github/workflows/e2e-preset-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,13 @@ jobs:
- name: Wait for Resource to be ready
run: |
kubectl rollout status ${{steps.resource.outputs.RESOURCE_TYPE}}/${{ matrix.model.name }} --timeout=1800s
- name: Check Adapter Loading from Logs
if: matrix.model.loads_adapter == true
run: |
POD_NAME=$(kubectl get pods -l app=${{ matrix.model.name }} -o jsonpath="{.items[0].metadata.name}")
kubectl logs $POD_NAME | grep "Adapter added:" | grep "${{ matrix.model.expected_adapter }}" || (echo "Adapter not loaded or incorrect adapter loaded" && exit 1)
- name: Test home endpoint
run: |
curl http://${{ steps.get_ip.outputs.SERVICE_IP }}:80/
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
apiVersion: v1
kind: Service
metadata:
name: falcon-7b-adapter
spec:
selector:
app: falcon
ports:
- protocol: TCP
port: 80
targetPort: 5000
type: LoadBalancer
publishNotReadyAddresses: true
71 changes: 71 additions & 0 deletions presets/test/manifests/falcon-7b-adapter/falcon-7b-adapter.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: falcon-7b-adapter
spec:
replicas: 1
selector:
matchLabels:
app: falcon
template:
metadata:
labels:
app: falcon
spec:
initContainers:
- name: adapter-init-container
image: REPO_HERE.azurecr.io/adapter-falcon-7b-amod-mental-health-busybox:0.0.2
command:
- /bin/sh
- -c
- |
mkdir -p /mnt/adapter && cp -r /data/* /mnt/adapter/
volumeMounts:
- name: adapter-volume
mountPath: /mnt/adapter
imagePullPolicy: Always
containers:
- name: falcon-container
image: REPO_HERE.azurecr.io/falcon-7b:TAG_HERE
command:
- /bin/sh
- -c
- accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all inference_api.py --pipeline text-generation --torch_dtype bfloat16
resources:
requests:
nvidia.com/gpu: 2
limits:
nvidia.com/gpu: 2 # Requesting 2 GPUs
livenessProbe:
httpGet:
path: /healthz
port: 5000
initialDelaySeconds: 600 # 10 Min
periodSeconds: 10
readinessProbe:
httpGet:
path: /healthz
port: 5000
initialDelaySeconds: 30
periodSeconds: 10
volumeMounts:
- name: dshm
mountPath: /dev/shm
- name: adapter-volume
mountPath: /mnt/adapter
volumes:
- name: dshm
emptyDir:
medium: Memory
- name: adapter-volume
emptyDir: {} # emptyDir volume shared between adapter-init container and main container
tolerations:
- effect: NoSchedule
key: sku
operator: Equal
value: gpu
- effect: NoSchedule
key: nvidia.com/gpu
operator: Exists
nodeSelector:
pool: falcon7b

0 comments on commit 1f06382

Please sign in to comment.