feat: Add Adapter Loading Test for E2E Image Preset Workflow (#567)

**Reason for Change**: This PR adds a test for loading a mental health adapter onto the falcon-7b model. And testing to make sure the adapter was loaded onto the model successfully using `inference_api.py` logs.
kaito-project · Aug 19, 2024 · 1f06382 · 1f06382
1 parent 8b7aabf
commit 1f06382
Show file tree

Hide file tree

Showing 4 changed files with 130 additions and 16 deletions.
diff --git a/.github/e2e-preset-configs.json b/.github/e2e-preset-configs.json
@@ -6,105 +6,129 @@
         "node-count": 1,
         "node-vm-size": "Standard_NC12s_v3",
         "node-osdisk-size": 100,
-        "OSS": true
+        "OSS": true,
+        "loads_adapter": false
+      },
+      {
+        "name": "falcon-7b-adapter",
+        "node-count": 1,
+        "node-vm-size": "Standard_NC12s_v3",
+        "node-osdisk-size": 100,
+        "OSS": true,
+        "loads_adapter": true,
+        "expected_adapter": "amod-mental-health"
       },
       {
         "name": "falcon-7b-instruct",
         "node-count": 1,
         "node-vm-size": "Standard_NC12s_v3",
         "node-osdisk-size": 100,
-        "OSS": true
+        "OSS": true,
+        "loads_adapter": false
       },
       {
         "name": "falcon-40b",
         "node-count": 1,
         "node-vm-size": "Standard_NC24s_v3",
         "node-osdisk-size": 400,
-        "OSS": true
+        "OSS": true,
+        "loads_adapter": false
       },
       {
         "name": "falcon-40b-instruct",
         "node-count": 1,
         "node-vm-size": "Standard_NC24s_v3",
         "node-osdisk-size": 400,
-        "OSS": true
+        "OSS": true,
+        "loads_adapter": false
       },
       {
         "name": "mistral-7b",
         "node-count": 1,
         "node-vm-size": "Standard_NC12s_v3",
         "node-osdisk-size": 100,
-        "OSS": true
+        "OSS": true,
+        "loads_adapter": false
       },
       {
         "name": "mistral-7b-instruct",
         "node-count": 1,
         "node-vm-size": "Standard_NC12s_v3",
         "node-osdisk-size": 100,
-        "OSS": true
+        "OSS": true,
+        "loads_adapter": false
       },
       {
         "name": "phi-2",
         "node-count": 1,
         "node-vm-size": "Standard_NC6s_v3",
         "node-osdisk-size": 50,
-        "OSS": true
+        "OSS": true,
+        "loads_adapter": false
       },
       {
         "name": "phi-3-mini-4k-instruct",
         "node-count": 1,
         "node-vm-size": "Standard_NC6s_v3",
         "node-osdisk-size": 50,
-        "OSS": true
+        "OSS": true,
+        "loads_adapter": false
       },
       {
         "name": "phi-3-mini-128k-instruct",
         "node-count": 1,
         "node-vm-size": "Standard_NC6s_v3",
         "node-osdisk-size": 50,
-        "OSS": true
+        "OSS": true,
+        "loads_adapter": false
       },
       {
         "name": "phi-3-medium-4k-instruct",
         "node-count": 1,
         "node-vm-size": "Standard_NC12s_v3",
         "node-osdisk-size": 100,
-        "OSS": true
+        "OSS": true,
+        "loads_adapter": false
       },
       {
         "name": "phi-3-medium-128k-instruct",
         "node-count": 1,
         "node-vm-size": "Standard_NC12s_v3",
         "node-osdisk-size": 100,
-        "OSS": true
+        "OSS": true,
+        "loads_adapter": false
       },
       {
         "name": "llama-2-7b",
         "node-count": 1,
         "node-vm-size": "Standard_NC12s_v3",
         "node-osdisk-size": 100,
-        "OSS": false
+        "OSS": false,
+        "loads_adapter": false
       },
       {
         "name": "llama-2-7b-chat",
         "node-count": 1,
         "node-vm-size": "Standard_NC12s_v3",
         "node-osdisk-size": 100,
-        "OSS": false
+        "OSS": false,
+        "loads_adapter": false
       },
       {
         "name": "llama-2-13b",
         "node-count": 2,
         "node-vm-size": "Standard_NC12s_v3",
         "node-osdisk-size": 150,
-        "OSS": false
+        "OSS": false,
+        "loads_adapter": false
       },
       {
         "name": "llama-2-13b-chat",
         "node-count": 2,
         "node-vm-size": "Standard_NC12s_v3",
         "node-osdisk-size": 150,
-        "OSS": false
+        "OSS": false,
+        "loads_adapter": false
       }
     ]
   }

diff --git a/.github/workflows/e2e-preset-test.yml b/.github/workflows/e2e-preset-test.yml
@@ -237,7 +237,13 @@ jobs:
       - name: Wait for Resource to be ready
         run: |
             kubectl rollout status ${{steps.resource.outputs.RESOURCE_TYPE}}/${{ matrix.model.name }} --timeout=1800s
-        
+    
+      - name: Check Adapter Loading from Logs
+        if: matrix.model.loads_adapter == true
+        run: |
+            POD_NAME=$(kubectl get pods -l app=${{ matrix.model.name }} -o jsonpath="{.items[0].metadata.name}")
+            kubectl logs $POD_NAME | grep "Adapter added:" | grep "${{ matrix.model.expected_adapter }}" || (echo "Adapter not loaded or incorrect adapter loaded" && exit 1)
+          
       - name: Test home endpoint
         run: |
             curl http://${{ steps.get_ip.outputs.SERVICE_IP }}:80/

diff --git a/presets/test/manifests/falcon-7b-adapter/falcon-7b-adapter-service.yaml b/presets/test/manifests/falcon-7b-adapter/falcon-7b-adapter-service.yaml
@@ -0,0 +1,13 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: falcon-7b-adapter
+spec:
+  selector:
+    app: falcon
+  ports:
+  - protocol: TCP
+    port: 80
+    targetPort: 5000
+  type: LoadBalancer
+  publishNotReadyAddresses: true
diff --git a/presets/test/manifests/falcon-7b-adapter/falcon-7b-adapter.yaml b/presets/test/manifests/falcon-7b-adapter/falcon-7b-adapter.yaml
@@ -0,0 +1,71 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: falcon-7b-adapter
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: falcon
+  template:
+    metadata:
+      labels:
+        app: falcon
+    spec:
+      initContainers:
+      - name: adapter-init-container
+        image: REPO_HERE.azurecr.io/adapter-falcon-7b-amod-mental-health-busybox:0.0.2
+        command:
+          - /bin/sh
+          - -c
+          - |
+            mkdir -p /mnt/adapter && cp -r /data/* /mnt/adapter/
+        volumeMounts:
+        - name: adapter-volume
+          mountPath: /mnt/adapter
+        imagePullPolicy: Always
+      containers:
+      - name: falcon-container
+        image: REPO_HERE.azurecr.io/falcon-7b:TAG_HERE
+        command:
+          - /bin/sh
+          - -c
+          - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all inference_api.py --pipeline text-generation --torch_dtype bfloat16
+        resources:
+          requests:
+            nvidia.com/gpu: 2
+          limits:
+            nvidia.com/gpu: 2  # Requesting 2 GPUs
+        livenessProbe:
+          httpGet:
+            path: /healthz
+            port: 5000
+          initialDelaySeconds: 600 # 10 Min
+          periodSeconds: 10
+        readinessProbe:
+          httpGet:
+            path: /healthz
+            port: 5000
+          initialDelaySeconds: 30
+          periodSeconds: 10
+        volumeMounts:
+        - name: dshm
+          mountPath: /dev/shm
+        - name: adapter-volume
+          mountPath: /mnt/adapter
+      volumes:
+      - name: dshm
+        emptyDir:
+          medium: Memory
+      - name: adapter-volume
+        emptyDir: {} # emptyDir volume shared between adapter-init container and main container
+      tolerations:
+      - effect: NoSchedule
+        key: sku
+        operator: Equal
+        value: gpu
+      - effect: NoSchedule
+        key: nvidia.com/gpu
+        operator: Exists
+      nodeSelector:
+        pool: falcon7b