intel · shane-huang · Mar 7, 2024 · Mar 6, 2024 · Mar 6, 2024 · Mar 6, 2024
@@ -102,6 +102,7 @@ jobs:
           echo "ORIGINAL_REPLIT_CODE_PATH=${ORIGIN_DIR}/replit-code-v1-3b" >> "$GITHUB_ENV"
           echo "ORIGINAL_WHISPER_TINY_PATH=${ORIGIN_DIR}/whisper-tiny" >> "$GITHUB_ENV"
           echo "MISTRAL_ORIGIN_PATH=${ORIGIN_DIR}/Mistral-7B-v0.1" >> "$GITHUB_ENV"
+          echo "LLAMA2_7B_ORIGIN_PATH=${ORIGIN_DIR}/Llama-2-7b-chat-hf" >> "$GITHUB_ENV"
 
           echo "LLAMA_INT4_CKPT_PATH=${INT4_CKPT_DIR}/bigdl_llm_llama_7b_q4_0.bin" >> "$GITHUB_ENV"
           echo "GPTNEOX_INT4_CKPT_PATH=${INT4_CKPT_DIR}/bigdl_llm_redpajama_7b_q4_0.bin" >> "$GITHUB_ENV"
@@ -190,6 +191,10 @@ jobs:
             echo "wget -r -nH --no-verbose --cut-dirs=2 $LLM_FTP_URL/llm/datasets/common_voice -P $DATASET_DIR"
             wget -r -nH --no-verbose --cut-dirs=2 $LLM_FTP_URL/llm/datasets/common_voice -P $DATASET_DIR
           fi
+          if [ ! -d $LLAMA2_7B_ORIGIN_PATH ]; then
+            echo "Directory $LLAMA2_7B_ORIGIN_PATH not found. Downloading from FTP server..."
+            wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/Llama-2-7b-chat-hf -P $ORIGIN_DIR
+          fi
 
       - name: Run LLM cli test (Linux)
         if: runner.os == 'Linux' 
@@ -209,6 +214,12 @@ jobs:
           pip install -U chromadb==0.3.25
           pip install -U pandas==2.0.3
           bash python/llm/test/run-llm-langchain-tests.sh
+      - name: Run LLM llamaindex test
+        shell: bash
+        run: |
+          pip install llama-index-readers-file llama-index-vector-stores-postgres llama-index-embeddings-huggingface
+          pip install transformers==4.31.0
+          bash python/llm/test/run-llm-llamaindex-tests.sh
   llm-unit-test-on-arc:
     needs: [setup-python-version, llm-cpp-build]
     strategy:
@@ -347,7 +358,7 @@ jobs:
           fi
           bash python/llm/test/run-llm-example-tests-gpu.sh
 
-      - name: Run LLM langchain test
+      - name: Run LLM langchain GPU test
         shell: bash
         run: |
           pip install -U langchain==0.0.184
@@ -360,3 +371,17 @@ jobs:
             source /home/arda/intel/oneapi/setvars.sh
           fi
           bash python/llm/test/run-llm-langchain-tests-gpu.sh
+
+      - name: Run LLM llamaindex GPU test
+        shell: bash
+        run: |
+          pip install llama-index-readers-file llama-index-vector-stores-postgres llama-index-embeddings-huggingface
+          # Specific oneapi position on arc ut test machines
+          if [[ '${{ matrix.pytorch-version }}' == '2.1' ]]; then
+            pip install --pre --upgrade bigdl-llm[xpu] -f https://developer.intel.com/ipex-whl-stable-xpu
+            source /opt/intel/oneapi/setvars.sh
+          elif [[ '${{ matrix.pytorch-version }}' == '2.0' ]]; then
+            pip install --pre --upgrade bigdl-llm[xpu_2.0] -f https://developer.intel.com/ipex-whl-stable-xpu
+            source /home/arda/intel/oneapi/setvars.sh
+          fi
+          bash python/llm/test/run-llm-llamaindex-tests-gpu.sh
diff --git a/python/llm/test/llamaindex/test_llamaindex.py b/python/llm/test/llamaindex/test_llamaindex.py
@@ -14,33 +14,14 @@
 # limitations under the License.
 #
 
-from bigdl.llm.langchain.llms import TransformersLLM, TransformersPipelineLLM, \
-    LlamaLLM, BloomLLM
-from bigdl.llm.langchain.embeddings import TransformersEmbeddings, LlamaEmbeddings, \
-    BloomEmbeddings
-
-
-from langchain.document_loaders import WebBaseLoader
-from langchain.indexes import VectorstoreIndexCreator
-
-
-from langchain.chains.question_answering import load_qa_chain
-from langchain.chains.chat_vector_db.prompts import (CONDENSE_QUESTION_PROMPT,
-                                                     QA_PROMPT)
-from langchain.text_splitter import CharacterTextSplitter
-from langchain.vectorstores import Chroma
-
 import pytest
 from unittest import TestCase
 import os
 from bigdl.llm.llamaindex.llms import BigdlLLM
 
 class Test_LlamaIndex_Transformers_API(TestCase):
     def setUp(self):
-        self.auto_model_path = os.environ.get('ORIGINAL_CHATGLM2_6B_PATH')
-        self.auto_causal_model_path = os.environ.get('ORIGINAL_REPLIT_CODE_PATH')
-        self.llama_model_path = os.environ.get('LLAMA_ORIGIN_PATH')
-        self.bloom_model_path = os.environ.get('BLOOM_ORIGIN_PATH')
+        self.llama_model_path = os.environ.get('LLAMA2_7B_ORIGIN_PATH')
         thread_num = os.environ.get('THREAD_NUM')
         if thread_num is not None:
             self.n_threads = int(thread_num)

diff --git a/python/llm/test/llamaindex_gpu/test_llamaindex.py b/python/llm/test/llamaindex_gpu/test_llamaindex.py
@@ -0,0 +1,70 @@
+#
+# Copyright 2016 The BigDL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import torch
+import pytest
+from unittest import TestCase
+import os
+from bigdl.llm.llamaindex.llms import BigdlLLM
+
+class Test_LlamaIndex_Transformers_API(TestCase):
+    def setUp(self):
+        self.llama_model_path = os.environ.get('LLAMA2_7B_ORIGIN_PATH')
+        thread_num = os.environ.get('THREAD_NUM')
+        if thread_num is not None:
+            self.n_threads = int(thread_num)
+        else:
+            self.n_threads = 2   
+
+    def completion_to_prompt(completion):
+        return f"<|system|>\n</s>\n<|user|>\n{completion}</s>\n<|assistant|>\n" 
+
+    def messages_to_prompt(messages):
+        prompt = ""
+        for message in messages:
+            if message.role == "system":
+                prompt += f"<|system|>\n{message.content}</s>\n"
+            elif message.role == "user":
+                prompt += f"<|user|>\n{message.content}</s>\n"
+            elif message.role == "assistant":
+                prompt += f"<|assistant|>\n{message.content}</s>\n"
+
+        # ensure we start with a system prompt, insert blank if needed
+        if not prompt.startswith("<|system|>\n"):
+            prompt = "<|system|>\n</s>\n" + prompt
+
+        # add final assistant prompt
+        prompt = prompt + "<|assistant|>\n"
+        return prompt
+
+    def test_bigdl_llm(self):    
+        llm = BigdlLLM(
+            model_name=self.llama_model_path,
+            tokenizer_name=self.llama_model_path,
+            context_window=512,
+            max_new_tokens=32,
+            model_kwargs={},
+            generate_kwargs={"temperature": 0.7, "do_sample": False},
+            messages_to_prompt=self.messages_to_prompt,
+            completion_to_prompt=self.completion_to_prompt,
+            device_map="xpu",
+        )
+        res = llm.complete("What is AI?")
+        assert res!=None
+
+
+if __name__ == '__main__':
+    pytest.main([__file__])
diff --git a/python/llm/test/run-llm-llamaindex-tests-gpu.sh b/python/llm/test/run-llm-llamaindex-tests-gpu.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+export ANALYTICS_ZOO_ROOT=${ANALYTICS_ZOO_ROOT}
+export LLM_HOME=${ANALYTICS_ZOO_ROOT}/python/llm/src
+export LLM_INFERENCE_TEST_DIR=${ANALYTICS_ZOO_ROOT}/python/llm/test/llamaindex_gpu
+
+export USE_XETLA=OFF
+export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+
+set -e
+
+echo "# Start testing inference"
+start=$(date "+%s")
+
+python -m pytest -s ${LLM_INFERENCE_TEST_DIR}
+
+now=$(date "+%s")
+time=$((now-start))
+
+echo "Bigdl-llm llamaindex gpu tests finished"
+echo "Time used:$time seconds"
diff --git a/python/llm/test/run-llm-llamaindex-tests.sh b/python/llm/test/run-llm-llamaindex-tests.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+export ANALYTICS_ZOO_ROOT=${ANALYTICS_ZOO_ROOT}
+export LLM_HOME=${ANALYTICS_ZOO_ROOT}/python/llm/src
+export LLM_INFERENCE_TEST_DIR=${ANALYTICS_ZOO_ROOT}/python/llm/test/llamaindex
+
+set -e
+
+echo "# Start testing inference"
+start=$(date "+%s")
+
+python -m pytest -s ${LLM_INFERENCE_TEST_DIR}
+
+now=$(date "+%s")
+time=$((now-start))
+
+echo "Bigdl-llm llamaindex tests finished"
+echo "Time used:$time seconds"