Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update llamaindex ut #10338

Merged
merged 12 commits into from
Mar 7, 2024
27 changes: 26 additions & 1 deletion .github/workflows/llm_unit_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ jobs:
echo "ORIGINAL_REPLIT_CODE_PATH=${ORIGIN_DIR}/replit-code-v1-3b" >> "$GITHUB_ENV"
echo "ORIGINAL_WHISPER_TINY_PATH=${ORIGIN_DIR}/whisper-tiny" >> "$GITHUB_ENV"
echo "MISTRAL_ORIGIN_PATH=${ORIGIN_DIR}/Mistral-7B-v0.1" >> "$GITHUB_ENV"
echo "LLAMA2_7B_ORIGIN_PATH=${ORIGIN_DIR}/Llama-2-7b-chat-hf" >> "$GITHUB_ENV"

echo "LLAMA_INT4_CKPT_PATH=${INT4_CKPT_DIR}/bigdl_llm_llama_7b_q4_0.bin" >> "$GITHUB_ENV"
echo "GPTNEOX_INT4_CKPT_PATH=${INT4_CKPT_DIR}/bigdl_llm_redpajama_7b_q4_0.bin" >> "$GITHUB_ENV"
Expand Down Expand Up @@ -190,6 +191,10 @@ jobs:
echo "wget -r -nH --no-verbose --cut-dirs=2 $LLM_FTP_URL/llm/datasets/common_voice -P $DATASET_DIR"
wget -r -nH --no-verbose --cut-dirs=2 $LLM_FTP_URL/llm/datasets/common_voice -P $DATASET_DIR
fi
if [ ! -d $LLAMA2_7B_ORIGIN_PATH ]; then
echo "Directory $LLAMA2_7B_ORIGIN_PATH not found. Downloading from FTP server..."
wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/Llama-2-7b-chat-hf -P $ORIGIN_DIR
fi

- name: Run LLM cli test (Linux)
if: runner.os == 'Linux'
Expand All @@ -209,6 +214,12 @@ jobs:
pip install -U chromadb==0.3.25
pip install -U pandas==2.0.3
bash python/llm/test/run-llm-langchain-tests.sh
- name: Run LLM llamaindex test
shell: bash
run: |
pip install llama-index-readers-file llama-index-vector-stores-postgres llama-index-embeddings-huggingface
pip install transformers==4.31.0
bash python/llm/test/run-llm-llamaindex-tests.sh
llm-unit-test-on-arc:
needs: [setup-python-version, llm-cpp-build]
strategy:
Expand Down Expand Up @@ -347,7 +358,7 @@ jobs:
fi
bash python/llm/test/run-llm-example-tests-gpu.sh

- name: Run LLM langchain test
- name: Run LLM langchain GPU test
shell: bash
run: |
pip install -U langchain==0.0.184
Expand All @@ -360,3 +371,17 @@ jobs:
source /home/arda/intel/oneapi/setvars.sh
fi
bash python/llm/test/run-llm-langchain-tests-gpu.sh

- name: Run LLM llamaindex GPU test
shell: bash
run: |
pip install llama-index-readers-file llama-index-vector-stores-postgres llama-index-embeddings-huggingface
# Specific oneapi position on arc ut test machines
if [[ '${{ matrix.pytorch-version }}' == '2.1' ]]; then
pip install --pre --upgrade bigdl-llm[xpu] -f https://developer.intel.com/ipex-whl-stable-xpu
source /opt/intel/oneapi/setvars.sh
elif [[ '${{ matrix.pytorch-version }}' == '2.0' ]]; then
pip install --pre --upgrade bigdl-llm[xpu_2.0] -f https://developer.intel.com/ipex-whl-stable-xpu
source /home/arda/intel/oneapi/setvars.sh
fi
bash python/llm/test/run-llm-llamaindex-tests-gpu.sh
21 changes: 1 addition & 20 deletions python/llm/test/llamaindex/test_llamaindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,33 +14,14 @@
# limitations under the License.
#

from bigdl.llm.langchain.llms import TransformersLLM, TransformersPipelineLLM, \
LlamaLLM, BloomLLM
from bigdl.llm.langchain.embeddings import TransformersEmbeddings, LlamaEmbeddings, \
BloomEmbeddings


from langchain.document_loaders import WebBaseLoader
from langchain.indexes import VectorstoreIndexCreator


from langchain.chains.question_answering import load_qa_chain
from langchain.chains.chat_vector_db.prompts import (CONDENSE_QUESTION_PROMPT,
QA_PROMPT)
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma

import pytest
from unittest import TestCase
import os
from bigdl.llm.llamaindex.llms import BigdlLLM

class Test_LlamaIndex_Transformers_API(TestCase):
def setUp(self):
self.auto_model_path = os.environ.get('ORIGINAL_CHATGLM2_6B_PATH')
self.auto_causal_model_path = os.environ.get('ORIGINAL_REPLIT_CODE_PATH')
self.llama_model_path = os.environ.get('LLAMA_ORIGIN_PATH')
self.bloom_model_path = os.environ.get('BLOOM_ORIGIN_PATH')
self.llama_model_path = os.environ.get('LLAMA2_7B_ORIGIN_PATH')
thread_num = os.environ.get('THREAD_NUM')
if thread_num is not None:
self.n_threads = int(thread_num)
Expand Down
70 changes: 70 additions & 0 deletions python/llm/test/llamaindex_gpu/test_llamaindex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#
# Copyright 2016 The BigDL Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import torch
import pytest
from unittest import TestCase
import os
from bigdl.llm.llamaindex.llms import BigdlLLM

class Test_LlamaIndex_Transformers_API(TestCase):
def setUp(self):
self.llama_model_path = os.environ.get('LLAMA2_7B_ORIGIN_PATH')
thread_num = os.environ.get('THREAD_NUM')
if thread_num is not None:
self.n_threads = int(thread_num)
else:
self.n_threads = 2

def completion_to_prompt(completion):
return f"<|system|>\n</s>\n<|user|>\n{completion}</s>\n<|assistant|>\n"

def messages_to_prompt(messages):
prompt = ""
for message in messages:
if message.role == "system":
prompt += f"<|system|>\n{message.content}</s>\n"
elif message.role == "user":
prompt += f"<|user|>\n{message.content}</s>\n"
elif message.role == "assistant":
prompt += f"<|assistant|>\n{message.content}</s>\n"

# ensure we start with a system prompt, insert blank if needed
if not prompt.startswith("<|system|>\n"):
prompt = "<|system|>\n</s>\n" + prompt

# add final assistant prompt
prompt = prompt + "<|assistant|>\n"
return prompt

def test_bigdl_llm(self):
llm = BigdlLLM(
model_name=self.llama_model_path,
tokenizer_name=self.llama_model_path,
context_window=512,
max_new_tokens=32,
model_kwargs={},
generate_kwargs={"temperature": 0.7, "do_sample": False},
messages_to_prompt=self.messages_to_prompt,
completion_to_prompt=self.completion_to_prompt,
device_map="xpu",
)
res = llm.complete("What is AI?")
assert res!=None


if __name__ == '__main__':
pytest.main([__file__])
21 changes: 21 additions & 0 deletions python/llm/test/run-llm-llamaindex-tests-gpu.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/bin/bash

export ANALYTICS_ZOO_ROOT=${ANALYTICS_ZOO_ROOT}
export LLM_HOME=${ANALYTICS_ZOO_ROOT}/python/llm/src
export LLM_INFERENCE_TEST_DIR=${ANALYTICS_ZOO_ROOT}/python/llm/test/llamaindex_gpu

export USE_XETLA=OFF
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1

set -e

echo "# Start testing inference"
start=$(date "+%s")

python -m pytest -s ${LLM_INFERENCE_TEST_DIR}

now=$(date "+%s")
time=$((now-start))

echo "Bigdl-llm llamaindex gpu tests finished"
echo "Time used:$time seconds"
18 changes: 18 additions & 0 deletions python/llm/test/run-llm-llamaindex-tests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/bin/bash

export ANALYTICS_ZOO_ROOT=${ANALYTICS_ZOO_ROOT}
export LLM_HOME=${ANALYTICS_ZOO_ROOT}/python/llm/src
export LLM_INFERENCE_TEST_DIR=${ANALYTICS_ZOO_ROOT}/python/llm/test/llamaindex

set -e

echo "# Start testing inference"
start=$(date "+%s")

python -m pytest -s ${LLM_INFERENCE_TEST_DIR}

now=$(date "+%s")
time=$((now-start))

echo "Bigdl-llm llamaindex tests finished"
echo "Time used:$time seconds"
Loading