diff --git a/.github/workflows/gpu-ci.yml b/.github/workflows/gpu-ci.yml index 8c4407a6d..450e8a7a8 100644 --- a/.github/workflows/gpu-ci.yml +++ b/.github/workflows/gpu-ci.yml @@ -134,6 +134,10 @@ jobs: export CUDA_DIR=/usr/local/cuda export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib + echo $PWD + ls -la + ls -la .. + # Inference tests source ./build/set_python_envs.sh ./tests/inference_tests.sh diff --git a/CMakeLists.txt b/CMakeLists.txt index c13e00942..2780b580d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -37,11 +37,8 @@ if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) STRING "Choose the type of build." FORCE) endif() -# option for using Python -option(FF_USE_PYTHON "Enable Python" ON) -if (FF_USE_PYTHON) - find_package(Python3 COMPONENTS Interpreter Development) -endif() +# Python +find_package(Python3 COMPONENTS Interpreter Development) if(INSTALL_DIR) message(STATUS "INSTALL_DIR: ${INSTALL_DIR}") @@ -398,15 +395,29 @@ elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin") endif() # python related -if (FF_USE_PYTHON) - find_package(Python COMPONENTS Interpreter Development) - # create flexflow_cffi_header.py +find_package(Python COMPONENTS Interpreter Development) +# create flexflow_cffi_header.py +add_custom_command(TARGET flexflow + PRE_BUILD + COMMAND ${FLEXFLOW_ROOT}/python/flexflow_cffi_build.py --ffhome-dir ${FLEXFLOW_ROOT} --output-dir ${FLEXFLOW_ROOT}/python/flexflow/core + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + COMMENT "Creating flexflow_cffi_header.py..." +) +if (NOT FF_BUILD_FROM_PYPI) + # generate the Legion Python bindings library. When building from pip, we need to do this post-install to prevent Legion from overwriting the path to the Legion shared library + add_custom_command(TARGET flexflow + POST_BUILD + COMMAND CMAKE_BUILD_DIR=${Legion_BINARY_DIR}/runtime CMAKE_INSTALL_PREFIX=${Legion_BINARY_DIR} ${Python_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/deps/legion/bindings/python/setup.py build --build-lib=${Legion_BINARY_DIR}/bindings/python ${Legion_PYTHON_EXTRA_INSTALL_ARGS} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/deps/legion/bindings/python + ) + # create flexflow_python interpreter. When building from pip, we install the FF_HOME/python/flexflow_python script instead. add_custom_command(TARGET flexflow PRE_BUILD - COMMAND ${FLEXFLOW_ROOT}/python/flexflow_cffi_build.py --ffhome-dir ${FLEXFLOW_ROOT} --output-dir ${FLEXFLOW_ROOT}/python/flexflow/core + COMMAND ${Python_EXECUTABLE} ${FLEXFLOW_ROOT}/python/flexflow_python_build.py --build-dir ${CMAKE_BINARY_DIR} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - COMMENT "Creating flexflow_cffi_header.py..." + COMMENT "Creating flexflow_python interpreter..." ) + install(PROGRAMS ${CMAKE_BINARY_DIR}/flexflow_python DESTINATION "bin") endif() if (INFERENCE_TESTS) @@ -452,24 +463,24 @@ set(LIB_DEST "lib") install(FILES ${FLEXFLOW_HDR} DESTINATION ${INCLUDE_DEST}) install(TARGETS flexflow DESTINATION ${LIB_DEST}) # install python -if (FF_USE_PYTHON) - find_package(Python COMPONENTS Interpreter Development) - execute_process(COMMAND ${Python_EXECUTABLE} -c "import site, os; print([pkg for func in (site.getsitepackages(), site.getusersitepackages()) for pkg in ([func] if isinstance(func, str) else func) if os.access(pkg, os.W_OK)][0])" OUTPUT_VARIABLE PY_DEST OUTPUT_STRIP_TRAILING_WHITESPACE) - if (NOT FF_BUILD_FROM_PYPI) - install( - DIRECTORY ${FLEXFLOW_ROOT}/python/flexflow/ - DESTINATION ${PY_DEST}/flexflow - FILES_MATCHING - PATTERN "*.py") - else() - # pip automatically installs all *.py files in the python/flexflow folder, but because flexflow_cffi_header.py is generated at build time, we have to install it manually. - install( - PROGRAMS ${FLEXFLOW_ROOT}/python/flexflow/core/flexflow_cffi_header.py - DESTINATION ${PY_DEST}/flexflow/core - ) - # Use setup.py script to re-install the Python bindings library with the right library paths. - # Need to put the instructions in a subfolder because of issue below: - # https://stackoverflow.com/questions/43875499/do-post-processing-after-make-install-in-cmake - add_subdirectory(cmake/pip_install) - endif() + +find_package(Python COMPONENTS Interpreter Development) +execute_process(COMMAND ${Python_EXECUTABLE} -c "import site, os; print([pkg for func in (site.getsitepackages(), site.getusersitepackages()) for pkg in ([func] if isinstance(func, str) else func) if os.access(pkg, os.W_OK)][0])" OUTPUT_VARIABLE PY_DEST OUTPUT_STRIP_TRAILING_WHITESPACE) +if (NOT FF_BUILD_FROM_PYPI) + install( + DIRECTORY ${FLEXFLOW_ROOT}/python/flexflow/ + DESTINATION ${PY_DEST}/flexflow + FILES_MATCHING + PATTERN "*.py") +else() + # pip automatically installs all *.py files in the python/flexflow folder, but because flexflow_cffi_header.py is generated at build time, we have to install it manually. + install( + PROGRAMS ${FLEXFLOW_ROOT}/python/flexflow/core/flexflow_cffi_header.py + DESTINATION ${PY_DEST}/flexflow/core + ) + # Use setup.py script to re-install the Python bindings library with the right library paths. + # Need to put the instructions in a subfolder because of issue below: + # https://stackoverflow.com/questions/43875499/do-post-processing-after-make-install-in-cmake + add_subdirectory(cmake/pip_install) endif() + diff --git a/cmake/legion.cmake b/cmake/legion.cmake index 8bce17888..ee88a91ea 100644 --- a/cmake/legion.cmake +++ b/cmake/legion.cmake @@ -18,10 +18,8 @@ if(FF_USE_EXTERNAL_LEGION) else() # Build Legion from source message(STATUS "Building Legion from source") - if(FF_USE_PYTHON) - set(Legion_USE_Python ON CACHE BOOL "enable Legion_USE_Python") - set(Legion_BUILD_BINDINGS ON CACHE BOOL "build legion_python") - endif() + set(Legion_USE_Python ON CACHE BOOL "enable Legion_USE_Python") + set(Legion_BUILD_BINDINGS ON CACHE BOOL "build legion_python") if("${FF_LEGION_NETWORKS}" STREQUAL "gasnet") set(Legion_EMBED_GASNet ON CACHE BOOL "Use embed GASNet") set(Legion_EMBED_GASNet_VERSION "GASNet-2022.3.0" CACHE STRING "GASNet version") diff --git a/cmake/pip_install/CMakeLists.txt b/cmake/pip_install/CMakeLists.txt index 217d7e14f..055a45908 100644 --- a/cmake/pip_install/CMakeLists.txt +++ b/cmake/pip_install/CMakeLists.txt @@ -1,26 +1,24 @@ # Use setup.py script to re-install the Python bindings library with the right library paths -if (FF_USE_PYTHON) - execute_process(COMMAND ${Python_EXECUTABLE} -c "import site, os; print([pkg for func in (site.getsitepackages(), site.getusersitepackages()) for pkg in ([func] if isinstance(func, str) else func) if os.access(pkg, os.W_OK)][0])" OUTPUT_VARIABLE PY_DEST OUTPUT_STRIP_TRAILING_WHITESPACE) - if(FF_BUILD_FROM_PYPI) - cmake_path(SET CMAKE_SOURCE_DIR_ NORMALIZE ${CMAKE_CURRENT_SOURCE_DIR}/../../deps/legion) - cmake_path(SET CMAKE_BUILD_DIR_ NORMALIZE ${Legion_BINARY_DIR}/runtime) - cmake_path(SET CMAKE_INSTALL_PREFIX_ NORMALIZE ${PY_DEST}/../../..) - cmake_path(SET WORKING_DIRECTORY_ NORMALIZE ${CMAKE_CURRENT_SOURCE_DIR}/../../deps/legion/bindings/python/) - # CMAKE_CURRENT_SOURCE_DIR=/usr/FlexFlow/cmake/pip_install - # Legion_BINARY_DIR=/usr/FlexFlow/build//deps/legion - # CMAKE_SOURCE_DIR_=/usr/FlexFlow/deps/legion - # CMAKE_BUILD_DIR_: /usr/FlexFlow/build//deps/legion/runtime - # CMAKE_INSTALL_PREFIX_: /opt/conda/ or /usr/local - # WORKING_DIRECTORY_: /usr/FlexFlow/deps/legion/bindings/python/ - # PY_DEST: /python3.11/site-packages - message(STATUS "CMAKE_CURRENT_SOURCE_DIR: ${CMAKE_CURRENT_SOURCE_DIR}") - message(STATUS "Legion_BINARY_DIR: ${Legion_BINARY_DIR}") - message(STATUS "CMAKE_SOURCE_DIR_: ${CMAKE_SOURCE_DIR_}") - message(STATUS "CMAKE_BUILD_DIR_: ${CMAKE_BUILD_DIR_}") - message(STATUS "CMAKE_INSTALL_PREFIX_: ${CMAKE_INSTALL_PREFIX_}") - message(STATUS "WORKING_DIRECTORY_: ${WORKING_DIRECTORY_}") - message(STATUS "PY_DEST: ${PY_DEST}") - install(CODE "execute_process(COMMAND ${CMAKE_COMMAND} -E echo \"Editing path to Legion library using path: ${CMAKE_INSTALL_PREFIX_} \")") - install(CODE "execute_process(COMMAND ${CMAKE_COMMAND} -E env CMAKE_SOURCE_DIR=${CMAKE_SOURCE_DIR_} CMAKE_BUILD_DIR=${CMAKE_BUILD_DIR_} CMAKE_INSTALL_PREFIX=${PY_DEST}/flexflow ${Python3_EXECUTABLE} setup.py install --prefix ${CMAKE_INSTALL_PREFIX_} ${Legion_PYTHON_EXTRA_INSTALL_ARGS} WORKING_DIRECTORY ${WORKING_DIRECTORY_} COMMAND_ECHO STDOUT COMMAND_ERROR_IS_FATAL ANY)") - endif() +execute_process(COMMAND ${Python_EXECUTABLE} -c "import site, os; print([pkg for func in (site.getsitepackages(), site.getusersitepackages()) for pkg in ([func] if isinstance(func, str) else func) if os.access(pkg, os.W_OK)][0])" OUTPUT_VARIABLE PY_DEST OUTPUT_STRIP_TRAILING_WHITESPACE) +if(FF_BUILD_FROM_PYPI) + cmake_path(SET CMAKE_SOURCE_DIR_ NORMALIZE ${CMAKE_CURRENT_SOURCE_DIR}/../../deps/legion) + cmake_path(SET CMAKE_BUILD_DIR_ NORMALIZE ${Legion_BINARY_DIR}/runtime) + cmake_path(SET CMAKE_INSTALL_PREFIX_ NORMALIZE ${PY_DEST}/../../..) + cmake_path(SET WORKING_DIRECTORY_ NORMALIZE ${CMAKE_CURRENT_SOURCE_DIR}/../../deps/legion/bindings/python/) + # CMAKE_CURRENT_SOURCE_DIR=/usr/FlexFlow/cmake/pip_install + # Legion_BINARY_DIR=/usr/FlexFlow/build//deps/legion + # CMAKE_SOURCE_DIR_=/usr/FlexFlow/deps/legion + # CMAKE_BUILD_DIR_: /usr/FlexFlow/build//deps/legion/runtime + # CMAKE_INSTALL_PREFIX_: /opt/conda/ or /usr/local + # WORKING_DIRECTORY_: /usr/FlexFlow/deps/legion/bindings/python/ + # PY_DEST: /python3.11/site-packages + message(STATUS "CMAKE_CURRENT_SOURCE_DIR: ${CMAKE_CURRENT_SOURCE_DIR}") + message(STATUS "Legion_BINARY_DIR: ${Legion_BINARY_DIR}") + message(STATUS "CMAKE_SOURCE_DIR_: ${CMAKE_SOURCE_DIR_}") + message(STATUS "CMAKE_BUILD_DIR_: ${CMAKE_BUILD_DIR_}") + message(STATUS "CMAKE_INSTALL_PREFIX_: ${CMAKE_INSTALL_PREFIX_}") + message(STATUS "WORKING_DIRECTORY_: ${WORKING_DIRECTORY_}") + message(STATUS "PY_DEST: ${PY_DEST}") + install(CODE "execute_process(COMMAND ${CMAKE_COMMAND} -E echo \"Editing path to Legion library using path: ${CMAKE_INSTALL_PREFIX_} \")") + install(CODE "execute_process(COMMAND ${CMAKE_COMMAND} -E env CMAKE_SOURCE_DIR=${CMAKE_SOURCE_DIR_} CMAKE_BUILD_DIR=${CMAKE_BUILD_DIR_} CMAKE_INSTALL_PREFIX=${PY_DEST}/flexflow ${Python3_EXECUTABLE} setup.py install --prefix ${CMAKE_INSTALL_PREFIX_} ${Legion_PYTHON_EXTRA_INSTALL_ARGS} WORKING_DIRECTORY ${WORKING_DIRECTORY_} COMMAND_ECHO STDOUT COMMAND_ERROR_IS_FATAL ANY)") endif() diff --git a/config/config.inc b/config/config.inc index 0d8e394a3..7bd29bcaf 100644 --- a/config/config.inc +++ b/config/config.inc @@ -77,15 +77,6 @@ if [ -n "$CUDNN_DIR" ]; then SET_CUDNN="-DCUDNN_PATH=${CUDNN_DIR}" fi -# enable Python -if [ "$FF_USE_PYTHON" = "ON" ]; then - SET_PYTHON="-DFF_USE_PYTHON=ON" -elif [ "$FF_USE_PYTHON" = "OFF" ]; then - SET_PYTHON="-DFF_USE_PYTHON=OFF" -else - SET_PYTHON="-DFF_USE_PYTHON=ON" -fi - # enable NCCL if [ "$FF_USE_NCCL" = "ON" ]; then SET_NCCL="-DFF_USE_NCCL=ON" @@ -190,7 +181,7 @@ if [ -n "$FF_GPU_BACKEND" ]; then fi fi -CMAKE_FLAGS="-DCUDA_USE_STATIC_CUDA_RUNTIME=OFF -DLegion_HIJACK_CUDART=OFF ${SET_CC} ${SET_CXX} ${SET_INSTALL_DIR} ${SET_INFERENCE_TESTS} ${SET_LIBTORCH_PATH} ${SET_BUILD} ${SET_CUDA_ARCH} ${SET_CUDA} ${SET_CUBLAS} ${SET_CURAND} ${SET_CUDNN} ${SET_HIP_ARCH} ${SET_PYTHON} ${SET_NCCL} ${SET_NCCL_DIR} ${SET_LEGION_NETWORKS} ${SET_UCX} ${SET_EXAMPLES} ${SET_INFERENCE_EXAMPLES} ${SET_BUILD_UNIT_TESTS} ${SET_AVX2} ${SET_MAX_DIM} ${SET_LEGION_MAX_RETURN_SIZE} ${SET_ROCM_PATH} ${SET_FF_GPU_BACKEND}" +CMAKE_FLAGS="-DCUDA_USE_STATIC_CUDA_RUNTIME=OFF -DLegion_HIJACK_CUDART=OFF ${SET_CC} ${SET_CXX} ${SET_INSTALL_DIR} ${SET_INFERENCE_TESTS} ${SET_LIBTORCH_PATH} ${SET_BUILD} ${SET_CUDA_ARCH} ${SET_CUDA} ${SET_CUBLAS} ${SET_CURAND} ${SET_CUDNN} ${SET_HIP_ARCH} ${SET_NCCL} ${SET_NCCL_DIR} ${SET_LEGION_NETWORKS} ${SET_UCX} ${SET_EXAMPLES} ${SET_INFERENCE_EXAMPLES} ${SET_BUILD_UNIT_TESTS} ${SET_AVX2} ${SET_MAX_DIM} ${SET_LEGION_MAX_RETURN_SIZE} ${SET_ROCM_PATH} ${SET_FF_GPU_BACKEND}" function run_cmake() { SRC_LOCATION=${SRC_LOCATION:=`dirname $0`/../} diff --git a/config/config.linux b/config/config.linux index c6e75cb12..aacc6d357 100755 --- a/config/config.linux +++ b/config/config.linux @@ -52,9 +52,6 @@ CUDNN_DIR=${CUDNN_DIR:-"/usr/local/cuda"} # otherwise, we will build nccl from source NCCL_DIR=${NCCL_DIR:-"/usr/local/cuda"} -# enable Python -FF_USE_PYTHON=${FF_USE_PYTHON:-ON} - # set Legion networks FF_LEGION_NETWORKS=${FF_LEGION_NETWORKS:-} @@ -68,7 +65,6 @@ FF_GASNET_SYSTEM=${FF_GASNET_SYSTEM:-slingshot11} UCX_DIR=${UCX_DIR:-""} - # enable avx2 FF_USE_AVX2=${FF_USE_AVX2:-OFF} @@ -95,7 +91,7 @@ fi function get_build_configs() { # Create a string with the values of the variables set in this script - BUILD_CONFIGS="FF_CUDA_ARCH=${FF_CUDA_ARCH} FF_HIP_ARCH=${FF_HIP_ARCH} CUDA_DIR=${CUDA_DIR} CUDNN_DIR=${CUDNN_DIR} CUBLAS_DIR=${CUBLAS_DIR} CURAND_DIR=${CURAND_DIR} NCCL_DIR=${NCCL_DIR} FF_USE_PYTHON=${FF_USE_PYTHON} FF_GASNET_CONDUIT=${FF_GASNET_CONDUIT} UCX_DIR=${UCX_DIR} FF_LEGION_NETWORKS=${FF_LEGION_NETWORKS} FF_USE_AVX2=${FF_USE_AVX2} FF_MAX_DIM=${FF_MAX_DIM} ROCM_PATH=${ROCM_PATH} FF_GPU_BACKEND=${FF_GPU_BACKEND} INSTALL_DIR=${INSTALL_DIR}" + BUILD_CONFIGS="FF_CUDA_ARCH=${FF_CUDA_ARCH} FF_HIP_ARCH=${FF_HIP_ARCH} CUDA_DIR=${CUDA_DIR} CUDNN_DIR=${CUDNN_DIR} CUBLAS_DIR=${CUBLAS_DIR} CURAND_DIR=${CURAND_DIR} NCCL_DIR=${NCCL_DIR} FF_GASNET_CONDUIT=${FF_GASNET_CONDUIT} UCX_DIR=${UCX_DIR} FF_LEGION_NETWORKS=${FF_LEGION_NETWORKS} FF_USE_AVX2=${FF_USE_AVX2} FF_MAX_DIM=${FF_MAX_DIM} ROCM_PATH=${ROCM_PATH} FF_GPU_BACKEND=${FF_GPU_BACKEND} INSTALL_DIR=${INSTALL_DIR}" } if [[ -n "$1" && ( "$1" == "CMAKE_FLAGS" || "$1" == "CUDA_PATH" ) ]]; then diff --git a/inference/python/incr_decoding.py b/inference/python/incr_decoding.py index 4bb6892a6..41da995e9 100644 --- a/inference/python/incr_decoding.py +++ b/inference/python/incr_decoding.py @@ -63,7 +63,7 @@ def get_configs(): } llm_configs = { # required parameters - "llm_model": "tiiuae/falcon-7b", + "llm_model": "meta-llama/Llama-3.2-1B-Instruct", # optional parameters "cache_path": os.environ.get("FF_CACHE_PATH", ""), "refresh_cache": False, diff --git a/tests/python_interface_test.sh b/tests/python_interface_test.sh index 2c1cf7e60..766b02f43 100755 --- a/tests/python_interface_test.sh +++ b/tests/python_interface_test.sh @@ -4,21 +4,7 @@ set -e check_python_interface() { # Usage: check_python_interface {before-installation, after-installation} - GPUS=1 - BATCHSIZE=$((GPUS * 64)) - FSIZE=14048 - ZSIZE=12192 - ONLY_DATA_PARALLEL=true - installation_status=${1:-"before-installation"} - - # Generate configs JSON files - test_params=$(jq -n --arg num_gpus "$GPUS" --arg memory_per_gpu "$FSIZE" --arg zero_copy_memory_per_node "$ZSIZE" --arg batch_size "$BATCHSIZE" --arg only_data_parallel "$ONLY_DATA_PARALLEL" '{"num_gpus":$num_gpus,"memory_per_gpu":$memory_per_gpu,"zero_copy_memory_per_node":$zero_copy_memory_per_node,"batch_size":$batch_size,"only_data_parallel":$only_data_parallel}') - mkdir -p /tmp/flexflow/training_tests - echo "$test_params" > /tmp/flexflow/training_tests/test_params.json - - EXE="python" - echo "Running a single-GPU Python test to check the Python interface (native python interpreter)" - $EXE "$FF_HOME"/examples/python/keras/seq_mnist_mlp.py -config-file /tmp/flexflow/training_tests/test_params.json + python $FF_HOME/inference/python/incr_decoding.py } @@ -37,17 +23,17 @@ if [[ "$installation_status" == "before-installation" ]]; then python -c "import flexflow.core; import flexflow.serve as ff; exit()" unset PYTHONPATH unset LD_LIBRARY_PATH - # Run a single-gpu test using the native python interpreter + # Run simple python inference test export LD_LIBRARY_PATH="${BUILD_FOLDER}:${BUILD_FOLDER}/deps/legion/lib:${LD_LIBRARY_PATH}" export PYTHONPATH="${FF_HOME}/python:${BUILD_FOLDER}/deps/legion/bindings/python:${PYTHONPATH}" - check_python_interface python + python $FF_HOME/inference/python/incr_decoding.py unset PYTHONPATH unset LD_LIBRARY_PATH elif [[ "$installation_status" == "after-installation" ]]; then # Check availability of flexflow modules in Python python -c "import flexflow.core; import flexflow.serve as ff; exit()" - # Run a single-gpu test using the native python interpreter - check_python_interface python + # Run simple python inference test + python $FF_HOME/inference/python/incr_decoding.py else echo "Invalid installation status!" echo "Usage: $0 {before-installation, after-installation}"