diff --git a/poetry.lock b/poetry.lock index d03b5ea..6896bf4 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. [[package]] name = "aiofiles" @@ -644,6 +644,26 @@ humanfriendly = ">=9.1" [package.extras] cron = ["capturer (>=2.4)"] +[[package]] +name = "compressed-tensors" +version = "0.6.0" +description = "Library for utilization of compressed safetensors of neural network models" +optional = false +python-versions = "*" +files = [ + {file = "compressed-tensors-0.6.0.tar.gz", hash = "sha256:639ca97afc852602be0d3666b236ad6a96880de45af87851f515047eff700927"}, + {file = "compressed_tensors-0.6.0-py3-none-any.whl", hash = "sha256:1be9c466e38b992b1d462e577f7e1b2bfad5d1aa0e25e9c95ab1ee458b9e92a2"}, +] + +[package.dependencies] +pydantic = ">=2.0" +torch = ">=1.7.0" +transformers = "*" + +[package.extras] +accelerate = ["accelerate"] +dev = ["black (==22.12.0)", "flake8 (>=3.8.3)", "isort (==5.8.0)", "nbconvert (>=7.16.3)", "pytest (>=6.0.0)", "wheel (>=0.36.2)"] + [[package]] name = "crashtest" version = "0.4.1" @@ -1966,6 +1986,7 @@ files = [ [package.dependencies] jsonschema = ">=4.21.1,<5.0.0" numpy = {version = ">=1.25", markers = "python_version >= \"3.9\""} +opencv-python-headless = {version = ">=4.0.0,<5.0.0", optional = true, markers = "extra == \"opencv\""} pillow = ">=10.3.0,<11.0.0" pydantic = ">=2.6.1,<3.0.0" requests = ">=2.0.0,<3.0.0" @@ -2627,6 +2648,25 @@ typing-extensions = ">=4.11,<5" [package.extras] datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] +[[package]] +name = "opencv-python-headless" +version = "4.10.0.84" +description = "Wrapper package for OpenCV python bindings." +optional = false +python-versions = ">=3.6" +files = [ + {file = "opencv-python-headless-4.10.0.84.tar.gz", hash = "sha256:f2017c6101d7c2ef8d7bc3b414c37ff7f54d64413a1847d89970b6b7069b4e1a"}, + {file = "opencv_python_headless-4.10.0.84-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:a4f4bcb07d8f8a7704d9c8564c224c8b064c63f430e95b61ac0bffaa374d330e"}, + {file = "opencv_python_headless-4.10.0.84-cp37-abi3-macosx_12_0_x86_64.whl", hash = "sha256:5ae454ebac0eb0a0b932e3406370aaf4212e6a3fdb5038cc86c7aea15a6851da"}, + {file = "opencv_python_headless-4.10.0.84-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:46071015ff9ab40fccd8a163da0ee14ce9846349f06c6c8c0f2870856ffa45db"}, + {file = "opencv_python_headless-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:377d08a7e48a1405b5e84afcbe4798464ce7ee17081c1c23619c8b398ff18295"}, + {file = "opencv_python_headless-4.10.0.84-cp37-abi3-win32.whl", hash = "sha256:9092404b65458ed87ce932f613ffbb1106ed2c843577501e5768912360fc50ec"}, + {file = "opencv_python_headless-4.10.0.84-cp37-abi3-win_amd64.whl", hash = "sha256:afcf28bd1209dd58810d33defb622b325d3cbe49dcd7a43a902982c33e5fad05"}, +] + +[package.dependencies] +numpy = {version = ">=1.23.5", markers = "python_version >= \"3.11\""} + [[package]] name = "orjson" version = "3.10.7" @@ -5076,13 +5116,13 @@ docs = ["sphinx (==6.1.3)", "sphinx-mdinclude (==0.5.3)"] [[package]] name = "transformers" -version = "4.45.1" +version = "4.46.0" description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow" optional = false python-versions = ">=3.8.0" files = [ - {file = "transformers-4.45.1-py3-none-any.whl", hash = "sha256:21e3f47aa7256dbbfb5215937a3168a984c94432ce3a16b7908265807d62aee8"}, - {file = "transformers-4.45.1.tar.gz", hash = "sha256:9cace11072172df05ca6a694fcd1f5064a55b63285e492bd88f0ad1cec270f02"}, + {file = "transformers-4.46.0-py3-none-any.whl", hash = "sha256:e161268ae8bee315eb9e9b4c0b27f1bd6980f91e0fc292d75249193d339704c0"}, + {file = "transformers-4.46.0.tar.gz", hash = "sha256:3a9e2eb537094db11c3652334d281afa4766c0e5091c4dcdb454e9921bb0d2b7"}, ] [package.dependencies] @@ -5100,13 +5140,13 @@ tqdm = ">=4.27" [package.extras] accelerate = ["accelerate (>=0.26.0)"] agents = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "datasets (!=2.5.0)", "diffusers", "opencv-python", "sentencepiece (>=0.1.91,!=0.1.92)", "torch"] -all = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timm (<=0.9.16)", "tokenizers (>=0.20,<0.21)", "torch", "torchaudio", "torchvision"] +all = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "av (==9.2.0)", "codecarbon (==1.2.0)", "flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timm (<=0.9.16)", "tokenizers (>=0.20,<0.21)", "torch", "torchaudio", "torchvision"] audio = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] benchmark = ["optimum-benchmark (>=0.3.0)"] codecarbon = ["codecarbon (==1.2.0)"] deepspeed = ["accelerate (>=0.26.0)", "deepspeed (>=0.9.3)"] deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=0.26.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "nltk (<=3.8.1)", "optuna", "parameterized", "protobuf", "psutil", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.5.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"] -dev = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "av (==9.2.0)", "beautifulsoup4", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "decord (==0.6.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1,<=0.7.0)", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "libcst", "librosa", "nltk (<=3.8.1)", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.5.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "timm (<=0.9.16)", "tokenizers (>=0.20,<0.21)", "torch", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"] +dev = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "av (==9.2.0)", "beautifulsoup4", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1,<=0.7.0)", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "libcst", "librosa", "nltk (<=3.8.1)", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.5.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "timm (<=0.9.16)", "tokenizers (>=0.20,<0.21)", "torch", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"] dev-tensorflow = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "isort (>=5.5.4)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "libcst", "librosa", "nltk (<=3.8.1)", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.5.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "tokenizers (>=0.20,<0.21)", "urllib3 (<2.0.0)"] dev-torch = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "beautifulsoup4", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "kenlm", "libcst", "librosa", "nltk (<=3.8.1)", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.5.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (<=0.9.16)", "tokenizers (>=0.20,<0.21)", "torch", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"] flax = ["flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "optax (>=0.0.8,<=0.1.4)", "scipy (<1.13.0)"] @@ -5140,7 +5180,7 @@ torch = ["accelerate (>=0.26.0)", "torch"] torch-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"] torch-vision = ["Pillow (>=10.0.1,<=15.0)", "torchvision"] torchhub = ["filelock", "huggingface-hub (>=0.23.2,<1.0)", "importlib-metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.20,<0.21)", "torch", "tqdm (>=4.27)"] -video = ["av (==9.2.0)", "decord (==0.6.0)"] +video = ["av (==9.2.0)"] vision = ["Pillow (>=10.0.1,<=15.0)"] [[package]] @@ -5155,6 +5195,11 @@ files = [ {file = "triton-3.0.0-1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:34e509deb77f1c067d8640725ef00c5cbfcb2052a1a3cb6a6d343841f92624eb"}, {file = "triton-3.0.0-1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bcbf3b1c48af6a28011a5c40a5b3b9b5330530c3827716b5fbf6d7adcc1e53e9"}, {file = "triton-3.0.0-1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6e5727202f7078c56f91ff13ad0c1abab14a0e7f2c87e91b12b6f64f3e8ae609"}, + {file = "triton-3.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39b052da883351fdf6be3d93cedae6db3b8e3988d3b09ed221bccecfa9612230"}, + {file = "triton-3.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd34f19a8582af96e6291d4afce25dac08cb2a5d218c599163761e8e0827208e"}, + {file = "triton-3.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d5e10de8c011adeb7c878c6ce0dd6073b14367749e34467f1cff2bde1b78253"}, + {file = "triton-3.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8903767951bf86ec960b4fe4e21bc970055afc65e9d57e916d79ae3c93665e3"}, + {file = "triton-3.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41004fb1ae9a53fcb3e970745feb87f0e3c94c6ce1ba86e95fa3b8537894bef7"}, ] [package.dependencies] @@ -5273,47 +5318,54 @@ standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", [[package]] name = "uvloop" -version = "0.20.0" +version = "0.21.0" description = "Fast implementation of asyncio event loop on top of libuv" optional = false python-versions = ">=3.8.0" files = [ - {file = "uvloop-0.20.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:9ebafa0b96c62881d5cafa02d9da2e44c23f9f0cd829f3a32a6aff771449c996"}, - {file = "uvloop-0.20.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:35968fc697b0527a06e134999eef859b4034b37aebca537daeb598b9d45a137b"}, - {file = "uvloop-0.20.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b16696f10e59d7580979b420eedf6650010a4a9c3bd8113f24a103dfdb770b10"}, - {file = "uvloop-0.20.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b04d96188d365151d1af41fa2d23257b674e7ead68cfd61c725a422764062ae"}, - {file = "uvloop-0.20.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:94707205efbe809dfa3a0d09c08bef1352f5d3d6612a506f10a319933757c006"}, - {file = "uvloop-0.20.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:89e8d33bb88d7263f74dc57d69f0063e06b5a5ce50bb9a6b32f5fcbe655f9e73"}, - {file = "uvloop-0.20.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e50289c101495e0d1bb0bfcb4a60adde56e32f4449a67216a1ab2750aa84f037"}, - {file = "uvloop-0.20.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e237f9c1e8a00e7d9ddaa288e535dc337a39bcbf679f290aee9d26df9e72bce9"}, - {file = "uvloop-0.20.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:746242cd703dc2b37f9d8b9f173749c15e9a918ddb021575a0205ec29a38d31e"}, - {file = "uvloop-0.20.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82edbfd3df39fb3d108fc079ebc461330f7c2e33dbd002d146bf7c445ba6e756"}, - {file = "uvloop-0.20.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:80dc1b139516be2077b3e57ce1cb65bfed09149e1d175e0478e7a987863b68f0"}, - {file = "uvloop-0.20.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4f44af67bf39af25db4c1ac27e82e9665717f9c26af2369c404be865c8818dcf"}, - {file = "uvloop-0.20.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:4b75f2950ddb6feed85336412b9a0c310a2edbcf4cf931aa5cfe29034829676d"}, - {file = "uvloop-0.20.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:77fbc69c287596880ecec2d4c7a62346bef08b6209749bf6ce8c22bbaca0239e"}, - {file = "uvloop-0.20.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6462c95f48e2d8d4c993a2950cd3d31ab061864d1c226bbf0ee2f1a8f36674b9"}, - {file = "uvloop-0.20.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:649c33034979273fa71aa25d0fe120ad1777c551d8c4cd2c0c9851d88fcb13ab"}, - {file = "uvloop-0.20.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:3a609780e942d43a275a617c0839d85f95c334bad29c4c0918252085113285b5"}, - {file = "uvloop-0.20.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:aea15c78e0d9ad6555ed201344ae36db5c63d428818b4b2a42842b3870127c00"}, - {file = "uvloop-0.20.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:f0e94b221295b5e69de57a1bd4aeb0b3a29f61be6e1b478bb8a69a73377db7ba"}, - {file = "uvloop-0.20.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:fee6044b64c965c425b65a4e17719953b96e065c5b7e09b599ff332bb2744bdf"}, - {file = "uvloop-0.20.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:265a99a2ff41a0fd56c19c3838b29bf54d1d177964c300dad388b27e84fd7847"}, - {file = "uvloop-0.20.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b10c2956efcecb981bf9cfb8184d27d5d64b9033f917115a960b83f11bfa0d6b"}, - {file = "uvloop-0.20.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:e7d61fe8e8d9335fac1bf8d5d82820b4808dd7a43020c149b63a1ada953d48a6"}, - {file = "uvloop-0.20.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:2beee18efd33fa6fdb0976e18475a4042cd31c7433c866e8a09ab604c7c22ff2"}, - {file = "uvloop-0.20.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:d8c36fdf3e02cec92aed2d44f63565ad1522a499c654f07935c8f9d04db69e95"}, - {file = "uvloop-0.20.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a0fac7be202596c7126146660725157d4813aa29a4cc990fe51346f75ff8fde7"}, - {file = "uvloop-0.20.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9d0fba61846f294bce41eb44d60d58136090ea2b5b99efd21cbdf4e21927c56a"}, - {file = "uvloop-0.20.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95720bae002ac357202e0d866128eb1ac82545bcf0b549b9abe91b5178d9b541"}, - {file = "uvloop-0.20.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:36c530d8fa03bfa7085af54a48f2ca16ab74df3ec7108a46ba82fd8b411a2315"}, - {file = "uvloop-0.20.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e97152983442b499d7a71e44f29baa75b3b02e65d9c44ba53b10338e98dedb66"}, - {file = "uvloop-0.20.0.tar.gz", hash = "sha256:4603ca714a754fc8d9b197e325db25b2ea045385e8a3ad05d3463de725fdf469"}, + {file = "uvloop-0.21.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ec7e6b09a6fdded42403182ab6b832b71f4edaf7f37a9a0e371a01db5f0cb45f"}, + {file = "uvloop-0.21.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:196274f2adb9689a289ad7d65700d37df0c0930fd8e4e743fa4834e850d7719d"}, + {file = "uvloop-0.21.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f38b2e090258d051d68a5b14d1da7203a3c3677321cf32a95a6f4db4dd8b6f26"}, + {file = "uvloop-0.21.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87c43e0f13022b998eb9b973b5e97200c8b90823454d4bc06ab33829e09fb9bb"}, + {file = "uvloop-0.21.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:10d66943def5fcb6e7b37310eb6b5639fd2ccbc38df1177262b0640c3ca68c1f"}, + {file = "uvloop-0.21.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:67dd654b8ca23aed0a8e99010b4c34aca62f4b7fce88f39d452ed7622c94845c"}, + {file = "uvloop-0.21.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c0f3fa6200b3108919f8bdabb9a7f87f20e7097ea3c543754cabc7d717d95cf8"}, + {file = "uvloop-0.21.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0878c2640cf341b269b7e128b1a5fed890adc4455513ca710d77d5e93aa6d6a0"}, + {file = "uvloop-0.21.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9fb766bb57b7388745d8bcc53a359b116b8a04c83a2288069809d2b3466c37e"}, + {file = "uvloop-0.21.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a375441696e2eda1c43c44ccb66e04d61ceeffcd76e4929e527b7fa401b90fb"}, + {file = "uvloop-0.21.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:baa0e6291d91649c6ba4ed4b2f982f9fa165b5bbd50a9e203c416a2797bab3c6"}, + {file = "uvloop-0.21.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4509360fcc4c3bd2c70d87573ad472de40c13387f5fda8cb58350a1d7475e58d"}, + {file = "uvloop-0.21.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:359ec2c888397b9e592a889c4d72ba3d6befba8b2bb01743f72fffbde663b59c"}, + {file = "uvloop-0.21.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f7089d2dc73179ce5ac255bdf37c236a9f914b264825fdaacaded6990a7fb4c2"}, + {file = "uvloop-0.21.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:baa4dcdbd9ae0a372f2167a207cd98c9f9a1ea1188a8a526431eef2f8116cc8d"}, + {file = "uvloop-0.21.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86975dca1c773a2c9864f4c52c5a55631038e387b47eaf56210f873887b6c8dc"}, + {file = "uvloop-0.21.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:461d9ae6660fbbafedd07559c6a2e57cd553b34b0065b6550685f6653a98c1cb"}, + {file = "uvloop-0.21.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:183aef7c8730e54c9a3ee3227464daed66e37ba13040bb3f350bc2ddc040f22f"}, + {file = "uvloop-0.21.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:bfd55dfcc2a512316e65f16e503e9e450cab148ef11df4e4e679b5e8253a5281"}, + {file = "uvloop-0.21.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:787ae31ad8a2856fc4e7c095341cccc7209bd657d0e71ad0dc2ea83c4a6fa8af"}, + {file = "uvloop-0.21.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5ee4d4ef48036ff6e5cfffb09dd192c7a5027153948d85b8da7ff705065bacc6"}, + {file = "uvloop-0.21.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3df876acd7ec037a3d005b3ab85a7e4110422e4d9c1571d4fc89b0fc41b6816"}, + {file = "uvloop-0.21.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bd53ecc9a0f3d87ab847503c2e1552b690362e005ab54e8a48ba97da3924c0dc"}, + {file = "uvloop-0.21.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a5c39f217ab3c663dc699c04cbd50c13813e31d917642d459fdcec07555cc553"}, + {file = "uvloop-0.21.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:17df489689befc72c39a08359efac29bbee8eee5209650d4b9f34df73d22e414"}, + {file = "uvloop-0.21.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bc09f0ff191e61c2d592a752423c767b4ebb2986daa9ed62908e2b1b9a9ae206"}, + {file = "uvloop-0.21.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f0ce1b49560b1d2d8a2977e3ba4afb2414fb46b86a1b64056bc4ab929efdafbe"}, + {file = "uvloop-0.21.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e678ad6fe52af2c58d2ae3c73dc85524ba8abe637f134bf3564ed07f555c5e79"}, + {file = "uvloop-0.21.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:460def4412e473896ef179a1671b40c039c7012184b627898eea5072ef6f017a"}, + {file = "uvloop-0.21.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:10da8046cc4a8f12c91a1c39d1dd1585c41162a15caaef165c2174db9ef18bdc"}, + {file = "uvloop-0.21.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c097078b8031190c934ed0ebfee8cc5f9ba9642e6eb88322b9958b649750f72b"}, + {file = "uvloop-0.21.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:46923b0b5ee7fc0020bef24afe7836cb068f5050ca04caf6b487c513dc1a20b2"}, + {file = "uvloop-0.21.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:53e420a3afe22cdcf2a0f4846e377d16e718bc70103d7088a4f7623567ba5fb0"}, + {file = "uvloop-0.21.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:88cb67cdbc0e483da00af0b2c3cdad4b7c61ceb1ee0f33fe00e09c81e3a6cb75"}, + {file = "uvloop-0.21.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:221f4f2a1f46032b403bf3be628011caf75428ee3cc204a22addf96f586b19fd"}, + {file = "uvloop-0.21.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:2d1f581393673ce119355d56da84fe1dd9d2bb8b3d13ce792524e1607139feff"}, + {file = "uvloop-0.21.0.tar.gz", hash = "sha256:3bf12b0fda68447806a7ad847bfa591613177275d35b6724b1ee573faa3704e3"}, ] [package.extras] +dev = ["Cython (>=3.0,<4.0)", "setuptools (>=60)"] docs = ["Sphinx (>=4.1.2,<4.2.0)", "sphinx-rtd-theme (>=0.5.2,<0.6.0)", "sphinxcontrib-asyncio (>=0.3.0,<0.4.0)"] -test = ["Cython (>=0.29.36,<0.30.0)", "aiohttp (==3.9.0b0)", "aiohttp (>=3.8.1)", "flake8 (>=5.0,<6.0)", "mypy (>=0.800)", "psutil", "pyOpenSSL (>=23.0.0,<23.1.0)", "pycodestyle (>=2.9.0,<2.10.0)"] +test = ["aiohttp (>=3.10.5)", "flake8 (>=5.0,<6.0)", "mypy (>=0.800)", "psutil", "pyOpenSSL (>=23.0.0,<23.1.0)", "pycodestyle (>=2.9.0,<2.10.0)"] [[package]] name = "virtualenv" @@ -5337,24 +5389,25 @@ test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess [[package]] name = "vllm" -version = "0.6.2" +version = "0.6.3.post1" description = "A high-throughput and memory-efficient inference and serving engine for LLMs" optional = false python-versions = ">=3.8" files = [ - {file = "vllm-0.6.2-cp38-abi3-manylinux1_x86_64.whl", hash = "sha256:414e2244a6c3a97175e7659f9a6e10c2e295376d1d1e4bec704da18caa237f0b"}, - {file = "vllm-0.6.2.tar.gz", hash = "sha256:2fffd856a25d3defa38a539150fccf9126959ce4c6781c1c5a76d5da7216af59"}, + {file = "vllm-0.6.3.post1-cp38-abi3-manylinux1_x86_64.whl", hash = "sha256:691f10edb9869eb8b85bebfe2c0fb3c6a6b2cf2aefad7cdb2ab97688a57ca60e"}, + {file = "vllm-0.6.3.post1.tar.gz", hash = "sha256:0aae6ddd5348f86bf20e4f323c09e77d5ad2638d77f0d69323c5a63a40f8c143"}, ] [package.dependencies] aiohttp = "*" +compressed-tensors = "0.6.0" einops = "*" -fastapi = {version = ">=0.114.1", markers = "python_version >= \"3.9\""} +fastapi = {version = ">=0.107.0,<0.113.dev0 || >0.114.0", markers = "python_version >= \"3.9\""} filelock = ">=3.10.4" gguf = "0.10.0" importlib-metadata = "*" lm-format-enforcer = "0.10.6" -mistral-common = ">=1.4.3" +mistral-common = {version = ">=1.4.4", extras = ["opencv"]} msgspec = "*" numpy = "<2.0.0" nvidia-ml-py = "*" @@ -5371,14 +5424,14 @@ pydantic = ">=2.9" pyyaml = "*" pyzmq = "*" ray = ">=2.9" -requests = "*" +requests = ">=2.26.0" sentencepiece = "*" tiktoken = ">=0.6.0" tokenizers = ">=0.19.1" torch = "2.4.0" torchvision = "0.19" tqdm = "*" -transformers = ">=4.45.0" +transformers = ">=4.45.2" typing-extensions = ">=4.10" uvicorn = {version = "*", extras = ["standard"]} xformers = {version = "0.0.27.post2", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} @@ -5386,7 +5439,6 @@ xformers = {version = "0.0.27.post2", markers = "platform_system == \"Linux\" an [package.extras] audio = ["librosa", "soundfile"] tensorizer = ["tensorizer (>=2.9.0)"] -video = ["opencv-python"] [[package]] name = "watchfiles" @@ -5934,4 +5986,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.0" python-versions = "~3.11" -content-hash = "31428dbc1f1c7351c03037180f72963988b1296123ed60685a108d0b60aac005" +content-hash = "82619c826f78a374732469fe005e8fc43b913198b82c2d252e0bc19844a169ea" diff --git a/pyproject.toml b/pyproject.toml index 1f2717b..a2d5c04 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,7 +41,7 @@ langchain = "^0.3.0" langchain-openai = "^0.2.0" av = "^12.3.0" pybase64 = "^1.4.0" -vllm = "^0.6.2" +vllm = "^0.6.3.post1" [build-system] build-backend = "poetry.core.masonry.api" diff --git a/requirements.txt b/requirements.txt index 67b7d8d..09e1e4f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,6 +16,7 @@ click==8.1.7 ; python_version >= "3.11" and python_version < "3.12" cloudpickle==3.0.0 ; python_version >= "3.11" and python_version < "3.12" colorama==0.4.6 ; python_version >= "3.11" and python_version < "3.12" and (sys_platform == "win32" or platform_system == "Windows") coloredlogs==15.0.1 ; python_version >= "3.11" and python_version < "3.12" +compressed-tensors==0.6.0 ; python_version >= "3.11" and python_version < "3.12" cryptography==43.0.1 ; python_version >= "3.11" and python_version < "3.12" ctranslate2==4.4.0 ; python_version >= "3.11" and python_version < "3.12" datasets==2.14.4 ; python_version >= "3.11" and python_version < "3.12" @@ -58,7 +59,7 @@ lark==1.2.2 ; python_version >= "3.11" and python_version < "3.12" llvmlite==0.43.0 ; python_version >= "3.11" and python_version < "3.12" lm-format-enforcer==0.10.6 ; python_version >= "3.11" and python_version < "3.12" markupsafe==2.1.5 ; python_version >= "3.11" and python_version < "3.12" -mistral-common==1.4.4 ; python_version >= "3.11" and python_version < "3.12" +mistral-common[opencv]==1.4.4 ; python_version >= "3.11" and python_version < "3.12" mpmath==1.3.0 ; python_version >= "3.11" and python_version < "3.12" msgpack==1.1.0 ; python_version >= "3.11" and python_version < "3.12" msgspec==0.18.6 ; python_version >= "3.11" and python_version < "3.12" @@ -84,6 +85,7 @@ nvidia-nvjitlink-cu12==12.6.68 ; platform_system == "Linux" and platform_machine nvidia-nvtx-cu12==12.1.105 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.11" and python_version < "3.12" onnxruntime==1.19.2 ; python_version >= "3.11" and python_version < "3.12" openai==1.48.0 ; python_version >= "3.11" and python_version < "3.12" +opencv-python-headless==4.10.0.84 ; python_version >= "3.11" and python_version < "3.12" orjson==3.10.7 ; python_version >= "3.11" and python_version < "3.12" outlines==0.0.46 ; python_version >= "3.11" and python_version < "3.12" packaging==24.1 ; python_version >= "3.11" and python_version < "3.12" @@ -131,15 +133,15 @@ torch==2.4.0 ; python_version >= "3.11" and python_version < "3.12" torchaudio==2.4.0 ; python_version >= "3.11" and python_version < "3.12" torchvision==0.19.0 ; python_version >= "3.11" and python_version < "3.12" tqdm==4.66.5 ; python_version >= "3.11" and python_version < "3.12" -transformers==4.45.1 ; python_version >= "3.11" and python_version < "3.12" +transformers==4.46.0 ; python_version >= "3.11" and python_version < "3.12" triton==3.0.0 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version < "3.12" and python_version >= "3.11" typing-extensions==4.12.2 ; python_version >= "3.11" and python_version < "3.12" tzdata==2024.2 ; python_version >= "3.11" and python_version < "3.12" urllib3==2.2.3 ; python_version >= "3.11" and python_version < "3.12" uuid6==2024.7.10 ; python_version >= "3.11" and python_version < "3.12" uvicorn[standard]==0.29.0 ; python_version >= "3.11" and python_version < "3.12" -uvloop==0.20.0 ; (sys_platform != "win32" and sys_platform != "cygwin") and platform_python_implementation != "PyPy" and python_version >= "3.11" and python_version < "3.12" -vllm==0.6.2 ; python_version >= "3.11" and python_version < "3.12" +uvloop==0.21.0 ; (sys_platform != "win32" and sys_platform != "cygwin") and platform_python_implementation != "PyPy" and python_version >= "3.11" and python_version < "3.12" +vllm==0.6.3.post1 ; python_version >= "3.11" and python_version < "3.12" watchfiles==0.24.0 ; python_version >= "3.11" and python_version < "3.12" websockets==13.1 ; python_version >= "3.11" and python_version < "3.12" xformers==0.0.27.post2 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.11" and python_version < "3.12" diff --git a/skynet/env.py b/skynet/env.py index 1763ea6..bd64f55 100644 --- a/skynet/env.py +++ b/skynet/env.py @@ -2,10 +2,15 @@ import sys import uuid +import torch + app_uuid = str(uuid.uuid4()) is_mac = sys.platform == 'darwin' +device = 'cuda' if torch.cuda.is_available() else 'cpu' +use_vllm = device == 'cuda' + # utilities def tobool(val: str | None): @@ -18,6 +23,7 @@ def tobool(val: str | None): # general +app_port = int(os.environ.get('SKYNET_PORT', 8000)) log_level = os.environ.get('LOG_LEVEL', 'DEBUG').strip().upper() supported_modules = {'summaries:dispatcher', 'summaries:executor', 'streaming_whisper'} enabled_modules = set(os.environ.get('ENABLED_MODULES', 'summaries:dispatcher,summaries:executor').split(',')) @@ -36,9 +42,10 @@ def tobool(val: str | None): # openai api llama_cpp_server_path = os.environ.get('LLAMA_CPP_SERVER_PATH', './llama.cpp/llama-server') -vllm_server_path = os.environ.get('VLLM_SERVER_PATH', 'vllm.entrypoints.openai.api_server') -openai_api_server_port = int(os.environ.get('OPENAI_API_SERVER_PORT', 8003)) -openai_api_base_url = os.environ.get('OPENAI_API_BASE_URL', f'http://localhost:{openai_api_server_port}') +openai_api_server_port = int(os.environ.get('OPENAI_API_SERVER_PORT', app_port if use_vllm else 8003)) +openai_api_base_url = os.environ.get( + 'OPENAI_API_BASE_URL', f'http://localhost:{openai_api_server_port}{"/openai" if use_vllm else ""}' +) # openai openai_credentials_file = os.environ.get('SKYNET_CREDENTIALS_PATH') diff --git a/skynet/index.html b/skynet/index.html index 5c72ea7..7508be5 100644 --- a/skynet/index.html +++ b/skynet/index.html @@ -7,6 +7,9 @@

Skynet

  • Summaries API
  • +
  • + OpenAI API +
  • diff --git a/skynet/main.py b/skynet/main.py index a42c408..559eda5 100644 --- a/skynet/main.py +++ b/skynet/main.py @@ -7,18 +7,23 @@ from fastapi.responses import FileResponse from skynet.agent import create_tcpserver -from skynet.env import enable_haproxy_agent, enable_metrics, modules +from skynet.env import app_port, device, enable_haproxy_agent, enable_metrics, is_mac, modules, use_vllm from skynet.logs import get_logger from skynet.utils import create_app, create_webserver log = get_logger(__name__) if not modules: - log.warn('No modules enabled!') + log.warning('No modules enabled!') sys.exit(1) log.info(f'Enabled modules: {modules}') +if device == 'cuda' or is_mac: + log.info('Using GPU') +else: + log.info('Using CPU') + @asynccontextmanager async def lifespan(main_app: FastAPI): @@ -40,7 +45,14 @@ async def lifespan(main_app: FastAPI): if 'summaries:executor' in modules: from skynet.modules.ttt.summaries.app import executor_startup as executor_startup - await executor_startup() + if use_vllm: + from vllm.entrypoints.openai.api_server import lifespan + + app = create_app(lifespan=lifespan) + await executor_startup(app) + main_app.mount('/openai', app) + else: + await executor_startup() yield @@ -61,7 +73,7 @@ def root(): async def main(): - tasks = [asyncio.create_task(create_webserver('skynet.main:app', port=8000))] + tasks = [asyncio.create_task(create_webserver('skynet.main:app', port=app_port))] if enable_metrics: tasks.append(asyncio.create_task(create_webserver('skynet.metrics:metrics', port=8001))) diff --git a/skynet/modules/stt/streaming_whisper/cfg.py b/skynet/modules/stt/streaming_whisper/cfg.py index ee4d17a..d2e42d9 100644 --- a/skynet/modules/stt/streaming_whisper/cfg.py +++ b/skynet/modules/stt/streaming_whisper/cfg.py @@ -1,19 +1,24 @@ import os -import torch from faster_whisper import WhisperModel -from skynet.env import whisper_compute_type, whisper_device, whisper_gpu_indices, whisper_model_name, whisper_model_path +from skynet.env import ( + device, + whisper_compute_type, + whisper_device, + whisper_gpu_indices, + whisper_model_name, + whisper_model_path, +) from skynet.logs import get_logger from skynet.modules.stt.streaming_whisper.utils import vad_utils as vad -from skynet.utils import get_device log = get_logger(__name__) vad_model = vad.init_jit_model(f'{os.getcwd()}/skynet/modules/stt/streaming_whisper/models/vad/silero_vad.jit') -device = whisper_device if whisper_device != 'auto' else get_device() +device = whisper_device if whisper_device != 'auto' else device log.info(f'Using {device}') num_workers = 1 gpu_indices = [0] diff --git a/skynet/modules/ttt/openai_api/app.py b/skynet/modules/ttt/openai_api/app.py index 7de30a6..9408cb6 100644 --- a/skynet/modules/ttt/openai_api/app.py +++ b/skynet/modules/ttt/openai_api/app.py @@ -1,5 +1,8 @@ +import asyncio import subprocess +from fastapi import FastAPI + from skynet import http_client from skynet.env import ( llama_cpp_server_path, @@ -9,37 +12,52 @@ llama_path, openai_api_base_url, openai_api_server_port, - vllm_server_path, + use_vllm, ) from skynet.logs import get_logger -from skynet.utils import get_device - -proc = None -use_vllm = get_device() == 'cuda' +from skynet.utils import dependencies, responses log = get_logger(__name__) -def initialize(): - log.info('Starting OpenAI API server...') +async def run_vllm_server(args, app: FastAPI): + from vllm.entrypoints.openai.api_server import build_async_engine_client, init_app_state, router + + async with build_async_engine_client(args) as engine_client: + app.include_router(router, dependencies=dependencies, responses=responses) + + model_config = await engine_client.get_model_config() + init_app_state(engine_client, model_config, app.state, args) - global proc + +def initialize(app: FastAPI | None = None): + log.info('Starting OpenAI API server...') if use_vllm: - openai_api_server_path = vllm_server_path - proc = subprocess.Popen( - f'python -m {openai_api_server_path} \ - --disable-log-requests \ - --model {llama_path} \ - --gpu_memory_utilization 0.99 \ - --max-model-len {llama_n_ctx} \ - --port {openai_api_server_port}'.split(), - shell=False, + from vllm.entrypoints.openai.cli_args import make_arg_parser + from vllm.utils import FlexibleArgumentParser + + parser = FlexibleArgumentParser(description="vLLM OpenAI-Compatible RESTful API server.") + parser = make_arg_parser(parser) + args = parser.parse_args( + [ + '--disable-frontend-multiprocessing', # disable running the engine in a separate process + '--disable-log-requests', + '--model', + llama_path, + '--gpu_memory_utilization', + '0.99', + '--max-model-len', + str(llama_n_ctx), + '--port', + str(openai_api_server_port), + ] ) + + asyncio.create_task(run_vllm_server(args, app)) else: - openai_api_server_path = llama_cpp_server_path - proc = subprocess.Popen( - f'{openai_api_server_path} \ + subprocess.Popen( + f'{llama_cpp_server_path} \ --batch-size {llama_n_batch} \ --ctx-size {llama_n_ctx} \ --flash-attn \ @@ -49,25 +67,17 @@ def initialize(): shell=False, ) - if proc.poll() is not None: - log.error(f'Failed to start OpenAI API server from {openai_api_server_path}') - else: - log.info(f'OpenAI API server started from {openai_api_server_path}') - async def is_ready(): try: - await http_client.get(f'{openai_api_base_url}/health', 'text' if use_vllm else 'json') + response = await http_client.get(f'{openai_api_base_url}/health', 'text' if use_vllm else 'json') + + if use_vllm: + return response == '' return True except Exception: return False -def destroy(): - log.info('Killing OpenAI API subprocess...') - - proc.kill() - - -__all__ = ['destroy', 'initialize', 'restart'] +__all__ = ['initialize', 'is_ready'] diff --git a/skynet/modules/ttt/summaries/app.py b/skynet/modules/ttt/summaries/app.py index 8d3896d..cc730cd 100644 --- a/skynet/modules/ttt/summaries/app.py +++ b/skynet/modules/ttt/summaries/app.py @@ -1,13 +1,13 @@ import random -from fastapi import Request +from fastapi import FastAPI, Request from fastapi_versionizer.versionizer import Versionizer from skynet import http_client from skynet.auth.openai import setup_credentials from skynet.env import echo_requests_base_url, echo_requests_percent, echo_requests_token from skynet.logs import get_logger -from skynet.modules.ttt.openai_api.app import destroy as destroy_openai_api, initialize as initialize_openai_api +from skynet.modules.ttt.openai_api.app import initialize as initialize_openai_api from skynet.utils import create_app from .jobs import start_monitoring_jobs @@ -52,10 +52,10 @@ async def app_startup(): log.info('Persistence initialized') -async def executor_startup(): +async def executor_startup(app: FastAPI | None = None): await setup_credentials() - initialize_openai_api() + initialize_openai_api(app) initialize_summaries() log.info('summaries:executor module initialized') @@ -68,8 +68,6 @@ async def executor_startup(): async def executor_shutdown(): - destroy_openai_api() - await db.close() log.info('Persistence shutdown') diff --git a/skynet/utils.py b/skynet/utils.py index e8b55bd..821a954 100644 --- a/skynet/utils.py +++ b/skynet/utils.py @@ -1,4 +1,3 @@ -import torch import uvicorn from fastapi import APIRouter, Depends, FastAPI from fastapi.middleware.cors import CORSMiddleware @@ -47,11 +46,3 @@ async def create_webserver(app, port): ) server = uvicorn.Server(server_config) await server.serve() - - -def get_device() -> str: - if torch.cuda.is_available(): - log.debug('CUDA device found.') - return 'cuda' - log.warning('No CUDA device found, defaulting to CPU.') - return 'cpu'