Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added GIL release for time consuming methods. #1673

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
4 changes: 2 additions & 2 deletions src/python/openvino_genai/py_openvino_genai.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -524,7 +524,7 @@ class FluxTransformer2DModel:
"""
def get_config(self) -> FluxTransformer2DModel.Config:
...
def infer(self, sample: openvino._pyopenvino.Tensor, timestep: openvino._pyopenvino.Tensor) -> openvino._pyopenvino.Tensor:
def infer(self, latent: openvino._pyopenvino.Tensor, timestep: openvino._pyopenvino.Tensor) -> openvino._pyopenvino.Tensor:
...
def reshape(self, batch_size: int, height: int, width: int, tokenizer_model_max_length: int) -> FluxTransformer2DModel:
...
Expand Down Expand Up @@ -1354,7 +1354,7 @@ class SD3Transformer2DModel:
"""
def get_config(self) -> SD3Transformer2DModel.Config:
...
def infer(self, sample: openvino._pyopenvino.Tensor, timestep: openvino._pyopenvino.Tensor) -> openvino._pyopenvino.Tensor:
def infer(self, latent: openvino._pyopenvino.Tensor, timestep: openvino._pyopenvino.Tensor) -> openvino._pyopenvino.Tensor:
...
def reshape(self, batch_size: int, height: int, width: int, tokenizer_model_max_length: int) -> SD3Transformer2DModel:
...
Expand Down
53 changes: 42 additions & 11 deletions src/python/py_image_generation_models.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,14 +70,20 @@ void init_clip_text_model(py::module_& m) {
clip_text_model.def("get_config", &ov::genai::CLIPTextModel::get_config)
.def("reshape", &ov::genai::CLIPTextModel::reshape, py::arg("batch_size"))
.def("set_adapters", &ov::genai::CLIPTextModel::set_adapters, py::arg("adapters"))
.def("infer", &ov::genai::CLIPTextModel::infer, py::arg("pos_prompt"), py::arg("neg_prompt"), py::arg("do_classifier_free_guidance"))
.def("infer",
&ov::genai::CLIPTextModel::infer,
py::call_guard<py::gil_scoped_release>(),
py::arg("pos_prompt"),
py::arg("neg_prompt"),
py::arg("do_classifier_free_guidance"))
.def("get_output_tensor", &ov::genai::CLIPTextModel::get_output_tensor, py::arg("idx"))
.def(
"compile",
[](ov::genai::CLIPTextModel& self,
const std::string& device,
const py::kwargs& kwargs
) {
py::gil_scoped_release rel;
self.compile(device, pyutils::kwargs_to_any_map(kwargs));
},
py::arg("device"), "device on which inference will be done",
Expand Down Expand Up @@ -133,7 +139,11 @@ void init_clip_text_model_with_projection(py::module_& m) {
.def_readwrite("num_hidden_layers", &ov::genai::CLIPTextModelWithProjection::Config::num_hidden_layers);

clip_text_model_with_projection.def("reshape", &ov::genai::CLIPTextModelWithProjection::reshape, py::arg("batch_size"))
.def("infer", &ov::genai::CLIPTextModelWithProjection::infer, py::arg("pos_prompt"), py::arg("neg_prompt"), py::arg("do_classifier_free_guidance"))
.def("infer", &ov::genai::CLIPTextModelWithProjection::infer,
py::call_guard<py::gil_scoped_release>(),
py::arg("pos_prompt"),
py::arg("neg_prompt"),
py::arg("do_classifier_free_guidance"))
.def("get_config", &ov::genai::CLIPTextModelWithProjection::get_config)
.def("get_output_tensor", &ov::genai::CLIPTextModelWithProjection::get_output_tensor, py::arg("idx"))
.def("set_adapters", &ov::genai::CLIPTextModelWithProjection::set_adapters, py::arg("adapters"))
Expand All @@ -143,6 +153,7 @@ void init_clip_text_model_with_projection(py::module_& m) {
const std::string& device,
const py::kwargs& kwargs
) {
py::gil_scoped_release rel;
self.compile(device, pyutils::kwargs_to_any_map(kwargs));
},
py::arg("device"), "device on which inference will be done",
Expand Down Expand Up @@ -189,15 +200,21 @@ void init_t5_encoder_model(py::module_& m) {
model (T5EncoderModel): T5EncoderModel model
)")
.def("reshape", &ov::genai::T5EncoderModel::reshape, py::arg("batch_size"), py::arg("max_sequence_length"))
.def("infer", &ov::genai::T5EncoderModel::infer, py::arg("pos_prompt"), py::arg("neg_prompt"), py::arg("do_classifier_free_guidance"), py::arg("max_sequence_length"))
.def("infer",
&ov::genai::T5EncoderModel::infer,
py::call_guard<py::gil_scoped_release>(),
py::arg("pos_prompt"),
py::arg("neg_prompt"),
py::arg("do_classifier_free_guidance"),
py::arg("max_sequence_length"))
.def("get_output_tensor", &ov::genai::T5EncoderModel::get_output_tensor, py::arg("idx"))
// .def("set_adapters", &ov::genai::T5EncoderModel::set_adapters, py::arg("adapters"))
.def(
"compile",
[](ov::genai::T5EncoderModel& self,
const std::string& device,
const py::kwargs& kwargs
) {
py::gil_scoped_release rel;
self.compile(device, pyutils::kwargs_to_any_map(kwargs));
},
py::arg("device"), "device on which inference will be done",
Expand Down Expand Up @@ -254,7 +271,11 @@ void init_unet2d_condition_model(py::module_& m) {
unet2d_condition_model.def("get_config", &ov::genai::UNet2DConditionModel::get_config)
.def("reshape", &ov::genai::UNet2DConditionModel::reshape, py::arg("batch_size"), py::arg("height"), py::arg("width"), py::arg("tokenizer_model_max_length"))
.def("set_adapters", &ov::genai::UNet2DConditionModel::set_adapters, py::arg("adapters"))
.def("infer", &ov::genai::UNet2DConditionModel::infer, py::arg("sample"), py::arg("timestep"))
.def("infer",
&ov::genai::UNet2DConditionModel::infer,
py::call_guard<py::gil_scoped_release>(),
py::arg("sample"),
py::arg("timestep"))
.def("set_hidden_states", &ov::genai::UNet2DConditionModel::set_hidden_states, py::arg("tensor_name"), py::arg("encoder_hidden_states"))
.def("do_classifier_free_guidance", &ov::genai::UNet2DConditionModel::do_classifier_free_guidance, py::arg("guidance_scale"))
.def(
Expand All @@ -263,6 +284,7 @@ void init_unet2d_condition_model(py::module_& m) {
const std::string& device,
const py::kwargs& kwargs
) {
py::gil_scoped_release rel;
self.compile(device, pyutils::kwargs_to_any_map(kwargs));
},
py::arg("device"), "device on which inference will be done",
Expand Down Expand Up @@ -319,15 +341,19 @@ void init_sd3_transformer_2d_model(py::module_& m) {

sd3_transformer_2d_model.def("get_config", &ov::genai::SD3Transformer2DModel::get_config)
.def("reshape", &ov::genai::SD3Transformer2DModel::reshape, py::arg("batch_size"), py::arg("height"), py::arg("width"), py::arg("tokenizer_model_max_length"))
// .def("set_adapters", &ov::genai::SD3Transformer2DModel::set_adapters, py::arg("adapters"))
.def("infer", &ov::genai::SD3Transformer2DModel::infer, py::arg("sample"), py::arg("timestep"))
.def("infer",
&ov::genai::SD3Transformer2DModel::infer,
py::call_guard<py::gil_scoped_release>(),
py::arg("latent"),
py::arg("timestep"))
.def("set_hidden_states", &ov::genai::SD3Transformer2DModel::set_hidden_states, py::arg("tensor_name"), py::arg("encoder_hidden_states"))
.def(
"compile",
[](ov::genai::SD3Transformer2DModel& self,
const std::string& device,
const py::kwargs& kwargs
) {
py::gil_scoped_release rel;
self.compile(device, pyutils::kwargs_to_any_map(kwargs));
},
py::arg("device"), "device on which inference will be done",
Expand Down Expand Up @@ -382,15 +408,19 @@ void init_flux_transformer_2d_model(py::module_& m) {

flux_transformer_2d_model.def("get_config", &ov::genai::FluxTransformer2DModel::get_config)
.def("reshape", &ov::genai::FluxTransformer2DModel::reshape, py::arg("batch_size"), py::arg("height"), py::arg("width"), py::arg("tokenizer_model_max_length"))
// .def("set_adapters", &ov::genai::FluxTransformer2DModel::set_adapters, py::arg("adapters"))
.def("infer", &ov::genai::FluxTransformer2DModel::infer, py::arg("sample"), py::arg("timestep"))
.def("infer",
&ov::genai::FluxTransformer2DModel::infer,
py::call_guard<py::gil_scoped_release>(),
py::arg("latent"),
py::arg("timestep"))
.def("set_hidden_states", &ov::genai::FluxTransformer2DModel::set_hidden_states, py::arg("tensor_name"), py::arg("encoder_hidden_states"))
.def(
"compile",
[](ov::genai::FluxTransformer2DModel& self,
const std::string& device,
const py::kwargs& kwargs
) {
py::gil_scoped_release rel;
self.compile(device, pyutils::kwargs_to_any_map(kwargs));
},
py::arg("device"), "device on which inference will be done",
Expand Down Expand Up @@ -484,6 +514,7 @@ void init_autoencoder_kl(py::module_& m) {
const std::string& device,
const py::kwargs& kwargs
) {
py::gil_scoped_release rel;
self.compile(device, pyutils::kwargs_to_any_map(kwargs));
},
py::arg("device"), "device on which inference will be done"
Expand All @@ -492,8 +523,8 @@ void init_autoencoder_kl(py::module_& m) {
device (str): Device to run the model on (e.g., CPU, GPU).
kwargs: Device properties.
)")
.def("decode", &ov::genai::AutoencoderKL::decode, py::arg("latent"))
.def("encode", &ov::genai::AutoencoderKL::encode, py::arg("image"), py::arg("generator"))
.def("decode", &ov::genai::AutoencoderKL::decode, py::call_guard<py::gil_scoped_release>(), py::arg("latent"))
.def("encode", &ov::genai::AutoencoderKL::encode, py::call_guard<py::gil_scoped_release>(), py::arg("image"), py::arg("generator"))
.def("get_config", &ov::genai::AutoencoderKL::get_config)
.def("get_vae_scale_factor", &ov::genai::AutoencoderKL::get_vae_scale_factor);
}
15 changes: 12 additions & 3 deletions src/python/py_image_generation_pipelines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,9 @@ void init_image_generation_pipelines(py::module_& m) {
const std::string& device,
const py::kwargs& kwargs
) {
pipe.compile(device, pyutils::kwargs_to_any_map(kwargs));
auto map = pyutils::kwargs_to_any_map(kwargs);
py::gil_scoped_release rel;
pipe.compile(device, map);
},
py::arg("device"), "device on which inference will be done",
R"(
Expand All @@ -290,6 +292,7 @@ void init_image_generation_pipelines(py::module_& m) {
const py::kwargs& kwargs
) -> py::typing::Union<ov::Tensor> {
ov::AnyMap params = pyutils::kwargs_to_any_map(kwargs);
py::gil_scoped_release rel;
return py::cast(pipe.generate(prompt, params));
},
py::arg("prompt"), "Input string",
Expand Down Expand Up @@ -337,7 +340,9 @@ void init_image_generation_pipelines(py::module_& m) {
const std::string& device,
const py::kwargs& kwargs
) {
pipe.compile(device, pyutils::kwargs_to_any_map(kwargs));
auto map = pyutils::kwargs_to_any_map(kwargs);
py::gil_scoped_release rel;
pipe.compile(device, map);
},
py::arg("device"), "device on which inference will be done",
R"(
Expand All @@ -353,6 +358,7 @@ void init_image_generation_pipelines(py::module_& m) {
const py::kwargs& kwargs
) -> py::typing::Union<ov::Tensor> {
ov::AnyMap params = pyutils::kwargs_to_any_map(kwargs);
py::gil_scoped_release rel;
return py::cast(pipe.generate(prompt, image, params));
},
py::arg("prompt"), "Input string",
Expand Down Expand Up @@ -400,7 +406,9 @@ void init_image_generation_pipelines(py::module_& m) {
const std::string& device,
const py::kwargs& kwargs
) {
pipe.compile(device, pyutils::kwargs_to_any_map(kwargs));
auto map = pyutils::kwargs_to_any_map(kwargs);
py::gil_scoped_release rel;
pipe.compile(device, map);
},
py::arg("device"), "device on which inference will be done",
R"(
Expand All @@ -417,6 +425,7 @@ void init_image_generation_pipelines(py::module_& m) {
const py::kwargs& kwargs
) -> py::typing::Union<ov::Tensor> {
ov::AnyMap params = pyutils::kwargs_to_any_map(kwargs);
py::gil_scoped_release rel;
return py::cast(pipe.generate(prompt, image, mask_image, params));
},
py::arg("prompt"), "Input string",
Expand Down
4 changes: 4 additions & 0 deletions src/python/py_tokenizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ void init_tokenizer(py::module_& m) {
.def("encode", [](Tokenizer& tok, std::vector<std::string>& prompts, bool add_special_tokens) {
ov::AnyMap tokenization_params;
tokenization_params[ov::genai::add_special_tokens.name()] = add_special_tokens;
py::gil_scoped_release rel;
return tok.encode(prompts, tokenization_params);
},
py::arg("prompts"),
Expand All @@ -66,6 +67,7 @@ void init_tokenizer(py::module_& m) {
[](Tokenizer& tok, std::vector<int64_t>& tokens, bool skip_special_tokens) -> py::str {
ov::AnyMap detokenization_params;
detokenization_params[ov::genai::skip_special_tokens.name()] = skip_special_tokens;
py::gil_scoped_release rel;
return pyutils::handle_utf8(tok.decode(tokens, detokenization_params));
},
py::arg("tokens"), py::arg("skip_special_tokens") = true,
Expand All @@ -77,6 +79,7 @@ void init_tokenizer(py::module_& m) {
[](Tokenizer& tok, ov::Tensor& tokens, bool skip_special_tokens) -> py::typing::List<py::str> {
ov::AnyMap detokenization_params;
detokenization_params[ov::genai::skip_special_tokens.name()] = skip_special_tokens;
py::gil_scoped_release rel;
return pyutils::handle_utf8(tok.decode(tokens, detokenization_params));
},
py::arg("tokens"), py::arg("skip_special_tokens") = true,
Expand All @@ -87,6 +90,7 @@ void init_tokenizer(py::module_& m) {
[](Tokenizer& tok, std::vector<std::vector<int64_t>>& tokens, bool skip_special_tokens) -> py::typing::List<py::str> {
ov::AnyMap detokenization_params;
detokenization_params[ov::genai::skip_special_tokens.name()] = skip_special_tokens;
py::gil_scoped_release rel;
return pyutils::handle_utf8(tok.decode(tokens, detokenization_params));
},
py::arg("tokens"), py::arg("skip_special_tokens") = true,
Expand Down
6 changes: 4 additions & 2 deletions src/python/py_vlm_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ py::object call_vlm_generate(
) {
auto updated_config = *pyutils::update_config_from_kwargs(generation_config, kwargs);
ov::genai::StreamerVariant streamer = pyutils::pystreamer_to_streamer(py_streamer);

py::gil_scoped_release rel;
ilya-lavrenov marked this conversation as resolved.
Show resolved Hide resolved
return py::cast(pipe.generate(prompt, images, updated_config, streamer));
}

Expand Down Expand Up @@ -194,7 +194,9 @@ void init_vlm_pipeline(py::module_& m) {
const std::string& prompt,
const py::kwargs& kwargs
) -> py::typing::Union<ov::genai::VLMDecodedResults> {
return py::cast(pipe.generate(prompt, pyutils::kwargs_to_any_map(kwargs)));
auto map = pyutils::kwargs_to_any_map(kwargs);
py::gil_scoped_release rel;
return py::cast(pipe.generate(prompt, map));
},
py::arg("prompt"), "Input string",
(vlm_generate_kwargs_docstring + std::string(" \n ")).c_str()
Expand Down
2 changes: 1 addition & 1 deletion src/python/py_whisper_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ py::object call_whisper_common_generate(WhisperPipeline& pipe,
auto updated_config = update_whisper_config_from_kwargs(base_config, kwargs);

ChunkStreamerVariant streamer = pystreamer_to_chunk_streamer(py_streamer);

py::gil_scoped_release rel;
return py::cast(pipe.generate(raw_speech_input, updated_config, streamer));
}

Expand Down
Loading