Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added GIL release for time consuming methods. #1673

Merged
merged 14 commits into from
Feb 11, 2025
4 changes: 2 additions & 2 deletions src/python/openvino_genai/py_openvino_genai.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -524,7 +524,7 @@ class FluxTransformer2DModel:
"""
def get_config(self) -> FluxTransformer2DModel.Config:
...
def infer(self, sample: openvino._pyopenvino.Tensor, timestep: openvino._pyopenvino.Tensor) -> openvino._pyopenvino.Tensor:
def infer(self, latent: openvino._pyopenvino.Tensor, timestep: openvino._pyopenvino.Tensor) -> openvino._pyopenvino.Tensor:
...
def reshape(self, batch_size: int, height: int, width: int, tokenizer_model_max_length: int) -> FluxTransformer2DModel:
...
Expand Down Expand Up @@ -1354,7 +1354,7 @@ class SD3Transformer2DModel:
"""
def get_config(self) -> SD3Transformer2DModel.Config:
...
def infer(self, sample: openvino._pyopenvino.Tensor, timestep: openvino._pyopenvino.Tensor) -> openvino._pyopenvino.Tensor:
def infer(self, latent: openvino._pyopenvino.Tensor, timestep: openvino._pyopenvino.Tensor) -> openvino._pyopenvino.Tensor:
...
def reshape(self, batch_size: int, height: int, width: int, tokenizer_model_max_length: int) -> SD3Transformer2DModel:
...
Expand Down
88 changes: 70 additions & 18 deletions src/python/py_image_generation_models.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,15 +70,24 @@ void init_clip_text_model(py::module_& m) {
clip_text_model.def("get_config", &ov::genai::CLIPTextModel::get_config)
.def("reshape", &ov::genai::CLIPTextModel::reshape, py::arg("batch_size"))
.def("set_adapters", &ov::genai::CLIPTextModel::set_adapters, py::arg("adapters"))
.def("infer", &ov::genai::CLIPTextModel::infer, py::arg("pos_prompt"), py::arg("neg_prompt"), py::arg("do_classifier_free_guidance"))
.def("infer",
&ov::genai::CLIPTextModel::infer,
py::call_guard<py::gil_scoped_release>(),
py::arg("pos_prompt"),
py::arg("neg_prompt"),
py::arg("do_classifier_free_guidance"))
.def("get_output_tensor", &ov::genai::CLIPTextModel::get_output_tensor, py::arg("idx"))
.def(
"compile",
[](ov::genai::CLIPTextModel& self,
const std::string& device,
const py::kwargs& kwargs
) {
self.compile(device, pyutils::kwargs_to_any_map(kwargs));
auto map = pyutils::kwargs_to_any_map(kwargs);
{
py::gil_scoped_release rel;
self.compile(device, map);
}
},
py::arg("device"), "device on which inference will be done",
R"(
Expand Down Expand Up @@ -133,7 +142,11 @@ void init_clip_text_model_with_projection(py::module_& m) {
.def_readwrite("num_hidden_layers", &ov::genai::CLIPTextModelWithProjection::Config::num_hidden_layers);

clip_text_model_with_projection.def("reshape", &ov::genai::CLIPTextModelWithProjection::reshape, py::arg("batch_size"))
.def("infer", &ov::genai::CLIPTextModelWithProjection::infer, py::arg("pos_prompt"), py::arg("neg_prompt"), py::arg("do_classifier_free_guidance"))
.def("infer", &ov::genai::CLIPTextModelWithProjection::infer,
py::call_guard<py::gil_scoped_release>(),
py::arg("pos_prompt"),
py::arg("neg_prompt"),
py::arg("do_classifier_free_guidance"))
.def("get_config", &ov::genai::CLIPTextModelWithProjection::get_config)
.def("get_output_tensor", &ov::genai::CLIPTextModelWithProjection::get_output_tensor, py::arg("idx"))
.def("set_adapters", &ov::genai::CLIPTextModelWithProjection::set_adapters, py::arg("adapters"))
Expand All @@ -143,7 +156,11 @@ void init_clip_text_model_with_projection(py::module_& m) {
const std::string& device,
const py::kwargs& kwargs
) {
self.compile(device, pyutils::kwargs_to_any_map(kwargs));
auto map = pyutils::kwargs_to_any_map(kwargs);
{
py::gil_scoped_release rel;
self.compile(device, map);
}
},
py::arg("device"), "device on which inference will be done",
R"(
Expand Down Expand Up @@ -189,16 +206,25 @@ void init_t5_encoder_model(py::module_& m) {
model (T5EncoderModel): T5EncoderModel model
)")
.def("reshape", &ov::genai::T5EncoderModel::reshape, py::arg("batch_size"), py::arg("max_sequence_length"))
.def("infer", &ov::genai::T5EncoderModel::infer, py::arg("pos_prompt"), py::arg("neg_prompt"), py::arg("do_classifier_free_guidance"), py::arg("max_sequence_length"))
.def("infer",
&ov::genai::T5EncoderModel::infer,
py::call_guard<py::gil_scoped_release>(),
py::arg("pos_prompt"),
py::arg("neg_prompt"),
py::arg("do_classifier_free_guidance"),
py::arg("max_sequence_length"))
.def("get_output_tensor", &ov::genai::T5EncoderModel::get_output_tensor, py::arg("idx"))
// .def("set_adapters", &ov::genai::T5EncoderModel::set_adapters, py::arg("adapters"))
.def(
"compile",
[](ov::genai::T5EncoderModel& self,
const std::string& device,
const py::kwargs& kwargs
) {
self.compile(device, pyutils::kwargs_to_any_map(kwargs));
auto map = pyutils::kwargs_to_any_map(kwargs);
{
py::gil_scoped_release rel;
self.compile(device, map);
}
},
py::arg("device"), "device on which inference will be done",
R"(
Expand Down Expand Up @@ -254,7 +280,11 @@ void init_unet2d_condition_model(py::module_& m) {
unet2d_condition_model.def("get_config", &ov::genai::UNet2DConditionModel::get_config)
.def("reshape", &ov::genai::UNet2DConditionModel::reshape, py::arg("batch_size"), py::arg("height"), py::arg("width"), py::arg("tokenizer_model_max_length"))
.def("set_adapters", &ov::genai::UNet2DConditionModel::set_adapters, py::arg("adapters"))
.def("infer", &ov::genai::UNet2DConditionModel::infer, py::arg("sample"), py::arg("timestep"))
.def("infer",
&ov::genai::UNet2DConditionModel::infer,
py::call_guard<py::gil_scoped_release>(),
py::arg("sample"),
py::arg("timestep"))
.def("set_hidden_states", &ov::genai::UNet2DConditionModel::set_hidden_states, py::arg("tensor_name"), py::arg("encoder_hidden_states"))
.def("do_classifier_free_guidance", &ov::genai::UNet2DConditionModel::do_classifier_free_guidance, py::arg("guidance_scale"))
.def(
Expand All @@ -263,7 +293,11 @@ void init_unet2d_condition_model(py::module_& m) {
const std::string& device,
const py::kwargs& kwargs
) {
self.compile(device, pyutils::kwargs_to_any_map(kwargs));
auto map = pyutils::kwargs_to_any_map(kwargs);
{
py::gil_scoped_release rel;
self.compile(device, map);
}
},
py::arg("device"), "device on which inference will be done",
R"(
Expand Down Expand Up @@ -319,16 +353,23 @@ void init_sd3_transformer_2d_model(py::module_& m) {

sd3_transformer_2d_model.def("get_config", &ov::genai::SD3Transformer2DModel::get_config)
.def("reshape", &ov::genai::SD3Transformer2DModel::reshape, py::arg("batch_size"), py::arg("height"), py::arg("width"), py::arg("tokenizer_model_max_length"))
// .def("set_adapters", &ov::genai::SD3Transformer2DModel::set_adapters, py::arg("adapters"))
.def("infer", &ov::genai::SD3Transformer2DModel::infer, py::arg("sample"), py::arg("timestep"))
.def("infer",
&ov::genai::SD3Transformer2DModel::infer,
py::call_guard<py::gil_scoped_release>(),
py::arg("latent"),
py::arg("timestep"))
.def("set_hidden_states", &ov::genai::SD3Transformer2DModel::set_hidden_states, py::arg("tensor_name"), py::arg("encoder_hidden_states"))
.def(
"compile",
[](ov::genai::SD3Transformer2DModel& self,
const std::string& device,
const py::kwargs& kwargs
) {
self.compile(device, pyutils::kwargs_to_any_map(kwargs));
auto map = pyutils::kwargs_to_any_map(kwargs);
{
py::gil_scoped_release rel;
self.compile(device, map);
}
},
py::arg("device"), "device on which inference will be done",
R"(
Expand Down Expand Up @@ -382,16 +423,23 @@ void init_flux_transformer_2d_model(py::module_& m) {

flux_transformer_2d_model.def("get_config", &ov::genai::FluxTransformer2DModel::get_config)
.def("reshape", &ov::genai::FluxTransformer2DModel::reshape, py::arg("batch_size"), py::arg("height"), py::arg("width"), py::arg("tokenizer_model_max_length"))
// .def("set_adapters", &ov::genai::FluxTransformer2DModel::set_adapters, py::arg("adapters"))
.def("infer", &ov::genai::FluxTransformer2DModel::infer, py::arg("sample"), py::arg("timestep"))
.def("infer",
&ov::genai::FluxTransformer2DModel::infer,
py::call_guard<py::gil_scoped_release>(),
py::arg("latent"),
py::arg("timestep"))
.def("set_hidden_states", &ov::genai::FluxTransformer2DModel::set_hidden_states, py::arg("tensor_name"), py::arg("encoder_hidden_states"))
.def(
"compile",
[](ov::genai::FluxTransformer2DModel& self,
const std::string& device,
const py::kwargs& kwargs
) {
self.compile(device, pyutils::kwargs_to_any_map(kwargs));
auto map = pyutils::kwargs_to_any_map(kwargs);
{
py::gil_scoped_release rel;
self.compile(device, map);
}
},
py::arg("device"), "device on which inference will be done",
R"(
Expand Down Expand Up @@ -484,16 +532,20 @@ void init_autoencoder_kl(py::module_& m) {
const std::string& device,
const py::kwargs& kwargs
) {
self.compile(device, pyutils::kwargs_to_any_map(kwargs));
auto map = pyutils::kwargs_to_any_map(kwargs);
{
py::gil_scoped_release rel;
self.compile(device, map);
}
},
py::arg("device"), "device on which inference will be done"
R"(
Compiles the model.
device (str): Device to run the model on (e.g., CPU, GPU).
kwargs: Device properties.
)")
.def("decode", &ov::genai::AutoencoderKL::decode, py::arg("latent"))
.def("encode", &ov::genai::AutoencoderKL::encode, py::arg("image"), py::arg("generator"))
.def("decode", &ov::genai::AutoencoderKL::decode, py::call_guard<py::gil_scoped_release>(), py::arg("latent"))
.def("encode", &ov::genai::AutoencoderKL::encode, py::call_guard<py::gil_scoped_release>(), py::arg("image"), py::arg("generator"))
.def("get_config", &ov::genai::AutoencoderKL::get_config)
.def("get_vae_scale_factor", &ov::genai::AutoencoderKL::get_vae_scale_factor);
}
39 changes: 33 additions & 6 deletions src/python/py_image_generation_pipelines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,11 @@ void init_image_generation_pipelines(py::module_& m) {
const std::string& device,
const py::kwargs& kwargs
) {
pipe.compile(device, pyutils::kwargs_to_any_map(kwargs));
auto map = pyutils::kwargs_to_any_map(kwargs);
{
py::gil_scoped_release rel;
pipe.compile(device, map);
}
},
py::arg("device"), "device on which inference will be done",
R"(
Expand All @@ -290,7 +294,12 @@ void init_image_generation_pipelines(py::module_& m) {
const py::kwargs& kwargs
) -> py::typing::Union<ov::Tensor> {
ov::AnyMap params = pyutils::kwargs_to_any_map(kwargs);
return py::cast(pipe.generate(prompt, params));
ov::Tensor res;
{
py::gil_scoped_release rel;
res = pipe.generate(prompt, params);
}
return py::cast(res);
},
py::arg("prompt"), "Input string",
(text2image_generate_docstring + std::string(" \n ")).c_str())
Expand Down Expand Up @@ -337,7 +346,11 @@ void init_image_generation_pipelines(py::module_& m) {
const std::string& device,
const py::kwargs& kwargs
) {
pipe.compile(device, pyutils::kwargs_to_any_map(kwargs));
auto map = pyutils::kwargs_to_any_map(kwargs);
{
py::gil_scoped_release rel;
pipe.compile(device, map);
}
},
py::arg("device"), "device on which inference will be done",
R"(
Expand All @@ -353,7 +366,12 @@ void init_image_generation_pipelines(py::module_& m) {
const py::kwargs& kwargs
) -> py::typing::Union<ov::Tensor> {
ov::AnyMap params = pyutils::kwargs_to_any_map(kwargs);
return py::cast(pipe.generate(prompt, image, params));
ov::Tensor res;
{
py::gil_scoped_release rel;
res = pipe.generate(prompt, image, params);
}
return py::cast(res);
},
py::arg("prompt"), "Input string",
py::arg("image"), "Initial image",
Expand Down Expand Up @@ -400,7 +418,11 @@ void init_image_generation_pipelines(py::module_& m) {
const std::string& device,
const py::kwargs& kwargs
) {
pipe.compile(device, pyutils::kwargs_to_any_map(kwargs));
auto map = pyutils::kwargs_to_any_map(kwargs);
{
py::gil_scoped_release rel;
pipe.compile(device, map);
}
},
py::arg("device"), "device on which inference will be done",
R"(
Expand All @@ -417,7 +439,12 @@ void init_image_generation_pipelines(py::module_& m) {
const py::kwargs& kwargs
) -> py::typing::Union<ov::Tensor> {
ov::AnyMap params = pyutils::kwargs_to_any_map(kwargs);
return py::cast(pipe.generate(prompt, image, mask_image, params));
ov::Tensor res;
{
popovaan marked this conversation as resolved.
Show resolved Hide resolved
py::gil_scoped_release rel;
res = pipe.generate(prompt, image, mask_image, params);
}
return py::cast(res);
},
py::arg("prompt"), "Input string",
py::arg("image"), "Initial image",
Expand Down
28 changes: 24 additions & 4 deletions src/python/py_tokenizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,12 @@ void init_tokenizer(py::module_& m) {
.def("encode", [](Tokenizer& tok, std::vector<std::string>& prompts, bool add_special_tokens) {
ov::AnyMap tokenization_params;
tokenization_params[ov::genai::add_special_tokens.name()] = add_special_tokens;
return tok.encode(prompts, tokenization_params);
ov::genai::TokenizedInputs res;
{
py::gil_scoped_release rel;
res = tok.encode(prompts, tokenization_params);
}
return res;
},
py::arg("prompts"),
py::arg("add_special_tokens") = true,
Expand All @@ -66,7 +71,12 @@ void init_tokenizer(py::module_& m) {
[](Tokenizer& tok, std::vector<int64_t>& tokens, bool skip_special_tokens) -> py::str {
ov::AnyMap detokenization_params;
detokenization_params[ov::genai::skip_special_tokens.name()] = skip_special_tokens;
return pyutils::handle_utf8(tok.decode(tokens, detokenization_params));
std::string res;
{
py::gil_scoped_release rel;
res = tok.decode(tokens, detokenization_params);
}
return pyutils::handle_utf8(res);
},
py::arg("tokens"), py::arg("skip_special_tokens") = true,
R"(Decode a sequence into a string prompt.)"
Expand All @@ -77,7 +87,12 @@ void init_tokenizer(py::module_& m) {
[](Tokenizer& tok, ov::Tensor& tokens, bool skip_special_tokens) -> py::typing::List<py::str> {
ov::AnyMap detokenization_params;
detokenization_params[ov::genai::skip_special_tokens.name()] = skip_special_tokens;
return pyutils::handle_utf8(tok.decode(tokens, detokenization_params));
std::vector<std::string> res;
{
py::gil_scoped_release rel;
res = tok.decode(tokens, detokenization_params);
}
return pyutils::handle_utf8(res);
},
py::arg("tokens"), py::arg("skip_special_tokens") = true,
R"(Decode tensor into a list of string prompts.)")
Expand All @@ -87,7 +102,12 @@ void init_tokenizer(py::module_& m) {
[](Tokenizer& tok, std::vector<std::vector<int64_t>>& tokens, bool skip_special_tokens) -> py::typing::List<py::str> {
ov::AnyMap detokenization_params;
detokenization_params[ov::genai::skip_special_tokens.name()] = skip_special_tokens;
return pyutils::handle_utf8(tok.decode(tokens, detokenization_params));
std::vector<std::string> res;
{
py::gil_scoped_release rel;
res = tok.decode(tokens, detokenization_params);
}
return pyutils::handle_utf8(res);
},
py::arg("tokens"), py::arg("skip_special_tokens") = true,
R"(Decode a batch of tokens into a list of string prompt.)")
Expand Down
16 changes: 13 additions & 3 deletions src/python/py_vlm_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,12 @@ py::object call_vlm_generate(
) {
auto updated_config = *pyutils::update_config_from_kwargs(generation_config, kwargs);
ov::genai::StreamerVariant streamer = pyutils::pystreamer_to_streamer(py_streamer);

return py::cast(pipe.generate(prompt, images, updated_config, streamer));
ov::genai::VLMDecodedResults res;
{
py::gil_scoped_release rel;
res= pipe.generate(prompt, images, updated_config, streamer);
}
return py::cast(res);
}

void init_vlm_pipeline(py::module_& m) {
Expand Down Expand Up @@ -194,7 +198,13 @@ void init_vlm_pipeline(py::module_& m) {
const std::string& prompt,
const py::kwargs& kwargs
) -> py::typing::Union<ov::genai::VLMDecodedResults> {
return py::cast(pipe.generate(prompt, pyutils::kwargs_to_any_map(kwargs)));
auto map = pyutils::kwargs_to_any_map(kwargs);
ov::genai::VLMDecodedResults res;
{
py::gil_scoped_release rel;
res = pipe.generate(prompt, map);
}
return py::cast(res);
},
py::arg("prompt"), "Input string",
(vlm_generate_kwargs_docstring + std::string(" \n ")).c_str()
Expand Down
Loading
Loading