diff --git a/src/cpp/src/continuous_batching_impl.cpp b/src/cpp/src/continuous_batching_impl.cpp index f95cd3b9c6..e289c776fb 100644 --- a/src/cpp/src/continuous_batching_impl.cpp +++ b/src/cpp/src/continuous_batching_impl.cpp @@ -53,11 +53,6 @@ void apply_kv_cache_precision(const std::shared_ptr& model, const std // x86 and ARM have different default kv cache type, take this information from the plugin m_kv_cache_type = core.get_property(device, ov::hint::kv_cache_precision); } - - // TEMP WA: currently FP16 / BF16 KV cache is faster than U8 for PagedAttention - if (m_kv_cache_type == ov::element::u8) { - m_kv_cache_type = inference_precision == ov::element::bf16 ? ov::element::bf16 : ov::element::f16; - } } else if (device.find("GPU") != std::string::npos) { if (accuracy_mode) { inference_precision = ov::element::f32;