diff --git a/mistralrs-core/src/dummy_paged_attention/mod.rs b/mistralrs-core/src/dummy_paged_attention/mod.rs index 1f97cd5aed..9f67e82076 100644 --- a/mistralrs-core/src/dummy_paged_attention/mod.rs +++ b/mistralrs-core/src/dummy_paged_attention/mod.rs @@ -85,8 +85,9 @@ pub fn calculate_cache_config( Either::Left(v) => v, Either::Right(f) => { let free = MemoryUsage.get_memory_available(device)? as f32 / SIZE_IN_MB as f32; - let total = MemoryUsage.get_total_memory(device)? as f32 / SIZE_IN_MB as f32 * f; - let size = (total - free) as usize; + let total = MemoryUsage.get_total_memory(device)? as f32 / SIZE_IN_MB as f32; + let used = total - free; + let size = (total * f - used) as usize; info!("Allocating {size} MB for Paged Attention KV cache"); size } diff --git a/mistralrs-core/src/paged_attention/mod.rs b/mistralrs-core/src/paged_attention/mod.rs index e9e6cca380..59f6bf8cde 100644 --- a/mistralrs-core/src/paged_attention/mod.rs +++ b/mistralrs-core/src/paged_attention/mod.rs @@ -89,8 +89,9 @@ pub fn calculate_cache_config( Either::Left(v) => v, Either::Right(f) => { let free = MemoryUsage.get_memory_available(device)? as f32 / SIZE_IN_MB as f32; - let total = MemoryUsage.get_total_memory(device)? as f32 / SIZE_IN_MB as f32 * f; - let size = (total - free) as usize; + let total = MemoryUsage.get_total_memory(device)? as f32 / SIZE_IN_MB as f32; + let used = total - free; + let size = (total * f - used) as usize; info!("Allocating {size} MB for Paged Attention KV cache"); size }