Skip to content

Commit

Permalink
Fix method of calculating paged attn with util percent (#581)
Browse files Browse the repository at this point in the history
* Fix method of calculating paged attn with util percent

* Clippy
  • Loading branch information
EricLBuehler authored Jul 16, 2024
1 parent cc4ceee commit dd47b37
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 4 deletions.
5 changes: 3 additions & 2 deletions mistralrs-core/src/dummy_paged_attention/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,9 @@ pub fn calculate_cache_config(
Either::Left(v) => v,
Either::Right(f) => {
let free = MemoryUsage.get_memory_available(device)? as f32 / SIZE_IN_MB as f32;
let total = MemoryUsage.get_total_memory(device)? as f32 / SIZE_IN_MB as f32 * f;
let size = (total - free) as usize;
let total = MemoryUsage.get_total_memory(device)? as f32 / SIZE_IN_MB as f32;
let used = total - free;
let size = (total * f - used) as usize;
info!("Allocating {size} MB for Paged Attention KV cache");
size
}
Expand Down
5 changes: 3 additions & 2 deletions mistralrs-core/src/paged_attention/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,9 @@ pub fn calculate_cache_config(
Either::Left(v) => v,
Either::Right(f) => {
let free = MemoryUsage.get_memory_available(device)? as f32 / SIZE_IN_MB as f32;
let total = MemoryUsage.get_total_memory(device)? as f32 / SIZE_IN_MB as f32 * f;
let size = (total - free) as usize;
let total = MemoryUsage.get_total_memory(device)? as f32 / SIZE_IN_MB as f32;
let used = total - free;
let size = (total * f - used) as usize;
info!("Allocating {size} MB for Paged Attention KV cache");
size
}
Expand Down

0 comments on commit dd47b37

Please sign in to comment.