Skip to content

Commit

Permalink
Bump to version 0.2.2 (#619)
Browse files Browse the repository at this point in the history
* Bump to version 0.2.2

* Update output

* Format
  • Loading branch information
EricLBuehler authored Jul 24, 2024
1 parent 7acdd1c commit 3262fda
Show file tree
Hide file tree
Showing 12 changed files with 88 additions and 84 deletions.
14 changes: 7 additions & 7 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ exclude = [
resolver = "2"

[workspace.package]
version = "0.2.1"
version = "0.2.2"
edition = "2021"
description = "Fast and easy LLM serving."
homepage = "https://github.com/EricLBuehler/mistral.rs"
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@ Please submit requests for new models [here](https://github.com/EricLBuehler/mis
./mistralrs_server -i toml -f toml-selectors/anymoe_lora.toml
```
- 💎 Run the Gemma 2 model
- 🦙 Run the Llama 3.1 model
```
./mistralrs_server -i plain -m google/gemma-2-9b-it -a gemma2
./mistralrs_server -i plain -m meta-llama/Meta-Llama-3.1-8B-Instruct -a llama
```
- φ³ Run the Phi 3 model with 128K context window
Expand Down Expand Up @@ -189,7 +189,7 @@ Please submit more benchmarks via raising an issue!
> Note: You can use our [Docker containers here](https://github.com/EricLBuehler/mistral.rs/pkgs/container/mistral.rs).
> Learn more about running Docker containers: https://docs.docker.com/engine/reference/run/
> Note: You can use pre-built `mistralrs-server` binaries [here](https://github.com/EricLBuehler/mistral.rs/releases/tag/v0.2.1)
> Note: You can use pre-built `mistralrs-server` binaries [here](https://github.com/EricLBuehler/mistral.rs/releases/tag/v0.2.2)
- Install the [Python package here](mistralrs-pyo3/README.md).
Expand Down
2 changes: 1 addition & 1 deletion mistralrs-bench/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ candle-core.workspace = true
serde.workspace = true
serde_json.workspace = true
clap.workspace = true
mistralrs-core = { version = "0.2.1", path = "../mistralrs-core" }
mistralrs-core = { version = "0.2.2", path = "../mistralrs-core" }
tracing.workspace = true
either.workspace = true
tokio.workspace = true
Expand Down
4 changes: 2 additions & 2 deletions mistralrs-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,13 @@ tracing-subscriber.workspace = true
derive-new = "0.6.0"
itertools = "0.13.0"
sysinfo = "0.30.12"
mistralrs-vision = { version = "0.2.1", path = "../mistralrs-vision" }
mistralrs-vision = { version = "0.2.2", path = "../mistralrs-vision" }
csv = "1.3.0"
reqwest.workspace = true
base64.workspace = true
bytemuck_derive = "1.7.0"
plotly = { version = "0.9.0", features = ["kaleido"], optional = true }
mistralrs-paged-attn = { version = "0.2.1", path = "../mistralrs-paged-attn", optional = true }
mistralrs-paged-attn = { version = "0.2.2", path = "../mistralrs-paged-attn", optional = true }

[features]
default = ["plotly"]
Expand Down
132 changes: 68 additions & 64 deletions mistralrs-core/src/pipeline/isq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,74 +142,78 @@ pub trait IsqModel {
{
let (tensors, mapper) = self.get_biases();
let total_tensors = tensors.len();
info!("Applying in-situ quantization bias device mapping to {total_tensors} biases.");
let bar = ProgressBar::new(total_tensors as u64);
bar.set_style(
ProgressStyle::default_bar()
.template("[{elapsed_precise}] [{bar:40.cyan/blue}] {pos}/{len} ({eta})")
.unwrap()
.progress_chars("#>-"),
);

let mut devices = Vec::new();
for (_, layer) in &tensors {
let device = if let Some(layer) = layer {
mapper.device_for(*layer, false).unwrap_or(&device)
} else {
&device
};
devices.push(device.clone());
}

let t_start = Instant::now();
#[cfg(not(feature = "metal"))]
{
// NOTE(EricLBuehler): On version 0.2.0, remove this
let isq_low_mem = std::env::var("ISQ_LOW_MEMORY").is_ok();
if isq_low_mem {
warn!("ISQ_LOW_MEMORY is set but as of version 0.1.24, this is irrelevant");
if total_tensors > 0 {
info!(
"Applying in-situ quantization bias device mapping to {total_tensors} biases."
);
let bar = ProgressBar::new(total_tensors as u64);
bar.set_style(
ProgressStyle::default_bar()
.template("[{elapsed_precise}] [{bar:40.cyan/blue}] {pos}/{len} ({eta})")
.unwrap()
.progress_chars("#>-"),
);

let mut devices = Vec::new();
for (_, layer) in &tensors {
let device = if let Some(layer) = layer {
mapper.device_for(*layer, false).unwrap_or(&device)
} else {
&device
};
devices.push(device.clone());
}

info!("Applying ISQ on {} threads.", rayon::current_num_threads());

use indicatif::ParallelProgressIterator;
use rayon::iter::{
IndexedParallelIterator, IntoParallelIterator, ParallelIterator,
};
tensors
.into_par_iter()
.zip(devices)
.progress_with(bar)
.for_each(|((tensor, _), device)| {
if let Some(tensor) = tensor {
*tensor = tensor
.to_device(&device)
.unwrap()
.to_dtype(DType::F32)
.unwrap();
}
});
}
let t_start = Instant::now();
#[cfg(not(feature = "metal"))]
{
// NOTE(EricLBuehler): On version 0.2.0, remove this
let isq_low_mem = std::env::var("ISQ_LOW_MEMORY").is_ok();
if isq_low_mem {
warn!("ISQ_LOW_MEMORY is set but as of version 0.1.24, this is irrelevant");
}

info!("Applying ISQ on {} threads.", rayon::current_num_threads());

use indicatif::ParallelProgressIterator;
use rayon::iter::{
IndexedParallelIterator, IntoParallelIterator, ParallelIterator,
};
tensors
.into_par_iter()
.zip(devices)
.progress_with(bar)
.for_each(|((tensor, _), device)| {
if let Some(tensor) = tensor {
*tensor = tensor
.to_device(&device)
.unwrap()
.to_dtype(DType::F32)
.unwrap();
}
});
}

#[cfg(feature = "metal")]
{
use indicatif::ProgressIterator;
tensors
.into_iter()
.zip(devices)
.progress_with(bar)
.for_each(|((tensor, _), device)| {
if let Some(tensor) = tensor {
*tensor = tensor
.to_device(&device)
.unwrap()
.to_dtype(DType::F32)
.unwrap();
}
});
#[cfg(feature = "metal")]
{
use indicatif::ProgressIterator;
tensors
.into_iter()
.zip(devices)
.progress_with(bar)
.for_each(|((tensor, _), device)| {
if let Some(tensor) = tensor {
*tensor = tensor
.to_device(&device)
.unwrap()
.to_dtype(DType::F32)
.unwrap();
}
});
}
let delta = Instant::now().duration_since(t_start).as_secs_f32();
info!("Applied in-situ quantization device mapping. Took {delta:.2}s",);
}
let delta = Instant::now().duration_since(t_start).as_secs_f32();
info!("Applied in-situ quantization device mapping. Took {delta:.2}s",);
}
Ok(())
}
Expand Down
2 changes: 1 addition & 1 deletion mistralrs-pyo3/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ doc = false

[dependencies]
pyo3.workspace = true
mistralrs-core = { version = "0.2.1", path = "../mistralrs-core", features = ["pyo3_macros"] }
mistralrs-core = { version = "0.2.2", path = "../mistralrs-core", features = ["pyo3_macros"] }
serde.workspace = true
serde_json.workspace = true
candle-core.workspace = true
Expand Down
2 changes: 1 addition & 1 deletion mistralrs-pyo3/Cargo_template.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ doc = false

[dependencies]
pyo3.workspace = true
mistralrs-core = { version = "0.2.1", path = "../mistralrs-core", features=["pyo3_macros","$feature_name"] }
mistralrs-core = { version = "0.2.2", path = "../mistralrs-core", features=["pyo3_macros","$feature_name"] }
serde.workspace = true
serde_json.workspace = true
candle-core = { git = "https://github.com/EricLBuehler/candle.git", version = "0.6.0", rev = "c967be9", features=["$feature_name"] }
Expand Down
2 changes: 1 addition & 1 deletion mistralrs-pyo3/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "maturin"

[project]
name = "mistralrs"
version = "0.2.1"
version = "0.2.2"
requires-python = ">=3.8"
classifiers = [
"Programming Language :: Rust",
Expand Down
2 changes: 1 addition & 1 deletion mistralrs-pyo3/pyproject_template.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "maturin"

[project]
name = "$name"
version = "0.2.1"
version = "0.2.2"
requires-python = ">=3.8"
classifiers = [
"Programming Language :: Rust",
Expand Down
2 changes: 1 addition & 1 deletion mistralrs-server/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ axum = { version = "0.7.4", features = ["tokio"] }
tower-http = { version = "0.5.1", features = ["cors"]}
utoipa = { version = "4.2", features = ["axum_extras"] }
utoipa-swagger-ui = { version = "7.1.0", features = ["axum"]}
mistralrs-core = { version = "0.2.1", path = "../mistralrs-core" }
mistralrs-core = { version = "0.2.2", path = "../mistralrs-core" }
indexmap.workspace = true
accelerate-src = { workspace = true, optional = true }
intel-mkl-src = { workspace = true, optional = true }
Expand Down
2 changes: 1 addition & 1 deletion mistralrs/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ license.workspace = true
homepage.workspace = true

[dependencies]
mistralrs-core = { version = "0.2.1", path = "../mistralrs-core" }
mistralrs-core = { version = "0.2.2", path = "../mistralrs-core" }
anyhow.workspace = true
tokio.workspace = true
candle-core.workspace = true
Expand Down

0 comments on commit 3262fda

Please sign in to comment.