diff --git a/benchmarks/benches/formatting.rs b/benchmarks/benches/formatting.rs index 3479029f4..25d162411 100644 --- a/benchmarks/benches/formatting.rs +++ b/benchmarks/benches/formatting.rs @@ -55,7 +55,7 @@ fn bench_formatting(c: &mut criterion::Criterion) { group.bench_function(conf.name, |b| { b.iter(|| { let mut matcher = conf.matching_words.build(conf.text); - matcher.format(option.clone()); + matcher.format(*option); }) }); } diff --git a/benchmarks/benches/indexing.rs b/benchmarks/benches/indexing.rs index d567b3da1..9446c0b0f 100644 --- a/benchmarks/benches/indexing.rs +++ b/benchmarks/benches/indexing.rs @@ -30,7 +30,7 @@ fn setup_dir(path: impl AsRef) { fn setup_index() -> Index { let path = "benches.mmdb"; - setup_dir(&path); + setup_dir(path); let mut options = EnvOpenOptions::new(); options.map_size(100 * 1024 * 1024 * 1024); // 100 GB options.max_readers(10); @@ -62,7 +62,7 @@ fn setup_settings<'t>( builder.execute(|_| (), || false).unwrap(); } -fn setup_index_with_settings<'t>( +fn setup_index_with_settings( primary_key: &str, searchable_fields: &[&str], filterable_fields: &[&str], @@ -121,7 +121,7 @@ fn indexing_songs_default(c: &mut Criterion) { let sortable_fields = []; setup_index_with_settings( - &primary_key, + primary_key, &searchable_fields, &filterable_fields, &sortable_fields, @@ -166,7 +166,7 @@ fn reindexing_songs_default(c: &mut Criterion) { let sortable_fields = []; let index = setup_index_with_settings( - &primary_key, + primary_key, &searchable_fields, &filterable_fields, &sortable_fields, @@ -232,7 +232,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) { let sortable_fields = []; let index = setup_index_with_settings( - &primary_key, + primary_key, &searchable_fields, &filterable_fields, &sortable_fields, @@ -295,7 +295,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) { let sortable_fields = []; let index = setup_index_with_settings( - &primary_key, + primary_key, &searchable_fields, &filterable_fields, &sortable_fields, @@ -377,7 +377,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) { let sortable_fields = []; setup_index_with_settings( - &primary_key, + primary_key, &searchable_fields, &filterable_fields, &sortable_fields, @@ -422,7 +422,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) { let sortable_fields = []; setup_index_with_settings( - &primary_key, + primary_key, &searchable_fields, &filterable_fields, &sortable_fields, @@ -466,7 +466,7 @@ fn indexing_wiki(c: &mut Criterion) { let sortable_fields = []; setup_index_with_settings( - &primary_key, + primary_key, &searchable_fields, &filterable_fields, &sortable_fields, @@ -511,7 +511,7 @@ fn reindexing_wiki(c: &mut Criterion) { let sortable_fields = []; let index = setup_index_with_settings( - &primary_key, + primary_key, &searchable_fields, &filterable_fields, &sortable_fields, @@ -578,7 +578,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) { let sortable_fields = []; let index = setup_index_with_settings( - &primary_key, + primary_key, &searchable_fields, &filterable_fields, &sortable_fields, @@ -641,7 +641,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) { let sortable_fields = []; let index = setup_index_with_settings( - &primary_key, + primary_key, &searchable_fields, &filterable_fields, &sortable_fields, @@ -732,7 +732,7 @@ fn indexing_movies_default(c: &mut Criterion) { let sortable_fields = []; setup_index_with_settings( - &primary_key, + primary_key, &searchable_fields, &filterable_fields, &sortable_fields, @@ -776,7 +776,7 @@ fn reindexing_movies_default(c: &mut Criterion) { let sortable_fields = []; let index = setup_index_with_settings( - &primary_key, + primary_key, &searchable_fields, &filterable_fields, &sortable_fields, @@ -841,7 +841,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) { let sortable_fields = []; let index = setup_index_with_settings( - &primary_key, + primary_key, &searchable_fields, &filterable_fields, &sortable_fields, @@ -903,7 +903,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) { let sortable_fields = []; let index = setup_index_with_settings( - &primary_key, + primary_key, &searchable_fields, &filterable_fields, &sortable_fields, @@ -1011,7 +1011,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) { let sortable_fields = ["popularity", "runtime", "vote_average", "release_date"]; setup_index_with_settings( - &primary_key, + primary_key, &searchable_fields, &filterable_fields, &sortable_fields, @@ -1078,7 +1078,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) { let sortable_fields = ["popularity", "runtime", "vote_average", "release_date"]; let index = setup_index_with_settings( - &primary_key, + primary_key, &searchable_fields, &filterable_fields, &sortable_fields, @@ -1148,7 +1148,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) { let sortable_fields = []; setup_index_with_settings( - &primary_key, + primary_key, &searchable_fields, &filterable_fields, &sortable_fields, @@ -1192,7 +1192,7 @@ fn indexing_geo(c: &mut Criterion) { let sortable_fields = ["_geo", "population", "elevation"]; setup_index_with_settings( - &primary_key, + primary_key, &searchable_fields, &filterable_fields, &sortable_fields, @@ -1237,7 +1237,7 @@ fn reindexing_geo(c: &mut Criterion) { let sortable_fields = ["_geo", "population", "elevation"]; let index = setup_index_with_settings( - &primary_key, + primary_key, &searchable_fields, &filterable_fields, &sortable_fields, @@ -1304,7 +1304,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) { let sortable_fields = ["_geo", "population", "elevation"]; let index = setup_index_with_settings( - &primary_key, + primary_key, &searchable_fields, &filterable_fields, &sortable_fields, diff --git a/benchmarks/benches/search_geo.rs b/benchmarks/benches/search_geo.rs index faea4e3e0..a0ae93527 100644 --- a/benchmarks/benches/search_geo.rs +++ b/benchmarks/benches/search_geo.rs @@ -3,7 +3,7 @@ mod utils; use criterion::{criterion_group, criterion_main}; use milli::update::Settings; -use utils::Conf; +use utils::{IndexConf, IndexSettingsConf, SearchBenchConf}; #[global_allocator] static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc; @@ -29,94 +29,93 @@ fn base_conf(builder: &mut Settings) { builder.set_sortable_fields(sortable_fields); } -#[rustfmt::skip] -const BASE_CONF: Conf = Conf { - dataset: datasets_paths::SMOL_ALL_COUNTRIES, - dataset_format: "jsonl", - queries: &[ - "", - ], - configure: base_conf, - primary_key: Some("geonameid"), - ..Conf::BASE -}; - fn bench_geo(c: &mut criterion::Criterion) { - #[rustfmt::skip] - let confs = &[ - // A basic placeholder with no geo - utils::Conf { - group_name: "placeholder with no geo", - ..BASE_CONF - }, - // Medium aglomeration: probably the most common usecase - utils::Conf { - group_name: "asc sort from Lille", - sort: Some(vec!["_geoPoint(50.62999333378238, 3.086269263384099):asc"]), - ..BASE_CONF - }, - utils::Conf { - group_name: "desc sort from Lille", - sort: Some(vec!["_geoPoint(50.62999333378238, 3.086269263384099):desc"]), - ..BASE_CONF - }, - // Big agglomeration: a lot of documents close to our point - utils::Conf { - group_name: "asc sort from Tokyo", - sort: Some(vec!["_geoPoint(35.749512532692144, 139.61664952543356):asc"]), - ..BASE_CONF - }, - utils::Conf { - group_name: "desc sort from Tokyo", - sort: Some(vec!["_geoPoint(35.749512532692144, 139.61664952543356):desc"]), - ..BASE_CONF - }, - // The furthest point from any civilization - utils::Conf { - group_name: "asc sort from Point Nemo", - sort: Some(vec!["_geoPoint(-48.87561645055408, -123.39275749319793):asc"]), - ..BASE_CONF - }, - utils::Conf { - group_name: "desc sort from Point Nemo", - sort: Some(vec!["_geoPoint(-48.87561645055408, -123.39275749319793):desc"]), - ..BASE_CONF - }, - // Filters - utils::Conf { - group_name: "filter of 100km from Lille", - filter: Some("_geoRadius(50.62999333378238, 3.086269263384099, 100000)"), - ..BASE_CONF - }, - utils::Conf { - group_name: "filter of 1km from Lille", - filter: Some("_geoRadius(50.62999333378238, 3.086269263384099, 1000)"), - ..BASE_CONF - }, - utils::Conf { - group_name: "filter of 100km from Tokyo", - filter: Some("_geoRadius(35.749512532692144, 139.61664952543356, 100000)"), - ..BASE_CONF - }, - utils::Conf { - group_name: "filter of 1km from Tokyo", - filter: Some("_geoRadius(35.749512532692144, 139.61664952543356, 1000)"), - ..BASE_CONF - }, - utils::Conf { - group_name: "filter of 100km from Point Nemo", - filter: Some("_geoRadius(-48.87561645055408, -123.39275749319793, 100000)"), - ..BASE_CONF - }, - utils::Conf { - group_name: "filter of 1km from Point Nemo", - filter: Some("_geoRadius(-48.87561645055408, -123.39275749319793, 1000)"), - ..BASE_CONF - }, - ]; + let index_conf = IndexConf { + dataset: datasets_paths::SMOL_ALL_COUNTRIES, + dataset_format: "jsonl", + primary_key: Some("geonameid"), + configure: base_conf, + ..IndexConf::BASE + }; + + let confs = vec![( + IndexSettingsConf::BASE, + vec![ + // A basic placeholder with no geo + SearchBenchConf { group_name: "placeholder with no geo", ..SearchBenchConf::BASE }, + // Medium aglomeration: probably the most common usecase + SearchBenchConf { + group_name: "asc sort from Lille", + sort: Some(vec!["_geoPoint(50.62999333378238, 3.086269263384099):asc"]), + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "desc sort from Lille", + sort: Some(vec!["_geoPoint(50.62999333378238, 3.086269263384099):desc"]), + ..SearchBenchConf::BASE + }, + // Big agglomeration: a lot of documents close to our point + SearchBenchConf { + group_name: "asc sort from Tokyo", + sort: Some(vec!["_geoPoint(35.749512532692144, 139.61664952543356):asc"]), + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "desc sort from Tokyo", + sort: Some(vec!["_geoPoint(35.749512532692144, 139.61664952543356):desc"]), + ..SearchBenchConf::BASE + }, + // The furthest point from any civilization + SearchBenchConf { + group_name: "asc sort from Point Nemo", + sort: Some(vec!["_geoPoint(-48.87561645055408, -123.39275749319793):asc"]), + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "desc sort from Point Nemo", + sort: Some(vec!["_geoPoint(-48.87561645055408, -123.39275749319793):desc"]), + ..SearchBenchConf::BASE + }, + // Filters + SearchBenchConf { + group_name: "filter of 100km from Lille", + filter: Some("_geoRadius(50.62999333378238, 3.086269263384099, 100000)"), + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "filter of 1km from Lille", + filter: Some("_geoRadius(50.62999333378238, 3.086269263384099, 1000)"), + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "filter of 100km from Tokyo", + filter: Some("_geoRadius(35.749512532692144, 139.61664952543356, 100000)"), + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "filter of 1km from Tokyo", + filter: Some("_geoRadius(35.749512532692144, 139.61664952543356, 1000)"), + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "filter of 100km from Point Nemo", + filter: Some("_geoRadius(-48.87561645055408, -123.39275749319793, 100000)"), + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "filter of 1km from Point Nemo", + filter: Some("_geoRadius(-48.87561645055408, -123.39275749319793, 1000)"), + ..SearchBenchConf::BASE + }, + ], + )]; - utils::run_benches(c, confs); + utils::run_benches(index_conf, c, &confs); } -criterion_group!(benches, bench_geo); +criterion_group!( + name = benches; + config = { criterion::Criterion::default().sample_size(10) }; + targets = bench_geo +); criterion_main!(benches); diff --git a/benchmarks/benches/search_songs.rs b/benchmarks/benches/search_songs.rs index a1245528f..4ecb7cf29 100644 --- a/benchmarks/benches/search_songs.rs +++ b/benchmarks/benches/search_songs.rs @@ -1,9 +1,12 @@ mod datasets_paths; mod utils; +use std::collections::HashMap; + use criterion::{criterion_group, criterion_main}; use milli::update::Settings; -use utils::Conf; +use milli::CriterionImplementationStrategy; +use utils::{IndexConf, IndexSettingsConf, SearchBenchConf}; #[global_allocator] static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc; @@ -16,37 +19,42 @@ fn base_conf(builder: &mut Settings) { .collect(); builder.set_displayed_fields(displayed_fields); - let searchable_fields = ["title", "album", "artist"].iter().map(|s| s.to_string()).collect(); + let searchable_fields = + ["title", "album", "artist", "genre"].iter().map(|s| s.to_string()).collect(); builder.set_searchable_fields(searchable_fields); let faceted_fields = ["released-timestamp", "duration-float", "genre", "country", "artist"] .iter() .map(|s| s.to_string()) .collect(); + + let mut synonyms = HashMap::new(); + synonyms.insert("ily".to_owned(), vec!["i love you".to_owned()]); + synonyms.insert("rnr".to_owned(), vec!["rock and roll".to_owned()]); + synonyms.insert( + "mftomp".to_owned(), + vec![ + "songs from the original motion picture".to_owned(), + "music from the original motion picture".to_owned(), + "music from the motion picture".to_owned(), + "songs from the motion picture".to_owned(), + "songs from the original soundtrack".to_owned(), + "original soundtrack".to_owned(), + ], + ); + builder.set_synonyms(synonyms); + builder.set_filterable_fields(faceted_fields); } -#[rustfmt::skip] -const BASE_CONF: Conf = Conf { - dataset: datasets_paths::SMOL_SONGS, - queries: &[ - "john ", // 9097 - "david ", // 4794 - "charles ", // 1957 - "david bowie ", // 1200 - "michael jackson ", // 600 - "thelonious monk ", // 303 - "charles mingus ", // 142 - "marcus miller ", // 60 - "tamo ", // 13 - "Notstandskomitee ", // 4 - ], - configure: base_conf, - primary_key: Some("id"), - ..Conf::BASE -}; - fn bench_songs(c: &mut criterion::Criterion) { + let base_index_conf = IndexConf { + dataset: datasets_paths::SMOL_SONGS, + primary_key: Some("id"), + configure: base_conf, + ..IndexConf::BASE + }; + let default_criterion: Vec = milli::default_criteria().iter().map(|criteria| criteria.to_string()).collect(); let default_criterion = default_criterion.iter().map(|s| s.as_str()); @@ -55,142 +63,515 @@ fn bench_songs(c: &mut criterion::Criterion) { let desc_default: Vec<&str> = std::iter::once("released-timestamp:desc").chain(default_criterion.clone()).collect(); - let basic_with_quote: Vec = BASE_CONF - .queries - .iter() - .map(|s| { - s.trim().split(' ').map(|s| format!(r#""{}""#, s)).collect::>().join(" ") - }) - .collect(); - let basic_with_quote: &[&str] = - &basic_with_quote.iter().map(|s| s.as_str()).collect::>(); - #[rustfmt::skip] - let confs = &[ - /* first we bench each criterion alone */ - utils::Conf { - group_name: "proximity", - queries: &[ - "black saint sinner lady ", - "les dangeureuses 1960 ", - "The Disneyland Sing-Along Chorus ", - "Under Great Northern Lights ", - "7000 Danses Un Jour Dans Notre Vie ", - ], - criterion: Some(&["proximity"]), - optional_words: false, - ..BASE_CONF - }, - utils::Conf { - group_name: "typo", - queries: &[ - "mongus ", - "thelonius monk ", - "Disnaylande ", - "the white striper ", - "indochie ", - "indochien ", - "klub des loopers ", - "fear of the duck ", - "michel depech ", - "stromal ", - "dire straights ", - "Arethla Franklin ", - ], - criterion: Some(&["typo"]), - optional_words: false, - ..BASE_CONF - }, - utils::Conf { - group_name: "words", - queries: &[ - "the black saint and the sinner lady and the good doggo ", // four words to pop - "les liaisons dangeureuses 1793 ", // one word to pop - "The Disneyland Children's Sing-Alone song ", // two words to pop - "seven nation mummy ", // one word to pop - "7000 Danses / Le Baiser / je me trompe de mots ", // four words to pop - "Bring Your Daughter To The Slaughter but now this is not part of the title ", // nine words to pop - "whathavenotnsuchforth and a good amount of words to pop to match the first one ", // 13 - ], - criterion: Some(&["words"]), - ..BASE_CONF - }, - utils::Conf { - group_name: "asc", - criterion: Some(&["released-timestamp:desc"]), - ..BASE_CONF - }, - utils::Conf { - group_name: "desc", - criterion: Some(&["released-timestamp:desc"]), - ..BASE_CONF - }, - - /* then we bench the asc and desc criterion on top of the default criterion */ - utils::Conf { - group_name: "asc + default", - criterion: Some(&asc_default[..]), - ..BASE_CONF - }, - utils::Conf { - group_name: "desc + default", - criterion: Some(&desc_default[..]), - ..BASE_CONF - }, - - /* we bench the filters with the default request */ - utils::Conf { - group_name: "basic filter: <=", - filter: Some("released-timestamp <= 946728000"), // year 2000 - ..BASE_CONF - }, - utils::Conf { - group_name: "basic filter: TO", - filter: Some("released-timestamp 946728000 TO 1262347200"), // year 2000 to 2010 - ..BASE_CONF - }, - utils::Conf { - group_name: "big filter", - filter: Some("released-timestamp != 1262347200 AND (NOT (released-timestamp = 946728000)) AND (duration-float = 1 OR (duration-float 1.1 TO 1.5 AND released-timestamp > 315576000))"), - ..BASE_CONF - }, - - /* the we bench some global / normal search with all the default criterion in the default - * order */ - utils::Conf { - group_name: "basic placeholder", - queries: &[""], - ..BASE_CONF - }, - utils::Conf { - group_name: "basic without quote", - queries: &BASE_CONF - .queries - .iter() - .map(|s| s.trim()) // we remove the space at the end of each request - .collect::>(), - ..BASE_CONF - }, - utils::Conf { - group_name: "basic with quote", - queries: basic_with_quote, - ..BASE_CONF - }, - utils::Conf { - group_name: "prefix search", - queries: &[ - "s", // 500k+ results - "a", // - "b", // - "i", // - "x", // only 7k results - ], - ..BASE_CONF - }, + let benches = &[ + // First all the benches done on the index with only the proximity criterion + ( + IndexSettingsConf { + criterion: Some(&["proximity"]), + }, + vec![ + SearchBenchConf { + group_name: "proximity criterion", + queries: vec![ + "black saint sinner lady ", + "les dangeureuses 1960 ", + "The Disneyland Sing-Along Chorus ", + "Under Great Northern Lights ", + "7000 Danses Un Jour Dans Notre Vie ", + ], + optional_words: false, + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "proximity criterion set-based", + queries: vec![ + "black saint sinner lady ", + "les dangeureuses 1960 ", + "The Disneyland Sing-Along Chorus ", + "Under Great Northern Lights ", + "7000 Danses Un Jour Dans Notre Vie ", + ], + optional_words: false, + criterion_implementation_strategy: CriterionImplementationStrategy::OnlySetBased, + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "proximity criterion iterative", + queries: vec![ + "black saint sinner lady ", + "les dangeureuses 1960 ", + "The Disneyland Sing-Along Chorus ", + "Under Great Northern Lights ", + "7000 Danses Un Jour Dans Notre Vie ", + ], + optional_words: false, + criterion_implementation_strategy: CriterionImplementationStrategy::OnlyIterative, + ..SearchBenchConf::BASE + }, + ] + ), + // Then with only the typo criterion + ( + IndexSettingsConf { + criterion: Some(&["typo"]), + }, + vec![ + SearchBenchConf { + group_name: "typo criterion", + queries: vec![ + "mongus ", + "thelonius monk ", + "Disnaylande ", + "the white striper ", + "indochie ", + "indochien ", + "klub des loopers ", + "fear of the duck ", + "michel depech ", + "stromal ", + "dire straights ", + "Arethla Franklin ", + ], + optional_words: false, + ..SearchBenchConf::BASE + }, + ] + ), + // Then with only the words criterion + ( + IndexSettingsConf { + criterion: Some(&["words"]), + }, + vec![ + SearchBenchConf { + group_name: "words criterion", + queries: vec![ + "the black saint and the sinner lady and the good doggo ", // four words to pop + "les liaisons dangeureuses 1793 ", // one word to pop + "The Disneyland Children's Sing-Alone song ", // two words to pop + "seven nation mummy ", // one word to pop + "7000 Danses / Le Baiser / je me trompe de mots ", // four words to pop + "Bring Your Daughter To The Slaughter but now this is not part of the title ", // nine words to pop + "whathavenotnsuchforth and a good amount of words to pop to match the first one ", // 13 + ], + ..SearchBenchConf::BASE + } + ] + ), + // Then with only the released-timestamp:asc criterion + ( + IndexSettingsConf { + criterion: Some(&["released-timestamp:asc"]), + }, + vec![ + SearchBenchConf { + group_name: "asc", + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "asc set-based", + criterion_implementation_strategy: CriterionImplementationStrategy::OnlySetBased, + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "asc iterative", + criterion_implementation_strategy: CriterionImplementationStrategy::OnlyIterative, + ..SearchBenchConf::BASE + }, + ] + ), + // Then with only the released-timestamp:desc criterion + ( + IndexSettingsConf { + criterion: Some(&["released-timestamp:desc"]), + }, + vec![ + SearchBenchConf { + group_name: "desc", + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "desc set-based", + criterion_implementation_strategy: CriterionImplementationStrategy::OnlySetBased, + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "desc iterative", + criterion_implementation_strategy: CriterionImplementationStrategy::OnlyIterative, + ..SearchBenchConf::BASE + }, + ] + ), + // Then with the asc criterion on top of the default criterion + ( + IndexSettingsConf { + criterion: Some(&asc_default[..]), + }, + vec![ + SearchBenchConf { + group_name: "asc + default", + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "asc + default set-based", + criterion_implementation_strategy: CriterionImplementationStrategy::OnlySetBased, + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "asc + default iterative", + criterion_implementation_strategy: CriterionImplementationStrategy::OnlyIterative, + ..SearchBenchConf::BASE + }, + ] + ), + // Then with the desc criterion on top of the default criterion + ( + IndexSettingsConf { + criterion: Some(&desc_default[..]), + }, + vec![ + SearchBenchConf { + group_name: "desc + default", + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "desc + default set-based", + criterion_implementation_strategy: CriterionImplementationStrategy::OnlySetBased, + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "desc + default iterative", + criterion_implementation_strategy: CriterionImplementationStrategy::OnlyIterative, + ..SearchBenchConf::BASE + }, + ] + ), + // Then with the default index config + ( + IndexSettingsConf::BASE, + vec![ + SearchBenchConf { + group_name: "basic filter: <=", + filter: Some("released-timestamp <= 946728000"), // year 2000 + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "basic filter: TO", + filter: Some("released-timestamp 946728000 TO 1262347200"), // year 2000 to 2010 + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "big filter", + filter: Some("released-timestamp != 1262347200 AND (NOT (released-timestamp = 946728000)) AND (duration-float = 1 OR (duration-float 1.1 TO 1.5 AND released-timestamp > 315576000))"), + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "big IN filter", + filter: Some("NOT \"released-timestamp\" IN [-126230400, -1399075200, -160444800, -189388800, -220924800, -252460800, -283996800, -31536000, -347155200, -378691200, -473385600, -631152000, -694310400, -94694400, 0, 1000166400, 1009843200, 1041379200, 1070323200, 1072915200, 1075852800, 1078099200, 1088640000, 1096588800, 1099612800, 1104537600, 1121644800, 1136073600, 1150156800, 1159660800, 1162339200, 1167609600, 1171584000, 1183248000, 1184112000, 1190419200, 1199145600, 1203724800, 1204329600, 1216339200, 1228089600, 1230768000, 1233446400, 1247097600, 1247961600, 1252886400, 126230400, 1262304000, 1268956800, 1283212800, 1285027200, 1293840000, 1295913600, 1296518400, 1306886400, 1312156800, 1320105600, 1321228800, 1321660800, 1322179200, 1322438400, 1325376000, 1327536000, 1338336000, 1347840000, 1351728000, 1353801600, 1356048000, 1356998400, 1369440000, 1370044800, 1372636800, 1382572800, 1382659200, 1384905600, 1388534400, 1393804800, 1397260800, 1401148800, 1411948800, 1420070400, 1426377600, 1427846400, 1433116800, 1439078400, 1440028800, 1446336000, 1451606400, 1456704000, 1464739200, 1466899200, 1467676800, 1470355200, 1483228800, 1493942400, 1495756800, 1497484800, 1506816000, 1512432000, 1514764800, 1521158400, 1522972800, 1524182400, 1528416000, 1529539200, 1533859200, 1536105600, 1536278400, 1543622400, 1546300800, 1547164800, 1550188800, 1551398400, 1564704000, 1572566400, 157766400, 1577836800, 1585267200, 1587772800, 1597968000, 1601251200, 189302400, 220924800, 252460800, 283996800, 31536000, 315532800, 347155200, 378691200, 410227200, 436492800, 441763200, 473385600, 504921600, 536457600, 567993600, 599616000, 606009600, 63072000, 631152000, 662688000, 672192000, 694224000, 725846400, 738892800, 757382400, 788918400, 790128000, 797212800, 820454400, 852076800, 854755200, 864518400, 866592000, 868233600, 872121600, 883612800, 886291200, 893980800, 912470400, 915148800, 938736000, 946684800, 94694400, 959904000, 965088000, 978307200, 987033600]"), + ..SearchBenchConf::BASE + }, + + /* the we bench some global / normal search with all the default criterion in the default + * order */ + SearchBenchConf { + group_name: "basic placeholder", + queries: vec![""], + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "basic without quote", + queries: vec![ + "john ", + "david ", + "charles ", + "david bowie ", + "michael jackson ", + "thelonious monk ", + "charles mingus ", + "marcus miller ", + "tamo ", + "Notstandskomitee ", + ], + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "basic without quote set-based", + queries: vec![ + "john ", + "david ", + "charles ", + "david bowie ", + "michael jackson ", + "thelonious monk ", + "charles mingus ", + "marcus miller ", + "tamo ", + "Notstandskomitee ", + ], + criterion_implementation_strategy: CriterionImplementationStrategy::OnlySetBased, + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "basic with quote", + queries: vec![ + "\"john\" ", + "\"david\" ", + "\"charles\" ", + "\"david bowie\" ", + "\"michael jackson\" ", + "\"thelonious monk\" ", + "\"charles mingus\" ", + "\"marcus miller\" ", + "\"tamo\" ", + "\"Notstandskomitee\" ", + ], + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "prefix search", + queries: vec![ + "s", // 500k+ results + "a", // + "b", // + "i", // + "x", // only 7k results + ], + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "prefix search set-based", + queries: vec![ + "s", // 500k+ results + "a", // + "b", // + "i", // + "x", // only 7k results + ], + criterion_implementation_strategy: CriterionImplementationStrategy::OnlySetBased, + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "prefix search iterative", + queries: vec![ + "s", // 500k+ results + "a", // + "b", // + "i", // + "x", // only 7k results + ], + criterion_implementation_strategy: CriterionImplementationStrategy::OnlyIterative, + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "words + prefix search", + queries: vec![ + "Someone I l", + "billie e", + "billie ei", + "i am getting o", + "i am getting ol", + "i am getting old", + "prologue 1 a 1", + "prologue 1 a 10" + ], + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "words + prefix search set-based", + queries: vec![ + "Someone I l", + "billie e", + "billie ei", + "i am getting o", + "i am getting ol", + "i am getting old", + "prologue 1 a 1", + "prologue 1 a 10" + ], + criterion_implementation_strategy: CriterionImplementationStrategy::OnlySetBased, + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "words + prefix search iterative", + queries: vec![ + "Someone I l", + "billie e", + "billie ei", + "i am getting o", + "i am getting ol", + "i am getting old", + "prologue 1 a 1", + "prologue 1 a 10" + ], + criterion_implementation_strategy: CriterionImplementationStrategy::OnlyIterative, + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "large offset", + queries: vec![ + "rock and r", + ], + offset: Some(770), + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "large offset set-based", + queries: vec![ + "rock and r", + ], + offset: Some(770), + criterion_implementation_strategy: CriterionImplementationStrategy::OnlySetBased, + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "large offset iterative", + queries: vec![ + "rock and r", + ], + offset: Some(770), + criterion_implementation_strategy: CriterionImplementationStrategy::OnlyIterative, + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "many common and different words", + queries: vec![ + "Rock You Hip Hop Folk World Country Electronic Love The", + "Rock You Hip Hop Folk World Country Electronic Love", + "Rock You Hip Hop Folk World Country Electronic", + ], + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "10x 'a' or 'b'", + queries: vec![ + "a a a a a a a a a a", + "b b b b b b b b b b", + ], + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "10x 'a' or 'b' - set-based", + queries: vec![ + "a a a a a a a a a a", + "b b b b b b b b b b", + ], + criterion_implementation_strategy: CriterionImplementationStrategy::OnlySetBased, + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "10x 'a' or 'b' - iterative", + queries: vec![ + "a a a a a a a a a a", + "b b b b b b b b b b", + ], + criterion_implementation_strategy: CriterionImplementationStrategy::OnlyIterative, + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "long gibberish", + queries: vec![ + "abababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababab abababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababab abababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababab abababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababab abababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababab abababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababab abababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababab abababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababab abababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababab abababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababab", + "abababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababab abababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababab abababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababab abababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababab abababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababab abababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababab abababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababab abababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababab abababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababab abababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababab", + ], + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "phrase 7 words", + queries: vec![ + "\"Music From The Original Motion Picture Soundtrack\"" + ], + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "phrase 9 words", + queries: vec![ + "\"Songs And Music From The Original Motion Picture Soundtrack\"" + ], + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "synonyms easy", + queries: vec![ + "ily rnr", + "rnr ily", + "rnr rnr", + "ily ily", + "mftomp" + ], + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "synonyms easy - set-based", + queries: vec![ + "ily rnr", + "rnr ily", + "rnr rnr", + "ily ily", + "mftomp" + ], + criterion_implementation_strategy: CriterionImplementationStrategy::OnlySetBased, + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "synonyms easy - iterative", + queries: vec![ + "ily rnr", + "rnr ily", + "rnr rnr", + "ily ily", + "mftomp" + ], + criterion_implementation_strategy: CriterionImplementationStrategy::OnlyIterative, + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "synonyms medium", + queries: vec![ + "mftomp ily", + "mftomp rnr", + "rnr mftomp", + "ily mftomp", + "mftomp rnr ily" + ], + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "synonyms medium - set-based", + queries: vec![ + "mftomp ily", + "mftomp rnr", + "rnr mftomp", + "ily mftomp", + "mftomp rnr ily" + ], + criterion_implementation_strategy: CriterionImplementationStrategy::OnlySetBased, + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "synonyms medium - iterative", + queries: vec![ + "mftomp ily", + "mftomp rnr", + "rnr mftomp", + "ily mftomp", + "mftomp rnr ily" + ], + criterion_implementation_strategy: CriterionImplementationStrategy::OnlyIterative, + ..SearchBenchConf::BASE + } + ] + ) ]; - utils::run_benches(c, confs); + /* we bench the filters with the default request */ + + utils::run_benches(base_index_conf, c, benches); } -criterion_group!(benches, bench_songs); +criterion_group!( + name = benches; + config = { criterion::Criterion::default().sample_size(10) }; + targets = bench_songs +); criterion_main!(benches); diff --git a/benchmarks/benches/search_wiki.rs b/benchmarks/benches/search_wiki.rs index b792c2645..27211615e 100644 --- a/benchmarks/benches/search_wiki.rs +++ b/benchmarks/benches/search_wiki.rs @@ -3,7 +3,8 @@ mod utils; use criterion::{criterion_group, criterion_main}; use milli::update::Settings; -use utils::Conf; +use milli::CriterionImplementationStrategy; +use utils::{IndexConf, IndexSettingsConf, SearchBenchConf}; #[global_allocator] static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc; @@ -16,114 +17,272 @@ fn base_conf(builder: &mut Settings) { builder.set_searchable_fields(searchable_fields); } -#[rustfmt::skip] -const BASE_CONF: Conf = Conf { - dataset: datasets_paths::SMOL_WIKI_ARTICLES, - queries: &[ - "mingus ", // 46 candidates - "miles davis ", // 159 - "rock and roll ", // 1007 - "machine ", // 3448 - "spain ", // 7002 - "japan ", // 10.593 - "france ", // 17.616 - "film ", // 24.959 - ], - configure: base_conf, - ..Conf::BASE -}; - -fn bench_songs(c: &mut criterion::Criterion) { - let basic_with_quote: Vec = BASE_CONF - .queries - .iter() - .map(|s| { - s.trim().split(' ').map(|s| format!(r#""{}""#, s)).collect::>().join(" ") - }) - .collect(); - let basic_with_quote: &[&str] = - &basic_with_quote.iter().map(|s| s.as_str()).collect::>(); +fn bench_wiki(c: &mut criterion::Criterion) { + let index_conf: IndexConf = IndexConf { + dataset: datasets_paths::SMOL_WIKI_ARTICLES, + configure: base_conf, + ..IndexConf::BASE + }; #[rustfmt::skip] - let confs = &[ - /* first we bench each criterion alone */ - utils::Conf { - group_name: "proximity", - queries: &[ - "herald sings ", - "april paris ", - "tea two ", - "diesel engine ", - ], - criterion: Some(&["proximity"]), - optional_words: false, - ..BASE_CONF - }, - utils::Conf { - group_name: "typo", - queries: &[ - "migrosoft ", - "linax ", - "Disnaylande ", - "phytogropher ", - "nympalidea ", - "aritmetric ", - "the fronce ", - "sisan ", - ], - criterion: Some(&["typo"]), - optional_words: false, - ..BASE_CONF - }, - utils::Conf { - group_name: "words", - queries: &[ - "the black saint and the sinner lady and the good doggo ", // four words to pop, 27 results - "Kameya Tokujirō mingus monk ", // two words to pop, 55 - "Ulrich Hensel meilisearch milli ", // two words to pop, 306 - "Idaho Bellevue pizza ", // one word to pop, 800 - "Abraham machin ", // one word to pop, 1141 - ], - criterion: Some(&["words"]), - ..BASE_CONF - }, - /* the we bench some global / normal search with all the default criterion in the default - * order */ - utils::Conf { - group_name: "basic placeholder", - queries: &[""], - ..BASE_CONF - }, - utils::Conf { - group_name: "basic without quote", - queries: &BASE_CONF - .queries - .iter() - .map(|s| s.trim()) // we remove the space at the end of each request - .collect::>(), - ..BASE_CONF - }, - utils::Conf { - group_name: "basic with quote", - queries: basic_with_quote, - ..BASE_CONF - }, - utils::Conf { - group_name: "prefix search", - queries: &[ - "t", // 453k results - "c", // 405k - "g", // 318k - "j", // 227k - "q", // 71k - "x", // 17k - ], - ..BASE_CONF - }, + let benches = vec![ + // First all the benches done on the index with only the proximity criterion + ( + IndexSettingsConf { + criterion: Some(&["proximity"]), + }, + vec![ + SearchBenchConf { + group_name: "proximity criterion", + queries: vec![ + "herald sings ", + "april paris ", + "tea two ", + "diesel engine ", + ], + optional_words: false, + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "proximity criterion set-based", + queries: vec![ + "herald sings ", + "april paris ", + "tea two ", + "diesel engine ", + ], + optional_words: false, + criterion_implementation_strategy: CriterionImplementationStrategy::OnlySetBased, + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "proximity criterion iterative", + queries: vec![ + "herald sings ", + "april paris ", + "tea two ", + "diesel engine ", + ], + optional_words: false, + criterion_implementation_strategy: CriterionImplementationStrategy::OnlyIterative, + ..SearchBenchConf::BASE + }, + ] + ), + // Then all the benches done on the index with only the typo criterion + ( + IndexSettingsConf { + criterion: Some(&["typo"]), + }, + vec![ + SearchBenchConf { + group_name: "typo criterion", + queries: vec![ + "migrosoft ", + "linax ", + "Disnaylande ", + "phytogropher ", + "nympalidea ", + "aritmetric ", + "the fronce ", + "sisan ", + ], + optional_words: false, + ..SearchBenchConf::BASE + }, + ] + ), + // Then all the benches done on the index with only the words criterion + ( + IndexSettingsConf { + criterion: Some(&["words"]), + }, + vec![ + SearchBenchConf { + group_name: "words criterion", + queries: vec![ + "the black saint and the sinner lady and the good doggo ", // four words to pop, 27 results + "Kameya Tokujirō mingus monk ", // two words to pop, 55 + "Ulrich Hensel meilisearch milli ", // two words to pop, 306 + "Idaho Bellevue pizza ", // one word to pop, 800 + "Abraham machin ", // one word to pop, 1141 + ], + ..SearchBenchConf::BASE + } + ] + ), + + // /* the we bench some global / normal search with all the default criterion in the default + // * order */ + ( + IndexSettingsConf::BASE, + vec![ + SearchBenchConf { + group_name: "basic placeholder", + queries: vec![""], + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "basic without quote", + queries: vec![ + "mingus ", + "miles davis ", + "rock and roll ", + "machine ", + "spain ", + "japan ", + "france ", + "film ", + "the black saint and the sinner lady and the", + ], + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "basic without quote set-based", + queries: vec![ + "mingus", + "miles davis", + "rock and roll", + "machine", + "spain", + "japan", + "france", + "film", + "the black saint and the sinner lady and the", + ], + criterion_implementation_strategy: CriterionImplementationStrategy::OnlySetBased, + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "basic with quote", + queries: vec![ + "\"mingus\"", + "\"miles davis\"", + "\"rock and roll\"", + "\"machine\"", + "\"spain\"", + "\"japan\"", + "\"france\"", + "\"film\"", + "\"the black saint and the sinner lady\" and the", + ], + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "prefix search", + queries: vec![ + "t", + "c", + "g", + "j", + "q", + "x", + ], + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "prefix search set-based", + queries: vec![ + "t", + "c", + "g", + "j", + "q", + "x", + ], + criterion_implementation_strategy: CriterionImplementationStrategy::OnlySetBased, + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "prefix search iterative", + queries: vec![ + "t", + "c", + "g", + "j", + "q", + "x", + ], + criterion_implementation_strategy: CriterionImplementationStrategy::OnlyIterative, + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "words + prefix search", + queries: vec![ + "the love of a new f", + "aesthetic sense of w", + "aesthetic sense of wo", + "once upon a time in ho", + "once upon a time in hol", + "once upon a time in hollywood a", + "belgium ardennes festival l", + ], + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "words + prefix search set-based", + queries: vec![ + "the love of a new f", + "aesthetic sense of w", + "aesthetic sense of wo", + "once upon a time in ho", + "once upon a time in hol", + "once upon a time in hollywood a", + "belgium ardennes festival l", + ], + criterion_implementation_strategy: CriterionImplementationStrategy::OnlySetBased, + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "words + prefix search iterative", + queries: vec![ + "the love of a new f", + "aesthetic sense of w", + "aesthetic sense of wo", + "once upon a time in ho", + "once upon a time in hol", + "once upon a time in hollywood a", + "belgium ardennes festival l", + ], + criterion_implementation_strategy: CriterionImplementationStrategy::OnlyIterative, + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "10x 'a' or 'b'", + queries: vec![ + "a a a a a a a a a a", + "b b b b b b b b b b", + ], + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "10x 'a' or 'b' - set-based", + queries: vec![ + "a a a a a a a a a a", + "b b b b b b b b b b", + ], + criterion_implementation_strategy: CriterionImplementationStrategy::OnlySetBased, + ..SearchBenchConf::BASE + }, + SearchBenchConf { + group_name: "10x 'a' or 'b' - iterative", + queries: vec![ + "a a a a a a a a a a", + "b b b b b b b b b b", + ], + criterion_implementation_strategy: CriterionImplementationStrategy::OnlyIterative, + ..SearchBenchConf::BASE + }, + ] + ) ]; - utils::run_benches(c, confs); + utils::run_benches(index_conf, c, &benches); } -criterion_group!(benches, bench_songs); +criterion_group!( + name = benches; + config = { criterion::Criterion::default().sample_size(10) }; + targets = bench_wiki +); criterion_main!(benches); diff --git a/benchmarks/benches/utils.rs b/benchmarks/benches/utils.rs index 511b3b8d5..ecafd83ac 100644 --- a/benchmarks/benches/utils.rs +++ b/benchmarks/benches/utils.rs @@ -5,16 +5,16 @@ use std::io::{self, BufRead, BufReader, Cursor, Read, Seek}; use std::num::ParseFloatError; use std::path::Path; -use criterion::BenchmarkId; use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; use milli::heed::EnvOpenOptions; use milli::update::{ IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings, }; -use milli::{Filter, Index, Object, TermsMatchingStrategy}; +use milli::{CriterionImplementationStrategy, Filter, Index, Object, TermsMatchingStrategy}; use serde_json::Value; -pub struct Conf<'a> { +#[derive(Clone)] +pub struct IndexConf<'a> { /// where we are going to create our database.mmdb directory /// each benchmark will first try to delete it and then recreate it pub database_name: &'a str, @@ -22,45 +22,66 @@ pub struct Conf<'a> { pub dataset: &'a str, /// The format of the dataset pub dataset_format: &'a str, - pub group_name: &'a str, - pub queries: &'a [&'a str], + /// primary key, if there is None we'll auto-generate docids for every documents + pub primary_key: Option<&'a str>, + /// configure your database as you want + pub configure: fn(&mut Settings), +} + +impl IndexConf<'_> { + pub const BASE: Self = IndexConf { + database_name: "benches.mmdb", + dataset_format: "csv", + dataset: "", + primary_key: None, + configure: |_| (), + }; +} + +pub struct IndexSettingsConf<'a> { /// here you can change which criterion are used and in which order. /// - if you specify something all the base configuration will be thrown out /// - if you don't specify anything (None) the default configuration will be kept pub criterion: Option<&'a [&'a str]>, - /// the last chance to configure your database as you want - pub configure: fn(&mut Settings), +} +impl IndexSettingsConf<'_> { + pub const BASE: Self = IndexSettingsConf { criterion: None }; +} + +#[derive(Clone)] +pub struct SearchBenchConf<'a> { + pub group_name: &'a str, + pub queries: Vec<&'a str>, + /// the implementation strategy used by the criteria + pub criterion_implementation_strategy: CriterionImplementationStrategy, + pub limit: Option, + pub offset: Option, pub filter: Option<&'a str>, pub sort: Option>, /// enable or disable the optional words on the query pub optional_words: bool, - /// primary key, if there is None we'll auto-generate docids for every documents - pub primary_key: Option<&'a str>, } -impl Conf<'_> { - pub const BASE: Self = Conf { - database_name: "benches.mmdb", - dataset_format: "csv", - dataset: "", +impl SearchBenchConf<'_> { + pub const BASE: Self = SearchBenchConf { group_name: "", - queries: &[], - criterion: None, - configure: |_| (), + queries: vec![], filter: None, sort: None, + offset: None, + limit: None, + criterion_implementation_strategy: CriterionImplementationStrategy::Dynamic, optional_words: true, - primary_key: None, }; } -pub fn base_setup(conf: &Conf) -> Index { - match remove_dir_all(&conf.database_name) { +pub fn base_setup_index(conf: &IndexConf) -> Index { + match remove_dir_all(conf.database_name) { Ok(_) => (), Err(e) if e.kind() == std::io::ErrorKind::NotFound => (), Err(e) => panic!("{}", e), } - create_dir_all(&conf.database_name).unwrap(); + create_dir_all(conf.database_name).unwrap(); let mut options = EnvOpenOptions::new(); options.map_size(100 * 1024 * 1024 * 1024); // 100 GB @@ -75,14 +96,9 @@ pub fn base_setup(conf: &Conf) -> Index { builder.set_primary_key(primary_key.to_string()); } - if let Some(criterion) = conf.criterion { - builder.reset_filterable_fields(); - builder.reset_criteria(); - builder.reset_stop_words(); - - let criterion = criterion.iter().map(|s| s.to_string()).collect(); - builder.set_criteria(criterion); - } + builder.reset_filterable_fields(); + builder.reset_criteria(); + builder.reset_stop_words(); (conf.configure)(&mut builder); @@ -107,41 +123,68 @@ pub fn base_setup(conf: &Conf) -> Index { index } -pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) { - for conf in confs { - let index = base_setup(conf); - - let file_name = Path::new(conf.dataset).file_name().and_then(|f| f.to_str()).unwrap(); - let name = format!("{}: {}", file_name, conf.group_name); - let mut group = c.benchmark_group(&name); +pub fn run_benches( + index_conf: IndexConf, + c: &mut criterion::Criterion, + confs: &[(IndexSettingsConf, Vec)], +) { + let index = base_setup_index(&index_conf); + let file_name = Path::new(index_conf.dataset).file_name().and_then(|f| f.to_str()).unwrap(); + + for (index_settings_conf, search_confs) in confs { + let mut wtxn = index.write_txn().unwrap(); + let indexer_config = IndexerConfig::default(); + let mut builder = Settings::new(&mut wtxn, &index, &indexer_config); + builder.reset_criteria(); + if let Some(criterion) = index_settings_conf.criterion { + let criterion = criterion.iter().map(|s| s.to_string()).collect(); + builder.set_criteria(criterion); + } + builder.execute(|_| (), || false).unwrap(); + wtxn.commit().unwrap(); - for &query in conf.queries { - group.bench_with_input(BenchmarkId::from_parameter(query), &query, |b, &query| { + for search_conf in search_confs { + let name = format!("{}: {}", file_name, search_conf.group_name); + let _bench_criterion = c.bench_function(&name, |b| { b.iter(|| { let rtxn = index.read_txn().unwrap(); let mut search = index.search(&rtxn); - search.query(query).terms_matching_strategy(TermsMatchingStrategy::default()); - if let Some(filter) = conf.filter { + search.terms_matching_strategy(TermsMatchingStrategy::default()); + if let Some(filter) = search_conf.filter { let filter = Filter::from_str(filter).unwrap().unwrap(); search.filter(filter); } - if let Some(sort) = &conf.sort { + if let Some(sort) = &search_conf.sort { let sort = sort.iter().map(|sort| sort.parse().unwrap()).collect(); search.sort_criteria(sort); } - let _ids = search.execute().unwrap(); + if let Some(limit) = search_conf.limit { + search.limit(limit); + } + if let Some(offset) = search_conf.offset { + search.offset(offset); + } + search.criterion_implementation_strategy( + search_conf.criterion_implementation_strategy, + ); + if search_conf.queries.is_empty() { + let _ids = search.execute().unwrap(); + } else { + for &query in search_conf.queries.iter() { + search.query(query); + let _ids = search.execute().unwrap(); + } + } }); }); } - group.finish(); - - index.prepare_for_closing().wait(); } + index.prepare_for_closing().wait(); } pub fn documents_from(filename: &str, filetype: &str) -> DocumentsBatchReader { - let reader = - File::open(filename).expect(&format!("could not find the dataset in: {}", filename)); + let reader = File::open(filename) + .unwrap_or_else(|_| panic!("could not find the dataset in: {}", filename)); let reader = BufReader::new(reader); let documents = match filetype { "csv" => documents_from_csv(reader).unwrap(),