From 59ba74cacb87ac89f7f5fcb5233eaccb50dd8349 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Fri, 16 Dec 2022 11:12:59 -0700 Subject: [PATCH 1/2] rustdoc: simplify JS search routine by not messing with lev distance Since the sorting function accounts for an `index` field, there's not much reason to also be applying changes to the levenshtein distance. Instead, we can just not treat `lev` as a filter if there's already a non-sentinel value for `index`. This change gives slightly more weight to the index and path part, as search criteria, than it used to. This changes some of the test cases, but not in any obviously-"worse" way, and, in particular, substring matches are a bigger deal than levenshtein distances (we're assuming that a typo is less likely than someone just not typing the entire name). Based on https://github.com/rust-lang/rust/pull/103710#issuecomment-1296894296 --- src/librustdoc/html/static/js/search.js | 114 +++++++++++++----------- 1 file changed, 64 insertions(+), 50 deletions(-) diff --git a/src/librustdoc/html/static/js/search.js b/src/librustdoc/html/static/js/search.js index 1b8822b0b2b7d..88592fa0c84c1 100644 --- a/src/librustdoc/html/static/js/search.js +++ b/src/librustdoc/html/static/js/search.js @@ -781,7 +781,29 @@ function initSearch(rawSearchIndex) { return a - b; } - // Sort by non levenshtein results and then levenshtein results by the distance + // sort by index of keyword in item name (no literal occurrence goes later) + a = (aaa.index < 0); + b = (bbb.index < 0); + if (a !== b) { + return a - b; + } + + // Sort by distance in the path part, if specified + // (less changes required to match means higher rankings) + a = aaa.path_lev; + b = bbb.path_lev; + if (a !== b) { + return a - b; + } + + // (later literal occurrence, if any, goes later) + a = aaa.index; + b = bbb.index; + if (a !== b) { + return a - b; + } + + // Sort by distance in the name part, the last part of the path // (less changes required to match means higher rankings) a = (aaa.lev); b = (bbb.lev); @@ -810,19 +832,6 @@ function initSearch(rawSearchIndex) { return (a > b ? +1 : -1); } - // sort by index of keyword in item name (no literal occurrence goes later) - a = (aaa.index < 0); - b = (bbb.index < 0); - if (a !== b) { - return a - b; - } - // (later literal occurrence, if any, goes later) - a = aaa.index; - b = bbb.index; - if (a !== b) { - return a - b; - } - // special precedence for primitive and keyword pages if ((aaa.item.ty === TY_PRIMITIVE && bbb.item.ty !== TY_KEYWORD) || (aaa.item.ty === TY_KEYWORD && bbb.item.ty !== TY_PRIMITIVE)) { @@ -1230,15 +1239,19 @@ function initSearch(rawSearchIndex) { * * `id` is the index in both `searchWords` and `searchIndex` arrays for this element. * * `index` is an `integer`` used to sort by the position of the word in the item's name. * * `lev` is the main metric used to sort the search results. + * * `path_lev` is zero if a single-component search query is used, otherwise it's the + * distance computed for everything other than the last path component. * * @param {Results} results * @param {string} fullId * @param {integer} id * @param {integer} index * @param {integer} lev + * @param {integer} path_lev */ - function addIntoResults(results, fullId, id, index, lev) { - if (lev === 0 || (!parsedQuery.literalSearch && lev <= MAX_LEV_DISTANCE)) { + function addIntoResults(results, fullId, id, index, lev, path_lev) { + const inBounds = lev <= MAX_LEV_DISTANCE || index !== -1; + if (lev === 0 || (!parsedQuery.literalSearch && inBounds)) { if (results[fullId] !== undefined) { const result = results[fullId]; if (result.dontValidate || result.lev <= lev) { @@ -1250,6 +1263,7 @@ function initSearch(rawSearchIndex) { index: index, dontValidate: parsedQuery.literalSearch, lev: lev, + path_lev: path_lev, }; } } @@ -1280,68 +1294,68 @@ function initSearch(rawSearchIndex) { if (!row || (filterCrates !== null && row.crate !== filterCrates)) { return; } - let lev, lev_add = 0, index = -1; + let lev, index = -1, path_lev = 0; const fullId = row.id; + const searchWord = searchWords[pos]; const in_args = findArg(row, elem, parsedQuery.typeFilter); const returned = checkReturned(row, elem, parsedQuery.typeFilter); - addIntoResults(results_in_args, fullId, pos, index, in_args); - addIntoResults(results_returned, fullId, pos, index, returned); + // path_lev is 0 because no parent path information is currently stored + // in the search index + addIntoResults(results_in_args, fullId, pos, -1, in_args, 0); + addIntoResults(results_returned, fullId, pos, -1, returned, 0); if (!typePassesFilter(parsedQuery.typeFilter, row.ty)) { return; } - const searchWord = searchWords[pos]; - if (parsedQuery.literalSearch) { - if (searchWord === elem.name) { - addIntoResults(results_others, fullId, pos, -1, 0); - } - return; + const row_index = row.normalizedName.indexOf(elem.pathLast); + const word_index = searchWord.indexOf(elem.pathLast); + + // lower indexes are "better" matches + // rank based on the "best" match + if (row_index === -1) { + index = word_index; + } else if (word_index === -1) { + index = row_index; + } else if (word_index < row_index) { + index = word_index; + } else { + index = row_index; } // No need to check anything else if it's a "pure" generics search. if (elem.name.length === 0) { if (row.type !== null) { lev = checkGenerics(row.type, elem, MAX_LEV_DISTANCE + 1); - addIntoResults(results_others, fullId, pos, index, lev); + // path_lev is 0 because we know it's empty + addIntoResults(results_others, fullId, pos, index, lev, 0); } return; } if (elem.fullPath.length > 1) { - lev = checkPath(elem.pathWithoutLast, row); - if (lev > MAX_LEV_DISTANCE || (parsedQuery.literalSearch && lev !== 0)) { + path_lev = checkPath(elem.pathWithoutLast, row); + if (path_lev > MAX_LEV_DISTANCE) { return; - } else if (lev > 0) { - lev_add = lev / 10; } } - if (searchWord.indexOf(elem.pathLast) > -1 || - row.normalizedName.indexOf(elem.pathLast) > -1 - ) { - index = row.normalizedName.indexOf(elem.pathLast); - } - lev = levenshtein(searchWord, elem.pathLast); - if (lev > 0 && elem.pathLast.length > 2 && searchWord.indexOf(elem.pathLast) > -1) { - if (elem.pathLast.length < 6) { - lev = 1; - } else { - lev = 0; + if (parsedQuery.literalSearch) { + if (searchWord === elem.name) { + addIntoResults(results_others, fullId, pos, index, 0, path_lev); } - } - lev += lev_add; - if (lev > MAX_LEV_DISTANCE) { return; - } else if (index !== -1 && elem.fullPath.length < 2) { - lev -= 1; } - if (lev < 0) { - lev = 0; + + lev = levenshtein(searchWord, elem.pathLast); + + if (index === -1 && lev + path_lev > MAX_LEV_DISTANCE) { + return; } - addIntoResults(results_others, fullId, pos, index, lev); + + addIntoResults(results_others, fullId, pos, index, lev, path_lev); } /** @@ -1386,7 +1400,7 @@ function initSearch(rawSearchIndex) { return; } const lev = Math.round(totalLev / nbLev); - addIntoResults(results, row.id, pos, 0, lev); + addIntoResults(results, row.id, pos, 0, lev, 0); } function innerRunQuery() { From db558b46861ee2f812ee1cc2b03b796f691a246d Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Fri, 16 Dec 2022 13:19:38 -0700 Subject: [PATCH 2/2] rustdoc: update search test cases --- tests/rustdoc-js-std/macro-print.js | 2 +- tests/rustdoc-js-std/typed-query.js | 2 +- tests/rustdoc-js-std/vec-new.js | 5 +++-- tests/rustdoc-js/search-short-types.js | 3 +-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/rustdoc-js-std/macro-print.js b/tests/rustdoc-js-std/macro-print.js index 858046e72e9a4..1b4c7b4057020 100644 --- a/tests/rustdoc-js-std/macro-print.js +++ b/tests/rustdoc-js-std/macro-print.js @@ -3,8 +3,8 @@ const QUERY = 'macro:print'; const EXPECTED = { 'others': [ { 'path': 'std', 'name': 'print' }, - { 'path': 'std', 'name': 'eprint' }, { 'path': 'std', 'name': 'println' }, + { 'path': 'std', 'name': 'eprint' }, { 'path': 'std', 'name': 'eprintln' }, ], }; diff --git a/tests/rustdoc-js-std/typed-query.js b/tests/rustdoc-js-std/typed-query.js index 25efbad269540..fd5c5489d79cf 100644 --- a/tests/rustdoc-js-std/typed-query.js +++ b/tests/rustdoc-js-std/typed-query.js @@ -6,8 +6,8 @@ const FILTER_CRATE = 'std'; const EXPECTED = { 'others': [ { 'path': 'std', 'name': 'print' }, - { 'path': 'std', 'name': 'eprint' }, { 'path': 'std', 'name': 'println' }, + { 'path': 'std', 'name': 'eprint' }, { 'path': 'std', 'name': 'eprintln' }, { 'path': 'std::pin', 'name': 'pin' }, { 'path': 'std::future', 'name': 'join' }, diff --git a/tests/rustdoc-js-std/vec-new.js b/tests/rustdoc-js-std/vec-new.js index cd0e8e7b4a9eb..fc44a566af21f 100644 --- a/tests/rustdoc-js-std/vec-new.js +++ b/tests/rustdoc-js-std/vec-new.js @@ -3,7 +3,8 @@ const QUERY = 'Vec::new'; const EXPECTED = { 'others': [ { 'path': 'std::vec::Vec', 'name': 'new' }, - { 'path': 'std::vec::Vec', 'name': 'ne' }, - { 'path': 'alloc::vec::Vec', 'name': 'ne' }, + { 'path': 'alloc::vec::Vec', 'name': 'new' }, + { 'path': 'std::vec::Vec', 'name': 'new_in' }, + { 'path': 'alloc::vec::Vec', 'name': 'new_in' }, ], }; diff --git a/tests/rustdoc-js/search-short-types.js b/tests/rustdoc-js/search-short-types.js index d14672af71fd6..3b2f15a40bf87 100644 --- a/tests/rustdoc-js/search-short-types.js +++ b/tests/rustdoc-js/search-short-types.js @@ -4,7 +4,6 @@ const EXPECTED = { 'others': [ { 'path': 'search_short_types', 'name': 'P' }, { 'path': 'search_short_types::VeryLongTypeName', 'name': 'p' }, - { 'path': 'search_short_types', 'name': 'Ap' }, - { 'path': 'search_short_types::VeryLongTypeName', 'name': 'ap' }, + { 'path': 'search_short_types', 'name': 'Pa' }, ], };