diff --git a/glean/glass/Glean/Glass/Handler/Symbols.hs b/glean/glass/Glean/Glass/Handler/Symbols.hs index 3770603d4..48425b0ee 100644 --- a/glean/glass/Glean/Glass/Handler/Symbols.hs +++ b/glean/glass/Glean/Glass/Handler/Symbols.hs @@ -82,7 +82,7 @@ import Glean.Glass.NameSearch ( SearchQuery(..), SingleSymbol, FeelingLuckyResult(..), QueryExpr(..), RepoSearchResult, SymbolSearchData(..), toSearchResult, ToSearchResult(..), AngleSearch(..), srEntity, - buildLuckyContainerQuery, buildSearchQuery + buildLuckyContainerQuery, buildSearchQuery, dedupSearchResult ) import Glean.Glass.XRefs ( GenXRef(..) ) import Glean.Glass.Search as Search @@ -408,13 +408,14 @@ joinSearchResults joinSearchResults mlimit terse sorted xs = SymbolSearchResult syms $ if terse then [] else catMaybes descs where + uniqXs = dedupSearchResult <$> xs (syms,descs) = unzip $ nubOrd $ case (mlimit, sorted) of (Nothing, _) -> flattened (Just n, False) -> take n flattened -- codehub/aka "sorted" mode grouping, ranking and sampling - (Just n, True) -> takeFairN n (concatMap sortResults xs) + (Just n, True) -> takeFairN n (concatMap sortResults uniqXs) - flattened = concat xs + flattened = concat uniqXs -- -- DFS to first singleton result. diff --git a/glean/glass/Glean/Glass/NameSearch.hs b/glean/glass/Glean/Glass/NameSearch.hs index 75188e556..5b35c04fd 100644 --- a/glean/glass/Glean/Glass/NameSearch.hs +++ b/glean/glass/Glean/Glass/NameSearch.hs @@ -17,6 +17,7 @@ module Glean.Glass.NameSearch , RepoSearchResult , FeelingLuckyResult(..) , SingleSymbol + , dedupSearchResult -- * Search -- ** Search flags @@ -56,6 +57,7 @@ import Glean.Glass.Utils (splitOnAny, QueryType ) import qualified Glean.Schema.CodemarkupTypes.Types as Code import qualified Glean.Schema.CodemarkupSearch.Types as CodeSearch import qualified Glean.Schema.Code.Types as Code +import qualified Data.HashMap.Strict as Map -- -- Finding entities by name search @@ -651,6 +653,18 @@ instance ToSearchResult CodeSearch.SearchByScope where -- | Type of processed search results from a single scm repo type RepoSearchResult = [SingleSymbol] +-- | Ensure all SymbolResults in a (repo-wide) search result are unique. +-- +-- We can have multiple descriptions when querying dbs +-- having the same content (for instance, incremental and full +-- DBs). In that case, descriptions differ only in the repo_hash +-- field. +-- dedupSearchResult picks one abitrarily. TODO pick the most +-- recent revision one, and don't discard a description if it +-- differs by more than the repo_hash field +dedupSearchResult :: RepoSearchResult -> RepoSearchResult +dedupSearchResult results = Map.toList $ Map.fromListWith max results + -- An un-concatenated set of query results to search for unique hits in -- within one scm repo, across dbs, across queries, a set of result symbols. newtype FeelingLuckyResult = FeelingLuckyResult [[RepoSearchResult]]