Skip to content

Commit

Permalink
fix elasticsearch, refactor gitgrep
Browse files Browse the repository at this point in the history
  • Loading branch information
wxiaoguang committed Feb 14, 2025
1 parent e87e418 commit fca84ac
Show file tree
Hide file tree
Showing 8 changed files with 119 additions and 54 deletions.
9 changes: 6 additions & 3 deletions modules/indexer/code/bleve/bleve.go
Original file line number Diff line number Diff line change
Expand Up @@ -267,10 +267,13 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
pathQuery.FieldVal = "Filename"
pathQuery.SetBoost(10)

if strings.HasPrefix(opts.Keyword, "\"") && strings.HasSuffix(opts.Keyword, "\"") {
opts.Keyword = strings.Trim(opts.Keyword, "\"")
q := bleve.NewMatchPhraseQuery(opts.Keyword)
keywordAsPhrase, isPhrase := internal.ParseKeywordAsPhrase(opts.Keyword)
if isPhrase {
q := bleve.NewMatchPhraseQuery(keywordAsPhrase)
q.FieldVal = "Content"
if opts.IsKeywordFuzzy {
q.Fuzziness = inner_bleve.GuessFuzzinessByKeyword(keywordAsPhrase)
}
contentQuery = q
} else {
q := bleve.NewMatchQuery(opts.Keyword)
Expand Down
17 changes: 12 additions & 5 deletions modules/indexer/code/elasticsearch/elasticsearch.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/modules/typesniffer"
"code.gitea.io/gitea/modules/util"

"github.com/go-enry/go-enry/v2"
"github.com/olivere/elastic/v7"
Expand Down Expand Up @@ -359,13 +360,19 @@ func extractAggs(searchResult *elastic.SearchResult) []*internal.SearchResultLan

// Search searches for codes and language stats by given conditions.
func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
searchType := esMultiMatchTypePhrasePrefix
if opts.IsKeywordFuzzy {
searchType = esMultiMatchTypeBestFields
var contentQuery elastic.Query
keywordAsPhrase, isPhrase := internal.ParseKeywordAsPhrase(opts.Keyword)
if isPhrase {
contentQuery = elastic.NewMatchPhraseQuery("content", keywordAsPhrase)
} else {
// TODO: this is the old logic, but not really using "fuzziness"
// * IsKeywordFuzzy=true: "best_fields"
// * IsKeywordFuzzy=false: "phrase_prefix"
contentQuery = elastic.NewMultiMatchQuery("content", opts.Keyword).
Type(util.Iif(opts.IsKeywordFuzzy, esMultiMatchTypeBestFields, esMultiMatchTypePhrasePrefix))
}

kwQuery := elastic.NewBoolQuery().Should(
elastic.NewMultiMatchQuery(opts.Keyword, "content").Type(searchType),
contentQuery,
elastic.NewMultiMatchQuery(opts.Keyword, "filename^10").Type(esMultiMatchTypePhrasePrefix),
)
query := elastic.NewBoolQuery()
Expand Down
59 changes: 59 additions & 0 deletions modules/indexer/code/gitgrep/gitgrep.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// Copyright 2025 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package gitgrep

import (
"context"
"fmt"
"strings"

"code.gitea.io/gitea/modules/git"
code_indexer "code.gitea.io/gitea/modules/indexer/code"
"code.gitea.io/gitea/modules/setting"
)

func indexSettingToGitGrepPathspecList() (list []string) {
for _, expr := range setting.Indexer.IncludePatterns {
list = append(list, ":(glob)"+expr.PatternString())
}
for _, expr := range setting.Indexer.ExcludePatterns {
list = append(list, ":(glob,exclude)"+expr.PatternString())
}
return list
}

func PerformSearch(ctx context.Context, page int, repoID int64, gitRepo *git.Repository, ref git.RefName, keyword string, isFuzzy bool) (searchResults []*code_indexer.Result, total int, err error) {
// TODO: it should also respect ParseKeywordAsPhrase and clarify the "fuzzy" behavior
res, err := git.GrepSearch(ctx, gitRepo, keyword, git.GrepOptions{
ContextLineNumber: 1,
IsFuzzy: isFuzzy,
RefName: ref.String(),
PathspecList: indexSettingToGitGrepPathspecList(),
})
if err != nil {
// TODO: if no branch exists, it reports: exit status 128, fatal: this operation must be run in a work tree.
return nil, 0, fmt.Errorf("git.GrepSearch: %w", err)
}
commitID, err := gitRepo.GetRefCommitID(ref.String())
if err != nil {
return nil, 0, fmt.Errorf("gitRepo.GetRefCommitID: %w", err)
}

total = len(res)
pageStart := min((page-1)*setting.UI.RepoSearchPagingNum, len(res))
pageEnd := min(page*setting.UI.RepoSearchPagingNum, len(res))
res = res[pageStart:pageEnd]
for _, r := range res {
searchResults = append(searchResults, &code_indexer.Result{
RepoID: repoID,
Filename: r.Filename,
CommitID: commitID,
// UpdatedUnix: not supported yet
// Language: not supported yet
// Color: not supported yet
Lines: code_indexer.HighlightSearchResultCode(r.Filename, "", r.LineNumbers, strings.Join(r.LineCodes, "\n")),
})
}
return searchResults, total, nil
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package repo
package gitgrep

import (
"testing"
Expand Down
6 changes: 2 additions & 4 deletions modules/indexer/code/indexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,11 @@ var (
// When the real indexer is not ready, it will be a dummy indexer which will return error to explain it's not ready.
// So it's always safe use it as *globalIndexer.Load() and call its methods.
globalIndexer atomic.Pointer[internal.Indexer]
dummyIndexer *internal.Indexer
)

func init() {
i := internal.NewDummyIndexer()
dummyIndexer = &i
globalIndexer.Store(dummyIndexer)
dummyIndexer := internal.NewDummyIndexer()
globalIndexer.Store(&dummyIndexer)
}

func index(ctx context.Context, indexer internal.Indexer, repoID int64) error {
Expand Down
10 changes: 9 additions & 1 deletion modules/indexer/code/internal/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ func FilenameOfIndexerID(indexerID string) string {
return indexerID[index+1:]
}

// Given the contents of file, returns the boundaries of its first seven lines.
// FilenameMatchIndexPos returns the boundaries of its first seven lines.
func FilenameMatchIndexPos(content string) (int, int) {
count := 1
for i, c := range content {
Expand All @@ -48,3 +48,11 @@ func FilenameMatchIndexPos(content string) (int, int) {
}
return 0, len(content)
}

func ParseKeywordAsPhrase(keyword string) (string, bool) {
if strings.HasPrefix(keyword, `"`) && strings.HasSuffix(keyword, `"`) {
// only remove the prefix and suffix quotes, no need to decode the content at the moment
return keyword[1 : len(keyword)-1], true
}
return "", false
}
24 changes: 24 additions & 0 deletions modules/indexer/code/internal/util_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Copyright 2025 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package internal

import (
"testing"

"github.com/stretchr/testify/assert"
)

func TestParseKeywordAsPhrase(t *testing.T) {
phrase, isPhrase := ParseKeywordAsPhrase(`a`)
assert.Empty(t, phrase)
assert.False(t, isPhrase)

phrase, isPhrase = ParseKeywordAsPhrase(`"a"`)
assert.Equal(t, "a", phrase)
assert.True(t, isPhrase)

phrase, isPhrase = ParseKeywordAsPhrase(`""\"""`)
assert.Equal(t, `"\""`, phrase)
assert.True(t, isPhrase)
}
46 changes: 6 additions & 40 deletions routers/web/repo/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ package repo

import (
"net/http"
"strings"

"code.gitea.io/gitea/models/db"
"code.gitea.io/gitea/modules/git"
code_indexer "code.gitea.io/gitea/modules/indexer/code"
"code.gitea.io/gitea/modules/indexer/code/gitgrep"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/templates"
"code.gitea.io/gitea/routers/common"
Expand All @@ -18,16 +18,6 @@ import (

const tplSearch templates.TplName = "repo/search"

func indexSettingToGitGrepPathspecList() (list []string) {
for _, expr := range setting.Indexer.IncludePatterns {
list = append(list, ":(glob)"+expr.PatternString())
}
for _, expr := range setting.Indexer.ExcludePatterns {
list = append(list, ":(glob,exclude)"+expr.PatternString())
}
return list
}

// Search render repository search page
func Search(ctx *context.Context) {
ctx.Data["PageIsViewCode"] = true
Expand Down Expand Up @@ -67,38 +57,14 @@ func Search(ctx *context.Context) {
ctx.Data["CodeIndexerUnavailable"] = !code_indexer.IsAvailable(ctx)
}
} else {
searchRefName := git.RefNameFromBranch(ctx.Repo.Repository.DefaultBranch) // BranchName should be default branch or the first existing branch
res, err := git.GrepSearch(ctx, ctx.Repo.GitRepo, prepareSearch.Keyword, git.GrepOptions{
ContextLineNumber: 1,
IsFuzzy: prepareSearch.IsFuzzy,
RefName: searchRefName.String(),
PathspecList: indexSettingToGitGrepPathspecList(),
})
if err != nil {
// TODO: if no branch exists, it reports: exit status 128, fatal: this operation must be run in a work tree.
ctx.ServerError("GrepSearch", err)
return
}
commitID, err := ctx.Repo.GitRepo.GetRefCommitID(searchRefName.String())
var err error
// ref should be default branch or the first existing branch
searchRef := git.RefNameFromBranch(ctx.Repo.Repository.DefaultBranch)
searchResults, total, err = gitgrep.PerformSearch(ctx, page, ctx.Repo.Repository.ID, ctx.Repo.GitRepo, searchRef, prepareSearch.Keyword, prepareSearch.IsFuzzy)
if err != nil {
ctx.ServerError("GetRefCommitID", err)
ctx.ServerError("gitgrep.PerformSearch", err)
return
}
total = len(res)
pageStart := min((page-1)*setting.UI.RepoSearchPagingNum, len(res))
pageEnd := min(page*setting.UI.RepoSearchPagingNum, len(res))
res = res[pageStart:pageEnd]
for _, r := range res {
searchResults = append(searchResults, &code_indexer.Result{
RepoID: ctx.Repo.Repository.ID,
Filename: r.Filename,
CommitID: commitID,
// UpdatedUnix: not supported yet
// Language: not supported yet
// Color: not supported yet
Lines: code_indexer.HighlightSearchResultCode(r.Filename, "", r.LineNumbers, strings.Join(r.LineCodes, "\n")),
})
}
}

ctx.Data["Repo"] = ctx.Repo.Repository
Expand Down

0 comments on commit fca84ac

Please sign in to comment.