Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

improved ranking #93

Merged
merged 3 commits into from
Oct 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 2 additions & 11 deletions src/bucket/bucket.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ import (
"github.com/tminaorg/brzaguza/src/bucket/result"
"github.com/tminaorg/brzaguza/src/config"
"github.com/tminaorg/brzaguza/src/engines"
"github.com/tminaorg/brzaguza/src/rank"
)

type Relay struct {
Expand Down Expand Up @@ -36,10 +35,6 @@ func AddSEResult(seResult *engines.RetrievedResult, seName engines.Name, relay *
Response: nil,
}

if config.InsertDefaultRank {
result.Rank = rank.DefaultRank(seResult.Rank.Rank, seResult.Rank.Page, seResult.Rank.OnPageRank)
}

relay.Mutex.Lock()
relay.ResultMap[result.URL] = &result
relay.Mutex.Unlock()
Expand Down Expand Up @@ -83,17 +78,13 @@ func SetResultResponse(link string, response *colly.Response, relay *Relay, seNa
}

mapRes.Response = response

resCopy := *mapRes
rankAddr := &(mapRes.Rank)
relay.Mutex.Unlock()
rank.SetRank(&resCopy, rankAddr, &(relay.Mutex)) //copy contains pointer to response
}

func MakeSEResult(urll string, title string, description string, searchEngineName engines.Name, seRank int, sePage int, seOnPageRank int) *engines.RetrievedResult {
func MakeSEResult(urll string, title string, description string, searchEngineName engines.Name, sePage int, seOnPageRank int) *engines.RetrievedResult {
ser := engines.RetrievedRank{
SearchEngine: searchEngineName,
Rank: seRank,
Rank: -1,
Page: sePage,
OnPageRank: seOnPageRank,
}
Expand Down
2 changes: 1 addition & 1 deletion src/cli.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ func setupCli() {
kong.Vars{
"version": fmt.Sprintf("%v (%v@%v)", Version, GitCommit, Timestamp),
"config_path": ".",
"log_path": ".",
"log_path": "./log",
"query_string": "banana death",
},
)
Expand Down
11 changes: 8 additions & 3 deletions src/config/load.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,12 @@ import (
)

var EnabledEngines []engines.Name = make([]engines.Name, 0)
var LogDumpLocation string = "dump/"

func (c *Config) Load(path string, logPath string) {
// Load vars
loadVars(logPath)

func (c *Config) Load(path string) {
// Use "." as the key path delimiter. This can be "/" or any character.
k := koanf.New(".")

Expand Down Expand Up @@ -62,5 +66,6 @@ func (c *Config) Load(path string) {
}
}

const InsertDefaultRank bool = true // this should be moved to config
const LogDumpLocation string = "logdump/" // this should be moved to config
func loadVars(logPath string) {
LogDumpLocation = logPath + "/" + LogDumpLocation
}
2 changes: 1 addition & 1 deletion src/engines/bing/bing.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi
var pageStr string = e.Request.Ctx.Get("page")
page, _ := strconv.Atoi(pageStr)

res := bucket.MakeSEResult(linkText, titleText, descText, Info.Name, -1, page, pageRankCounter[page]+1)
res := bucket.MakeSEResult(linkText, titleText, descText, Info.Name, page, pageRankCounter[page]+1)
bucket.AddSEResult(res, Info.Name, relay, &options, pagesCol)
pageRankCounter[page]++
} else {
Expand Down
2 changes: 1 addition & 1 deletion src/engines/brave/brave.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi
var pageStr string = e.Request.Ctx.Get("page")
page, _ := strconv.Atoi(pageStr)

res := bucket.MakeSEResult(linkText, titleText, descText, Info.Name, -1, page, pageRankCounter[page]+1)
res := bucket.MakeSEResult(linkText, titleText, descText, Info.Name, page, pageRankCounter[page]+1)
bucket.AddSEResult(res, Info.Name, relay, &options, pagesCol)
pageRankCounter[page]++
}
Expand Down
2 changes: 1 addition & 1 deletion src/engines/duckduckgo/duckduckgo.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi
linkText = parse.ParseURL(rawURL)
case 3:
if linkText != "" && linkText != "#" && titleText != "" {
res := bucket.MakeSEResult(linkText, titleText, descText, Info.Name, rrank, page, (i/4 + 1))
res := bucket.MakeSEResult(linkText, titleText, descText, Info.Name, page, (i/4 + 1))
bucket.AddSEResult(res, Info.Name, relay, &options, pagesCol)
}
}
Expand Down
13 changes: 4 additions & 9 deletions src/engines/etools/etools.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi
sedefaults.ColRequest(Info.Name, col, &ctx, &retError)
sedefaults.ColError(Info.Name, col, &retError)

var pageRankCounter []int = make([]int, options.MaxPages*Info.ResultsPerPage)

col.OnHTML(dompaths.Result, func(e *colly.HTMLElement) {
dom := e.DOM

Expand All @@ -53,17 +55,10 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi
if linkText != "" && linkText != "#" && titleText != "" {
var pageStr string = e.Request.Ctx.Get("page")
page, _ := strconv.Atoi(pageStr)
seRankString := strings.TrimSpace(dom.Find("td[class=\"count help\"]").Text())
seRank, convErr := strconv.Atoi(seRankString)
if convErr != nil {
log.Error().Err(convErr).Msgf("%v: SERank string to int conversion error. URL: %v, SERank string: %v", Info.Name, linkText, seRankString)
}

//var onPageRank int = e.Index // this should also work, but is a bit more volatile
var onPageRank int = (seRank-1)%Info.ResultsPerPage + 1

res := bucket.MakeSEResult(linkText, titleText, descText, Info.Name, seRank, page, onPageRank)
res := bucket.MakeSEResult(linkText, titleText, descText, Info.Name, page, pageRankCounter[page]+1)
bucket.AddSEResult(res, Info.Name, relay, &options, pagesCol)
pageRankCounter[page]++
}
})

Expand Down
2 changes: 1 addition & 1 deletion src/engines/google/google.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi
var pageStr string = e.Request.Ctx.Get("page")
page, _ := strconv.Atoi(pageStr)

res := bucket.MakeSEResult(linkText, titleText, descText, Info.Name, -1, page, pageRankCounter[page]+1)
res := bucket.MakeSEResult(linkText, titleText, descText, Info.Name, page, pageRankCounter[page]+1)
bucket.AddSEResult(res, Info.Name, relay, &options, pagesCol)
pageRankCounter[page]++
}
Expand Down
2 changes: 1 addition & 1 deletion src/engines/mojeek/mojeek.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi
var pageStr string = e.Request.Ctx.Get("page")
page, _ := strconv.Atoi(pageStr)

res := bucket.MakeSEResult(linkText, titleText, descText, Info.Name, (page-1)*Info.ResultsPerPage+pageRankCounter[page]+1, page, pageRankCounter[page]+1)
res := bucket.MakeSEResult(linkText, titleText, descText, Info.Name, page, pageRankCounter[page]+1)
bucket.AddSEResult(res, Info.Name, relay, &options, pagesCol)
pageRankCounter[page]++
}
Expand Down
2 changes: 1 addition & 1 deletion src/engines/presearch/presearch.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi
goodTitle := parse.ParseTextWithHTML(result.Title)
goodDesc := parse.ParseTextWithHTML(result.Desc)

res := bucket.MakeSEResult(goodURL, goodTitle, goodDesc, Info.Name, -1, page, counter)
res := bucket.MakeSEResult(goodURL, goodTitle, goodDesc, Info.Name, page, counter)
bucket.AddSEResult(res, Info.Name, relay, &options, pagesCol)
counter += 1
}
Expand Down
4 changes: 2 additions & 2 deletions src/engines/qwant/qwant.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,15 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi
}

mainline := parsedResponse.Data.Res.Items.Mainline
counter := 0
counter := 1
for _, group := range mainline {
if group.Type != "web" {
continue
}
for _, result := range group.Items {
goodURL := parse.ParseURL(result.URL)

res := bucket.MakeSEResult(goodURL, result.Title, result.Description, Info.Name, (page-1)*Info.ResultsPerPage+counter, page, counter%Info.ResultsPerPage+1)
res := bucket.MakeSEResult(goodURL, result.Title, result.Description, Info.Name, page, counter)
bucket.AddSEResult(res, Info.Name, relay, &options, pagesCol)
counter += 1
}
Expand Down
2 changes: 1 addition & 1 deletion src/engines/startpage/startpage.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi
var pageStr string = e.Request.Ctx.Get("page")
page, _ := strconv.Atoi(pageStr)

res := bucket.MakeSEResult(linkText, titleText, descText, Info.Name, -1, page, pageRankCounter[page]+1)
res := bucket.MakeSEResult(linkText, titleText, descText, Info.Name, page, pageRankCounter[page]+1)
bucket.AddSEResult(res, Info.Name, relay, &options, pagesCol)
pageRankCounter[page]++
} else {
Expand Down
4 changes: 2 additions & 2 deletions src/engines/swisscows/swisscows.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,13 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi
log.Error().Err(err).Msgf("%v: Failed body unmarshall to json:\n%v", Info.Name, string(r.Body))
}

counter := 0
counter := 1
for _, result := range parsedResponse.Items {
goodURL := parse.ParseURL(result.URL)
title := parse.ParseTextWithHTML(result.Title)
desc := parse.ParseTextWithHTML(result.Desc)

res := bucket.MakeSEResult(goodURL, title, desc, Info.Name, -1, page, counter%Info.ResultsPerPage+1)
res := bucket.MakeSEResult(goodURL, title, desc, Info.Name, page, counter)
bucket.AddSEResult(res, Info.Name, relay, &options, pagesCol)
counter += 1
}
Expand Down
2 changes: 1 addition & 1 deletion src/engines/yahoo/yahoo.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi
var pageStr string = e.Request.Ctx.Get("page")
page, _ := strconv.Atoi(pageStr)

res := bucket.MakeSEResult(linkText, titleText, descText, Info.Name, -1, page, pageRankCounter[page]+1)
res := bucket.MakeSEResult(linkText, titleText, descText, Info.Name, page, pageRankCounter[page]+1)
bucket.AddSEResult(res, Info.Name, relay, &options, pagesCol)
pageRankCounter[page]++
}
Expand Down
4 changes: 2 additions & 2 deletions src/engines/yep/yep.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi
col.OnResponse(func(r *colly.Response) {
content := parseJSON(r.Body)

counter := 0
counter := 1
for _, result := range content.Results {
if result.TType != "Organic" {
continue
Expand All @@ -44,7 +44,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi
goodTitle := parse.ParseTextWithHTML(result.Title)
goodDescription := parse.ParseTextWithHTML(result.Snippet)

res := bucket.MakeSEResult(goodURL, goodTitle, goodDescription, Info.Name, counter, counter/Info.ResultsPerPage+1, counter%Info.ResultsPerPage+1)
res := bucket.MakeSEResult(goodURL, goodTitle, goodDescription, Info.Name, 1, counter)
bucket.AddSEResult(res, Info.Name, relay, &options, pagesCol)
counter += 1
}
Expand Down
2 changes: 1 addition & 1 deletion src/logger/logger.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ func DateString() string {
func Setup(path string, verbosity int) {
// Generate logfile name
datetime := DateString()
filepath := fmt.Sprintf("%v/log/brzaguza_%v.log", path, datetime)
filepath := fmt.Sprintf("%v/brzaguza_%v.log", path, datetime)

// Setup logger
logger := log.Output(io.MultiWriter(zerolog.ConsoleWriter{
Expand Down
2 changes: 1 addition & 1 deletion src/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ func main() {

// load config file
config := config.New()
config.Load(cli.Config)
config.Load(cli.Config, cli.Log)

if cli.Cli {
log.Info().
Expand Down
21 changes: 20 additions & 1 deletion src/rank/byrank.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,28 @@
package rank

import "github.com/tminaorg/brzaguza/src/bucket/result"
import (
"github.com/rs/zerolog/log"
"github.com/tminaorg/brzaguza/src/bucket/result"
)

type ByRank []result.Result

func (r ByRank) Len() int { return len(r) }
func (r ByRank) Swap(i, j int) { r[i], r[j] = r[j], r[i] }
func (r ByRank) Less(i, j int) bool { return r[i].Rank < r[j].Rank }

type ByRetrievedRank []RankFiller

func (r ByRetrievedRank) Len() int { return len(r) }
func (r ByRetrievedRank) Swap(i, j int) { r[i], r[j] = r[j], r[i] }
func (r ByRetrievedRank) Less(i, j int) bool {
if r[i].RetRank.Page != r[j].RetRank.Page {
return r[i].RetRank.Page < r[j].RetRank.Page
}
if r[i].RetRank.OnPageRank != r[j].RetRank.OnPageRank {
return r[i].RetRank.OnPageRank < r[j].RetRank.OnPageRank
}

log.Error().Msgf("failed at ranking: %v, %v", r[i], r[j])
return true
}
62 changes: 45 additions & 17 deletions src/rank/rank.go
Original file line number Diff line number Diff line change
@@ -1,33 +1,61 @@
package rank

import (
"sync"
"sort"

"github.com/rs/zerolog/log"
"github.com/tminaorg/brzaguza/src/bucket/result"
"github.com/tminaorg/brzaguza/src/engines"
)

// TLDR: you must mutex.Lock when changing *rankAddr, you probably dont need to mutex.RLock() when reading result
// (in reality even *rankAddr shouldnt need a lock, but go would definately complain about simultanious read/write because of it)
func SetRank(result *result.Result, rankAddr *int, mutex *sync.RWMutex) {
func SetRank(result *result.Result) {
result.Rank = result.EngineRanks[0].Rank
}

func Rank(resMap map[string]*result.Result) []result.Result {
results := make([]result.Result, 0, len(resMap))
for _, res := range resMap {
results = append(results, *res)
}

//mutex.RLock()
reqUrl := result.Response.Request.URL.String() //dummy code, if error here, uncomment lock
//mutex.RUnlock()
//setup retrieved rank here
FillRetrievedRank(results)

if reqUrl != result.URL { //dummy code
log.Trace().Msgf("(This is ok) Request URL not same as result.URL \\/ %v | %v", reqUrl, result.URL)
for ind := range results {
SetRank(&(results[ind]))
}

rrank := result.EngineRanks[0].Page*100 + result.EngineRanks[0].OnPageRank
sort.Sort(ByRank(results))

mutex.Lock()
*rankAddr = rrank
mutex.Unlock()
return results
}

log.Trace().Msgf("Set rank to %v for %v: %v", rrank, result.Title, result.URL)
type RankFiller struct {
ArrInd int
RetRank engines.RetrievedRank
RRInd int
}

func DefaultRank(seRank int, sePage int, seOnPageRank int) int {
return sePage*100 + seOnPageRank
func FillRetrievedRank(results []result.Result) {
engResults := make([][]RankFiller, len(engines.NameValues()))
for arrind, res := range results {
for rrind, er := range res.EngineRanks {
rf := RankFiller{
ArrInd: arrind,
RetRank: er,
RRInd: rrind,
}
if er.SearchEngine == engines.Undefined { //this should be fixed. TODO
continue
}
engResults[er.SearchEngine] = append(engResults[er.SearchEngine], rf)
}
}

for _, engRes := range engResults {
sort.Sort(ByRetrievedRank(engRes))

for rnk, el := range engRes {
results[el.ArrInd].EngineRanks[el.RRInd].Rank = rnk + 1
}
}
}
18 changes: 9 additions & 9 deletions src/search/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package search
import (
"context"
"net/url"
"sort"
"time"

"github.com/rs/zerolog/log"
"github.com/sourcegraph/conc"
Expand All @@ -21,19 +21,19 @@ func PerformSearch(query string, options engines.Options, config *config.Config)

query = url.QueryEscape(query)

resTiming := time.Now()
log.Debug().Msg("Waiting for results from engines...")
var worker conc.WaitGroup
runEngines(config.Engines, query, &worker, &relay, options)
log.Debug().Msg("Waiting for results from engines...")
worker.Wait()
log.Debug().Msgf("Got results in %v", time.Since(resTiming).Milliseconds())

results := make([]result.Result, 0, len(relay.ResultMap))
for _, res := range relay.ResultMap {
results = append(results, *res)
}

sort.Sort(rank.ByRank(results))
rankTiming := time.Now()
log.Debug().Msg("Ranking...")
results := rank.Rank(relay.ResultMap)
log.Debug().Msgf("Finished ranking in %v", time.Since(rankTiming).Milliseconds())

log.Debug().Msg("All processing done!")
log.Debug().Msg("Search done!")

return results
}
Expand Down