Skip to content

Commit

Permalink
Update version. Replace WRatio by the much faster QRatio (#39)
Browse files Browse the repository at this point in the history
* Replace WRatio by the much faster QRatio
  • Loading branch information
i30817 authored Nov 4, 2023
1 parent d6074d0 commit f0d8776
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 8 deletions.
2 changes: 1 addition & 1 deletion libretrofuzz/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "3.6.1"
__version__ = "3.6.2"
12 changes: 6 additions & 6 deletions libretrofuzz/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ def __call__(self, name, other, score_cutoff=None):
return MAX_SCORE
if not name_ns:
return 0
# score is based on WRatio (a comprehensive 0-100 weighted heuristic from rapidfuzz)
# score is based on QRatio (a 0-100 heuristic from rapidfuzz, WRatio is better but unacceptably slow)
# summed to some custom heuristics
# Up to DEF_SCORE WRatio is used with 100 being exactly DEF_SCORE, after the heuristics
# if -min is used, above DEF_SCORE, the heuristics will need better fit to win
Expand All @@ -354,33 +354,33 @@ def __call__(self, name, other, score_cutoff=None):
# 97% of remaining score will be used for different heuristics
heuristic = remaining * 0.97
# used denormalized in all returns, just with different percentages
wratio = fuzz.WRatio(name, other) * 0.01
ratio = fuzz.QRatio(name, other) * 0.01
# find a exact full name non digit match
# (lots of dump or disc numbers as subtitles in some dumps)
# on either a subtitle or a sequence of subtitles from the start
# note that this doesn't include subtitles in
# 'name' matching a subtitle in 'other' and vice versa
# because a subtitle match like this is a strong indicator of a match,
# give it the full default score slot, and wratio for the heuristic slot
# give it the full default score slot, and ratio for the heuristic slot
if not name_ns.isdigit():
sum_ns = ""
for sub_ns in other_ns_subs:
if name_ns == sub_ns or name_ns == (sum_ns := sum_ns + sub_ns):
rest_of_score += heuristic * wratio
rest_of_score += heuristic * ratio
return DEF_SCORE + rest_of_score
if not other_ns.isdigit():
sum_ns = ""
for sub_ns in name_ns_subs:
if other_ns == sub_ns or other_ns == (sum_ns := sum_ns + sub_ns):
rest_of_score += heuristic * wratio
rest_of_score += heuristic * ratio
return DEF_SCORE + rest_of_score
# heuristic measures if the name is more completely at the start of other name
common = len(os.path.commonprefix([name_ns, other_ns])) / len(name_ns)
# heuristic measures how similar the lenghts of the names are
parity = min(len(name_ns),len(other_ns))/max(len(name_ns),len(other_ns))
rest_of_score += (heuristic * common * 0.80) + (heuristic * parity * 0.20)
# remember that WRatio fills the DEF_SCORE slot
return rest_of_score + DEF_SCORE * wratio
return rest_of_score + DEF_SCORE * ratio

# ---------------------------------------------------------------
# Normalization functions, part of the functions that change both
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "libretrofuzz"
version = "3.6.1"
version = "3.6.2"
description = "Fuzzy Retroarch thumbnail downloader"
authors = ["i30817 <i30817@gmail.com>"]
license = "MIT"
Expand Down

0 comments on commit f0d8776

Please sign in to comment.