From 9fb099e2aacbc6bf03d6ffd771da1fe7d293c767 Mon Sep 17 00:00:00 2001
From: matthieugomez <gomez.matthieu@gmail.com>
Date: Sun, 26 Sep 2021 10:47:30 -0400
Subject: [PATCH] tag new version

---
 Project.toml           |  2 +-
 src/StringDistances.jl |  8 +++++---
 src/fuzzywuzzy.jl      | 21 ++++++++-------------
 src/utils.jl           |  5 ++---
 4 files changed, 16 insertions(+), 20 deletions(-)

diff --git a/Project.toml b/Project.toml
index 7a359c5..ef525b7 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "StringDistances"
 uuid = "88034a9c-02f8-509d-84a9-84ec65e18404"
-version = "0.11.0"
+version = "0.11.1"
 
 [deps]
 Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
diff --git a/src/StringDistances.jl b/src/StringDistances.jl
index 603adef..d4ac067 100755
--- a/src/StringDistances.jl
+++ b/src/StringDistances.jl
@@ -2,10 +2,10 @@ module StringDistances
 
 using Distances
 import StatsAPI: pairwise, pairwise!
+# Distances API
 abstract type StringSemiMetric <: SemiMetric end
 abstract type StringMetric <: Metric end
-(dist::Union{StringSemiMetric, StringMetric})(s1, s2; max_dist = nothing) = dist(s1, s2)
-
+const StringDistance = Union{StringSemiMetric, StringMetric}
 function Distances.result_type(dist::Union{StringSemiMetric, StringMetric}, s1::Type, s2::Type)
     T = typeof(dist("", ""))
     if (Missing <: s1) | (Missing <: s2)
@@ -15,6 +15,9 @@ function Distances.result_type(dist::Union{StringSemiMetric, StringMetric}, s1::
 end
 Distances.result_type(dist::Union{StringSemiMetric, StringMetric}, s1, s2) = result_type(dist, typeof(s1), typeof(s2))
 
+
+
+(dist::Union{StringSemiMetric, StringMetric})(s1, s2; max_dist = nothing) = dist(s1, s2)
 include("utils.jl")
 include("distances/edit.jl")
 include("distances/qgram.jl")
@@ -24,7 +27,6 @@ include("find.jl")
 include("fuzzywuzzy.jl")
 
 
-const StringDistance = Union{StringSemiMetric, StringMetric}
 ##############################################################################
 ##
 ## Export
diff --git a/src/fuzzywuzzy.jl b/src/fuzzywuzzy.jl
index 1e3e2f8..cdeee6c 100755
--- a/src/fuzzywuzzy.jl
+++ b/src/fuzzywuzzy.jl
@@ -34,15 +34,15 @@ function (dist::Partial)(s1, s2; max_dist = nothing)
     return out
 end
 
+# specialized (faster) version for RatcliffObershelp
 function (dist::Partial{<: Union{RatcliffObershelp, Normalized{RatcliffObershelp}}})(s1, s2; max_dist = nothing)
     (s1 === missing) | (s2 === missing) && return missing
     s1, s2 = reorder(s1, s2)
     len1, len2 = length(s1), length(s2)
     len1 == len2 && return dist.dist(s1, s2)
     out = 1.0
-    for r in matching_blocks(s1, s2, 1, 1, len1, len2)
+    for s2_start in matching_blocks(s1, s2, 1, 1, len1, len2)
         # Make sure the substring of s2 has length len1
-        s2_start = r[2] - r[1] + 1
         if s2_start < 1
             s2_start = 1
         elseif s2_start + len1 - 1 > len2
@@ -56,20 +56,16 @@ function (dist::Partial{<: Union{RatcliffObershelp, Normalized{RatcliffObershelp
 end
 
 function matching_blocks(s1, s2, start1::Integer, start2::Integer, end1::Integer, end2::Integer)
-    x = Set{Tuple{Int, Int, Int}}()
+    x = Set{Int}()
     p = zeros(Int, max(end1 - start1, end2 - start2) + 1)
     matching_blocks!(x, p, s1, s2, start1, start2, end1, end2)
 end
 
-function matching_blocks!(x::Set{Tuple{Int, Int, Int}}, p::Vector{Int}, s1, s2, start1::Integer, start2::Integer, end1::Integer, end2::Integer)
+function matching_blocks!(x::Set{Int}, p::Vector{Int}, s1, s2, start1::Integer, start2::Integer, end1::Integer, end2::Integer)
     j1, j2, len = longest_common_pattern!(p, s1, s2, start1, start2, end1, end2)
-    # exit if there is no common substring
     len == 0 && return x
-    # add the info of the common to the existing set
-    push!(x, (j1, j2, len))
-     # add the longest common substring that happens before
+    push!(x, j2 - j1 + 1)
     matching_blocks!(x, p, s1, s2, start1, start2, j1 - 1, j2 - 1)
-     # add the longest common substring that happens after
     matching_blocks!(x, p, s1, s2, j1 + len, j2 + len, end1, end2)
     return x
 end
@@ -137,10 +133,9 @@ function (dist::TokenSet)(s1::Union{AbstractString, Missing}, s2::Union{Abstract
     s1 = join(v1, " ")
     s2 = join(v2, " ")
     isempty(s0) && return dist.dist(s1, s2; max_dist = max_dist)
-    out_01 = dist.dist(s0, s1; max_dist = max_dist)
-    out_02 = dist.dist(s0, s2; max_dist = max_dist)
-    out_12 = dist.dist(s1, s2; max_dist = max_dist)
-    min(out_01, out_02, out_12)
+    min(dist.dist(s0, s1; max_dist = max_dist),
+        dist.dist(s0, s2; max_dist = max_dist),
+        dist.dist(s1, s2; max_dist = max_dist))
 end
 
 Normalized(dist::TokenSet) = Normalized{typeof(TokenSet(Normalized(dist.dist)))}(TokenSet(Normalized(dist.dist)))
diff --git a/src/utils.jl b/src/utils.jl
index e8cf27b..d359ab8 100755
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -26,12 +26,11 @@ string_with_length(s::AbstractString) = StringWithLength(s, length(s))
 # Not really needed but avoid multi-encapsulation
 string_with_length(s::StringWithLength) = s
 Base.length(s::StringWithLength) = s.l
-Base.iterate(s::StringWithLength, i::Integer = firstindex(s.s)) = iterate(s.s, i)
+Base.iterate(s::StringWithLength) = iterate(s.s)
+Base.iterate(s::StringWithLength, i::Integer) = iterate(s.s, i)
 Base.nextind(s::StringWithLength, i::Int, n::Int = 1) = nextind(s.s, i, n)
 Base.ncodeunits(s::StringWithLength) = ncodeunits(s.s)
 Base.isvalid(s::StringWithLength, i::Int) = isvalid(s.s, i)
-
-
 function reorder(s1::AbstractString, s2::AbstractString)
     s1 = string_with_length(s1)
     s2 = string_with_length(s2)