Skip to content

Commit

Permalink
tag new version
Browse files Browse the repository at this point in the history
  • Loading branch information
matthieugomez committed Sep 26, 2021
1 parent acf6623 commit 9fb099e
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 20 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name = "StringDistances"
uuid = "88034a9c-02f8-509d-84a9-84ec65e18404"
version = "0.11.0"
version = "0.11.1"

[deps]
Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
Expand Down
8 changes: 5 additions & 3 deletions src/StringDistances.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ module StringDistances

using Distances
import StatsAPI: pairwise, pairwise!
# Distances API
abstract type StringSemiMetric <: SemiMetric end
abstract type StringMetric <: Metric end
(dist::Union{StringSemiMetric, StringMetric})(s1, s2; max_dist = nothing) = dist(s1, s2)

const StringDistance = Union{StringSemiMetric, StringMetric}
function Distances.result_type(dist::Union{StringSemiMetric, StringMetric}, s1::Type, s2::Type)
T = typeof(dist("", ""))
if (Missing <: s1) | (Missing <: s2)
Expand All @@ -15,6 +15,9 @@ function Distances.result_type(dist::Union{StringSemiMetric, StringMetric}, s1::
end
Distances.result_type(dist::Union{StringSemiMetric, StringMetric}, s1, s2) = result_type(dist, typeof(s1), typeof(s2))



(dist::Union{StringSemiMetric, StringMetric})(s1, s2; max_dist = nothing) = dist(s1, s2)
include("utils.jl")
include("distances/edit.jl")
include("distances/qgram.jl")
Expand All @@ -24,7 +27,6 @@ include("find.jl")
include("fuzzywuzzy.jl")


const StringDistance = Union{StringSemiMetric, StringMetric}
##############################################################################
##
## Export
Expand Down
21 changes: 8 additions & 13 deletions src/fuzzywuzzy.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,15 @@ function (dist::Partial)(s1, s2; max_dist = nothing)
return out
end

# specialized (faster) version for RatcliffObershelp
function (dist::Partial{<: Union{RatcliffObershelp, Normalized{RatcliffObershelp}}})(s1, s2; max_dist = nothing)
(s1 === missing) | (s2 === missing) && return missing
s1, s2 = reorder(s1, s2)
len1, len2 = length(s1), length(s2)
len1 == len2 && return dist.dist(s1, s2)
out = 1.0
for r in matching_blocks(s1, s2, 1, 1, len1, len2)
for s2_start in matching_blocks(s1, s2, 1, 1, len1, len2)
# Make sure the substring of s2 has length len1
s2_start = r[2] - r[1] + 1
if s2_start < 1
s2_start = 1
elseif s2_start + len1 - 1 > len2
Expand All @@ -56,20 +56,16 @@ function (dist::Partial{<: Union{RatcliffObershelp, Normalized{RatcliffObershelp
end

function matching_blocks(s1, s2, start1::Integer, start2::Integer, end1::Integer, end2::Integer)
x = Set{Tuple{Int, Int, Int}}()
x = Set{Int}()
p = zeros(Int, max(end1 - start1, end2 - start2) + 1)
matching_blocks!(x, p, s1, s2, start1, start2, end1, end2)
end

function matching_blocks!(x::Set{Tuple{Int, Int, Int}}, p::Vector{Int}, s1, s2, start1::Integer, start2::Integer, end1::Integer, end2::Integer)
function matching_blocks!(x::Set{Int}, p::Vector{Int}, s1, s2, start1::Integer, start2::Integer, end1::Integer, end2::Integer)
j1, j2, len = longest_common_pattern!(p, s1, s2, start1, start2, end1, end2)
# exit if there is no common substring
len == 0 && return x
# add the info of the common to the existing set
push!(x, (j1, j2, len))
# add the longest common substring that happens before
push!(x, j2 - j1 + 1)
matching_blocks!(x, p, s1, s2, start1, start2, j1 - 1, j2 - 1)
# add the longest common substring that happens after
matching_blocks!(x, p, s1, s2, j1 + len, j2 + len, end1, end2)
return x
end
Expand Down Expand Up @@ -137,10 +133,9 @@ function (dist::TokenSet)(s1::Union{AbstractString, Missing}, s2::Union{Abstract
s1 = join(v1, " ")
s2 = join(v2, " ")
isempty(s0) && return dist.dist(s1, s2; max_dist = max_dist)
out_01 = dist.dist(s0, s1; max_dist = max_dist)
out_02 = dist.dist(s0, s2; max_dist = max_dist)
out_12 = dist.dist(s1, s2; max_dist = max_dist)
min(out_01, out_02, out_12)
min(dist.dist(s0, s1; max_dist = max_dist),
dist.dist(s0, s2; max_dist = max_dist),
dist.dist(s1, s2; max_dist = max_dist))
end

Normalized(dist::TokenSet) = Normalized{typeof(TokenSet(Normalized(dist.dist)))}(TokenSet(Normalized(dist.dist)))
Expand Down
5 changes: 2 additions & 3 deletions src/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,11 @@ string_with_length(s::AbstractString) = StringWithLength(s, length(s))
# Not really needed but avoid multi-encapsulation
string_with_length(s::StringWithLength) = s
Base.length(s::StringWithLength) = s.l
Base.iterate(s::StringWithLength, i::Integer = firstindex(s.s)) = iterate(s.s, i)
Base.iterate(s::StringWithLength) = iterate(s.s)
Base.iterate(s::StringWithLength, i::Integer) = iterate(s.s, i)
Base.nextind(s::StringWithLength, i::Int, n::Int = 1) = nextind(s.s, i, n)
Base.ncodeunits(s::StringWithLength) = ncodeunits(s.s)
Base.isvalid(s::StringWithLength, i::Int) = isvalid(s.s, i)


function reorder(s1::AbstractString, s2::AbstractString)
s1 = string_with_length(s1)
s2 = string_with_length(s2)
Expand Down

2 comments on commit 9fb099e

@matthieugomez
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register()

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/45555

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.11.1 -m "<description of version>" 9fb099e2aacbc6bf03d6ffd771da1fe7d293c767
git push origin v0.11.1

Please sign in to comment.