Skip to content

Commit

Permalink
Revert "avoid using internal functions"
Browse files Browse the repository at this point in the history
This reverts commit a3de1c2.
  • Loading branch information
bkamins committed Feb 11, 2021
1 parent 659ec7c commit 07ecb0a
Showing 1 changed file with 29 additions and 16 deletions.
45 changes: 29 additions & 16 deletions src/abstractdataframe/join.jl
Original file line number Diff line number Diff line change
Expand Up @@ -421,8 +421,12 @@ function _innerjoin_unsorted(left::AbstractArray, right::AbstractArray{T}) where
left isa OnCol && _prehash(left)

for (idx_r, val_r) in enumerate(right)
haskey(dict, val_r) && return _innerjoin_dup(left, right, dict, idx_r)
dict[val_r] = idx_r
# we use dict_index to make sure the following two operations are fast:
# - if index is found - fall back to algorithm allowing duplicates
# - if index is not found - add it
dict_index = Base.ht_keyindex2!(dict, val_r)
dict_index > 0 && return _innerjoin_dup(left, right, dict, idx_r)
Base._setindex!(dict, idx_r, val_r, -dict_index)
end

left_ixs = Int[]
Expand All @@ -433,9 +437,12 @@ function _innerjoin_unsorted(left::AbstractArray, right::AbstractArray{T}) where
sizehint!(right_ixs, right_len)

for (idx_l, val_l) in enumerate(left)
# we know that dict contains only positive values
idx_r = get(dict, val_l, -1)
if idx_r != -1
# we use dict_index to make sure the following two operations are fast:
# - if index is found - get it and process it
# - if index is not found - do nothing
dict_index = Base.ht_keyindex(dict, val_l)
if dict_index > 0 # -1 if key not found
@inbounds idx_r = dict.vals[dict_index]
push!(left_ixs, idx_l)
push!(right_ixs, idx_r)
end
Expand Down Expand Up @@ -513,16 +520,18 @@ function _innerjoin_dup(left::AbstractArray, right::AbstractArray{T},
groups = Vector{Int}(undef, right_len)
groups[1:ngroups] = 1:ngroups

@inbounds for idx_r in idx_r_start:right_len
val_r = right[idx_r]
# we know that group ids are positive
group_id = get(dict, val_r, -1)
if group_id == -1
ngroups += 1
groups[idx_r] = ngroups
dict[val_r] = ngroups
for idx_r in idx_r_start:right_len
@inbounds val_r = right[idx_r]
# we use dict_index to make sure the following two operations are fast:
# - if index is found - process the row with existing group number
# - if index is not found - add a new group
dict_index = Base.ht_keyindex2!(dict, val_r)
if dict_index > 0
@inbounds groups[idx_r] = dict.vals[dict_index]
else
groups[idx_r] = group_id
ngroups += 1
@inbounds groups[idx_r] = ngroups
Base._setindex!(dict, ngroups, val_r, -dict_index)
end
end

Expand Down Expand Up @@ -588,8 +597,12 @@ function _innerjoin_postprocess(left::AbstractArray, dict::Dict{T, Int},

n = 0
@inbounds for (idx_l, val_l) in enumerate(left)
group_id = get(dict, val_l, -1)
if group_id != -1
# we use dict_index to make sure the following two operations are fast:
# - if index is found - get it and process it
# - if index is not found - do nothing
dict_index = Base.ht_keyindex(dict, val_l)
if dict_index > 0 # -1 if key not found
group_id = dict.vals[dict_index]
ref_stop = starts[group_id + 1]
l = ref_stop - starts[group_id]
newn = n + l
Expand Down

0 comments on commit 07ecb0a

Please sign in to comment.