Skip to content

Commit

Permalink
BUG: Modify rank calculation when for dense rank pandas-dev#20731
Browse files Browse the repository at this point in the history
  • Loading branch information
peterpanmj committed May 22, 2018
1 parent 172ab7a commit adfd619
Showing 1 changed file with 10 additions and 2 deletions.
12 changes: 10 additions & 2 deletions pandas/_libs/groupby_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
bint is_datetimelike, object ties_method,
bint ascending, bint pct, object na_option):
"""
Provides the rank of values within each group.
Provides the rank of values within each group.

Parameters
----------
Expand Down Expand Up @@ -453,6 +453,7 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
TiebreakEnumType tiebreak
Py_ssize_t i, j, N, K, val_start=0, grp_start=0, dups=0, sum_ranks=0
Py_ssize_t grp_vals_seen=1, grp_na_count=0
Py_ssize_t total_tie_count=0
ndarray[int64_t] _as
ndarray[float64_t, ndim=2] grp_sizes
ndarray[{{c_type}}] masked_vals
Expand Down Expand Up @@ -499,6 +500,9 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,

if not ascending:
_as = _as[::-1]

if mask.any() and keep_na:
total_tie_count -= 1

with nogil:
# Loop over the length of the value array
Expand Down Expand Up @@ -560,6 +564,7 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
if (i == N - 1 or
(masked_vals[_as[i]] != masked_vals[_as[i+1]]) or
(mask[_as[i]] ^ mask[_as[i+1]])):
total_tie_count += 1
dups = sum_ranks = 0
val_start = i
grp_vals_seen += 1
Expand All @@ -581,7 +586,10 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,

if pct:
for i in range(N):
out[i, 0] = out[i, 0] / grp_sizes[i, 0]
if tiebreak == TIEBREAK_DENSE:
out[i, 0] = out[i, 0] / total_tie_count
else:
out[i, 0] = out[i, 0] / grp_sizes[i, 0]
{{endif}}
{{endfor}}

Expand Down

0 comments on commit adfd619

Please sign in to comment.