Skip to content

Commit

Permalink
utils: add frequency distribution in the grouping function
Browse files Browse the repository at this point in the history
  • Loading branch information
ailton-felix committed Nov 22, 2023
1 parent 20a71cf commit f9f9993
Showing 1 changed file with 10 additions and 2 deletions.
12 changes: 10 additions & 2 deletions cereja/utils/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,7 @@ def invert_dict(dict_: Union[dict, set]) -> dict:
return new_dict


def group_by(values, fn) -> dict:
def group_by(values, fn, get_freq: bool = False) -> Union[dict, Tuple[dict, dict]]:
"""
group items by result of fn (function)
Expand All @@ -501,14 +501,22 @@ def group_by(values, fn) -> dict:
>>> values = ['joab', 'leite', 'da', 'silva', 'Neto', 'você']
>>> cj.group_by(values, lambda x: 'N' if x.lower().startswith('n') else 'OTHER')
# {'OTHER': ['joab', 'leite', 'da', 'silva', 'você'], 'N': ['Neto']}
>>> cj.group_by(values, lambda x: 'N' if x.lower().startswith('n') else 'OTHER', get_freq=True)
# ({'OTHER': ['joab', 'leite', 'da', 'silva', 'você'], 'N': ['Neto']},
# {'OTHER': 5, 'N': 1})
@param values: list of values
@param fn: a function
@param get_freq: if True, returns a tuple containing the main result of the function followed by the frequency
distribution of the groups, respectively
"""
d = defaultdict(list)
for el in values:
d[fn(el)].append(el)
return dict(d)

d = dict(d)
result = (d, dict(map(lambda t: (t[0], len(t[1])), d.items()))) if get_freq else d
return result


def import_string(dotted_path):
Expand Down

0 comments on commit f9f9993

Please sign in to comment.