From c6933f24b485da0e8e0aa7a5b35422ea47340b8a Mon Sep 17 00:00:00 2001 From: Ailton Felix <56263264+ailton-felix@users.noreply.github.com> Date: Wed, 22 Nov 2023 18:31:55 -0300 Subject: [PATCH] utils: add frequency distribution in the grouping function (#201) --- cereja/utils/_utils.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/cereja/utils/_utils.py b/cereja/utils/_utils.py index 6fbc8c6..4b4355c 100644 --- a/cereja/utils/_utils.py +++ b/cereja/utils/_utils.py @@ -492,7 +492,7 @@ def invert_dict(dict_: Union[dict, set]) -> dict: return new_dict -def group_by(values, fn) -> dict: +def group_by(values, fn, get_freq: bool = False) -> Union[dict, Tuple[dict, dict]]: """ group items by result of fn (function) @@ -501,14 +501,22 @@ def group_by(values, fn) -> dict: >>> values = ['joab', 'leite', 'da', 'silva', 'Neto', 'você'] >>> cj.group_by(values, lambda x: 'N' if x.lower().startswith('n') else 'OTHER') # {'OTHER': ['joab', 'leite', 'da', 'silva', 'você'], 'N': ['Neto']} + >>> cj.group_by(values, lambda x: 'N' if x.lower().startswith('n') else 'OTHER', get_freq=True) + # ({'OTHER': ['joab', 'leite', 'da', 'silva', 'você'], 'N': ['Neto']}, + # {'OTHER': 5, 'N': 1}) @param values: list of values @param fn: a function + @param get_freq: if True, returns a tuple containing the main result of the function followed by the frequency + distribution of the groups, respectively """ d = defaultdict(list) for el in values: d[fn(el)].append(el) - return dict(d) + + d = dict(d) + result = (d, dict(map(lambda t: (t[0], len(t[1])), d.items()))) if get_freq else d + return result def import_string(dotted_path):