From af87f779f4790125fd06310ceed1a403ad0bb52f Mon Sep 17 00:00:00 2001 From: Hitesh Tarani <9803423+hitesh-tarani@users.noreply.github.com> Date: Sun, 7 Apr 2024 13:10:46 +0530 Subject: [PATCH] Updating logic for parsing scheme names for detailed CAS reports, handling erstwhile and non-demat suffixes --- casparser/process/cas_detailed.py | 11 ++++++++--- tests/test_process.py | 19 ++++++++++++++++++- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/casparser/process/cas_detailed.py b/casparser/process/cas_detailed.py index a0d17f8..a9256c5 100644 --- a/casparser/process/cas_detailed.py +++ b/casparser/process/cas_detailed.py @@ -118,6 +118,13 @@ def get_transaction_type( return txn_type, dividend_rate +def get_parsed_scheme_name(scheme) -> str: + scheme = re.sub(r"\((formerly|erstwhile).+?\)", "", scheme, flags=re.I | re.DOTALL).strip() + scheme = re.sub(r"\((Demat|Non-Demat).*", "", scheme, flags=re.I | re.DOTALL).strip() + scheme = re.sub(r"\s+", " ", scheme).strip() + return re.sub(r"[^a-zA-Z0-9_)]+$", "", scheme).strip() + + def parse_transaction(line) -> Optional[ParsedTransaction]: for regex in (TRANSACTION_RE1, TRANSACTION_RE2, TRANSACTION_RE3): if m := re.search(regex, line, re.DOTALL | re.MULTILINE | re.I): @@ -189,9 +196,7 @@ def process_detailed_text(text): elif m := re.search(SCHEME_RE, line, re.DOTALL | re.MULTILINE | re.I): if current_folio is None: raise CASParseError("Layout Error! Scheme found before folio entry.") - scheme = re.sub(r"\(formerly.+?\)", "", m.group("name"), flags=re.I | re.DOTALL).strip() - scheme = re.sub(r"\s+", " ", scheme).strip() - scheme = re.sub(r"\W+$", "", scheme).strip() + scheme = get_parsed_scheme_name(m.group("name")) if curr_scheme_data is None or curr_scheme_data.scheme != scheme: if curr_scheme_data: folios[current_folio].schemes.append(curr_scheme_data) diff --git a/tests/test_process.py b/tests/test_process.py index e062901..84e7f88 100644 --- a/tests/test_process.py +++ b/tests/test_process.py @@ -4,7 +4,7 @@ from casparser.exceptions import CASParseError, HeaderParseError from casparser.process import process_cas_text -from casparser.process.cas_detailed import parse_header, get_transaction_type +from casparser.process.cas_detailed import parse_header, get_transaction_type, get_parsed_scheme_name from casparser.process.cas_detailed import ParsedTransaction, parse_transaction from casparser.process.cas_summary import parse_header as parse_summary_header from casparser.process.utils import isin_search @@ -94,6 +94,23 @@ def test_dividend_transactions(self): Decimal("0.0241"), ) + def test_parsed_scheme_name(self): + assert get_parsed_scheme_name( + "Axis Long Term Equity Fund - Direct Growth") == "Axis Long Term Equity Fund - Direct Growth" + assert get_parsed_scheme_name( + "Axis Bluechip Fund - Regular Growth ") == "Axis Bluechip Fund - Regular Growth" + assert get_parsed_scheme_name( + "HSBC Corporate Bond Fund - Regular Growth (Formerly known as L&T Triple Ace Bond Fund - Growth)") == \ + "HSBC Corporate Bond Fund - Regular Growth" + assert get_parsed_scheme_name( + "Bandhan ELSS Tax saver Fund-Growth-(Regular Plan)" + "(erstwhile Bandhan Tax Advantage ELSS Fund-Growth-Regular Plan)") == \ + "Bandhan ELSS Tax saver Fund-Growth-(Regular Plan)" + assert get_parsed_scheme_name( + "Bandhan Liquid Fund-Growth-(Regular Plan) (erstwhile IDFC Cash Fund-Growth-Regular Plan) (Non-Demat) ") == \ + "Bandhan Liquid Fund-Growth-(Regular Plan)" + + def test_isin_search(self): isin, amfi, scheme_type = isin_search( "Axis Long Term Equity Fund - Direct Growth", "KFINTECH", "128TSDGG"