From 691995a39bd864640b8cdf8bc5bf56bec702c810 Mon Sep 17 00:00:00 2001
From: Sandeep Somasekharan <codereverser@gmail.com>
Date: Sun, 22 Dec 2024 06:26:16 +0530
Subject: [PATCH] use codecov token

---
 .github/workflows/run-pytest.yml |  3 ++-
 casparser/parsers/mupdf.py       | 20 +++++++++-----------
 casparser/parsers/pdfminer.py    | 13 +++++++------
 3 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/.github/workflows/run-pytest.yml b/.github/workflows/run-pytest.yml
index 2dc89d6..1d45b2c 100644
--- a/.github/workflows/run-pytest.yml
+++ b/.github/workflows/run-pytest.yml
@@ -11,7 +11,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ['3.8']
+        python-version: ['3.10']
 
     steps:
     - uses: actions/checkout@v3
@@ -44,3 +44,4 @@ jobs:
       uses: codecov/codecov-action@v5
       with:
         files: ./coverage.xml
+        token: ${{ secrets.CODECOV_TOKEN }}
diff --git a/casparser/parsers/mupdf.py b/casparser/parsers/mupdf.py
index b3f77ce..83d61e8 100644
--- a/casparser/parsers/mupdf.py
+++ b/casparser/parsers/mupdf.py
@@ -52,9 +52,9 @@ def extract_blocks(page_dict):
     for block in grouped_blocks:
         lines = []
         items = []
-        if len(block.get("lines", [])) == 0:
-            continue
-        bbox = block["lines"][0]["bbox"]
+        bbox = [0, 0, 0, 0]
+        if len(block.get("lines", [])) > 0:
+            bbox = block["lines"][0]["bbox"]
         y0, y1 = bbox[1], bbox[3]
         for line in sorted(block["lines"], key=lambda x: x["bbox"][1]):
             if len(items) > 0 and not (
@@ -113,12 +113,10 @@ def parse_investor_info(page_dict, page_rect: fitz.Rect) -> InvestorInfo:
     name = None
     for block in blocks:
         for line in block["lines"]:
-            for span in line["spans"]:
-                if span["bbox"][0] > width / 3:
-                    continue
+            for span in filter(
+                lambda x: x["bbox"][0] <= width / 3 and x["text"].strip() != "", line["spans"]
+            ):
                 txt = span["text"].strip()
-                if txt == "":
-                    continue
                 if not email_found:
                     if m := re.search(r"^\s*email\s+id\s*:\s*(.+?)(?:\s|$)", txt, re.I):
                         email = m.group(1).strip()
@@ -156,9 +154,9 @@ def group_similar_rows(elements_list: List[Iterator[Any]]):
     lines = []
     for elements in elements_list:
         sorted_elements = list(sorted(elements, key=itemgetter(1, 0)))
-        if len(sorted_elements) == 0:
-            continue
-        y0, y1 = sorted_elements[0][1], sorted_elements[0][3]
+        y0, y1 = 0, 0
+        if len(sorted_elements) > 0:
+            y0, y1 = sorted_elements[0][1], sorted_elements[0][3]
         items = []
         for el in sorted_elements:
             x2, y2, x3, y3 = el[:4]
diff --git a/casparser/parsers/pdfminer.py b/casparser/parsers/pdfminer.py
index 806ffd9..7b9719c 100644
--- a/casparser/parsers/pdfminer.py
+++ b/casparser/parsers/pdfminer.py
@@ -22,7 +22,10 @@ def parse_investor_info(layout, width, height) -> InvestorInfo:
         [
             x
             for x in layout
-            if isinstance(x, LTTextBoxHorizontal) and x.x1 < width / 1.5 and x.y1 > height / 2
+            if isinstance(x, LTTextBoxHorizontal)
+            and x.x1 < width / 1.5
+            and x.y1 > height / 2
+            and x.get_text().strip() != ""
         ],
         key=lambda x: -x.y1,
     )
@@ -33,8 +36,6 @@ def parse_investor_info(layout, width, height) -> InvestorInfo:
     name = None
     for el in text_elements:
         txt = el.get_text().strip()
-        if txt == "":
-            continue
         if not email_found:
             if m := re.search(r"^\s*email\s+id\s*:\s*(.+?)(?:\s|$)", txt, re.I):
                 email = m.group(1).strip()
@@ -88,9 +89,9 @@ def group_similar_rows(elements_list: List[Iterator[LTTextBoxHorizontal]]):
     lines = []
     for elements in elements_list:
         sorted_elements = list(sorted(elements, key=lambda x: (-x.y1, x.x0)))
-        if len(sorted_elements) == 0:
-            continue
-        y0, y1 = sorted_elements[0].y0, sorted_elements[0].y1
+        y0, y1 = 0, 0
+        if len(sorted_elements) > 0:
+            y0, y1 = sorted_elements[0].y0, sorted_elements[0].y1
         items = []
         for el in sorted_elements:
             if len(items) > 0 and not (