From 2ccba3eb441c35bc6f7bfc215b74773a9d1707e8 Mon Sep 17 00:00:00 2001 From: Christoph Auer Date: Tue, 7 Jan 2025 19:08:24 +0100 Subject: [PATCH 1/2] feat: Add mapping to colors into DocItemLabel Signed-off-by: Christoph Auer --- docling_core/types/doc/labels.py | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/docling_core/types/doc/labels.py b/docling_core/types/doc/labels.py index 5149df0..56d6398 100644 --- a/docling_core/types/doc/labels.py +++ b/docling_core/types/doc/labels.py @@ -1,12 +1,12 @@ """Models for the labels types.""" from enum import Enum +from typing import Tuple class DocItemLabel(str, Enum): """DocItemLabel.""" - # DocLayNet v2 CAPTION = "caption" FOOTNOTE = "footnote" FORMULA = "formula" @@ -26,12 +26,34 @@ class DocItemLabel(str, Enum): KEY_VALUE_REGION = "key_value_region" # Additional labels for markup-based formats (e.g. HTML, Word) - PARAGRAPH = "paragraph" # explicitly a paragraph and not arbitrary text + PARAGRAPH = "paragraph" REFERENCE = "reference" - def __str__(self): - """Get string value.""" - return str(self.value) + @staticmethod + def get_color(label: "DocItemLabel") -> Tuple[int, int, int]: + """Return the RGB color associated with a given label.""" + color_map = { + DocItemLabel.CAPTION: (255, 204, 153), + DocItemLabel.FOOTNOTE: (200, 200, 255), + DocItemLabel.FORMULA: (192, 192, 192), + DocItemLabel.LIST_ITEM: (153, 153, 255), + DocItemLabel.PAGE_FOOTER: (204, 255, 204), + DocItemLabel.PAGE_HEADER: (204, 255, 204), + DocItemLabel.PICTURE: (255, 204, 164), + DocItemLabel.SECTION_HEADER: (255, 153, 153), + DocItemLabel.TABLE: (255, 204, 204), + DocItemLabel.TEXT: (255, 255, 153), + DocItemLabel.TITLE: (255, 153, 153), + DocItemLabel.DOCUMENT_INDEX: (220, 220, 220), + DocItemLabel.CODE: (125, 125, 125), + DocItemLabel.CHECKBOX_SELECTED: (255, 182, 193), + DocItemLabel.CHECKBOX_UNSELECTED: (255, 182, 193), + DocItemLabel.FORM: (200, 255, 255), + DocItemLabel.KEY_VALUE_REGION: (183, 65, 14), + DocItemLabel.PARAGRAPH: (255, 255, 153), + DocItemLabel.REFERENCE: (176, 224, 230), + } + return color_map[label] class GroupLabel(str, Enum): From a70064c7eda33e59f25089e20ca14c3be0949c3b Mon Sep 17 00:00:00 2001 From: Christoph Auer Date: Wed, 8 Jan 2025 10:37:34 +0100 Subject: [PATCH 2/2] fix: Restore DocItemLabel.__str__ Signed-off-by: Christoph Auer --- docling_core/types/doc/labels.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docling_core/types/doc/labels.py b/docling_core/types/doc/labels.py index 56d6398..08c1f7c 100644 --- a/docling_core/types/doc/labels.py +++ b/docling_core/types/doc/labels.py @@ -29,6 +29,10 @@ class DocItemLabel(str, Enum): PARAGRAPH = "paragraph" REFERENCE = "reference" + def __str__(self): + """Get string value.""" + return str(self.value) + @staticmethod def get_color(label: "DocItemLabel") -> Tuple[int, int, int]: """Return the RGB color associated with a given label."""