From f0e9b969de447e120f3d7a2023a7201f08d6ec7f Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Thu, 16 Jan 2025 11:10:11 +0100 Subject: [PATCH 1/2] initial import --- .../components/splitters/hierarchical_doc_splitter.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/haystack_experimental/components/splitters/hierarchical_doc_splitter.py b/haystack_experimental/components/splitters/hierarchical_doc_splitter.py index 32366f6a..5e95992c 100644 --- a/haystack_experimental/components/splitters/hierarchical_doc_splitter.py +++ b/haystack_experimental/components/splitters/hierarchical_doc_splitter.py @@ -72,6 +72,8 @@ def _build_block_sizes(self): self.splitters[block_size] = DocumentSplitter( split_length=block_size, split_overlap=self.split_overlap, split_by=self.split_by ) + self.splitters[block_size].warm_up() + @staticmethod def _add_meta_data(document: Document): From bbb8b60825d05fbe2ab470940664c35c6f4bbcea Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Fri, 17 Jan 2025 15:53:35 +0100 Subject: [PATCH 2/2] updating LLMMetaDataExtractor example code --- .../components/extractors/llm_metadata_extractor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/haystack_experimental/components/extractors/llm_metadata_extractor.py b/haystack_experimental/components/extractors/llm_metadata_extractor.py index 8b1fd723..979fc2e0 100644 --- a/haystack_experimental/components/extractors/llm_metadata_extractor.py +++ b/haystack_experimental/components/extractors/llm_metadata_extractor.py @@ -85,7 +85,7 @@ class LLMMetadataExtractor: 1. Identify all entities. For each identified entity, extract the following information: - entity_name: Name of the entity, capitalized - entity_type: One of the following types: [organization, product, service, industry] - Format each entity as {"entity": , "entity_type": } + Format each entity as a JSON like: {"entity": , "entity_type": } 2. Return output in a single list with all the entities identified in steps 1.