From 874bab0f25ce4771e6a36beedd59cddef068f7ad Mon Sep 17 00:00:00 2001 From: chuqingG Date: Sat, 1 Mar 2025 17:00:12 -0500 Subject: [PATCH 1/2] enable cost tracking for synthesizer --- .../synthesizer/chunking/context_generator.py | 9 +++- deepeval/synthesizer/synthesizer.py | 41 ++++++++++++++++--- 2 files changed, 43 insertions(+), 7 deletions(-) diff --git a/deepeval/synthesizer/chunking/context_generator.py b/deepeval/synthesizer/chunking/context_generator.py index 293c75440..98bf4b902 100644 --- a/deepeval/synthesizer/chunking/context_generator.py +++ b/deepeval/synthesizer/chunking/context_generator.py @@ -66,6 +66,9 @@ def __init__( self.source_files_to_collections_map: Optional[ Dict[str, Collection] ] = None + + # cost tracking + self.total_cost = 0.0 ######################################################### ### Generate Contexts ################################### @@ -499,7 +502,8 @@ async def a_evaluate_chunk_and_update(chunk): def evaluate_chunk(self, chunk) -> float: prompt = FilterTemplate.evaluate_context(chunk) if self.using_native_model: - res, _ = self.model.generate(prompt, schema=ContextScore) + res, cost = self.model.generate(prompt, schema=ContextScore) + self.total_cost += cost return (res.clarity + res.depth + res.structure + res.relevance) / 4 else: try: @@ -523,7 +527,8 @@ def evaluate_chunk(self, chunk) -> float: async def a_evaluate_chunk(self, chunk) -> float: prompt = FilterTemplate.evaluate_context(chunk) if self.using_native_model: - res, _ = await self.model.a_generate(prompt, schema=ContextScore) + res, cost = await self.model.a_generate(prompt, schema=ContextScore) + self.total_cost += cost return (res.clarity + res.depth + res.structure + res.relevance) / 4 else: diff --git a/deepeval/synthesizer/synthesizer.py b/deepeval/synthesizer/synthesizer.py index 3027a6030..83efe5aa3 100644 --- a/deepeval/synthesizer/synthesizer.py +++ b/deepeval/synthesizer/synthesizer.py @@ -81,6 +81,7 @@ def __init__( filtration_config: Optional[FiltrationConfig] = None, evolution_config: Optional[EvolutionConfig] = None, styling_config: Optional[StylingConfig] = None, + cost_tracking: bool = False, ): self.model, self.using_native_model = initialize_model(model) self.async_mode = async_mode @@ -100,6 +101,8 @@ def __init__( self.styling_config = ( styling_config if styling_config is not None else StylingConfig() ) + self.cost_tracking = cost_tracking + self.synthesis_cost = 0 if self.using_native_model else None ############################################################# # Generate Goldens from Docs @@ -127,6 +130,7 @@ def generate_goldens_from_docs( include_expected_output=include_expected_output, max_goldens_per_context=max_goldens_per_context, context_construction_config=context_construction_config, + _reset_cost=False, ) ) else: @@ -149,6 +153,8 @@ def generate_goldens_from_docs( max_context_size=context_construction_config.max_context_length, ) ) + if self.synthesis_cost: + self.synthesis_cost += self.context_generator.total_cost print( f"Utilizing {len(set(chain.from_iterable(contexts)))} out of {self.context_generator.total_chunks} chunks." ) @@ -170,8 +176,12 @@ def generate_goldens_from_docs( _context_scores=context_scores, _progress_bar=progress_bar, _send_data=False, + _reset_cost=False, ) - + if self.cost_tracking and self.using_native_model: + print( + f"💰 API cost: {self.synthesis_cost:.6f}" + ) # Wrap-up Synthesis if _send_data == True: pass @@ -183,12 +193,14 @@ async def a_generate_goldens_from_docs( include_expected_output: bool = True, max_goldens_per_context: int = 2, context_construction_config: Optional[ContextConstructionConfig] = None, + _reset_cost=True, ): if context_construction_config is None: context_construction_config = ContextConstructionConfig( critic_model=self.model ) - self.synthesis_cost = 0 if self.using_native_model else None + if _reset_cost: + self.synthesis_cost = 0 if self.using_native_model else None # Generate contexts from provided docs if self.context_generator is None: @@ -210,6 +222,8 @@ async def a_generate_goldens_from_docs( max_context_size=context_construction_config.max_context_length, ) ) + if self.synthesis_cost: + self.synthesis_cost += self.context_generator.total_cost print( f"Utilizing {len(set(chain.from_iterable(contexts)))} out of {self.context_generator.total_chunks} chunks." ) @@ -230,8 +244,13 @@ async def a_generate_goldens_from_docs( source_files=source_files, _context_scores=context_scores, _progress_bar=progress_bar, + _reset_cost=False, ) self.synthetic_goldens.extend(goldens) + if _reset_cost and self.cost_tracking and self.using_native_model: + print( + f"💰 API cost: {self.synthesis_cost:.6f}" + ) return goldens ############################################################# @@ -247,8 +266,10 @@ def generate_goldens_from_contexts( _context_scores: Optional[List[float]] = None, _progress_bar: Optional[tqdm.std.tqdm] = None, _send_data: bool = True, + _reset_cost: bool = True, ) -> List[Golden]: - self.synthesis_cost = 0 if self.using_native_model else None + if _reset_cost: + self.synthesis_cost = 0 if self.using_native_model else None # Intialize Goldens as an empty list goldens: List[Golden] = [] @@ -358,6 +379,10 @@ def generate_goldens_from_contexts( self.synthetic_goldens.extend(goldens) if _send_data == True: pass + if _reset_cost and self.cost_tracking and self.using_native_model: + print( + f"💰 API cost: {self.synthesis_cost:.6f}" + ) return goldens async def a_generate_goldens_from_contexts( @@ -368,8 +393,10 @@ async def a_generate_goldens_from_contexts( source_files: Optional[List[str]] = None, _context_scores: Optional[List[float]] = None, _progress_bar: Optional[tqdm.std.tqdm] = None, + _reset_cost: bool = True, ) -> List[Golden]: - self.synthesis_cost = 0 if self.using_native_model else None + if _reset_cost: + self.synthesis_cost = 0 if self.using_native_model else None semaphore = asyncio.Semaphore(self.max_concurrent) goldens: List[Golden] = [] with synthesizer_progress_context( @@ -398,7 +425,11 @@ async def a_generate_goldens_from_contexts( for index, context in enumerate(contexts) ] await asyncio.gather(*tasks) - + + if _reset_cost and self.cost_tracking and self.using_native_model: + print( + f"💰 API cost: {self.synthesis_cost:.6f}" + ) return goldens async def _a_generate_from_context( From 499c64cc18071ced7794b7dc319d698338344628 Mon Sep 17 00:00:00 2001 From: chuqingG Date: Sat, 1 Mar 2025 17:14:23 -0500 Subject: [PATCH 2/2] reformat --- .../synthesizer/chunking/context_generator.py | 2 +- deepeval/synthesizer/synthesizer.py | 18 +++++------------- 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/deepeval/synthesizer/chunking/context_generator.py b/deepeval/synthesizer/chunking/context_generator.py index 98bf4b902..c68399734 100644 --- a/deepeval/synthesizer/chunking/context_generator.py +++ b/deepeval/synthesizer/chunking/context_generator.py @@ -66,7 +66,7 @@ def __init__( self.source_files_to_collections_map: Optional[ Dict[str, Collection] ] = None - + # cost tracking self.total_cost = 0.0 diff --git a/deepeval/synthesizer/synthesizer.py b/deepeval/synthesizer/synthesizer.py index 83efe5aa3..25273610b 100644 --- a/deepeval/synthesizer/synthesizer.py +++ b/deepeval/synthesizer/synthesizer.py @@ -179,9 +179,7 @@ def generate_goldens_from_docs( _reset_cost=False, ) if self.cost_tracking and self.using_native_model: - print( - f"💰 API cost: {self.synthesis_cost:.6f}" - ) + print(f"💰 API cost: {self.synthesis_cost:.6f}") # Wrap-up Synthesis if _send_data == True: pass @@ -248,9 +246,7 @@ async def a_generate_goldens_from_docs( ) self.synthetic_goldens.extend(goldens) if _reset_cost and self.cost_tracking and self.using_native_model: - print( - f"💰 API cost: {self.synthesis_cost:.6f}" - ) + print(f"💰 API cost: {self.synthesis_cost:.6f}") return goldens ############################################################# @@ -380,9 +376,7 @@ def generate_goldens_from_contexts( if _send_data == True: pass if _reset_cost and self.cost_tracking and self.using_native_model: - print( - f"💰 API cost: {self.synthesis_cost:.6f}" - ) + print(f"💰 API cost: {self.synthesis_cost:.6f}") return goldens async def a_generate_goldens_from_contexts( @@ -425,11 +419,9 @@ async def a_generate_goldens_from_contexts( for index, context in enumerate(contexts) ] await asyncio.gather(*tasks) - + if _reset_cost and self.cost_tracking and self.using_native_model: - print( - f"💰 API cost: {self.synthesis_cost:.6f}" - ) + print(f"💰 API cost: {self.synthesis_cost:.6f}") return goldens async def _a_generate_from_context(