From 874bab0f25ce4771e6a36beedd59cddef068f7ad Mon Sep 17 00:00:00 2001
From: chuqingG <gao688@purdue.edu>
Date: Sat, 1 Mar 2025 17:00:12 -0500
Subject: [PATCH 1/2] enable cost tracking for synthesizer

---
 .../synthesizer/chunking/context_generator.py |  9 +++-
 deepeval/synthesizer/synthesizer.py           | 41 ++++++++++++++++---
 2 files changed, 43 insertions(+), 7 deletions(-)

diff --git a/deepeval/synthesizer/chunking/context_generator.py b/deepeval/synthesizer/chunking/context_generator.py
index 293c75440..98bf4b902 100644
--- a/deepeval/synthesizer/chunking/context_generator.py
+++ b/deepeval/synthesizer/chunking/context_generator.py
@@ -66,6 +66,9 @@ def __init__(
         self.source_files_to_collections_map: Optional[
             Dict[str, Collection]
         ] = None
+        
+        # cost tracking
+        self.total_cost = 0.0
 
     #########################################################
     ### Generate Contexts ###################################
@@ -499,7 +502,8 @@ async def a_evaluate_chunk_and_update(chunk):
     def evaluate_chunk(self, chunk) -> float:
         prompt = FilterTemplate.evaluate_context(chunk)
         if self.using_native_model:
-            res, _ = self.model.generate(prompt, schema=ContextScore)
+            res, cost = self.model.generate(prompt, schema=ContextScore)
+            self.total_cost += cost
             return (res.clarity + res.depth + res.structure + res.relevance) / 4
         else:
             try:
@@ -523,7 +527,8 @@ def evaluate_chunk(self, chunk) -> float:
     async def a_evaluate_chunk(self, chunk) -> float:
         prompt = FilterTemplate.evaluate_context(chunk)
         if self.using_native_model:
-            res, _ = await self.model.a_generate(prompt, schema=ContextScore)
+            res, cost = await self.model.a_generate(prompt, schema=ContextScore)
+            self.total_cost += cost
             return (res.clarity + res.depth + res.structure + res.relevance) / 4
         else:
 
diff --git a/deepeval/synthesizer/synthesizer.py b/deepeval/synthesizer/synthesizer.py
index 3027a6030..83efe5aa3 100644
--- a/deepeval/synthesizer/synthesizer.py
+++ b/deepeval/synthesizer/synthesizer.py
@@ -81,6 +81,7 @@ def __init__(
         filtration_config: Optional[FiltrationConfig] = None,
         evolution_config: Optional[EvolutionConfig] = None,
         styling_config: Optional[StylingConfig] = None,
+        cost_tracking: bool = False,
     ):
         self.model, self.using_native_model = initialize_model(model)
         self.async_mode = async_mode
@@ -100,6 +101,8 @@ def __init__(
         self.styling_config = (
             styling_config if styling_config is not None else StylingConfig()
         )
+        self.cost_tracking = cost_tracking
+        self.synthesis_cost = 0 if self.using_native_model else None
 
     #############################################################
     # Generate Goldens from Docs
@@ -127,6 +130,7 @@ def generate_goldens_from_docs(
                     include_expected_output=include_expected_output,
                     max_goldens_per_context=max_goldens_per_context,
                     context_construction_config=context_construction_config,
+                    _reset_cost=False,
                 )
             )
         else:
@@ -149,6 +153,8 @@ def generate_goldens_from_docs(
                     max_context_size=context_construction_config.max_context_length,
                 )
             )
+            if self.synthesis_cost:
+                self.synthesis_cost += self.context_generator.total_cost
             print(
                 f"Utilizing {len(set(chain.from_iterable(contexts)))} out of {self.context_generator.total_chunks} chunks."
             )
@@ -170,8 +176,12 @@ def generate_goldens_from_docs(
                     _context_scores=context_scores,
                     _progress_bar=progress_bar,
                     _send_data=False,
+                    _reset_cost=False,
                 )
-
+        if self.cost_tracking and self.using_native_model:
+            print(
+                f"💰 API cost: {self.synthesis_cost:.6f}"
+            )
         # Wrap-up Synthesis
         if _send_data == True:
             pass
@@ -183,12 +193,14 @@ async def a_generate_goldens_from_docs(
         include_expected_output: bool = True,
         max_goldens_per_context: int = 2,
         context_construction_config: Optional[ContextConstructionConfig] = None,
+        _reset_cost=True,
     ):
         if context_construction_config is None:
             context_construction_config = ContextConstructionConfig(
                 critic_model=self.model
             )
-        self.synthesis_cost = 0 if self.using_native_model else None
+        if _reset_cost:
+            self.synthesis_cost = 0 if self.using_native_model else None
 
         # Generate contexts from provided docs
         if self.context_generator is None:
@@ -210,6 +222,8 @@ async def a_generate_goldens_from_docs(
                 max_context_size=context_construction_config.max_context_length,
             )
         )
+        if self.synthesis_cost:
+            self.synthesis_cost += self.context_generator.total_cost
         print(
             f"Utilizing {len(set(chain.from_iterable(contexts)))} out of {self.context_generator.total_chunks} chunks."
         )
@@ -230,8 +244,13 @@ async def a_generate_goldens_from_docs(
                 source_files=source_files,
                 _context_scores=context_scores,
                 _progress_bar=progress_bar,
+                _reset_cost=False,
             )
         self.synthetic_goldens.extend(goldens)
+        if _reset_cost and self.cost_tracking and self.using_native_model:
+            print(
+                f"💰 API cost: {self.synthesis_cost:.6f}"
+            )
         return goldens
 
     #############################################################
@@ -247,8 +266,10 @@ def generate_goldens_from_contexts(
         _context_scores: Optional[List[float]] = None,
         _progress_bar: Optional[tqdm.std.tqdm] = None,
         _send_data: bool = True,
+        _reset_cost: bool = True,
     ) -> List[Golden]:
-        self.synthesis_cost = 0 if self.using_native_model else None
+        if _reset_cost:
+            self.synthesis_cost = 0 if self.using_native_model else None
         # Intialize Goldens as an empty list
         goldens: List[Golden] = []
 
@@ -358,6 +379,10 @@ def generate_goldens_from_contexts(
         self.synthetic_goldens.extend(goldens)
         if _send_data == True:
             pass
+        if _reset_cost and self.cost_tracking and self.using_native_model:
+            print(
+                f"💰 API cost: {self.synthesis_cost:.6f}"
+            )
         return goldens
 
     async def a_generate_goldens_from_contexts(
@@ -368,8 +393,10 @@ async def a_generate_goldens_from_contexts(
         source_files: Optional[List[str]] = None,
         _context_scores: Optional[List[float]] = None,
         _progress_bar: Optional[tqdm.std.tqdm] = None,
+        _reset_cost: bool = True,
     ) -> List[Golden]:
-        self.synthesis_cost = 0 if self.using_native_model else None
+        if _reset_cost:
+            self.synthesis_cost = 0 if self.using_native_model else None
         semaphore = asyncio.Semaphore(self.max_concurrent)
         goldens: List[Golden] = []
         with synthesizer_progress_context(
@@ -398,7 +425,11 @@ async def a_generate_goldens_from_contexts(
                 for index, context in enumerate(contexts)
             ]
             await asyncio.gather(*tasks)
-
+            
+        if _reset_cost and self.cost_tracking and self.using_native_model:
+            print(
+                f"💰 API cost: {self.synthesis_cost:.6f}"
+            )
         return goldens
 
     async def _a_generate_from_context(

From 499c64cc18071ced7794b7dc319d698338344628 Mon Sep 17 00:00:00 2001
From: chuqingG <gao688@purdue.edu>
Date: Sat, 1 Mar 2025 17:14:23 -0500
Subject: [PATCH 2/2] reformat

---
 .../synthesizer/chunking/context_generator.py  |  2 +-
 deepeval/synthesizer/synthesizer.py            | 18 +++++-------------
 2 files changed, 6 insertions(+), 14 deletions(-)

diff --git a/deepeval/synthesizer/chunking/context_generator.py b/deepeval/synthesizer/chunking/context_generator.py
index 98bf4b902..c68399734 100644
--- a/deepeval/synthesizer/chunking/context_generator.py
+++ b/deepeval/synthesizer/chunking/context_generator.py
@@ -66,7 +66,7 @@ def __init__(
         self.source_files_to_collections_map: Optional[
             Dict[str, Collection]
         ] = None
-        
+
         # cost tracking
         self.total_cost = 0.0
 
diff --git a/deepeval/synthesizer/synthesizer.py b/deepeval/synthesizer/synthesizer.py
index 83efe5aa3..25273610b 100644
--- a/deepeval/synthesizer/synthesizer.py
+++ b/deepeval/synthesizer/synthesizer.py
@@ -179,9 +179,7 @@ def generate_goldens_from_docs(
                     _reset_cost=False,
                 )
         if self.cost_tracking and self.using_native_model:
-            print(
-                f"💰 API cost: {self.synthesis_cost:.6f}"
-            )
+            print(f"💰 API cost: {self.synthesis_cost:.6f}")
         # Wrap-up Synthesis
         if _send_data == True:
             pass
@@ -248,9 +246,7 @@ async def a_generate_goldens_from_docs(
             )
         self.synthetic_goldens.extend(goldens)
         if _reset_cost and self.cost_tracking and self.using_native_model:
-            print(
-                f"💰 API cost: {self.synthesis_cost:.6f}"
-            )
+            print(f"💰 API cost: {self.synthesis_cost:.6f}")
         return goldens
 
     #############################################################
@@ -380,9 +376,7 @@ def generate_goldens_from_contexts(
         if _send_data == True:
             pass
         if _reset_cost and self.cost_tracking and self.using_native_model:
-            print(
-                f"💰 API cost: {self.synthesis_cost:.6f}"
-            )
+            print(f"💰 API cost: {self.synthesis_cost:.6f}")
         return goldens
 
     async def a_generate_goldens_from_contexts(
@@ -425,11 +419,9 @@ async def a_generate_goldens_from_contexts(
                 for index, context in enumerate(contexts)
             ]
             await asyncio.gather(*tasks)
-            
+
         if _reset_cost and self.cost_tracking and self.using_native_model:
-            print(
-                f"💰 API cost: {self.synthesis_cost:.6f}"
-            )
+            print(f"💰 API cost: {self.synthesis_cost:.6f}")
         return goldens
 
     async def _a_generate_from_context(