Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

missing 1 required positional argument: 'schema' for DAGMetric when using custom LLM #1390

Open
mohamed-ahshik opened this issue Feb 26, 2025 · 1 comment

Comments

@mohamed-ahshik
Copy link

❗BEFORE YOU BEGIN❗
Are you on discord? 🤗 We'd love to have you asking questions on discord instead: https://discord.com/invite/a3K9c8GRGt

Describe the bug
Using CustomLLM, unable to use DAGMetric. Error received : TypeError: CustomLlama3_8B.a_generate() missing 1 required positional argument: 'schema'

To Reproduce

  1. Create a customLLM
  2. Use this customLLM with DAGMetric

Expected behavior
When you measure DAGMetric, this error pop up.

Screenshots
If applicable, add screenshots to help explain your problem.

Desktop (please complete the following information):

  • OS: macOS Sonoma 14.5

Additional context
Error received :

TypeError Traceback (most recent call last)
Cell In[14], line 54
50 dag = DeepAcyclicGraph(root_nodes=[extract_headings_node])
53 format_correctness = DAGMetric(name="Format Correctness", dag=dag, model=custom_llm, verbose_mode=True, include_reason=True)
---> 54 format_correctness.measure(test_case=test_case)
55 # print(format_correctness.score)

File /opt/anaconda3/envs/deep_eval_env/lib/python3.11/site-packages/deepeval/metrics/dag/dag.py:65, in DAGMetric.measure(self, test_case, _show_indicator)
63 if self.async_mode:
64 loop = get_or_create_event_loop()
---> 65 loop.run_until_complete(
66 self.a_measure(test_case, _show_indicator=False)
67 )
68 else:
69 self.dag._execute(metric=self, test_case=test_case)

File /opt/anaconda3/envs/deep_eval_env/lib/python3.11/site-packages/nest_asyncio.py:98, in _patch_loop..run_until_complete(self, future)
95 if not f.done():
96 raise RuntimeError(
97 'Event loop stopped before Future completed.')
---> 98 return f.result()

File /opt/anaconda3/envs/deep_eval_env/lib/python3.11/asyncio/futures.py:203, in Future.result(self)
201 self.__log_traceback = False
202 if self._exception is not None:
--> 203 raise self._exception.with_traceback(self._exception_tb)
204 return self._result

File /opt/anaconda3/envs/deep_eval_env/lib/python3.11/asyncio/tasks.py:279, in Task.__step(failed resolving arguments)
277 result = coro.send(None)
278 else:
--> 279 result = coro.throw(exc)
280 except StopIteration as exc:
281 if self._must_cancel:
282 # Task is cancelled right before coro stops.

File /opt/anaconda3/envs/deep_eval_env/lib/python3.11/site-packages/deepeval/metrics/dag/dag.py:93, in DAGMetric.a_measure(self, test_case, _show_indicator)
89 self.evaluation_cost = 0 if self.using_native_model else None
90 with metric_progress_indicator(
91 self, async_mode=True, _show_indicator=_show_indicator
92 ):
---> 93 await self.dag._a_execute(metric=self, test_case=test_case)
94 self.success = self.is_successful()
95 self.verbose_logs = construct_verbose_logs(
96 self,
97 steps=[
(...)
100 ],
101 )

File /opt/anaconda3/envs/deep_eval_env/lib/python3.11/site-packages/deepeval/metrics/dag/graph.py:38, in DeepAcyclicGraph._a_execute(self, metric, test_case)
33 async def _a_execute(
34 self,
35 metric: BaseMetric,
36 test_case: LLMTestCase,
37 ) -> None:
---> 38 await asyncio.gather(
39 *(
40 root_node._a_execute(
41 metric=metric, test_case=test_case, depth=0
42 )
43 for root_node in self.root_nodes
44 )
45 )

File /opt/anaconda3/envs/deep_eval_env/lib/python3.11/asyncio/tasks.py:349, in Task.__wakeup(self, future)
347 def __wakeup(self, future):
348 try:
--> 349 future.result()
350 except BaseException as exc:
351 # This may also be a cancellation.
352 self.__step(exc)

File /opt/anaconda3/envs/deep_eval_env/lib/python3.11/asyncio/tasks.py:277, in Task.__step(failed resolving arguments)
273 try:
274 if exc is None:
275 # We use the send method directly, because coroutines
276 # don't have __iter__ and __next__ methods.
--> 277 result = coro.send(None)
278 else:
279 result = coro.throw(exc)

File /opt/anaconda3/envs/deep_eval_env/lib/python3.11/site-packages/deepeval/metrics/dag/nodes.py:319, in TaskNode._a_execute(self, metric, test_case, depth)
317 self._output = res
318 else:
--> 319 res = await metric.model.a_generate(prompt=prompt)
320 self._output = res
322 metric._verbose_steps.append(
323 construct_node_verbose_log(self, self._depth)
324 )

TypeError: CustomLlama3_8B.a_generate() missing 1 required positional argument: 'schema'

CustomLLM

class CustomLlama3_8B(DeepEvalBaseLLM):
def init(self):
n_gpu_layers = -1 # The number of layers to put on the GPU. The rest will be on the CPU. If you don't know how many layers there are, you can use -1 to move all to GPU.
n_batch = 512 # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.
llm = LlamaCpp(
model_path="/Users/user/Documents/deep_eval/models/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf",
n_gpu_layers=n_gpu_layers,
n_batch=n_batch,
f16_kv=True, # MUST set to True, otherwise you will run into problem after a couple of calls
verbose=False, # Verbose is required to pass to the callback manager
repitition_penalty=1.0,
n_ctx=8000,
top_k=5,
do_sample=True,
seed=42,
max_tokens=2500,)
self.model = llm

def load_model(self):
    return self.model

def generate(self, prompt: str, schema:BaseModel) -> BaseModel:
    main_prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are a helpful assistant
    <|eot_id|><|start_header_id|>user<|end_header_id|>

    {prompt}
    ## OUTPUT
    You MUST answer using the following json schema:{schema.model_json_schema()}
    Please generate the list of statements from the given text. Do not add any extra information or explanations. 
    Simply present the list of statements in JSON format.
    Make sure all the keys are in the output.
    <|eot_id|><|start_header_id|>assistant<|end_header_id|>
    """
    model = self.load_model()
    result = model.invoke(main_prompt)
    result = repair_json(result)
    json_result = json.loads(result)
    print('JSON_Result:', json_result)
    
    return schema(**json_result)

async def a_generate(self, prompt: str, schema: BaseModel) -> BaseModel:
    return self.generate(prompt, schema)

def get_model_name(self):
    return "Llama-3.1 8B"

Use of DAGMetric

test_case = LLMTestCase(
input="""
Alice: "Today's agenda: product update, blockers, and marketing timeline. Bob, updates?"
Bob: "Core features are done, but we're optimizing performance for large datasets. Fixes by Friday, testing next week."
Alice: "Charlie, does this timeline work for marketing?"
Charlie: "We need finalized messaging by Monday."
Alice: "Bob, can we provide a stable version by then?"
Bob: "Yes, we’ll share an early build."
Charlie: "Great, we'll start preparing assets."
Alice: "Plan: fixes by Friday, marketing prep Monday, sync next Wednesday. Thanks, everyone!"
""",
actual_output="""
Intro:
Alice outlined the agenda: product updates, blockers, and marketing alignment.

Body:
Bob reported performance issues being optimized, with fixes expected by Friday. Charlie requested finalized messaging by Monday for marketing preparation. Bob confirmed an early stable build would be ready.

Conclusion:
The team aligned on next steps: engineering finalizing fixes, marketing preparing content, and a follow-up sync scheduled for Wednesday.
"""
)

correct_order_node = NonBinaryJudgementNode(
criteria="Are the summary headings in the correct order: 'intro' => 'body' => 'conclusion'?",
children=[
VerdictNode(verdict="Yes", score=10),
VerdictNode(verdict="Two are out of order", score=4),
VerdictNode(verdict="All out of order", score=2),
],
)

correct_headings_node = BinaryJudgementNode(
criteria="Does the summary headings contain all three: 'intro', 'body', and 'conclusion'?",
children=[
VerdictNode(verdict=False, score=0),
VerdictNode(verdict=True, child=correct_order_node),
],
)

extract_headings_node = TaskNode(
instructions="Extract all headings in actual_output",
evaluation_params=[LLMTestCaseParams.ACTUAL_OUTPUT],
output_label="Summary headings",
children=[correct_headings_node, correct_order_node],
)

create the DAG

dag = DeepAcyclicGraph(root_nodes=[extract_headings_node])

format_correctness = DAGMetric(name="Format Correctness", dag=dag, model=custom_llm, verbose_mode=True, include_reason=True)
format_correctness.measure(test_case=test_case)

print(format_correctness.score)


@penguine-ip
Copy link
Contributor

@mohamed-ahshik can you format your code its hard to read, thanks!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants