From a6596257b7e52eb5409ba67f0dbb82e761d4184c Mon Sep 17 00:00:00 2001 From: scosman Date: Sat, 1 Feb 2025 16:08:54 -0500 Subject: [PATCH] Update our run strategy: Plaintext output + COT still uses two-message format. This separates the COT from the answer, which is useful for training/fine-tining/products. Now COT thinking/R1 reasoning is never shown to the user. --- .../adapters/model_adapters/base_adapter.py | 19 ++++++++++--------- .../model_adapters/test_base_adapter.py | 8 ++++---- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/libs/core/kiln_ai/adapters/model_adapters/base_adapter.py b/libs/core/kiln_ai/adapters/model_adapters/base_adapter.py index 4595f60c..f2bd02b0 100644 --- a/libs/core/kiln_ai/adapters/model_adapters/base_adapter.py +++ b/libs/core/kiln_ai/adapters/model_adapters/base_adapter.py @@ -161,18 +161,19 @@ def run_strategy( self, ) -> Tuple[Literal["cot_as_message", "cot_two_call", "basic"], str | None]: # Determine the run strategy for COT prompting. 3 options: - # 1. Unstructured output: just call the LLM, with prompting for thinking - # 2. "Thinking" LLM designed to output thinking in a structured format: we make 1 call to the LLM, which outputs thinking in a structured format. - # 3. Normal LLM with structured output: we make 2 calls to the LLM - one for thinking and one for the final response. This helps us use the LLM's structured output modes (json_schema, tools, etc), which can't be used in a single call. + # 1. "Thinking" LLM designed to output thinking in a structured format plus a COT prompt: we make 1 call to the LLM, which outputs thinking in a structured format. We include the thinking instuctions as a message. + # 2. Normal LLM with COT prompt: we make 2 calls to the LLM - one for thinking and one for the final response. This helps us use the LLM's structured output modes (json_schema, tools, etc), which can't be used in a single call. It also separates the thinking from the final response. + # 3. Non chain of thought: we make 1 call to the LLM, with no COT prompt. cot_prompt = self.prompt_builder.chain_of_thought_prompt() - thinking_llm = self.model_provider().reasoning_capable + reasoning_capable = self.model_provider().reasoning_capable - if cot_prompt and (not self.has_structured_output() or thinking_llm): - # Case 1 or 2: Unstructured output or "Thinking" LLM designed to output thinking in a structured format - # For these, we add a system message with the thinking instruction to the message list, but then run normally + if cot_prompt and reasoning_capable: + # 1: "Thinking" LLM designed to output thinking in a structured format + # A simple message with the COT prompt appended to the message list is sufficient return "cot_as_message", cot_prompt - elif not thinking_llm and cot_prompt and self.has_structured_output(): - # Case 3: Normal LLM with structured output, requires 2 calls + elif cot_prompt: + # 2: Unstructured output with COT + # Two calls to separate the thinking from the final response return "cot_two_call", cot_prompt else: return "basic", None diff --git a/libs/core/kiln_ai/adapters/model_adapters/test_base_adapter.py b/libs/core/kiln_ai/adapters/model_adapters/test_base_adapter.py index 6d462f6d..a991f9b3 100644 --- a/libs/core/kiln_ai/adapters/model_adapters/test_base_adapter.py +++ b/libs/core/kiln_ai/adapters/model_adapters/test_base_adapter.py @@ -158,11 +158,11 @@ async def test_prompt_builder_json_instructions( @pytest.mark.parametrize( "cot_prompt,has_structured_output,reasoning_capable,expected", [ - # Case 1: Unstructured output with COT - ("think carefully", False, False, ("cot_as_message", "think carefully")), - # Case 2: Structured output with thinking-capable LLM + # COT and normal LLM + ("think carefully", False, False, ("cot_two_call", "think carefully")), + # Structured output with thinking-capable LLM ("think carefully", True, True, ("cot_as_message", "think carefully")), - # Case 3: Structured output with normal LLM + # Structured output with normal LLM ("think carefully", True, False, ("cot_two_call", "think carefully")), # Basic cases - no COT (None, True, True, ("basic", None)),