From a6596257b7e52eb5409ba67f0dbb82e761d4184c Mon Sep 17 00:00:00 2001
From: scosman <scosman@users.noreply.github.com>
Date: Sat, 1 Feb 2025 16:08:54 -0500
Subject: [PATCH] Update our run strategy:

Plaintext output + COT still uses two-message format. This separates the COT from the answer, which is useful for training/fine-tining/products. Now COT thinking/R1 reasoning is never shown to the user.
---
 .../adapters/model_adapters/base_adapter.py   | 19 ++++++++++---------
 .../model_adapters/test_base_adapter.py       |  8 ++++----
 2 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/libs/core/kiln_ai/adapters/model_adapters/base_adapter.py b/libs/core/kiln_ai/adapters/model_adapters/base_adapter.py
index 4595f60c..f2bd02b0 100644
--- a/libs/core/kiln_ai/adapters/model_adapters/base_adapter.py
+++ b/libs/core/kiln_ai/adapters/model_adapters/base_adapter.py
@@ -161,18 +161,19 @@ def run_strategy(
         self,
     ) -> Tuple[Literal["cot_as_message", "cot_two_call", "basic"], str | None]:
         # Determine the run strategy for COT prompting. 3 options:
-        # 1. Unstructured output: just call the LLM, with prompting for thinking
-        # 2. "Thinking" LLM designed to output thinking in a structured format: we make 1 call to the LLM, which outputs thinking in a structured format.
-        # 3. Normal LLM with structured output: we make 2 calls to the LLM - one for thinking and one for the final response. This helps us use the LLM's structured output modes (json_schema, tools, etc), which can't be used in a single call.
+        # 1. "Thinking" LLM designed to output thinking in a structured format plus a COT prompt: we make 1 call to the LLM, which outputs thinking in a structured format. We include the thinking instuctions as a message.
+        # 2. Normal LLM with COT prompt: we make 2 calls to the LLM - one for thinking and one for the final response. This helps us use the LLM's structured output modes (json_schema, tools, etc), which can't be used in a single call. It also separates the thinking from the final response.
+        # 3. Non chain of thought: we make 1 call to the LLM, with no COT prompt.
         cot_prompt = self.prompt_builder.chain_of_thought_prompt()
-        thinking_llm = self.model_provider().reasoning_capable
+        reasoning_capable = self.model_provider().reasoning_capable
 
-        if cot_prompt and (not self.has_structured_output() or thinking_llm):
-            # Case 1 or 2: Unstructured output or "Thinking" LLM designed to output thinking in a structured format
-            # For these, we add a system message with the thinking instruction to the message list, but then run normally
+        if cot_prompt and reasoning_capable:
+            # 1: "Thinking" LLM designed to output thinking in a structured format
+            # A simple message with the COT prompt appended to the message list is sufficient
             return "cot_as_message", cot_prompt
-        elif not thinking_llm and cot_prompt and self.has_structured_output():
-            # Case 3: Normal LLM with structured output, requires 2 calls
+        elif cot_prompt:
+            # 2: Unstructured output with COT
+            # Two calls to separate the thinking from the final response
             return "cot_two_call", cot_prompt
         else:
             return "basic", None
diff --git a/libs/core/kiln_ai/adapters/model_adapters/test_base_adapter.py b/libs/core/kiln_ai/adapters/model_adapters/test_base_adapter.py
index 6d462f6d..a991f9b3 100644
--- a/libs/core/kiln_ai/adapters/model_adapters/test_base_adapter.py
+++ b/libs/core/kiln_ai/adapters/model_adapters/test_base_adapter.py
@@ -158,11 +158,11 @@ async def test_prompt_builder_json_instructions(
 @pytest.mark.parametrize(
     "cot_prompt,has_structured_output,reasoning_capable,expected",
     [
-        # Case 1: Unstructured output with COT
-        ("think carefully", False, False, ("cot_as_message", "think carefully")),
-        # Case 2: Structured output with thinking-capable LLM
+        # COT and normal LLM
+        ("think carefully", False, False, ("cot_two_call", "think carefully")),
+        # Structured output with thinking-capable LLM
         ("think carefully", True, True, ("cot_as_message", "think carefully")),
-        # Case 3: Structured output with normal LLM
+        # Structured output with normal LLM
         ("think carefully", True, False, ("cot_two_call", "think carefully")),
         # Basic cases - no COT
         (None, True, True, ("basic", None)),