forked from ollama/ollama
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
llama: preserve field order in user-defined JSON schemas (ollama#8002)
Previously we decoded and re-encoded JSON schemas during validation, which served no purpose since json.RawMessage already validates JSON syntax. Worse, the re-encoding lost field ordering from the original schema, which affects inference quality during step-by-step reasoning. While fixing this ordering issue by using json.RawMessage directly, testing revealed that schema_to_grammar (from llama.cpp) also fails to preserve field order during grammar generation. This appears to be the root cause of inference degradation. This change prevents us from mangling the user's original schema order, but we still need to address the ordering issue in schema_to_grammar. That will be a separate change. Updates ollama#7978
- Loading branch information
Showing
5 changed files
with
104 additions
and
114 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
package llama | ||
|
||
import ( | ||
"bufio" | ||
"bytes" | ||
"strings" | ||
"testing" | ||
) | ||
|
||
// https://github.com/ollama/ollama/issues/7978 | ||
const issue7978JSONSchema = `{ | ||
"type": "object", | ||
"properties": { | ||
"steps": { | ||
"type": "array", | ||
"items": { | ||
"type": "object", | ||
"properties": { | ||
"explanation": { "type": "string" }, | ||
"output": { "type": "string" } | ||
}, | ||
"required": ["explanation", "output"], | ||
"additionalProperties": false | ||
} | ||
}, | ||
"final_answer": { "type": "string" } | ||
}, | ||
"required": ["steps", "final_answer"], | ||
"additionalProperties": false | ||
}` | ||
|
||
func TestIssue7978(t *testing.T) { | ||
t.Skip("schema_to_grammar is broken; skipping until fixed") | ||
|
||
g := SchemaToGrammar([]byte(issue7978JSONSchema)) | ||
if g == nil { | ||
t.Fatal("failed to convert JSON schema to grammar") | ||
} | ||
|
||
t.Logf("grammar:\n%s", g) | ||
t.Log() | ||
|
||
var sawSteps bool | ||
s := bufio.NewScanner(bytes.NewReader(g)) | ||
for s.Scan() { | ||
line := s.Text() | ||
if strings.Contains(line, "steps") { | ||
sawSteps = true | ||
} | ||
if strings.Contains(line, "final-answer") && !sawSteps { | ||
t.Error("expected 'steps' before 'final-answer'") | ||
} | ||
} | ||
} | ||
|
||
func TestSchemaToGrammer(t *testing.T) { | ||
t.Skip("schema_to_grammar is broken; skipping until fixed") | ||
|
||
cases := []struct { | ||
schema string | ||
prefix []byte // nil is check as nil | ||
}{ | ||
{`invalid`, nil}, | ||
|
||
// Simple heuristic/smoke test | ||
{`{"type":"object"}`, []byte("object ::=")}, | ||
} | ||
|
||
for _, c := range cases { | ||
t.Run("x", func(t *testing.T) { | ||
g := SchemaToGrammar([]byte(c.schema)) | ||
if c.prefix == nil && g != nil { | ||
t.Fatalf("grammar = %v, want nil", g) | ||
} | ||
if !bytes.HasPrefix(g, c.prefix) { | ||
t.Errorf("grammar = %q, want %q", g, c.prefix) | ||
} | ||
}) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,70 +1 @@ | ||
package llama | ||
|
||
import ( | ||
"strings" | ||
"testing" | ||
|
||
"github.com/google/go-cmp/cmp" | ||
) | ||
|
||
func TestJsonSchema(t *testing.T) { | ||
testCases := []struct { | ||
name string | ||
schema JsonSchema | ||
expected string | ||
}{ | ||
{ | ||
name: "empty schema", | ||
schema: JsonSchema{ | ||
Type: "object", | ||
}, | ||
expected: `array ::= "[" space ( value ("," space value)* )? "]" space | ||
boolean ::= ("true" | "false") space | ||
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) | ||
decimal-part ::= [0-9]{1,16} | ||
integral-part ::= [0] | [1-9] [0-9]{0,15} | ||
null ::= "null" space | ||
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space | ||
object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space | ||
root ::= object | ||
space ::= | " " | "\n" [ \t]{0,20} | ||
string ::= "\"" char* "\"" space | ||
value ::= object | array | string | number | boolean | null`, | ||
}, | ||
{ | ||
name: "invalid schema with circular reference", | ||
schema: JsonSchema{ | ||
Type: "object", | ||
Properties: map[string]any{ | ||
"self": map[string]any{ | ||
"$ref": "#", // Self reference | ||
}, | ||
}, | ||
}, | ||
expected: "", // Should return empty string for invalid schema | ||
}, | ||
{ | ||
name: "schema with invalid type", | ||
schema: JsonSchema{ | ||
Type: "invalid_type", // Invalid type | ||
Properties: map[string]any{ | ||
"foo": map[string]any{ | ||
"type": "string", | ||
}, | ||
}, | ||
}, | ||
expected: "", // Should return empty string for invalid schema | ||
}, | ||
} | ||
|
||
for _, tc := range testCases { | ||
t.Run(tc.name, func(t *testing.T) { | ||
result := tc.schema.AsGrammar() | ||
if !strings.EqualFold(strings.TrimSpace(result), strings.TrimSpace(tc.expected)) { | ||
if diff := cmp.Diff(tc.expected, result); diff != "" { | ||
t.Fatalf("grammar mismatch (-want +got):\n%s", diff) | ||
} | ||
} | ||
}) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters