Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Symbol Token Write Support #165

Merged
merged 29 commits into from
Nov 9, 2020
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 12 additions & 9 deletions cmd/ion-go/eventwriter.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,12 @@ type eventwriter struct {
// of ion-test-driver events.
func NewEventWriter(out io.Writer) ion.Writer {
w := ion.NewTextWriter(out)
w.WriteSymbol("$ion_event_stream")
w.WriteSymbol(ion.NewSimpleSymbolToken("$ion_event_stream"))

return &eventwriter{enc: ion.NewEncoder(w)}
}

func (e *eventwriter) FieldNameSymbol(val ion.SymbolToken) error {
func (e *eventwriter) FieldName(val ion.SymbolToken) error {
if val.Text == nil {
sid := fmt.Sprintf("$%v", val.LocalSID)
e.fieldname = &sid
Expand All @@ -53,11 +53,6 @@ func (e *eventwriter) FieldNameSymbol(val ion.SymbolToken) error {
return nil
}

func (e *eventwriter) FieldName(val string) error {
e.fieldname = &val
return nil
}

func (e *eventwriter) Annotation(val ion.SymbolToken) error {
e.annotations = append(e.annotations, val)
return nil
Expand Down Expand Up @@ -140,14 +135,22 @@ func (e *eventwriter) WriteTimestamp(val ion.Timestamp) error {
})
}

func (e *eventwriter) WriteSymbol(val string) error {
func (e *eventwriter) WriteSymbol(val ion.SymbolToken) error {
return e.write(event{
EventType: scalar,
IonType: iontype(ion.SymbolType),
ValueText: symbolify(val),
})
}

func (e *eventwriter) WriteSymbolFromString(val string) error {
return e.write(event{
EventType: scalar,
IonType: iontype(ion.SymbolType),
ValueText: stringify(val),
})
}

func (e *eventwriter) WriteString(val string) error {
return e.write(event{
EventType: scalar,
Expand Down Expand Up @@ -253,7 +256,7 @@ func stringify(val interface{}) string {
return string(bs)
}

func symbolify(val string) string {
func symbolify(val ion.SymbolToken) string {
buf := strings.Builder{}
w := ion.NewTextWriterOpts(&buf, ion.TextWriterQuietFinish)

Expand Down
18 changes: 7 additions & 11 deletions cmd/ion-go/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,28 +78,24 @@ func printVersion() error {
return err
}
{
if err := w.FieldName("version"); err != nil {
if err := w.FieldName(ion.NewSimpleSymbolToken("version")); err != nil {
return err
}
if err := w.WriteString(internal.GitCommit); err != nil {
return err
}

if err := w.FieldName(ion.NewSimpleSymbolToken("build_time")); err != nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't w.FieldName() happens only if ion.NewTimestampFromStr() returns no error? Condition line 93 new

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Look at the old code. w.FieldName("build_time") is in both the nil err case and the non-nil err case.

return err
}

buildtime, err := ion.NewTimestampFromStr(internal.BuildTime, ion.TimestampPrecisionSecond, ion.TimezoneUTC)
if err == nil {
if err := w.FieldName("build_time"); err != nil {
return err
}
if err := w.WriteTimestamp(buildtime); err != nil {
return err
}
} else {
if err := w.FieldName("build_time"); err != nil {
return err
}
if err := w.WriteString("unknown-buildtime"); err != nil {
return err
}
} else if err := w.WriteString("unknown-buildtime"); err != nil {
return err
}
}
if err := w.EndStruct(); err != nil {
Expand Down
12 changes: 6 additions & 6 deletions cmd/ion-go/nopwriter.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,7 @@ func NewNopWriter() ion.Writer {
return nopwriter{}
}

func (nopwriter) FieldName(string) error {
return nil
}

func (nopwriter) FieldNameSymbol(val ion.SymbolToken) error {
func (nopwriter) FieldName(ion.SymbolToken) error {
return nil
}

Expand Down Expand Up @@ -80,7 +76,11 @@ func (nopwriter) WriteTimestamp(ion.Timestamp) error {
return nil
}

func (nopwriter) WriteSymbol(string) error {
func (nopwriter) WriteSymbol(ion.SymbolToken) error {
return nil
}

func (nopwriter) WriteSymbolFromString(string) error {
return nil
}

Expand Down
6 changes: 3 additions & 3 deletions cmd/ion-go/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -220,8 +220,8 @@ func (p *processor) process(in ion.Reader) error {
if e != nil {
return p.error(read, err)
}
if name != nil && name.Text != nil {
if err = p.out.FieldName(*name.Text); err != nil {
if name != nil {
if err = p.out.FieldName(*name); err != nil {
return p.error(write, err)
}
}
Expand Down Expand Up @@ -301,7 +301,7 @@ func (p *processor) process(in ion.Reader) error {
err = p.out.WriteTimestamp(*val)

case ion.SymbolType:
val, err := in.StringValue()
val, err := in.SymbolValue()
if err != nil {
return p.error(read, err)
}
Expand Down
6 changes: 3 additions & 3 deletions cmd/ion-go/schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,13 @@ func (e eventtype) String() string {
}

func (e eventtype) MarshalIon(w ion.Writer) error {
return w.WriteSymbol(e.String())
return w.WriteSymbolFromString(e.String())
}

type iontype ion.Type

func (i iontype) MarshalIon(w ion.Writer) error {
return w.WriteSymbol(strings.ToUpper(ion.Type(i).String()))
return w.WriteSymbolFromString(strings.ToUpper(ion.Type(i).String()))
}

// event describes an Ion processing event.
Expand Down Expand Up @@ -99,7 +99,7 @@ func (e errortype) String() string {
}

func (e errortype) MarshalIon(w ion.Writer) error {
return w.WriteSymbol(e.String())
return w.WriteSymbolFromString(e.String())
}

// errordescription describes an error during Ion processing.
Expand Down
21 changes: 11 additions & 10 deletions ion/binaryreader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,14 +126,15 @@ func TestReadBinaryStructs(t *testing.T) {
_struct(t, r, func(t *testing.T, r Reader) {
_eof(t, r)
})
_structAF(t, r, nil, []SymbolToken{SymbolToken{Text: newString("foo"), LocalSID: SymbolIDUnknown}}, func(t *testing.T, r Reader) {
_structAF(t, r, &SymbolToken{Text: newString("name"), LocalSID: 4}, []SymbolToken{SymbolToken{Text: newString("bar"), LocalSID: SymbolIDUnknown}}, func(t *testing.T, r Reader) {
_structAF(t, r, nil, []SymbolToken{NewSimpleSymbolToken("foo")}, func(t *testing.T, r Reader) {
_structAF(t, r, &SymbolToken{Text: newString("name"), LocalSID: 4}, []SymbolToken{NewSimpleSymbolToken("bar")}, func(t *testing.T, r Reader) {
_eof(t, r)
})
_intAF(t, r, &SymbolToken{Text: newString("max_id"), LocalSID: 8}, nil, 0)
})
_structAF(t, r, nil, nil, func(t *testing.T, r Reader) {
_intAF(t, r, &SymbolToken{Text: newString(""), LocalSID: SymbolIDUnknown}, nil, 15)
st := NewSimpleSymbolToken("")
_intAF(t, r, &st, nil, 15)
})
_eof(t, r)
}
Expand Down Expand Up @@ -257,7 +258,7 @@ func TestReadBinaryNullFieldName(t *testing.T) {
// }
})
r.Next()
r.StepIn()
assert.NoError(t, r.StepIn())
_nextF(t, r, &SymbolToken{}, true, true)
}

Expand Down Expand Up @@ -287,10 +288,10 @@ func TestReadBinaryAnnotations(t *testing.T) {
0xE3, 0x81, 0xF1, 0x0F, // $113::null
})

_nextA(t, r, []SymbolToken{SymbolToken{Text: nil, LocalSID: 0}}, false, false)
_nextA(t, r, []SymbolToken{SymbolToken{Text: newString("$ion"), LocalSID: 1}}, false, false)
_nextA(t, r, []SymbolToken{SymbolToken{Text: newString("foo"), LocalSID: 110}}, false, false)
_nextA(t, r, []SymbolToken{SymbolToken{Text: newString("bar"), LocalSID: 111}}, false, false)
_nextA(t, r, []SymbolToken{{Text: nil, LocalSID: 0}}, false, false)
_nextA(t, r, []SymbolToken{{Text: newString("$ion"), LocalSID: 1}}, false, false)
_nextA(t, r, []SymbolToken{{Text: newString("foo"), LocalSID: 110}}, false, false)
_nextA(t, r, []SymbolToken{{Text: newString("bar"), LocalSID: 111}}, false, false)
_nextA(t, r, nil, true, true)
}

Expand Down Expand Up @@ -455,8 +456,8 @@ func TestReadBinaryNulls(t *testing.T) {
})

_null(t, r, NullType)
_nullAF(t, r, NullType, nil, []SymbolToken{SymbolToken{Text: newString("$ion"), LocalSID: 1}})
_nullAF(t, r, NullType, nil, []SymbolToken{SymbolToken{Text: newString("foo"), LocalSID: SymbolIDUnknown}, SymbolToken{Text: newString("bar"), LocalSID: SymbolIDUnknown}})
_nullAF(t, r, NullType, nil, []SymbolToken{{Text: newString("$ion"), LocalSID: 1}})
_nullAF(t, r, NullType, nil, []SymbolToken{NewSimpleSymbolToken("foo"), NewSimpleSymbolToken("bar")})
_eof(t, r)
}

Expand Down
70 changes: 53 additions & 17 deletions ion/binarywriter.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@ import (
"io"
"math"
"math/big"
"strconv"
"strings"
"time"
)

Expand Down Expand Up @@ -251,22 +249,51 @@ func (w *binaryWriter) WriteTimestamp(val Timestamp) error {
return w.writeValue("Writer.WriteTimestamp", buf)
}

// WriteSymbol writes a symbol value.
func (w *binaryWriter) WriteSymbol(val string) error {
id, err := w.resolve("Writer.WriteSymbol", val)
if err != nil {
w.err = err
return err
// WriteSymbol writes a symbol value given a SymbolToken.
func (w *binaryWriter) WriteSymbol(val SymbolToken) error {
var id uint64
if val.LocalSID != SymbolIDUnknown {
id = uint64(val.LocalSID)
} else if val.Text != nil {
text := *val.Text
if _, ok := symbolIdentifier(text); ok {
// Wrap text value in single quotes if the symbol's text is a symbol identifier
// (ie. of form $n for some integer n)
// This is done to distinguish from actual symbol table mappings.
text = fmt.Sprintf("'%v'", text)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure I understand the behavior that's intended here. Why are we wrapping $37 in single quotes?

If I pass a SymbolToken with LocalSID=SymbolIDUnknown and Text="$37", I would expect WriteSymbol to try to resolve the text literal $37 (unmodified) in the symbol table. If I'd wanted to write SID 37, I would have set LocalSID=37 in the SymbolToken, right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have roundtrip tests (such as ion-tests/iontestdata/good/symbols.ion with the test case '$99') that fail without any internal logic wrapping $99 in quotes.

When the text reader reads '$99', it produces the string "$99" (note the lack of single quotes) and then creates a SymbolToken using that string value. The absence of single quotes creates confusion further downstream about whether we are dealing with an actual symbol table mapping vs a non-mapping SymbolToken with this text value.

I moved the single quote wrapping logic over to textReader::nextBeforeTypeAnnotations instead of binaryWriter::WriteSymbol and textWriter::WriteSymbol.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

such as ion-tests/iontestdata/good/symbols.ion with the test case '$99'

Here's a link to this test case for anyone who's interested.

When the text reader reads '$99', it produces the string "$99" (note the lack of single quotes) and then creates a SymbolToken using that string value.

This sounds like the correct behavior to me.

The absence of single quotes creates confusion further downstream about whether we are dealing with an actual symbol table mapping vs a non-mapping SymbolToken with this text value.

This is something we need to sort out; I don't think there's any writer API where the golang string $99 should be reinterpreted as anything other than a string literal.

Can you link to the function(s) that are causing the problems? Why is the reinterpretation happening at all?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't remember off the top of my head where specifically to go looking, but the root cause on this one is likely me. :)

At some point back before the introduction of SymbolToken, if the binary reader hit a symbol ID it couldn't resolve (or the text reader hit an unquoted ${n}), they'd return the string ${id} instead of an error. The writers correspondingly mapped WriteSymbol("$123") to being a symbol with that ID, bypassing the symbol table. That came at the expense of being able to read/write an actual symbol whose text is ${n}, but that wasn't a thing I thought I would ever need to do (😬😅).

Now that everything at the reader/writer level is converted to SymbolTokens, any remnants of that questionable decision can and should be removed.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This makes total sense. Thanks for the added context!

Copy link
Contributor Author

@justing-bq justing-bq Oct 20, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alright I reworked this so the Symbol Token's text value is the unquoted $99.
However when the text writer is writing out the symbol in textutils.go:writeSymbol(), we still need to wrap this value in single quotes.

}

id, w.err = w.resolveFromSymbolTable("Writer.WriteSymbol", text)
if w.err != nil {
return w.err
}
} else {
return &UsageError{"Writer.WriteSymbol", "invalid symbol token"}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add some detail to the error message?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

}

return w.writeSymbolFromID("Writer.WriteSymbol", id)
}

// WriteSymbolFromString writes a symbol value given a string.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment should be more explicit about how the provided string is going to be used; it expects the string to already be in the symbol table and will return an error if it isn't.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

func (w *binaryWriter) WriteSymbolFromString(val string) error {
var id uint64
id, w.err = w.resolve("Writer.WriteSymbolFromString", val)
if w.err != nil {
return w.err
}

return w.writeSymbolFromID("Writer.WriteSymbolFromString", id)
}

func (w *binaryWriter) writeSymbolFromID(api string, id uint64) error {
vlength := uintLen(id)
bufLength := vlength + tagLen(vlength)
buf := make([]byte, 0, bufLength)

buf = appendTag(buf, 0x70, vlength)
buf = appendUint(buf, id)

return w.writeValue("Writer.WriteSymbol", buf)
return w.writeValue(api, buf)
}

// WriteString writes a string.
Expand Down Expand Up @@ -492,9 +519,17 @@ func (w *binaryWriter) beginValue(api string) error {
return &UsageError{api, "field name not set"}
}

id, err := w.resolve(api, *name)
if err != nil {
return err
var id uint64
if name.LocalSID != SymbolIDUnknown {
id = uint64(name.LocalSID)
} else if name.Text != nil {
var err error
id, err = w.resolve(api, *name.Text)
if err != nil {
return err
}
} else {
return &UsageError{api, "invalid field name symbol token"}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The error message should communicate why the symbol token is invalid.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

}

buf := make([]byte, 0, 10)
Expand Down Expand Up @@ -591,13 +626,14 @@ func (w *binaryWriter) end(api string, t ctx) error {

// Resolve resolves a symbol to its ID.
func (w *binaryWriter) resolve(api, sym string) (uint64, error) {
if strings.HasPrefix(sym, "$") {
id, err := strconv.ParseUint(sym[1:], 10, 64)
if err == nil {
return id, nil
}
if id, ok := symbolIdentifier(sym); ok {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I want to understand the symbol ID interpretation behavior that we're implementing here. I'm having difficulty imagining a scenario where I'm using a binary writer and I pass it a string in the $id format ($12, $47, etc).

return uint64(id), nil
}

return w.resolveFromSymbolTable(api, sym)
}

func (w *binaryWriter) resolveFromSymbolTable(api, sym string) (uint64, error) {
if w.lst != nil {
id, ok := w.lst.FindByName(sym)
if !ok {
Expand Down
26 changes: 13 additions & 13 deletions ion/binarywriter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,14 @@ func TestWriteBinaryStruct(t *testing.T) {
assert.NoError(t, w.BeginStruct())
assert.NoError(t, w.EndStruct())

assert.NoError(t, w.Annotation(SymbolToken{Text: newString("foo"), LocalSID: SymbolIDUnknown}))
assert.NoError(t, w.Annotation(NewSimpleSymbolToken("foo")))
assert.NoError(t, w.BeginStruct())
{
assert.NoError(t, w.FieldName("name"))
assert.NoError(t, w.Annotation(SymbolToken{Text: newString("bar"), LocalSID: SymbolIDUnknown}))
assert.NoError(t, w.FieldName(NewSimpleSymbolToken("name")))
assert.NoError(t, w.Annotation(NewSimpleSymbolToken("bar")))
assert.NoError(t, w.WriteNull())

assert.NoError(t, w.FieldName("max_id"))
assert.NoError(t, w.FieldName(NewSimpleSymbolToken("max_id")))
assert.NoError(t, w.WriteInt(0))
}
assert.NoError(t, w.EndStruct())
Expand All @@ -67,10 +67,10 @@ func TestWriteBinarySexp(t *testing.T) {
assert.NoError(t, w.BeginSexp())
assert.NoError(t, w.EndSexp())

assert.NoError(t, w.Annotation(SymbolToken{Text: newString("foo"), LocalSID: SymbolIDUnknown}))
assert.NoError(t, w.Annotation(NewSimpleSymbolToken("foo")))
assert.NoError(t, w.BeginSexp())
{
assert.NoError(t, w.Annotation(SymbolToken{Text: newString("bar"), LocalSID: SymbolIDUnknown}))
assert.NoError(t, w.Annotation(NewSimpleSymbolToken("bar")))
assert.NoError(t, w.WriteNull())

assert.NoError(t, w.WriteInt(0))
Expand All @@ -91,10 +91,10 @@ func TestWriteBinaryList(t *testing.T) {
assert.NoError(t, w.BeginList())
assert.NoError(t, w.EndList())

assert.NoError(t, w.Annotation(SymbolToken{Text: newString("foo"), LocalSID: SymbolIDUnknown}))
assert.NoError(t, w.Annotation(NewSimpleSymbolToken("foo")))
assert.NoError(t, w.BeginList())
{
assert.NoError(t, w.Annotation(SymbolToken{Text: newString("bar"), LocalSID: SymbolIDUnknown}))
assert.NoError(t, w.Annotation(NewSimpleSymbolToken("bar")))
assert.NoError(t, w.WriteNull())

assert.NoError(t, w.WriteInt(0))
Expand Down Expand Up @@ -160,11 +160,11 @@ func TestWriteBinarySymbol(t *testing.T) {
0x74, 0xFF, 0xFF, 0xFF, 0xFF, // $4294967295
}
testBinaryWriter(t, eval, func(w Writer) {
assert.NoError(t, w.WriteSymbol("$ion"))
assert.NoError(t, w.WriteSymbol("name"))
assert.NoError(t, w.WriteSymbol("version"))
assert.NoError(t, w.WriteSymbol("$ion_shared_symbol_table"))
assert.NoError(t, w.WriteSymbol("$4294967295"))
assert.NoError(t, w.WriteSymbolFromString("$ion"))
assert.NoError(t, w.WriteSymbolFromString("name"))
assert.NoError(t, w.WriteSymbolFromString("version"))
assert.NoError(t, w.WriteSymbolFromString("$ion_shared_symbol_table"))
assert.NoError(t, w.WriteSymbolFromString("$4294967295"))
})
}

Expand Down
Loading