Skip to content

Commit

Permalink
language: turn parsing panics into ErrSyntax
Browse files Browse the repository at this point in the history
We keep finding new panics in the language parser.
Limit the damage by reporting those inputs as syntax errors.

Change-Id: I786fe127c3df7e4c8e042d15095d3acf3c4e4a50
Reviewed-on: https://go-review.googlesource.com/c/text/+/340830
Trust: Russ Cox <rsc@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Roland Shoemaker <roland@golang.org>
  • Loading branch information
rsc committed Aug 10, 2021
1 parent 3115f89 commit 383b2e7
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 4 deletions.
43 changes: 39 additions & 4 deletions internal/language/language.go
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,13 @@ func (t Tag) Parent() Tag {

// ParseExtension parses s as an extension and returns it on success.
func ParseExtension(s string) (ext string, err error) {
defer func() {
if recover() != nil {
ext = ""
err = ErrSyntax
}
}()

scan := makeScannerString(s)
var end int
if n := len(scan.token); n != 1 {
Expand Down Expand Up @@ -461,7 +468,14 @@ func (t Tag) findTypeForKey(key string) (start, sep, end int, hasExt bool) {
// ParseBase parses a 2- or 3-letter ISO 639 code.
// It returns a ValueError if s is a well-formed but unknown language identifier
// or another error if another error occurred.
func ParseBase(s string) (Language, error) {
func ParseBase(s string) (l Language, err error) {
defer func() {
if recover() != nil {
l = 0
err = ErrSyntax
}
}()

if n := len(s); n < 2 || 3 < n {
return 0, ErrSyntax
}
Expand All @@ -472,7 +486,14 @@ func ParseBase(s string) (Language, error) {
// ParseScript parses a 4-letter ISO 15924 code.
// It returns a ValueError if s is a well-formed but unknown script identifier
// or another error if another error occurred.
func ParseScript(s string) (Script, error) {
func ParseScript(s string) (scr Script, err error) {
defer func() {
if recover() != nil {
scr = 0
err = ErrSyntax
}
}()

if len(s) != 4 {
return 0, ErrSyntax
}
Expand All @@ -489,7 +510,14 @@ func EncodeM49(r int) (Region, error) {
// ParseRegion parses a 2- or 3-letter ISO 3166-1 or a UN M.49 code.
// It returns a ValueError if s is a well-formed but unknown region identifier
// or another error if another error occurred.
func ParseRegion(s string) (Region, error) {
func ParseRegion(s string) (r Region, err error) {
defer func() {
if recover() != nil {
r = 0
err = ErrSyntax
}
}()

if n := len(s); n < 2 || 3 < n {
return 0, ErrSyntax
}
Expand Down Expand Up @@ -578,7 +606,14 @@ type Variant struct {

// ParseVariant parses and returns a Variant. An error is returned if s is not
// a valid variant.
func ParseVariant(s string) (Variant, error) {
func ParseVariant(s string) (v Variant, err error) {
defer func() {
if recover() != nil {
v = Variant{}
err = ErrSyntax
}
}()

s = strings.ToLower(s)
if id, ok := variantIndex[s]; ok {
return Variant{id, s}, nil
Expand Down
7 changes: 7 additions & 0 deletions internal/language/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,13 @@ func Parse(s string) (t Tag, err error) {
if s == "" {
return Und, ErrSyntax
}
defer func() {
if recover() != nil {
t = Und
err = ErrSyntax
return
}
}()
if len(s) <= maxAltTaglen {
b := [maxAltTaglen]byte{}
for i, c := range s {
Expand Down
22 changes: 22 additions & 0 deletions language/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,13 @@ func Parse(s string) (t Tag, err error) {
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
// The resulting tag is canonicalized using the canonicalization type c.
func (c CanonType) Parse(s string) (t Tag, err error) {
defer func() {
if recover() != nil {
t = Tag{}
err = language.ErrSyntax
}
}()

tt, err := language.Parse(s)
if err != nil {
return makeTag(tt), err
Expand Down Expand Up @@ -79,6 +86,13 @@ func Compose(part ...interface{}) (t Tag, err error) {
// tag is returned after canonicalizing using CanonType c. If one or more errors
// are encountered, one of the errors is returned.
func (c CanonType) Compose(part ...interface{}) (t Tag, err error) {
defer func() {
if recover() != nil {
t = Tag{}
err = language.ErrSyntax
}
}()

var b language.Builder
if err = update(&b, part...); err != nil {
return und, err
Expand Down Expand Up @@ -142,6 +156,14 @@ var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight")
// Tags with a weight of zero will be dropped. An error will be returned if the
// input could not be parsed.
func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) {
defer func() {
if recover() != nil {
tag = nil
q = nil
err = language.ErrSyntax
}
}()

var entry string
for s != "" {
if entry, s = split(s, ','); entry == "" {
Expand Down

0 comments on commit 383b2e7

Please sign in to comment.