Skip to content

Commit

Permalink
Clean up Regexes
Browse files Browse the repository at this point in the history
Prior to this commit, the regexes in substitution.go file were hard to
understand and hard to reuse because of the complexity of the regexes.
For example, we have to consider
  - all the three notations (dot notation, bracket with single quote,
  and bracket with double quote)
  - the suffix of indexing part including start operator and
  int indexing that was not included before.

Also the old regex coupled each notation part with the indexing
part, which leads to extra step to be done in the extract-related functions
i.e. have to trim the [*] every time.

In this fix, we clean up the regexes and make it as much reable and
reuseble as possible.

There will be more PRs to come to clean up considering the
complexity of the all regexes in substitution.go and its usage everywhere.
Also results-related regexes in resultref.go file need cleanup too.
  • Loading branch information
chuangw6 committed Aug 2, 2022
1 parent 5566576 commit 154025f
Show file tree
Hide file tree
Showing 4 changed files with 116 additions and 44 deletions.
4 changes: 2 additions & 2 deletions pkg/apis/pipeline/v1beta1/param_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ func (arrayOrString *ArrayOrString) applyOrCorrect(stringReplacements map[string

// if the stringVal is a string literal or a string that mixed with var references
// just do the normal string replacement
if !exactVariableSubstitutionRegex.MatchString(stringVal) {
if !isolatedResultVariableSubstitutionRegex.MatchString(stringVal) && !substitution.IsIsolatedArrayOrObjectParamRef(stringVal) {
arrayOrString.StringVal = substitution.ApplyReplacements(arrayOrString.StringVal, stringReplacements)
return
}
Expand Down Expand Up @@ -338,7 +338,7 @@ func validateParamStringValue(param Param, prefix string, paramNames sets.String
stringValue := param.Value.StringVal

// if the provided param value is an isolated reference to the whole array/object, we just check if the param name exists.
isIsolated, errs := substitution.ValidateWholeArrayOrObjectRefInStringVariable(param.Name, stringValue, prefix, paramNames)
isIsolated, errs := substitution.ValidateIsolatedArrayOrObjectRefInStringVariable(param.Name, stringValue, prefix, paramNames)
if isIsolated {
return errs
}
Expand Down
64 changes: 62 additions & 2 deletions pkg/apis/pipeline/v1beta1/pipeline_validation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1431,7 +1431,7 @@ func TestValidatePipelineParameterVariables_Success(t *testing.T) {
}},
}},
}, {
name: "array param - using the whole variable as a param's value that is intended to be array type",
name: "array param - using the whole variable (dot reference) as the value of an array param",
params: []ParamSpec{{
Name: "myArray",
Type: ParamTypeArray,
Expand All @@ -1443,6 +1443,32 @@ func TestValidatePipelineParameterVariables_Success(t *testing.T) {
Name: "a-param-intended-to-be-array", Value: ArrayOrString{Type: ParamTypeString, StringVal: "$(params.myArray[*])"},
}},
}},
}, {
name: "array param - using the whole variable (bracket with single quote) as the value of an array param",
params: []ParamSpec{{
Name: "myArray",
Type: ParamTypeArray,
}},
tasks: []PipelineTask{{
Name: "bar",
TaskRef: &TaskRef{Name: "bar-task"},
Params: []Param{{
Name: "a-param-intended-to-be-array", Value: ArrayOrString{Type: ParamTypeString, StringVal: "$(params['myArray'][*])"},
}},
}},
}, {
name: "array param - using the whole variable (bracket with double quote) as the value of an array param",
params: []ParamSpec{{
Name: "myArray",
Type: ParamTypeArray,
}},
tasks: []PipelineTask{{
Name: "bar",
TaskRef: &TaskRef{Name: "bar-task"},
Params: []Param{{
Name: "a-param-intended-to-be-array", Value: ArrayOrString{Type: ParamTypeString, StringVal: `$(params["myArray"][*])`},
}},
}},
}, {
name: "object param - using single individual variable in string param",
params: []ParamSpec{{
Expand Down Expand Up @@ -1535,7 +1561,7 @@ func TestValidatePipelineParameterVariables_Success(t *testing.T) {
}},
}},
}, {
name: "object param - using the whole variable as a param's value that is intended to be object type",
name: "object param - using the whole variable (dot notation) as the value of an object param",
params: []ParamSpec{{
Name: "myObject",
Type: ParamTypeObject,
Expand All @@ -1551,6 +1577,40 @@ func TestValidatePipelineParameterVariables_Success(t *testing.T) {
Name: "a-param-intended-to-be-object", Value: ArrayOrString{Type: ParamTypeString, StringVal: "$(params.myObject[*])"},
}},
}},
}, {
name: "object param - using the whole variable (bracket notation with single quote) as the value of an object param",
params: []ParamSpec{{
Name: "myObject",
Type: ParamTypeObject,
Properties: map[string]PropertySpec{
"key1": {Type: "string"},
"key2": {Type: "string"},
},
}},
tasks: []PipelineTask{{
Name: "bar",
TaskRef: &TaskRef{Name: "bar-task"},
Params: []Param{{
Name: "a-param-intended-to-be-object", Value: ArrayOrString{Type: ParamTypeString, StringVal: "$(params['myObject'][*])"},
}},
}},
}, {
name: "object param - using the whole variable (bracket notation with double quote) as the value of an object param",
params: []ParamSpec{{
Name: "myObject",
Type: ParamTypeObject,
Properties: map[string]PropertySpec{
"key1": {Type: "string"},
"key2": {Type: "string"},
},
}},
tasks: []PipelineTask{{
Name: "bar",
TaskRef: &TaskRef{Name: "bar-task"},
Params: []Param{{
Name: "a-param-intended-to-be-object", Value: ArrayOrString{Type: ParamTypeString, StringVal: `$(params["myObject"][*])`},
}},
}},
}, {
name: "object param - using individual variable in input of when expression, and using both object individual variable and array reference in values of when expression",
params: []ParamSpec{{
Expand Down
6 changes: 3 additions & 3 deletions pkg/apis/pipeline/v1beta1/resultref.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,9 @@ const (
// TODO(#2462) use one regex across all substitutions
// variableSubstitutionFormat matches format like $result.resultname, $result.resultname[int] and $result.resultname[*]
variableSubstitutionFormat = `\$\([_a-zA-Z0-9.-]+(\.[_a-zA-Z0-9.-]+)*(\[([0-9]+|\*)\])?\)`
// exactVariableSubstitutionFormat matches strings that only contain a single reference to result or param variables, but nothing else
// isolatedVariableSubstitutionFormat matches strings that only contain a single reference to result or param variables, but nothing else
// i.e. `$(result.resultname)` is a match, but `foo $(result.resultname)` is not.
exactVariableSubstitutionFormat = `^\$\([_a-zA-Z0-9.-]+(\.[_a-zA-Z0-9.-]+)*(\[([0-9]+|\*)\])?\)$`
isolatedVariableSubstitutionFormat = `^\$\([_a-zA-Z0-9.-]+(\.[_a-zA-Z0-9.-]+)*(\[([0-9]+|\*)\])?\)$`
// arrayIndexing will match all `[int]` and `[*]` for parseExpression
arrayIndexing = `\[([0-9])*\*?\]`
// ResultNameFormat Constant used to define the the regex Result.Name should follow
Expand All @@ -55,7 +55,7 @@ const (

// VariableSubstitutionRegex is a regex to find all result matching substitutions
var VariableSubstitutionRegex = regexp.MustCompile(variableSubstitutionFormat)
var exactVariableSubstitutionRegex = regexp.MustCompile(exactVariableSubstitutionFormat)
var isolatedResultVariableSubstitutionRegex = regexp.MustCompile(isolatedVariableSubstitutionFormat)
var resultNameFormatRegex = regexp.MustCompile(ResultNameFormat)

// arrayIndexingRegex is used to match `[int]` and `[*]`
Expand Down
86 changes: 49 additions & 37 deletions pkg/substitution/substitution.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,32 +27,52 @@ import (
)

const (
parameterSubstitution = `.*?(\[\*\])?`
// nameSubstition is the regex pattern for variable names with allowed characters only
nameSubstition = `[_a-zA-Z0-9.-]+`
// indexing is the regex pattern for the index suffix in a reference string
// - [i] int indexing to reference an array element
// - [*] start indexing to reference the whole array or object
indexing = `\[([0-9]+|\*)\]`

// braceMatchingRegex is a regex for parameter references including dot notation, bracket notation with single and double quotes.
braceMatchingRegex = "(\\$(\\(%s(\\.(?P<var1>%s)|\\[\"(?P<var2>%s)\"\\]|\\['(?P<var3>%s)'\\])\\)))"
// arrayIndexing will match all `[int]` and `[*]` for parseExpression
arrayIndexing = `\[([0-9])*\*?\]`
// paramIndex will match all `$(params.paramName[int])` expressions
paramIndexing = `\$\(params(\.[_a-zA-Z0-9.-]+|\[\'[_a-zA-Z0-9.-\/]+\'\]|\[\"[_a-zA-Z0-9.-\/]+\"\])\[[0-9]+\]\)`
// intIndex will match all `[int]` expressions
intIndex = `\[[0-9]+\]`
)

// arrayIndexingRegex is used to match `[int]` and `[*]`
var arrayIndexingRegex = regexp.MustCompile(arrayIndexing)

// paramIndexingRegex will match all `$(params.paramName[int])` expressions
var paramIndexingRegex = regexp.MustCompile(paramIndexing)

// intIndexRegex will match all `[int]` for param expression
var intIndexRegex = regexp.MustCompile(intIndex)
var (
// .paramName - dot notation
dotNotation = fmt.Sprintf(`\.(?P<var1>%s)`, nameSubstition)
// ['paramName'] - bracket notation with single quote
bracketNotationWithSingleQuote = fmt.Sprintf(`\['(?P<var2>%s)'\]`, nameSubstition)
// ["paramName"] - bracket notation with double quote
bracketNotationWithDoubleQuote = fmt.Sprintf(`\[\"(?P<var3>%s)\"\]`, nameSubstition)
// one of the three notations with optional indexing as the suffix
variableNotationPattern = fmt.Sprintf(`(%s|%s|%s)(?P<var4>%s)?`, dotNotation, bracketNotationWithSingleQuote, bracketNotationWithDoubleQuote, indexing)

// full variable referencing regex: $(xy)
// - x is the prefix i.e. params
// - y is `variableNotationPattern` - one of three notations + optional indexing
fullVariableReferenceRegex = `\$\(%s%s\)`

// a regex to check if a string is an isolated reference to a param variable
isolatedParamVariablePattern = fmt.Sprintf(fmt.Sprintf("^%s$", fullVariableReferenceRegex), "params", variableNotationPattern)
isolatedParamVariableRegex = regexp.MustCompile(isolatedParamVariablePattern)

// arrayIndexingRegex is used to match `[int]` and `[*]`
arrayIndexingRegex = regexp.MustCompile(indexing)

// paramIndexingRegex will match all `$(params.paramName[int])` expressions
paramIndexingRegex = regexp.MustCompile(paramIndexing)

// intIndexRegex will match all `[int]` for param expression
intIndexRegex = regexp.MustCompile(intIndex)
)

// ValidateVariable makes sure all variables in the provided string are known
func ValidateVariable(name, value, prefix, locationName, path string, vars sets.String) *apis.FieldError {
if vs, present, _ := extractVariablesFromString(value, prefix); present {
for _, v := range vs {
v = strings.TrimSuffix(v, "[*]")
if !vars.Has(v) {
return &apis.FieldError{
Message: fmt.Sprintf("non-existent variable in %q for %s %s", value, locationName, name),
Expand All @@ -75,7 +95,6 @@ func ValidateVariableP(value, prefix string, vars sets.String) *apis.FieldError

}
for _, v := range vs {
v = TrimArrayIndex(v)
if !vars.Has(v) {
return &apis.FieldError{
Message: fmt.Sprintf("non-existent variable in %q", value),
Expand All @@ -92,7 +111,6 @@ func ValidateVariableP(value, prefix string, vars sets.String) *apis.FieldError
func ValidateVariableProhibited(name, value, prefix, locationName, path string, vars sets.String) *apis.FieldError {
if vs, present, _ := extractVariablesFromString(value, prefix); present {
for _, v := range vs {
v = strings.TrimSuffix(v, "[*]")
if vars.Has(v) {
return &apis.FieldError{
Message: fmt.Sprintf("variable type invalid in %q for %s %s", value, locationName, name),
Expand All @@ -115,7 +133,6 @@ func ValidateVariableProhibitedP(value, prefix string, vars sets.String) *apis.F

}
for _, v := range vs {
v = strings.TrimSuffix(v, "[*]")
if vars.Has(v) {
return &apis.FieldError{
Message: fmt.Sprintf("variable type invalid in %q", value),
Expand Down Expand Up @@ -158,7 +175,6 @@ func ValidateVariableIsolated(name, value, prefix, locationName, path string, va
if vs, present, _ := extractVariablesFromString(value, prefix); present {
firstMatch, _ := extractExpressionFromString(value, prefix)
for _, v := range vs {
v = strings.TrimSuffix(v, "[*]")
if vars.Has(v) {
if len(value) != len(firstMatch) {
return &apis.FieldError{
Expand All @@ -184,7 +200,6 @@ func ValidateVariableIsolatedP(value, prefix string, vars sets.String) *apis.Fie
}
firstMatch, _ := extractExpressionFromString(value, prefix)
for _, v := range vs {
v = strings.TrimSuffix(v, "[*]")
if vars.Has(v) {
if len(value) != len(firstMatch) {
return &apis.FieldError{
Expand All @@ -199,33 +214,30 @@ func ValidateVariableIsolatedP(value, prefix string, vars sets.String) *apis.Fie
return nil
}

// ValidateWholeArrayOrObjectRefInStringVariable validates if a single string field uses references to the whole array/object appropriately
// ValidateIsolatedArrayOrObjectRefInStringVariable validates if a single string field uses references to the whole array/object appropriately
// valid example: "$(params.myObject[*])"
// invalid example: "$(params.name-not-exist[*])"
func ValidateWholeArrayOrObjectRefInStringVariable(name, value, prefix string, vars sets.String) (isIsolated bool, errs *apis.FieldError) {
nameSubstitution := `[_a-zA-Z0-9.-]+\[\*\]`

// a regex to check if the stringValue is an isolated reference to the whole array/object param without extra string literal.
isolatedVariablePattern := fmt.Sprintf(fmt.Sprintf("^%s$", braceMatchingRegex), prefix, nameSubstitution, nameSubstitution, nameSubstitution)
isolatedVariableRegex, err := regexp.Compile(isolatedVariablePattern)
if err != nil {
return false, &apis.FieldError{
Message: fmt.Sprint("Fail to parse the regex: ", err),
Paths: []string{fmt.Sprintf("%s.%s", prefix, name)},
}
}

if isolatedVariableRegex.MatchString(value) {
func ValidateIsolatedArrayOrObjectRefInStringVariable(name, value, prefix string, vars sets.String) (isIsolated bool, errs *apis.FieldError) {
if isIsolated := IsIsolatedArrayOrObjectParamRef(value); isIsolated {
return true, ValidateVariableP(value, prefix, vars).ViaFieldKey(prefix, name)
}

return false, nil
}

// IsIsolatedArrayOrObjectParamRef checks if a string is an isolated reference to the whole array/object param with start operator
func IsIsolatedArrayOrObjectParamRef(value string) bool {
if isolatedParamVariableRegex.MatchString(value) && strings.HasSuffix(value, "[*])") {
return true
}

return false
}

// Extract a the first full string expressions found (e.g "$(input.params.foo)"). Return
// "" and false if nothing is found.
func extractExpressionFromString(s, prefix string) (string, bool) {
pattern := fmt.Sprintf(braceMatchingRegex, prefix, parameterSubstitution, parameterSubstitution, parameterSubstitution)
pattern := fmt.Sprintf(fullVariableReferenceRegex, prefix, variableNotationPattern)
re := regexp.MustCompile(pattern)
match := re.FindStringSubmatch(s)
if match == nil {
Expand All @@ -235,7 +247,7 @@ func extractExpressionFromString(s, prefix string) (string, bool) {
}

func extractVariablesFromString(s, prefix string) ([]string, bool, string) {
pattern := fmt.Sprintf(braceMatchingRegex, prefix, parameterSubstitution, parameterSubstitution, parameterSubstitution)
pattern := fmt.Sprintf(fullVariableReferenceRegex, prefix, variableNotationPattern)
re := regexp.MustCompile(pattern)
matches := re.FindAllStringSubmatch(s, -1)
errString := ""
Expand Down Expand Up @@ -271,7 +283,7 @@ func extractVariablesFromString(s, prefix string) ([]string, bool, string) {
}

func extractEntireVariablesFromString(s, prefix string) ([]string, error) {
pattern := fmt.Sprintf(braceMatchingRegex, prefix, parameterSubstitution, parameterSubstitution, parameterSubstitution)
pattern := fmt.Sprintf(fullVariableReferenceRegex, prefix, variableNotationPattern)
re, err := regexp.Compile(pattern)
if err != nil {
return nil, fmt.Errorf("Fail to parse regex pattern: %v", err)
Expand Down

0 comments on commit 154025f

Please sign in to comment.