Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Weaken provider name segment validation to match real Terraform registries #20

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ go 1.14
require (
github.com/google/go-cmp v0.5.9
github.com/hashicorp/terraform-svchost v0.0.0-20200729002733-f050f53b9734
golang.org/x/net v0.0.0-20210119194325-5f4716e94777
golang.org/x/net v0.0.0-20210119194325-5f4716e94777 // indirect
)
12 changes: 12 additions & 0 deletions module_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,18 @@ func TestParseModuleSource_simple(t *testing.T) {
Subdir: "",
},
},
"custom registry, name/namespace not purely alphanumeric": {
input: "example.com/example_corp/network-ing/happycloud9",
want: Module{
Package: ModulePackage{
Host: svchost.Hostname("example.com"),
Namespace: "example_corp",
Name: "network-ing",
TargetSystem: "happycloud9",
},
Subdir: "",
},
},
"custom registry, subdir": {
input: "example.com/awesomecorp/network/happycloud//examples/foo",
want: Module{
Expand Down
71 changes: 28 additions & 43 deletions provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ package tfaddr

import (
"fmt"
"regexp"
"strings"

svchost "github.com/hashicorp/terraform-svchost"
"golang.org/x/net/idna"
)

// Provider encapsulates a single provider type. In the future this will be
Expand Down Expand Up @@ -48,6 +48,8 @@ const UnknownProviderNamespace = "?"
// FQN. This may be produced by Terraform 0.13.
const LegacyProviderNamespace = "-"

var providerNamePartPattern = regexp.MustCompile("^[0-9A-Za-z](?:[0-9A-Za-z-_]{0,62}[0-9A-Za-z])?$")

// String returns an FQN string, indended for use in machine-readable output.
func (pt Provider) String() string {
if pt.IsZero() {
Expand Down Expand Up @@ -179,9 +181,10 @@ func (pt Provider) Equals(other Provider) bool {
// terraform-config-inspect.
//
// The following are valid source string formats:
// name
// namespace/name
// hostname/namespace/name
//
// name
// namespace/name
// hostname/namespace/name
//
// "name"-only format is parsed as -/name (i.e. legacy namespace)
// requiring further identification of the namespace via Registry API
Expand Down Expand Up @@ -217,7 +220,7 @@ func ParseProviderSource(str string) (Provider, error) {
if err != nil {
return Provider{}, &ParserError{
Summary: "Invalid provider namespace",
Detail: fmt.Sprintf(`Invalid provider namespace %q in source %q: %s"`, namespace, str, err),
Detail: fmt.Sprintf(`Invalid provider namespace %q in source %q: %s"`, givenNamespace, str, err),
}
}
ret.Namespace = namespace
Expand All @@ -227,11 +230,12 @@ func ParseProviderSource(str string) (Provider, error) {
// Final Case: 3 parts
if len(parts) == 3 {
// the namespace is always the first part in a three-part source string
hn, err := svchost.ForComparison(parts[0])
givenHostname := parts[0]
hn, err := svchost.ForComparison(givenHostname)
if err != nil {
return Provider{}, &ParserError{
Summary: "Invalid provider source hostname",
Detail: fmt.Sprintf(`Invalid provider source hostname namespace %q in source %q: %s"`, hn, str, err),
Detail: fmt.Sprintf(`Invalid provider source hostname %q in source %q: %s"`, givenHostname, str, err),
}
}
ret.Hostname = hn
Expand Down Expand Up @@ -293,7 +297,7 @@ func ParseProviderSource(str string) (Provider, error) {

// MustParseProviderSource is a wrapper around ParseProviderSource that panics if
// it returns an error.
func MustParseProviderSource(raw string) (Provider) {
func MustParseProviderSource(raw string) Provider {
p, err := ParseProviderSource(raw)
if err != nil {
panic(err)
Expand Down Expand Up @@ -374,31 +378,22 @@ func parseSourceStringParts(str string) ([]string, error) {
}

// ParseProviderPart processes an addrs.Provider namespace or type string
// provided by an end-user, producing a normalized version if possible or
// provided by an end-user, returning the same string if validation succeeds or
// an error if the string contains invalid characters.
//
// A provider part is processed in the same way as an individual label in a DNS
// domain name: it is transformed to lowercase per the usual DNS case mapping
// and normalization rules and may contain only letters, digits, and dashes.
// Additionally, dashes may not appear at the start or end of the string.
//
// These restrictions are intended to allow these names to appear in fussy
// contexts such as directory/file names on case-insensitive filesystems,
// repository names on GitHub, etc. We're using the DNS rules in particular,
// rather than some similar rules defined locally, because the hostname part
// of an addrs.Provider is already a hostname and it's ideal to use exactly
// the same case folding and normalization rules for all of the parts.
//
// In practice a provider type string conventionally does not contain dashes
// either. Such names are permitted, but providers with such type names will be
// hard to use because their resource type names will not be able to contain
// the provider type name and thus each resource will need an explicit provider
// address specified. (A real-world example of such a provider is the
// "google-beta" variant of the GCP provider, which has resource types that
// start with the "google_" prefix instead.)
// As per the implementation of HashiCorp's public and private Terraform
// registries, a provider part may be up to 64 characters long, can contain
// letters, digits, underscores, and dashes, must start with a letter, and must
// end with a letter or digit.
//
// It's valid to pass the result of this function as the argument to a
// subsequent call, in which case the result will be identical.
// In practice, these name segments are often more restricted. Most notably, the
// public registry requires namespaces to match a GitHub username (ruling out
// underscores). Also, some permitted provider names are hard to use because
// their resource type names will not be able to contain the provider type name
// and thus each resource will need an explicit provider address specified. (A
// real-world example of such a provider is the "google-beta" variant of the GCP
// provider, which has resource types that start with the "google_" prefix
// instead.)
func ParseProviderPart(given string) (string, error) {
if len(given) == 0 {
return "", fmt.Errorf("must have at least one character")
Expand All @@ -415,21 +410,11 @@ func ParseProviderPart(given string) (string, error) {
return "", fmt.Errorf("dots are not allowed")
}

// We don't allow names containing multiple consecutive dashes, just as
// a matter of preference: they look weird, confusing, or incorrect.
// This also, as a side-effect, prevents the use of the "punycode"
// indicator prefix "xn--" that would cause the IDNA library to interpret
// the given name as punycode, because that would be weird and unexpected.
if strings.Contains(given, "--") {
return "", fmt.Errorf("cannot use multiple consecutive dashes")
}

result, err := idna.Lookup.ToUnicode(given)
if err != nil {
return "", fmt.Errorf("must contain only letters, digits, and dashes, and may not use leading or trailing dashes")
if !providerNamePartPattern.MatchString(given) {
return "", fmt.Errorf("must contain only letters, digits, dashes, and underscores, and may not use leading or trailing dashes or underscores")
}

return result, nil
return given, nil
}

// MustParseProviderPart is a wrapper around ParseProviderPart that panics if
Expand Down
72 changes: 52 additions & 20 deletions provider_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -268,8 +268,8 @@ func TestParseProviderSource(t *testing.T) {
},
"registry.Terraform.io/HashiCorp/AWS": {
Provider{
Type: "aws",
Namespace: "hashicorp",
Type: "AWS",
Namespace: "HashiCorp",
Comment on lines +271 to +272
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One of the benefits of this library, though maybe implied, rather than explicitly documented, was canonical representation, which in turn made comparison and sorting possible.

For example, hashicorp/aws today is treated as equal to HashiCorp/AWS when it comes to cache entries (the lock file) etc. It is also one of the reasons Terraform LS uses this library and why we extracted it.

My understanding is that the intention was merely to allow -, so I'm assuming there isn't a strong reason to introduce case sensitivity?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As best as I was able to determine, the private registry in Terraform Cloud (unlike the public registry) does not consider myorg/aws and myorg/AWS to be the same address, which would mean automatic lowercasing is a wrong behavior.

But unlike the underscores, that one was based on a reading of the code rather than an actual experiment. I didn't attempt publishing a provider with capital letters in its name (which, to be fair, would be a pretty perverse thing to do, and I'm not sure anyone would try it if they weren't attempting to break something).

Org names with uppercase letters are a bit more of a pressing concern, but I didn't attempt that yet either. I could give that a shot, it's somewhat easier than re-publishing my test provider with a vandalized name.

I'm open to adding some basic string lowercasing to restore case-collapse, if you're confident that we still need it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah on further thought, if capitalized org names are a problem in TFC, then the solution is to add case normalization to its registry's routing, not to change the semantics over in this library. We already know TFC enforces org name uniqueness across any combination of letter cases, and the top-level org route accounts for that; if we missed handling that somewhere deeper in the routing, we'll just fix it.

Hostname: DefaultProviderRegistryHost,
},
false,
Expand Down Expand Up @@ -304,8 +304,8 @@ func TestParseProviderSource(t *testing.T) {
},
"HashiCorp/AWS": {
Provider{
Type: "aws",
Namespace: "hashicorp",
Type: "AWS",
Namespace: "HashiCorp",
Hostname: DefaultProviderRegistryHost,
},
false,
Expand All @@ -320,7 +320,7 @@ func TestParseProviderSource(t *testing.T) {
},
"AWS": {
Provider{
Type: "aws",
Type: "AWS",
Namespace: UnknownProviderNamespace,
Hostname: DefaultProviderRegistryHost,
},
Expand All @@ -342,6 +342,14 @@ func TestParseProviderSource(t *testing.T) {
},
false,
},
"example.com/foo_bar/baz_boop": {
Provider{
Type: "baz_boop",
Namespace: "foo_bar",
Hostname: svchost.Hostname("example.com"),
},
false,
},
"localhost:8080/foo/bar": {
Provider{
Type: "bar",
Expand Down Expand Up @@ -374,9 +382,13 @@ func TestParseProviderSource(t *testing.T) {
Provider{},
true,
},
"example.com/bad--namespace/aws": {
Provider{},
true,
"example.com/okay--namespace/aws": {
Provider{
Type: "aws",
Namespace: "okay--namespace",
Hostname: svchost.Hostname("example.com"),
},
false,
},
"example.com/-badnamespace/aws": {
Provider{},
Expand All @@ -386,18 +398,30 @@ func TestParseProviderSource(t *testing.T) {
Provider{},
true,
},
"example.com/bad.namespace/aws": {
"example.com/badnamespace_/aws": {
Provider{},
true,
},
"example.com/hashicorp/badtype!": {
"example.com/_badnamespace/aws": {
Provider{},
true,
},
"example.com/hashicorp/bad--type": {
"example.com/bad.namespace/aws": {
Provider{},
true,
},
"example.com/hashicorp/badtype!": {
Provider{},
true,
},
"example.com/hashicorp/okay--type": {
Provider{
Type: "okay--type",
Namespace: "hashicorp",
Hostname: svchost.Hostname("example.com"),
},
false,
},
"example.com/hashicorp/-badtype": {
Provider{},
true,
Expand All @@ -406,6 +430,14 @@ func TestParseProviderSource(t *testing.T) {
Provider{},
true,
},
"example.com/hashicorp/_badtype": {
Provider{},
true,
},
"example.com/hashicorp/badtype_": {
Provider{},
true,
},
"example.com/hashicorp/bad.type": {
Provider{},
true,
Expand Down Expand Up @@ -453,48 +485,48 @@ func TestParseProviderPart(t *testing.T) {
``,
},
`FOO`: {
`foo`,
`FOO`,
``,
},
`Foo`: {
`foo`,
`Foo`,
``,
},
`abc-123`: {
`abc-123`,
``,
},
`Испытание`: {
`испытание`,
``,
`must contain only letters, digits, dashes, and underscores, and may not use leading or trailing dashes or underscores`,
},
`münchen`: { // this is a precomposed u with diaeresis
`münchen`, // this is a precomposed u with diaeresis
``,
`must contain only letters, digits, dashes, and underscores, and may not use leading or trailing dashes or underscores`,
},
`münchen`: { // this is a separate u and combining diaeresis
`münchen`, // this is a precomposed u with diaeresis
``,
`must contain only letters, digits, dashes, and underscores, and may not use leading or trailing dashes or underscores`,
},
`abc--123`: {
`abc--123`,
``,
`cannot use multiple consecutive dashes`,
},
`xn--80akhbyknj4f`: { // this is the punycode form of "испытание", but we don't accept punycode here
`xn--80akhbyknj4f`,
``,
`cannot use multiple consecutive dashes`,
},
`abc.123`: {
``,
`dots are not allowed`,
},
`-abc123`: {
``,
`must contain only letters, digits, and dashes, and may not use leading or trailing dashes`,
`must contain only letters, digits, dashes, and underscores, and may not use leading or trailing dashes or underscores`,
},
`abc123-`: {
``,
`must contain only letters, digits, and dashes, and may not use leading or trailing dashes`,
`must contain only letters, digits, dashes, and underscores, and may not use leading or trailing dashes or underscores`,
},
``: {
``,
Expand Down