-
Notifications
You must be signed in to change notification settings - Fork 9.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
clientv3: Set authority used in cert checks to host of endpoint #11184
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -95,6 +95,7 @@ jitter | |
WithBackoff | ||
BackoffLinearWithJitter | ||
jitter | ||
WithDialer | ||
WithMax | ||
ServerStreams | ||
BidiStreams | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,6 +22,7 @@ import ( | |
"net" | ||
"sync" | ||
|
||
"go.etcd.io/etcd/clientv3/balancer/resolver/endpoint" | ||
"go.etcd.io/etcd/etcdserver/api/v3rpc/rpctypes" | ||
grpccredentials "google.golang.org/grpc/credentials" | ||
) | ||
|
@@ -65,38 +66,37 @@ func (b *bundle) NewWithMode(mode string) (grpccredentials.Bundle, error) { | |
} | ||
|
||
// transportCredential implements "grpccredentials.TransportCredentials" interface. | ||
// transportCredential wraps TransportCredentials to track which | ||
// addresses are dialed for which endpoints, and then sets the authority when checking the endpoint's cert to the | ||
// hostname or IP of the dialed endpoint. | ||
// This is a workaround of a gRPC load balancer issue. gRPC uses the dialed target's service name as the authority when | ||
// checking all endpoint certs, which does not work for etcd servers using their hostname or IP as the Subject Alternative Name | ||
// in their TLS certs. | ||
// To enable, include both WithTransportCredentials(creds) and WithContextDialer(creds.Dialer) | ||
// when dialing. | ||
type transportCredential struct { | ||
gtc grpccredentials.TransportCredentials | ||
mu sync.Mutex | ||
// addrToEndpoint maps from the connection addresses that are dialed to the hostname or IP of the | ||
// endpoint provided to the dialer when dialing | ||
addrToEndpoint map[string]string | ||
} | ||
|
||
func newTransportCredential(cfg *tls.Config) *transportCredential { | ||
return &transportCredential{ | ||
gtc: grpccredentials.NewTLS(cfg), | ||
gtc: grpccredentials.NewTLS(cfg), | ||
addrToEndpoint: map[string]string{}, | ||
} | ||
} | ||
|
||
func (tc *transportCredential) ClientHandshake(ctx context.Context, authority string, rawConn net.Conn) (net.Conn, grpccredentials.AuthInfo, error) { | ||
// Only overwrite when authority is an IP address! | ||
// Let's say, a server runs SRV records on "etcd.local" that resolves | ||
// to "m1.etcd.local", and its SAN field also includes "m1.etcd.local". | ||
// But what if SAN does not include its resolved IP address (e.g. 127.0.0.1)? | ||
// Then, the server should only authenticate using its DNS hostname "m1.etcd.local", | ||
// instead of overwriting it with its IP address. | ||
// And we do not overwrite "localhost" either. Only overwrite IP addresses! | ||
if isIP(authority) { | ||
target := rawConn.RemoteAddr().String() | ||
if authority != target { | ||
// When user dials with "grpc.WithDialer", "grpc.DialContext" "cc.parsedTarget" | ||
// update only happens once. This is problematic, because when TLS is enabled, | ||
// retries happen through "grpc.WithDialer" with static "cc.parsedTarget" from | ||
// the initial dial call. | ||
// If the server authenticates by IP addresses, we want to set a new endpoint as | ||
// a new authority. Otherwise | ||
// "transport: authentication handshake failed: x509: certificate is valid for 127.0.0.1, 192.168.121.180, not 192.168.223.156" | ||
// when the new dial target is "192.168.121.180" whose certificate host name is also "192.168.121.180" | ||
// but client tries to authenticate with previously set "cc.parsedTarget" field "192.168.223.156" | ||
authority = target | ||
} | ||
// Set the authority when checking the endpoint's cert to the hostname or IP of the dialed endpoint | ||
tc.mu.Lock() | ||
dialEp, ok := tc.addrToEndpoint[rawConn.RemoteAddr().String()] | ||
tc.mu.Unlock() | ||
if ok { | ||
_, host, _ := endpoint.ParseEndpoint(dialEp) | ||
authority = host | ||
} | ||
return tc.gtc.ClientHandshake(ctx, authority, rawConn) | ||
} | ||
|
@@ -115,15 +115,33 @@ func (tc *transportCredential) Info() grpccredentials.ProtocolInfo { | |
} | ||
|
||
func (tc *transportCredential) Clone() grpccredentials.TransportCredentials { | ||
copy := map[string]string{} | ||
tc.mu.Lock() | ||
for k, v := range tc.addrToEndpoint { | ||
copy[k] = v | ||
} | ||
tc.mu.Unlock() | ||
return &transportCredential{ | ||
gtc: tc.gtc.Clone(), | ||
gtc: tc.gtc.Clone(), | ||
addrToEndpoint: copy, | ||
} | ||
} | ||
|
||
func (tc *transportCredential) OverrideServerName(serverNameOverride string) error { | ||
return tc.gtc.OverrideServerName(serverNameOverride) | ||
} | ||
|
||
func (tc *transportCredential) Dialer(ctx context.Context, dialEp string) (net.Conn, error) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So, gRPC first calls There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right, here's the dial call: https://github.com/grpc/grpc-go/blob/45bd2846a34b039c5f1e69b7202f118687156b34/internal/transport/http2_client.go#L167 and below it is the ClientHandshake call: https://github.com/grpc/grpc-go/blob/45bd2846a34b039c5f1e69b7202f118687156b34/internal/transport/http2_client.go#L212 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And here's where the authority gets set: https://github.com/grpc/grpc-go/blob/230def769105c46b2e597578b23002a6ac159dde/clientconn.go#L1142. We're basically just trying to get the authority set to an appropriate value for each endpoint instead of having a single value for the entire set of endpoints. |
||
// Keep track of which addresses are dialed for which endpoints | ||
conn, err := endpoint.Dialer(ctx, dialEp) | ||
if conn != nil { | ||
tc.mu.Lock() | ||
tc.addrToEndpoint[conn.RemoteAddr().String()] = dialEp | ||
tc.mu.Unlock() | ||
} | ||
return conn, err | ||
} | ||
|
||
// perRPCCredential implements "grpccredentials.PerRPCCredentials" interface. | ||
type perRPCCredential struct { | ||
authToken string | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice clean-up by moving code here!