diff --git a/backend/vxlan/vxlan_network_windows.go b/backend/vxlan/vxlan_network_windows.go new file mode 100644 index 0000000000..36937a78d6 --- /dev/null +++ b/backend/vxlan/vxlan_network_windows.go @@ -0,0 +1,195 @@ +// +build windows + +// Copyright 2015 flannel authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package vxlan + +import ( + "sync" + + log "github.com/golang/glog" + "golang.org/x/net/context" + + "github.com/coreos/flannel/backend" + "github.com/coreos/flannel/subnet" + + "encoding/json" + "fmt" + "github.com/Microsoft/hcsshim" + "github.com/coreos/flannel/pkg/ip" + "net" + "time" +) + +type network struct { + name string + networkId string + macPrefix string + extIface *backend.ExternalInterface + lease *subnet.Lease + sm subnet.Manager +} + +func (n *network) Lease() *subnet.Lease { + return n.lease +} + +func (n *network) MTU() int { + return n.extIface.Iface.MTU +} + +func (n *network) Run(ctx context.Context) { + wg := sync.WaitGroup{} + + log.Info("Watching for new subnet leases") + evts := make(chan []subnet.Event) + wg.Add(1) + go func() { + subnet.WatchLeases(ctx, n.sm, n.lease, evts) + wg.Done() + }() + + defer wg.Wait() + + for { + select { + case evtBatch := <-evts: + n.handleSubnetEvents(evtBatch) + + case <-ctx.Done(): + return + } + } +} + +func conjureMac(macPrefix string, ip ip.IP4) string { + a, b, c, d := ip.Octets() + return fmt.Sprintf("%v-%02x-%02x-%02x-%02x", macPrefix, a, b, c, d) +} + +func (n *network) handleSubnetEvents(batch []subnet.Event) { + for _, evt := range batch { + if evt.Lease.Attrs.BackendType != "vxlan" { + log.Warningf("Ignoring non-vxlan subnet: type=%v", evt.Lease.Attrs.BackendType) + continue + } + + if evt.Type != subnet.EventAdded && evt.Type != subnet.EventRemoved { + log.Error("Internal error: unknown event type: ", int(evt.Type)) + continue + } + + // add or delete all possible remote IPs (excluding gateway & bcast) as remote endpoints + managementIp := evt.Lease.Attrs.PublicIP.String() + lastIP := evt.Lease.Subnet.Next().IP - 1 + + start := time.Now() + for remoteIp := evt.Lease.Subnet.IP + 2; remoteIp < lastIP; remoteIp++ { + remoteMac := conjureMac(n.macPrefix, remoteIp) + remoteEndpointName := fmt.Sprintf("remote_%v", remoteIp.String()) + + if evt.Type == subnet.EventAdded { + if err := createRemoteEndpoint(remoteEndpointName, remoteIp, remoteMac, managementIp, n.networkId); err != nil { + log.Errorf("failed to create remote endpoint [%v], error: %v", remoteEndpointName, err) + } + } else { + if hnsEndpoint, err := hcsshim.GetHNSEndpointByName(remoteEndpointName); err != nil { + if _, err := hnsEndpoint.Delete(); err != nil { + log.Errorf("unable to delete existing remote endpoint [%v], error: %v", remoteEndpointName, err) + } + } + } + } + + t := time.Now() + elapsed := t.Sub(start) + + message := "Subnet removed" + if evt.Type == subnet.EventAdded { + message = "Subnet added" + } + log.Infof("%v: %v [%v ns]", message, evt.Lease.Subnet, elapsed.Nanoseconds()) + } +} + +func checkPAAddress(hnsEndpoint *hcsshim.HNSEndpoint, managementAddress string) bool { + if hnsEndpoint.Policies == nil { + return false + } + + for _, policyJson := range hnsEndpoint.Policies { + var policy map[string]interface{} + if json.Unmarshal(policyJson, &policy) != nil { + return false + } + + if valType, ok := policy["Type"]; ok && valType.(string) == "PA" { + if val, ok := policy["PA"]; ok { + if val.(string) == managementAddress { + return true + } + } + } + } + + return false +} + +func createRemoteEndpoint(remoteEndpointName string, remoteIp ip.IP4, remoteMac string, managementAddress string, networkId string) error { + + // find existing + hnsEndpoint, err := hcsshim.GetHNSEndpointByName(remoteEndpointName) + if err == nil && hnsEndpoint.VirtualNetwork == networkId && checkPAAddress(hnsEndpoint, managementAddress) { + return nil + } + + // create or replace endpoint + if hnsEndpoint != nil { + if _, err = hnsEndpoint.Delete(); err != nil { + log.Errorf("unable to delete existing remote endpoint [%v], error: %v", remoteEndpointName, err) + return err + } + } + + paPolicy := struct { + Type string + PA string + }{ + Type: "PA", + PA: managementAddress, + } + + policyBytes, _ := json.Marshal(&paPolicy) + + hnsEndpoint = &hcsshim.HNSEndpoint{ + Id: "", + Name: remoteEndpointName, + IPAddress: net.IPv4(remoteIp.Octets()), + MacAddress: remoteMac, + VirtualNetwork: networkId, + IsRemoteEndpoint: true, + Policies: []json.RawMessage{ + policyBytes, + }, + } + + hnsEndpoint, err = hnsEndpoint.Create() + if err != nil { + log.Errorf("unable to create remote endpoint [%v], error: %v", remoteEndpointName, err) + return err + } + + return nil +} \ No newline at end of file diff --git a/backend/vxlan/vxlan_network_windows_test.go b/backend/vxlan/vxlan_network_windows_test.go new file mode 100644 index 0000000000..3e71d85339 --- /dev/null +++ b/backend/vxlan/vxlan_network_windows_test.go @@ -0,0 +1,94 @@ +// +build windows + +// Copyright 2015 CNI authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package vxlan + +import ( + "encoding/json" + "github.com/Microsoft/hcsshim" + "github.com/stretchr/testify/assert" + "testing" +) + +const ( + remoteEndpointJson = ` + { + "ActivityId": "193b6f8d-b760-4940-9abb-66a91e5af49a", + "EncapOverhead": 50, + "ID": "8f109901-9970-4fda-bb2b-e9882fb3e8fb", + "IPAddress": "10.244.2.141", + "IsRemoteEndpoint": true, + "MacAddress": "0E-2A-0a-f4-02-8d", + "Name": "remote_10.244.2.141", + "Policies": [ + { + "PA": "10.123.74.74", + "Type": "PA" + } + ], + "PrefixLength": 16, + "Resources": { + "AllocationOrder": 1, + "Allocators": [ + { + "AllocationOrder": 0, + "CA": "10.244.2.141", + "ID": "c11177da-9d65-4f81-bd76-b11d1a055b38", + "IsLocal": false, + "IsPolicy": true, + "PA": "10.123.74.74", + "Tag": "VNET Policy", + "Type": 3 + } + ], + "ID": "193b6f8d-b760-4940-9abb-66a91e5af49a", + "PortOperationTime": 0, + "State": 1, + "SwitchOperationTime": 0, + "VfpOperationTime": 0, + "parentId": "e469c3a9-20ee-4de5-8fc2-d8ac99a49b18" + }, + "SharedContainers": [ + + ], + "State": 1, + "Type": "Overlay", + "Version": 21474836481, + "VirtualNetwork": "9d59c0df-b1e2-4eee-9fde-b7ea829fc6a1", + "VirtualNetworkName": "vxlan0" + } + ` +) + +func TestCheckPAAddressMatch(t *testing.T) { + endpointJson := []byte(remoteEndpointJson) + var hnsEndpoint hcsshim.HNSEndpoint + err := json.Unmarshal(endpointJson, &hnsEndpoint) + assert.Nil(t, err) + assert.True(t, checkPAAddress(&hnsEndpoint, "10.123.74.74")) +} + +func TestCheckPAAddressNoMatch(t *testing.T) { + endpointJson := []byte(remoteEndpointJson) + var hnsEndpoint hcsshim.HNSEndpoint + err := json.Unmarshal(endpointJson, &hnsEndpoint) + assert.Nil(t, err) + assert.False(t, checkPAAddress(&hnsEndpoint, "someOtherAddress")) +} + +func TestCheckPAAddressNoMatchIfNil(t *testing.T) { + var hnsEndpoint hcsshim.HNSEndpoint + assert.False(t, checkPAAddress(&hnsEndpoint, "someOtherAddress")) +} \ No newline at end of file diff --git a/backend/vxlan/vxlan_windows.go b/backend/vxlan/vxlan_windows.go index 0c8633fbf8..fd8b03f0af 100644 --- a/backend/vxlan/vxlan_windows.go +++ b/backend/vxlan/vxlan_windows.go @@ -1,3 +1,5 @@ +// +build windows + // Copyright 2015 flannel authors // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -11,14 +13,181 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -// +build windows package vxlan +// Some design notes: +// VXLAN encapsulates L2 packets (though flannel is L3 only so don't expect to be able to send L2 packets across hosts) +// Windows overlay decap works at L2 and so it needs the correct destination MAC for the remote host to work. +// Windows does not expose an L3Miss interface so for now all possible remote IP/MAC pairs have to be configured upfront. +// +// In this scheme the scaling of table entries (per host) is: +// - 1 network entry for the overlay network +// - 1 endpoint per local container +// - N remote endpoints remote node (total endpoints = import ( + "encoding/json" + "fmt" + "sync" + log "github.com/golang/glog" + + "golang.org/x/net/context" + + "errors" + + "github.com/Microsoft/hcsshim" + "github.com/coreos/flannel/backend" + "github.com/coreos/flannel/pkg/ip" + "github.com/coreos/flannel/subnet" ) func init() { - log.Infof("vxlan is not supported on this platform") + backend.Register("vxlan", New) +} + +const ( + minimumVNI = 4096 + vxlanPort = 4789 +) + +type VXLANBackend struct { + sm subnet.Manager + extIface *backend.ExternalInterface + networks map[string]*network +} + +func New(sm subnet.Manager, extIface *backend.ExternalInterface) (backend.Backend, error) { + + be := &VXLANBackend{ + sm: sm, + extIface: extIface, + networks: make(map[string]*network), + } + + return be, nil +} + +func (be *VXLANBackend) RegisterNetwork(ctx context.Context, wg sync.WaitGroup, config *subnet.Config) (backend.Network, error) { + // TODO: are these used? how to pass to HNS? + cfg := struct { + name string + macPrefix string + VNI int + Port int + GBP bool + DirectRouting bool + }{ + name: "vxlan0", + VNI: minimumVNI, + Port: vxlanPort, + macPrefix: "0E-2A", + } + + if len(config.Backend) > 0 { + if err := json.Unmarshal(config.Backend, &cfg); err != nil { + return nil, fmt.Errorf("error decoding VXLAN backend config: %v", err) + } + } + + if cfg.VNI < minimumVNI { + return nil, fmt.Errorf("invalid VXLAN backend config. VNI [%v] must be greater than or equal to %v on Windows", cfg.VNI, minimumVNI) + } + + if cfg.Port != vxlanPort { + return nil, fmt.Errorf("invalid VXLAN backend config. Port [%v] is not supported on Windows. Omit the setting to default to port %v", cfg.Port, vxlanPort) + } + + if cfg.DirectRouting == true { + return nil, errors.New("invalid VXLAN backend config. DirectRouting is not supported on Windows") + } + + if cfg.GBP == true { + return nil, errors.New("invalid VXLAN backend config. GBP is not supported on Windows") + } + + if cfg.macPrefix == "" || len(cfg.macPrefix) != 5 || cfg.macPrefix[2] != '-' { + return nil, fmt.Errorf("invalid VXLAN backend config. macPrefix [%v] is invalid, prefix must be of the format xx-xx e.g. 0E-2A", cfg.macPrefix) + } + + log.Infof("VXLAN config: %+v", cfg) + + n := &network{ + extIface: be.extIface, + sm: be.sm, + name: be.extIface.Iface.Name, + macPrefix: cfg.macPrefix, + } + + attrs := subnet.LeaseAttrs{ + PublicIP: ip.FromIP(be.extIface.ExtAddr), + BackendType: "vxlan", + } + + l, err := be.sm.AcquireLease(ctx, &attrs) + switch err { + case nil: + n.lease = l + + case context.Canceled, context.DeadlineExceeded: + return nil, err + + default: + return nil, fmt.Errorf("failed to acquire lease: %v", err) + } + + // check if the network exists and has the expected settings? + networkName := cfg.name + createNetwork := true + addressPrefix := config.Network + networkGatewayAddress := config.Network.IP + 1 + hnsNetwork, err := hcsshim.GetHNSNetworkByName(networkName) + if err == nil { + log.Infof("Found existing HNS network [%+v]", hnsNetwork) + n.networkId = hnsNetwork.Id + createNetwork = false + } + + if createNetwork { + // create, but a network with the same name exists? + if hnsNetwork != nil { + if _, err := hnsNetwork.Delete(); err != nil { + return nil, fmt.Errorf("unable to delete existing network [%v], error: %v", hnsNetwork.Name, err) + } + log.Infof("Deleted stale HNS network [%v]", networkName) + } + + // create the underlying windows HNS network + request := map[string]interface{}{ + "Name": networkName, + "Type": "Overlay", + "Subnets": []interface{}{ + map[string]interface{}{ + "AddressPrefix": addressPrefix, + "GatewayAddress": networkGatewayAddress, + "Policies": []interface{}{ + map[string]interface{}{ + "Type": "VSID", + "VSID": cfg.VNI, + }, + }, + }, + }, + } + + jsonRequest, err := json.Marshal(request) + if err != nil { + return nil, err + } + + log.Infof("Attempting to create HNS network, request: %v", string(jsonRequest)) + hnsNetwork, err := hcsshim.HNSNetworkRequest("POST", "", string(jsonRequest)) + if err != nil { + return nil, fmt.Errorf("unable to create network [%v], error: %v", networkName, err) + } + log.Infof("Created HNS network [%v] as %+v", networkName, hnsNetwork) + n.networkId = hnsNetwork.Id + } + + return n, nil }