diff --git a/dist/images/ovnkube.sh b/dist/images/ovnkube.sh index ae77d2f13b..e8e0a9ad8a 100755 --- a/dist/images/ovnkube.sh +++ b/dist/images/ovnkube.sh @@ -1701,11 +1701,6 @@ ovnkube-controller-with-node() { echo "=============== ovnkube-controller-with-node - (wait for ovs)" wait_for_event ovs_ready - if [[ ${ovnkube_node_mode} != "dpu-host" ]]; then - echo "=============== ovnkube-controller-with-node - (ovn-node wait for ovn-controller.pid)" - wait_for_event process_ready ovn-controller - fi - ovn_routable_mtu_flag= if [[ -n "${routable_mtu}" ]]; then routable_mtu_flag="--routable-mtu ${routable_mtu}" @@ -2360,6 +2355,22 @@ ovn-controller() { echo "ovn_nbdb ${ovn_nbdb} ovn_sbdb ${ovn_sbdb}" echo "ovn_nbdb_conn ${ovn_nbdb_conn}" + # if ovn IC, we do not support multi Node per Zone, therefore its safe to assume ovnkube-controller is local and ovn-controller + # has access to the file. Block starting on a file emitted by ovnkube-controller when SB DB is not stale. + if [[ ${ovn_enable_interconnect} == "true" ]]; then + echo "=============== time: $(date +%d-%m-%H:%M:%S:%N) ovn-controller - (wait for ovnkube-controller SB DB hot file for 5 minutes)" + retries=0 + while [[ ${retries} -lt 3000 ]]; do + if [[ -f "/var/run/ovn-kubernetes/ovnkube-controller-sb-db-hot" ]]; then + echo "=============== time: $(date +%d-%m-%H:%M:%S:%N) ovn-controller - ovnkube-controller SB DB hot file found" + break + fi + echo "=============== time: $(date +%d-%m-%H:%M:%S:%N) ovn-controller - (wait for ovnkube-controller SB DB hot file)..." + sleep .1 + ((retries += 1)) + done + fi + echo "=============== ovn-controller start_controller" rm -f /var/run/ovn-kubernetes/cni/* rm -f ${OVN_RUNDIR}/ovn-controller.*.ctl diff --git a/dist/templates/ovnkube-single-node-zone.yaml.j2 b/dist/templates/ovnkube-single-node-zone.yaml.j2 index d2d485cca7..ea437862b1 100644 --- a/dist/templates/ovnkube-single-node-zone.yaml.j2 +++ b/dist/templates/ovnkube-single-node-zone.yaml.j2 @@ -499,7 +499,9 @@ spec: - mountPath: /ovn-cert name: host-ovn-cert readOnly: true - + - mountPath: /var/run/ovn-kubernetes + name: host-var-run-ovn-kubernetes + readOnly: true resources: requests: cpu: 100m @@ -520,12 +522,12 @@ spec: fieldPath: metadata.namespace - name: OVN_SSL_ENABLE value: "{{ ovn_ssl_en }}" - - name: OVN_NORTH value: "local" - name: OVN_SOUTH value: "local" - + - name: OVN_ENABLE_INTERCONNECT + value: "{{ ovn_enable_interconnect }}" readinessProbe: exec: command: ["/usr/bin/ovn-kube-util", "readiness-probe", "-t", "ovn-controller"] diff --git a/go-controller/cmd/ovnkube/ovnkube.go b/go-controller/cmd/ovnkube/ovnkube.go index 2dc1189c62..89ca232390 100644 --- a/go-controller/cmd/ovnkube/ovnkube.go +++ b/go-controller/cmd/ovnkube/ovnkube.go @@ -29,6 +29,7 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/controllermanager" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb" + libovsdbutil "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb/util" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/metrics" ovnnode "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/routemanager" @@ -522,6 +523,18 @@ func runOvnKube(ctx context.Context, runMode *ovnkubeRunMode, ovnClientset *util controllerErr = fmt.Errorf("failed to start network controller: %w", err) return } + // wait until all changes in OVN NB DB have been sync'd to OVN SB DB. If context is cancelled, func returns. + if err = libovsdbutil.WaitUntilNorthdSyncOnce(ctx, libovsdbOvnNBClient, libovsdbOvnSBClient); err != nil { + klog.Errorf("Failed waiting for northd to sync OVN Northbound DB to Southbound: %v", err) + } + // ovnkube-controller writes a file when OVN SB DB contains the changes post sync. File is removed on exit. + const sbDBHotFileName = "/var/run/ovn-kubernetes/ovnkube-controller-sb-db-hot" + if err = os.WriteFile(sbDBHotFileName, []byte(time.Now().String()), 0o644); err != nil { + klog.Errorf("Failed to write ovnkube controller sb db hot file: %v", err) + } + defer func() { + os.Remove(sbDBHotFileName) + }() // record delay until ready metrics.MetricOVNKubeControllerReadyDuration.Set(time.Since(startTime).Seconds()) diff --git a/go-controller/pkg/libovsdb/util/northd_sync.go b/go-controller/pkg/libovsdb/util/northd_sync.go new file mode 100644 index 0000000000..798582ac5a --- /dev/null +++ b/go-controller/pkg/libovsdb/util/northd_sync.go @@ -0,0 +1,65 @@ +package util + +import ( + "context" + "errors" + "fmt" + "time" + + "k8s.io/apimachinery/pkg/util/wait" + + "github.com/ovn-org/libovsdb/client" + "github.com/ovn-org/libovsdb/model" + "github.com/ovn-org/libovsdb/ovsdb" + + libovsdbops "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb/ops" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/sbdb" +) + +// WaitUntilNorthdSyncOnce ensures northd has sync'd at least once by increments nb_cfg value in NB DB and waiting +// for northd to copy it to SB DB. Poll SB DB until context is cancelled. +// The expectation is that the data you wish to be sync'd to SB DB has already been written to NB DB so when we get the initial +// nb_cfg value, we know that if we increment that by one and see that value or greater in SB DB, then the data has sync'd. +// All other processes interacting with nb_cfg increment it. This function depends on other processes respecting that. +// No guarantee of any changes in SB DB made after this func. +func WaitUntilNorthdSyncOnce(ctx context.Context, nbClient, sbClient client.Client) error { + // 1. Get value of nb_cfg + // 2. Increment value of nb_cfg + // 3. Wait until value appears in SB DB after northd copies it. + nbGlobal := &nbdb.NBGlobal{} + nbGlobal, err := libovsdbops.GetNBGlobal(nbClient, nbGlobal) + if err != nil { + return fmt.Errorf("failed to find OVN Northbound NB_Global table"+ + " entry: %w", err) + } + // increment nb_cfg value by 1. When northd consumes updates from NB DB, it will copy this value to SB DBs SB_Global table nb_cfg field. + ops, err := nbClient.Where(nbGlobal).Mutate(nbGlobal, model.Mutation{ + Field: &nbGlobal.NbCfg, + Mutator: ovsdb.MutateOperationAdd, + Value: 1, + }) + if err != nil { + return fmt.Errorf("failed to generate ops to mutate nb_cfg: %w", err) + } + expectedNbCfgValue := nbGlobal.NbCfg + 1 + if _, err = libovsdbops.TransactAndCheck(nbClient, ops); err != nil { + return fmt.Errorf("failed to transact to increment nb_cfg: %w", err) + } + sbGlobal := &sbdb.SBGlobal{} + // poll until we see the expected value in SB DB every 5 milliseconds until context is cancelled. + err = wait.PollUntilContextCancel(ctx, time.Millisecond*5, true, func(_ context.Context) (done bool, err error) { + if sbGlobal, err = libovsdbops.GetSBGlobal(sbClient, sbGlobal); err != nil { + // northd hasn't added an entry yet + if errors.Is(err, client.ErrNotFound) { + return false, nil + } + return false, fmt.Errorf("failed to get sb_global table entry from SB DB: %w", err) + } + return sbGlobal.NbCfg >= expectedNbCfgValue, nil // we only need to ensure it is greater than or equal to the expected value + }) + if err != nil { + return fmt.Errorf("failed while waiting for nb_cfg value greater than or equal %d in sb db sb_global table: %w", expectedNbCfgValue, err) + } + return nil +} diff --git a/go-controller/pkg/node/default_node_network_controller.go b/go-controller/pkg/node/default_node_network_controller.go index f9f3b36ec5..509e9d07a5 100644 --- a/go-controller/pkg/node/default_node_network_controller.go +++ b/go-controller/pkg/node/default_node_network_controller.go @@ -243,6 +243,12 @@ func (oc *DefaultNodeNetworkController) Reconcile(netInfo util.NetInfo) error { } func clearOVSFlowTargets() error { + // TODO: match on something more specific than just the existance of an error + // nothing to clear if the bridge doesnt exist + if _, _, err := util.RunOVSVsctl("br-exists", "br-int"); err != nil { + return nil + } + _, _, err := util.RunOVSVsctl( "--", "clear", "bridge", "br-int", "netflow", @@ -508,44 +514,6 @@ func setEncapPort(ctx context.Context) error { return nil } -func isOVNControllerReady() (bool, error) { - // check node's connection status - runDir := util.GetOvnRunDir() - pid, err := os.ReadFile(runDir + "ovn-controller.pid") - if err != nil { - return false, fmt.Errorf("unknown pid for ovn-controller process: %v", err) - } - ctlFile := runDir + fmt.Sprintf("ovn-controller.%s.ctl", strings.TrimSuffix(string(pid), "\n")) - ret, _, err := util.RunOVSAppctl("-t", ctlFile, "connection-status") - if err != nil { - return false, fmt.Errorf("could not get connection status: %w", err) - } - klog.Infof("Node connection status = %s", ret) - if ret != "connected" { - return false, nil - } - - // check whether br-int exists on node - _, _, err = util.RunOVSVsctl("--", "br-exists", "br-int") - if err != nil { - return false, nil - } - - // check by dumping br-int flow entries - stdout, _, err := util.RunOVSOfctl("dump-aggregate", "br-int") - if err != nil { - klog.V(5).Infof("Error dumping aggregate flows: %v", err) - return false, nil - } - hasFlowCountZero := strings.Contains(stdout, "flow_count=0") - if hasFlowCountZero { - klog.V(5).Info("Got a flow count of 0 when dumping flows for node") - return false, nil - } - - return true, nil -} - // getEnvNameFromResourceName gets the device plugin env variable from the device plugin resource name. func getEnvNameFromResourceName(resource string) string { res1 := strings.ReplaceAll(resource, ".", "_") diff --git a/go-controller/pkg/node/gateway_init.go b/go-controller/pkg/node/gateway_init.go index c7553f7d0d..b50a803d98 100644 --- a/go-controller/pkg/node/gateway_init.go +++ b/go-controller/pkg/node/gateway_init.go @@ -406,19 +406,11 @@ func (nc *DefaultNodeNetworkController) initGatewayPreStart( return gw.initFunc() } - readyGwFunc := func() (bool, error) { - controllerReady, err := isOVNControllerReady() - if err != nil || !controllerReady { - return false, err - } - return gw.readyFunc() - } - if err := nodeAnnotator.Run(); err != nil { return nil, fmt.Errorf("failed to set node %s annotations: %w", nc.name, err) } - waiter.AddWait(readyGwFunc, initGwFunc) + waiter.AddWait(gw.readyFunc, initGwFunc) nc.Gateway = gw // Wait for management port and gateway resources to be created by the master diff --git a/go-controller/pkg/ovn/egressip.go b/go-controller/pkg/ovn/egressip.go index e79b9b29c5..ab7ff82921 100644 --- a/go-controller/pkg/ovn/egressip.go +++ b/go-controller/pkg/ovn/egressip.go @@ -1138,6 +1138,8 @@ func (e *EgressIPController) isLocalZoneNode(node *corev1.Node) bool { type egressIPCache struct { // egressIP name -> network name -> cache egressIPNameToPods map[string]map[string]selectedPods + // egressIP name -> to assigned Node names + egressIPNameToAssignedNodes map[string][]string // egressLocalNodes will contain all nodes that are local // to this zone which are serving this egressIP object.. // This will help sync SNATs @@ -1154,7 +1156,7 @@ type egressIPCache struct { } type nodeNetworkRedirects struct { - // node name -> network name -> redirect IPs + // network name -> node name -> redirect IPs cache map[string]map[string]redirectIPs } @@ -1444,7 +1446,7 @@ func (e *EgressIPController) syncStaleGWMarkRules(egressIPCache egressIPCache) e continue } for networkName, podCache := range networkPodCache { - for eIP, nodeName := range egressIPCache.egressIPIPToNodeCache { + for eIPIP, nodeName := range egressIPCache.egressIPIPToNodeCache { if !egressIPCache.egressLocalNodesCache.Has(nodeName) { continue } @@ -1458,7 +1460,7 @@ func (e *EgressIPController) syncStaleGWMarkRules(egressIPCache egressIPCache) e return fmt.Errorf("failed to create new network %s: %v", networkName, err) } routerName := ni.GetNetworkScopedGWRouterName(nodeName) - isEIPIPv6 := utilnet.IsIPv6String(eIP) + isEIPIPv6 := utilnet.IsIPv6String(eIPIP) for podKey, podIPs := range podCache.egressLocalPods { ops, err = processPodFn(ops, eIPName, podKey, egressIPCache.markCache[eIPName], routerName, networkName, podIPs, isEIPIPv6) if err != nil { @@ -1600,21 +1602,36 @@ func (e *EgressIPController) syncPodAssignmentCache(egressIPCache egressIPCache) // It also removes stale nexthops from router policies used by EgressIPs. // Upon failure, it may be invoked multiple times in order to avoid a pod restart. func (e *EgressIPController) syncStaleEgressReroutePolicy(cache egressIPCache) error { - for _, networkCache := range cache.egressIPNameToPods { + for eipName, networkCache := range cache.egressIPNameToPods { for networkName, data := range networkCache { logicalRouterPolicyStaleNexthops := []*nbdb.LogicalRouterPolicy{} + // select LRPs scoped to the correct LRP priority, network and EIP name p := func(item *nbdb.LogicalRouterPolicy) bool { if item.Priority != types.EgressIPReroutePriority || item.ExternalIDs[libovsdbops.NetworkKey.String()] != networkName { return false } - egressIPName, _ := getEIPLRPObjK8MetaData(item.ExternalIDs) - if egressIPName == "" { + networkNodeRedirectCache, ok := cache.egressNodeRedirectsCache.cache[networkName] + if !ok || len(networkNodeRedirectCache) == 0 { + klog.Infof("syncStaleEgressReroutePolicy found invalid logical router policy (UUID: %s) because no assigned Nodes for EgressIP %s", item.UUID, eipName) + return true + } + extractedEgressIPName, _ := getEIPLRPObjK8MetaData(item.ExternalIDs) + if extractedEgressIPName == "" { klog.Errorf("syncStaleEgressReroutePolicy found logical router policy (UUID: %s) with invalid meta data associated with network %s", item.UUID, networkName) - return false + return true + } + if extractedEgressIPName != eipName { + // remove if there's no reference to this EIP name + _, ok := cache.egressIPNameToPods[extractedEgressIPName] + return !ok } splitMatch := strings.Split(item.Match, " ") - logicalIP := splitMatch[len(splitMatch)-1] - parsedLogicalIP := net.ParseIP(logicalIP) + podIPStr := splitMatch[len(splitMatch)-1] + podIP := net.ParseIP(podIPStr) + if podIP == nil { + klog.Infof("syncStaleEgressReroutePolicy found invalid LRP with broken match with UID %q", item.UUID) + return true + } egressPodIPs := sets.NewString() // Since LRPs are created only for pods local to this zone // we need to care about only those pods. Nexthop for them will @@ -1624,31 +1641,24 @@ func (e *EgressIPController) syncStaleEgressReroutePolicy(cache egressIPCache) e for _, podIPs := range data.egressLocalPods { egressPodIPs.Insert(podIPs.UnsortedList()...) } - if !egressPodIPs.Has(parsedLogicalIP.String()) { - klog.Infof("syncStaleEgressReroutePolicy will delete %s due to no nexthop or stale logical ip: %v", egressIPName, item) + if !egressPodIPs.Has(podIP.String()) { + klog.Infof("syncStaleEgressReroutePolicy will delete %s due to no nexthop or stale logical ip: %v", extractedEgressIPName, item) return true } // Check for stale nexthops that may exist in the logical router policy and store that in logicalRouterPolicyStaleNexthops. // Note: adding missing nexthop(s) to the logical router policy is done outside the scope of this function. staleNextHops := []string{} for _, nexthop := range item.Nexthops { - nodeName, ok := cache.egressIPIPToNodeCache[parsedLogicalIP.String()] - if ok { - klog.Infof("syncStaleEgressReroutePolicy will delete %s due to no node assigned to logical ip: %v", egressIPName, item) - return true - } - networksRedirects, ok := cache.egressNodeRedirectsCache.cache[nodeName] - if ok { - klog.Infof("syncStaleEgressReroutePolicy will delete %s due to no network in cache: %v", egressIPName, item) - return true - } - redirects, ok := networksRedirects[networkName] - if !ok { - klog.Infof("syncStaleEgressReroutePolicy will delete %s due to no redirects for network in cache: %v", egressIPName, item) - return true + // ensure valid next hop by iterating through the node config + var isFound bool // isFound is true, if the next hop IP is found within the set of assigned nodes + for _, nodeRedirect := range networkNodeRedirectCache { + if nodeRedirect.containsIP(nexthop) { + isFound = true + break + } } - //FIXME: be more specific about which is the valid next hop instead of relying on verifying if the IP is within a valid set of IPs. - if !redirects.containsIP(nexthop) { + if !isFound { + //FIXME: be more specific about which is the valid next hop instead of relying on verifying if the IP is within a valid set of IPs. staleNextHops = append(staleNextHops, nexthop) } } @@ -1669,7 +1679,14 @@ func (e *EgressIPController) syncStaleEgressReroutePolicy(cache egressIPCache) e // Update Logical Router Policies that have stale nexthops. Notice that we must do this separately // because logicalRouterPolicyStaleNexthops must be populated first - klog.Infof("syncStaleEgressReroutePolicy will remove stale nexthops for network %s: %+v", networkName, logicalRouterPolicyStaleNexthops) + for _, staleNextHopLogicalRouterPolicy := range logicalRouterPolicyStaleNexthops { + if staleNextHopLogicalRouterPolicy.Nexthop == nil { + continue + } + klog.Infof("syncStaleEgressReroutePolicy will remove stale nexthops for LRP %q for network %s: %s", + staleNextHopLogicalRouterPolicy.UUID, networkName, *staleNextHopLogicalRouterPolicy.Nexthop) + } + err = libovsdbops.DeleteNextHopsFromLogicalRouterPolicies(e.nbClient, cache.networkToRouter[networkName], logicalRouterPolicyStaleNexthops...) if err != nil { return fmt.Errorf("unable to remove stale next hops from logical router policies for network %s: %v", networkName, err) @@ -1699,7 +1716,13 @@ func (e *EgressIPController) syncStaleSNATRules(egressIPCache egressIPCache) err return false } egressIPName := egressIPMeta[0] - parsedLogicalIP := net.ParseIP(item.LogicalIP).String() + // check logical IP maps to a valid pod + parsedPodIP := net.ParseIP(item.LogicalIP) + if parsedPodIP == nil { + klog.Errorf("syncStaleSNATRules found invalid logical IP for NAT with UID %q", item.UUID) + return true + } + parsedPodIPStr := parsedPodIP.String() cacheEntry, exists := egressIPCache.egressIPNameToPods[egressIPName][types.DefaultNetworkName] egressPodIPs := sets.NewString() if exists { @@ -1712,7 +1735,7 @@ func (e *EgressIPController) syncStaleSNATRules(egressIPCache egressIPCache) err egressPodIPs.Insert(podIPs.UnsortedList()...) } } - if !exists || !egressPodIPs.Has(parsedLogicalIP) { + if !exists || !egressPodIPs.Has(parsedPodIPStr) { klog.Infof("syncStaleSNATRules will delete %s due to logical ip: %v", egressIPName, item) return true } @@ -1721,9 +1744,15 @@ func (e *EgressIPController) syncStaleSNATRules(egressIPCache egressIPCache) err klog.Errorf("syncStaleSNATRules failed to find default network in networks cache") return false } - if node, ok := egressIPCache.egressIPIPToNodeCache[item.ExternalIP]; !ok || !cacheEntry.egressLocalPods[types.DefaultNetworkName].Has(node) || - item.LogicalPort == nil || *item.LogicalPort != ni.GetNetworkScopedK8sMgmtIntfName(node) { - klog.Infof("syncStaleSNATRules will delete %s due to external ip or stale logical port: %v", egressIPName, item) + // check external IP maps to a valid EgressIP IP and its assigned to a Node + node, ok := egressIPCache.egressIPIPToNodeCache[item.ExternalIP] + if !ok { + klog.Infof("syncStaleSNATRules found NAT %q without EIP assigned to a Node", item.UUID) + return true + } + // check logical port is set and correspondes to the correct egress node + if item.LogicalPort == nil || *item.LogicalPort != ni.GetNetworkScopedK8sMgmtIntfName(node) { + klog.Infof("syncStaleSNATRules found NAT %q with invalid logical port", item.UUID) return true } return false @@ -1907,9 +1936,12 @@ func (e *EgressIPController) generateCacheForEgressIP() (egressIPCache, error) { // This will help sync SNATs egressLocalNodesCache := sets.New[string]() cache.egressLocalNodesCache = egressLocalNodesCache - // egressIP name -> node name - egressNodesCache := make(map[string]string, 0) - cache.egressIPIPToNodeCache = egressNodesCache + // egressIP name -> nodes where the IPs are assigned + egressIPNameNodesCache := make(map[string][]string, 0) + cache.egressIPNameToAssignedNodes = egressIPNameNodesCache + // egressIP IP -> node name. Assigned node for EIP. + egressIPIPNodeCache := make(map[string]string, 0) + cache.egressIPIPToNodeCache = egressIPIPNodeCache cache.markCache = make(map[string]string) egressIPs, err := e.watchFactory.GetEgressIPs() if err != nil { @@ -1922,11 +1954,18 @@ func (e *EgressIPController) generateCacheForEgressIP() (egressIPCache, error) { } cache.markCache[egressIP.Name] = mark.String() egressIPsCache[egressIP.Name] = make(map[string]selectedPods, 0) + egressIPNameNodesCache[egressIP.Name] = make([]string, 0, len(egressIP.Status.Items)) for _, status := range egressIP.Status.Items { + eipIP := net.ParseIP(status.EgressIP) + if eipIP == nil { + klog.Errorf("Failed to parse EgressIP %s IP %q from status", egressIP.Name, status.EgressIP) + continue + } + egressIPIPNodeCache[eipIP.String()] = status.Node if localZoneNodes.Has(status.Node) { egressLocalNodesCache.Insert(status.Node) } - egressNodesCache[status.EgressIP] = status.Node + egressIPNameNodesCache[egressIP.Name] = append(egressIPNameNodesCache[egressIP.Name], status.Node) } namespaces, err = e.watchFactory.GetNamespacesBySelector(egressIP.Spec.NamespaceSelector) if err != nil { diff --git a/go-controller/pkg/ovn/egressip_test.go b/go-controller/pkg/ovn/egressip_test.go index b0e5ad142a..88633cd740 100644 --- a/go-controller/pkg/ovn/egressip_test.go +++ b/go-controller/pkg/ovn/egressip_test.go @@ -52,6 +52,7 @@ const ( podV4IP3 = "10.128.1.3" podV4IP4 = "10.128.1.4" podV6IP = "ae70::66" + podV6IP2 = "be70::66" v6GatewayIP = "ae70::1" v6Node1Subnet = "ae70::66/64" v6Node2Subnet = "be70::66/64" @@ -12723,6 +12724,539 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" }) }) + + ginkgo.Context("Sync", func() { + ginkgo.It("removes config for previously selected pods on a deleted Node", func() { + // node 1 is local zone and egress Node. + // pod was on node 2 but it is deleted. Node 2 previously was also an egress Node. + app.Action = func(*cli.Context) error { + config.OVNKubernetesFeature.EnableInterconnect = true + // dual stack cluster + config.IPv4Mode = true + config.IPv6Mode = true + egressNamespace := newNamespace(eipNamespace) + egressPod := corev1.Pod{ + ObjectMeta: newPodMeta(eipNamespace, podName, egressPodLabel), + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "containerName", + Image: "containerImage", + }, + }, + NodeName: node1Name, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodRunning, + PodIP: podV4IP, + PodIPs: []corev1.PodIP{{IP: podV4IP}, {IP: podV6IP}}, + }, + } + // node 1 (local zone) + node1IPv4 := "192.168.126.210" + Node1IPv4CIDR := node1IPv4 + "/24" + node1IPv6 := "fc00:f853:ccd:e793::30" + node1IPv6CIDR := node1IPv6 + "/64" + node1TranSwitchIPv4CIDR := "100.88.0.2/16" + node1TranSwitchIPv6CIDR := "fd97::2/64" + _, node1IPV4Net, _ := net.ParseCIDR(v4Node1Subnet) + _, node1IPV6Net, _ := net.ParseCIDR(v6Node1Subnet) + nodeAnnotations := map[string]string{ + "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop":"192.168.126.1"}}`, + "k8s.ovn.org/node-gateway-router-lrp-ifaddrs": fmt.Sprintf("{\"default\":{\"ipv4\":\"%s\",\"ipv6\":\"%s\"}}", nodeLogicalRouterIfAddrV4, nodeLogicalRouterIfAddrV6), + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", Node1IPv4CIDR, node1IPv6CIDR), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"%s\"]}", v4Node1Subnet, v6Node1Subnet), + "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s\", \"ipv6\": \"%s\"}", node1TranSwitchIPv4CIDR, node1TranSwitchIPv6CIDR), + "k8s.ovn.org/zone-name": node1Name, + util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\",\"%s\"]", Node1IPv4CIDR, node1IPv6CIDR), + } + node1 := getNodeObj(node1Name, nodeAnnotations, map[string]string{}) // add node to avoid error-ing out on transit switch IP fetch + // node 2 - deleted (remote zone) + node2TranSwitchIPv6 := "fd97::3" + eipIPv4 := "192.168.126.200" + eipIPv6 := "0:0:0:0:0:feff:c0a8:8e0d" + deletedPodIPv4 := podV4IP2 + // dual IP family EIP selecting one pod in local zone + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{ + eipIPv4, + eipIPv6, + }, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, + }, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{ + { + Node: node1Name, + EgressIP: eipIPv4, + }, + // Previous was set to node 2 and Node was deleted while local zone EIP controller was down. + //{ + // Node: node2Name, + // EgressIP: eipIPv6, + //}, + }, + }, + } + ginkgo.By("start OVN DBs with valid and invalid (pod doesn't exist..) OVN config") + node1NatLogicalPortName := "k8s-" + node1Name + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + // LRPs to support EIP assigned to a remote node node thats deleted while the controller was down + // Valid LRP for IPv4 egress node. IPv4 egress Node is local. IPv6 egress node is remote and deleted but ovn config remains + getReRoutePolicy(podV4IP, "4", "valid-reroute-ipv4-UUID", + nodeLogicalRouterIPv4, getEgressIPLRPReRouteDbIDs(eIP.Name, egressPod.Namespace, egressPod.Name, + IPFamilyValueV4, types.DefaultNetworkName, DefaultNetworkControllerName).GetExternalIDs()), + // invalid LRP for IPv6 because remove node is deleted + getReRoutePolicy(podV6IP, "6", "invalid-reroute-ipv6-UUID", + []string{node2TranSwitchIPv6}, getEgressIPLRPReRouteDbIDs(eIP.Name, egressPod.Namespace, egressPod.Name, + IPFamilyValueV6, types.DefaultNetworkName, DefaultNetworkControllerName).GetExternalIDs()), + // NATs to support EIP assigned to the local node + // valid NAT + &nbdb.NAT{ + UUID: "valid-nat-ipv4-UUID", + LogicalIP: podV4IP, + ExternalIP: eipIPv4, + ExternalIDs: getEgressIPNATDbIDs(egressIPName, egressPod.Namespace, egressPod.Name, IPFamilyValueV4, DefaultNetworkControllerName).GetExternalIDs(), + Type: nbdb.NATTypeSNAT, + LogicalPort: &node1NatLogicalPortName, + Options: map[string]string{ + "stateless": "false", + }, + }, + // invalid NAT for a deleted pod on remote node + &nbdb.NAT{ + UUID: "invalid-nat-ipv4-UUID", + LogicalIP: deletedPodIPv4, + ExternalIP: eipIPv4, + ExternalIDs: getEgressIPNATDbIDs(egressIPName, egressNamespace.Namespace, "deletedpod", IPFamilyValueV4, DefaultNetworkControllerName).GetExternalIDs(), + Type: nbdb.NATTypeSNAT, + LogicalPort: &node1NatLogicalPortName, + Options: map[string]string{ + "stateless": "false", + }, + }, + &nbdb.LogicalRouterPort{ + UUID: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1Name + "-UUID", + Name: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1Name, + Networks: []string{nodeLogicalRouterIfAddrV6, nodeLogicalRouterIfAddrV4}, + }, + &nbdb.LogicalRouter{ + Name: types.OVNClusterRouter, + UUID: types.OVNClusterRouter + "-UUID", + Policies: []string{"valid-reroute-ipv4-UUID", "invalid-reroute-ipv6-UUID"}, + }, + &nbdb.LogicalRouter{ + Name: types.GWRouterPrefix + node1Name, + UUID: types.GWRouterPrefix + node1Name + "-UUID", + Ports: []string{types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1Name + "-UUID"}, + Options: map[string]string{"dynamic_neigh_routers": "false"}, + Nat: []string{"valid-nat-ipv4-UUID", "invalid-nat-ipv4-UUID"}, + }, + &nbdb.LogicalSwitchPort{ + UUID: "k8s-" + node1.Name + "-UUID", + Name: "k8s-" + node1.Name, + Addresses: []string{"fe:1a:b2:3f:0e:fb " + util.GetNodeManagementIfAddr(node1IPV4Net).IP.String(), + "fe:1a:b2:3f:0e:fb " + util.GetNodeManagementIfAddr(node1IPV6Net).IP.String()}, + }, + &nbdb.LogicalSwitch{ + UUID: node1.Name + "-UUID", + Name: node1.Name, + Ports: []string{"k8s-" + node1.Name + "-UUID"}, + }, + }, + }, + &corev1.NamespaceList{ + Items: []corev1.Namespace{*egressNamespace}, + }, + &corev1.PodList{ + Items: []corev1.Pod{egressPod}, + }, + &corev1.NodeList{ + Items: []corev1.Node{node1}, + }, + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + ) + i, podIPv4Net, _ := net.ParseCIDR(podV4IP + "/23") + podIPv4Net.IP = i + i, podIPv6Net, _ := net.ParseCIDR(podV6IP + "/23") + podIPv6Net.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{podIPv4Net, podIPv6Net}) + + // hack pod to be in the provided zone + fakeOvn.controller.eIPC.nodeZoneState.Store(node1Name, true) + fakeOvn.controller.eIPC.nodeZoneState.Store(node2Name, false) + fakeOvn.controller.localZoneNodes.Store(node1Name, true) + fakeOvn.controller.localZoneNodes.Store(node2Name, false) + + err := fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ginkgo.By("ensuring cleanup of invalid LRP and NAT") + egressIPServedPodsASv4, egressIPServedPodsASv6 := buildEgressIPServedPodsAddressSets([]string{podV4IP, podV6IP}, types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName) + expectedDatabaseState := []libovsdbtest.TestData{ + getReRoutePolicy(podV4IP, "4", "valid-reroute-ipv4-UUID", + nodeLogicalRouterIPv4, getEgressIPLRPReRouteDbIDs(eIP.Name, egressPod.Namespace, egressPod.Name, + IPFamilyValueV4, types.DefaultNetworkName, DefaultNetworkControllerName).GetExternalIDs()), + &nbdb.NAT{ + UUID: "valid-egressip-nat-UUID", + LogicalIP: podV4IP, + ExternalIP: eipIPv4, + ExternalIDs: getEgressIPNATDbIDs(egressIPName, egressPod.Namespace, egressPod.Name, IPFamilyValueV4, fakeOvn.controller.controllerName).GetExternalIDs(), + Type: nbdb.NATTypeSNAT, + LogicalPort: &node1NatLogicalPortName, + Options: map[string]string{ + "stateless": "false", + }, + }, + &nbdb.LogicalRouter{ + Name: types.OVNClusterRouter, + UUID: types.OVNClusterRouter + "-UUID", + Policies: []string{"valid-reroute-ipv4-UUID"}, + }, + &nbdb.LogicalRouterPort{ + UUID: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1Name + "-UUID", + Name: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1Name, + Networks: []string{nodeLogicalRouterIfAddrV6, nodeLogicalRouterIfAddrV4}, + }, + &nbdb.LogicalRouter{ + Name: types.GWRouterPrefix + node1Name, + UUID: types.GWRouterPrefix + node1Name + "-UUID", + Ports: []string{types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1Name + "-UUID"}, + Nat: []string{"valid-egressip-nat-UUID"}, + Options: map[string]string{"dynamic_neigh_routers": "false"}, + }, + &nbdb.LogicalSwitchPort{ + UUID: "k8s-" + node1.Name + "-UUID", + Name: "k8s-" + node1.Name, + Addresses: []string{"fe:1a:b2:3f:0e:fb " + util.GetNodeManagementIfAddr(node1IPV4Net).IP.String(), + "fe:1a:b2:3f:0e:fb " + util.GetNodeManagementIfAddr(node1IPV6Net).IP.String()}, + }, + &nbdb.LogicalSwitch{ + UUID: node1.Name + "-UUID", + Name: node1.Name, + Ports: []string{"k8s-" + node1.Name + "-UUID"}, + }, + egressIPServedPodsASv4, + egressIPServedPodsASv6, + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + ginkgo.By("ensure config is consistent") + gomega.Consistently(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("remove invalid OVN config for deleted pod", func() { + // removes invalid SNAT/NAT for a pod that was selected by an EIP but was removed while controller was not running and therefore OVN config should be removed + // does not modify valid SNAT/NAT + // further references to "local" or "remote" imply local or remote OVN zone for IC. + // one EIP object with two assigned IPs of different IP families (v4 and v6) which select one pod that's local + app.Action = func(*cli.Context) error { + config.OVNKubernetesFeature.EnableInterconnect = true + // dual stack cluster + config.IPv4Mode = true + config.IPv6Mode = true + egressPod := corev1.Pod{ + ObjectMeta: newPodMeta(eipNamespace, podName, egressPodLabel), + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "containerName", + Image: "containerImage", + }, + }, + NodeName: node1Name, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodRunning, + PodIP: podV4IP, + PodIPs: []corev1.PodIP{{IP: podV4IP}, {IP: podV6IP}}, + }, + } + // deletedPodIP is a pod IP of a Pod that was deleted while eip controller was not running therefore config will exist in OVN DBs to support EIP + deletedPodIPv4, deletedPodIPv6 := podV4IP2, podV6IP2 + egressNamespace := newNamespace(eipNamespace) + // node 1 (local zone) + node1IPv4 := "192.168.126.210" + Node1IPv4CIDR := node1IPv4 + "/24" + node1IPv6 := "fc00:f853:ccd:e793::30" + node1IPv6CIDR := node1IPv6 + "/64" + node1TranSwitchIPv4CIDR := "100.88.0.2/16" + node1TranSwitchIPv6CIDR := "fd97::2/64" + _, node1IPV4Net, _ := net.ParseCIDR(v4Node1Subnet) + _, node1IPV6Net, _ := net.ParseCIDR(v6Node1Subnet) + nodeAnnotations := map[string]string{ + "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop":"192.168.126.1"}}`, + "k8s.ovn.org/node-gateway-router-lrp-ifaddrs": fmt.Sprintf("{\"default\":{\"ipv4\":\"%s\",\"ipv6\":\"%s\"}}", nodeLogicalRouterIfAddrV4, nodeLogicalRouterIfAddrV6), + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", Node1IPv4CIDR, node1IPv6CIDR), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"%s\"]}", v4Node1Subnet, v6Node1Subnet), + "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s\", \"ipv6\": \"%s\"}", node1TranSwitchIPv4CIDR, node1TranSwitchIPv6CIDR), + "k8s.ovn.org/zone-name": node1Name, + util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\",\"%s\"]", Node1IPv4CIDR, node1IPv6CIDR), + } + node1 := getNodeObj(node1Name, nodeAnnotations, map[string]string{}) // add node to avoid error-ing out on transit switch IP fetch + // node 2 (remote zone) + node2IPv4 := "192.168.126.202" + node2IPv4CIDR := node2IPv4 + "/24" + node2IPv6 := "fc00:f853:cce:e793::20" + node2IPv6CIDR := node2IPv6 + "/64" + node2TranSwitchIPv4 := "100.88.0.3" + node2TranSwitchIPv4CIDR := node2TranSwitchIPv4 + "/16" + node2TranSwitchIPv6 := "fd97::3" + node2TranSwitchIPv6CIDR := node2TranSwitchIPv6 + "/64" + _, node2IPV4Net, _ := net.ParseCIDR(v4Node2Subnet) + _, node2IPV6Net, _ := net.ParseCIDR(v6Node2Subnet) + nodeAnnotations = map[string]string{ + "k8s.ovn.org/node-gateway-router-lrp-ifaddrs": fmt.Sprintf("{\"default\":{\"ipv4\":\"%s\",\"ipv6\":\"%s\"}}", node2LogicalRouterIfAddrV4, node2LogicalRouterIfAddrV6), + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4CIDR, node2IPv6CIDR), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"%s\"]}", v4Node2Subnet, v6Node2Subnet), + "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s\", \"ipv6\": \"%s\"}", node2TranSwitchIPv4CIDR, node2TranSwitchIPv6CIDR), + "k8s.ovn.org/zone-name": node2Name, + util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\",\"%s\"]", node2IPv4CIDR, node2IPv6CIDR), + } + node2 := getNodeObj(node2Name, nodeAnnotations, map[string]string{}) + eipIPv4 := "192.168.126.200" + eipIPv6 := "0:0:0:0:0:feff:c0a8:8e0d" + // dual IP family EIP selecting one pod in local zone + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{ + eipIPv4, + eipIPv6, + }, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, + }, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{ + { + Node: node1Name, + EgressIP: eipIPv4, + }, + { + Node: node2Name, + EgressIP: eipIPv6, + }, + }, + }, + } + ginkgo.By("start OVN DBs with valid and invalid (pod doesn't exist..) OVN config") + node1NatLogicalPortName := "k8s-" + node1Name + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + // LRPs to support EIP assigned to a remote node + // valid LRP for IPv4/IPv6. IPv4 Egress Node is local, IPv6 is remote + getReRoutePolicy(podV4IP, "4", "valid-reroute-ipv4-UUID", + nodeLogicalRouterIPv4, getEgressIPLRPReRouteDbIDs(eIP.Name, egressPod.Namespace, egressPod.Name, + IPFamilyValueV4, types.DefaultNetworkName, DefaultNetworkControllerName).GetExternalIDs()), + getReRoutePolicy(podV6IP, "6", "valid-reroute-ipv6-UUID", + []string{node2TranSwitchIPv6}, getEgressIPLRPReRouteDbIDs(eIP.Name, egressPod.Namespace, egressPod.Name, + IPFamilyValueV6, types.DefaultNetworkName, DefaultNetworkControllerName).GetExternalIDs()), + // invalid lrp to redirect to the remote egress node for a deleted pod + getReRoutePolicy(deletedPodIPv6, "6", "invalid-reroute-ipv6-UUID", + []string{node2TranSwitchIPv6}, getEgressIPLRPReRouteDbIDs(eIP.Name, "UNKNOWN", "UNKNOWN", + IPFamilyValueV6, types.DefaultNetworkName, DefaultNetworkControllerName).GetExternalIDs()), + // NATs to support EIP assigned to the local node + // valid NAT + &nbdb.NAT{ + UUID: "valid-nat-ipv4-UUID", + LogicalIP: podV4IP, + ExternalIP: eipIPv4, + ExternalIDs: getEgressIPNATDbIDs(egressIPName, egressPod.Namespace, egressPod.Name, IPFamilyValueV4, DefaultNetworkControllerName).GetExternalIDs(), + Type: nbdb.NATTypeSNAT, + LogicalPort: &node1NatLogicalPortName, + Options: map[string]string{ + "stateless": "false", + }, + }, + // invalid NAT + &nbdb.NAT{ + UUID: "invalid-nat-ipv4-UUID", + LogicalIP: deletedPodIPv4, + ExternalIP: eipIPv4, + ExternalIDs: getEgressIPNATDbIDs(egressIPName, "UNKNOWN", "UNKNOWN", IPFamilyValueV4, DefaultNetworkControllerName).GetExternalIDs(), + Type: nbdb.NATTypeSNAT, + LogicalPort: &node1NatLogicalPortName, + Options: map[string]string{ + "stateless": "false", + }, + }, + &nbdb.LogicalRouterPort{ + UUID: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1Name + "-UUID", + Name: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1Name, + Networks: []string{nodeLogicalRouterIfAddrV6, nodeLogicalRouterIfAddrV4}, + }, + &nbdb.LogicalRouter{ + Name: types.OVNClusterRouter, + UUID: types.OVNClusterRouter + "-UUID", + Policies: []string{"valid-reroute-ipv4-UUID", "valid-reroute-ipv6-UUID", "invalid-reroute-ipv6-UUID"}, + }, + &nbdb.LogicalRouter{ + Name: types.GWRouterPrefix + node1Name, + UUID: types.GWRouterPrefix + node1Name + "-UUID", + Ports: []string{types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1Name + "-UUID"}, + Options: map[string]string{"dynamic_neigh_routers": "false"}, + Nat: []string{"valid-nat-ipv4-UUID", "invalid-nat-ipv4-UUID"}, + }, + &nbdb.LogicalSwitchPort{ + UUID: "k8s-" + node1.Name + "-UUID", + Name: "k8s-" + node1.Name, + Addresses: []string{"fe:1a:b2:3f:0e:fb " + util.GetNodeManagementIfAddr(node1IPV4Net).IP.String(), + "fe:1a:b2:3f:0e:fb " + util.GetNodeManagementIfAddr(node1IPV6Net).IP.String()}, + }, + &nbdb.LogicalSwitchPort{ + UUID: "k8s-" + node2.Name + "-UUID", + Name: "k8s-" + node2.Name, + Addresses: []string{"fe:1a:b2:3f:0e:fd " + util.GetNodeManagementIfAddr(node2IPV4Net).IP.String(), + "fe:1a:b2:3f:0e:fd " + util.GetNodeManagementIfAddr(node2IPV6Net).IP.String()}, + }, + &nbdb.LogicalSwitch{ + UUID: node1.Name + "-UUID", + Name: node1.Name, + Ports: []string{"k8s-" + node1.Name + "-UUID"}, + }, + &nbdb.LogicalSwitch{ + UUID: node2.Name + "-UUID", + Name: node2.Name, + Ports: []string{"k8s-" + node2.Name + "-UUID"}, + }, + }, + }, + &corev1.NamespaceList{ + Items: []corev1.Namespace{*egressNamespace}, + }, + &corev1.PodList{ + Items: []corev1.Pod{egressPod}, + }, + &corev1.NodeList{ + Items: []corev1.Node{node1, node2}, + }, + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + ) + i, podIPv4Net, _ := net.ParseCIDR(podV4IP + "/23") + podIPv4Net.IP = i + i, podIPv6Net, _ := net.ParseCIDR(podV6IP + "/23") + podIPv6Net.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{podIPv4Net, podIPv6Net}) + + // hack pod to be in the provided zone + fakeOvn.controller.eIPC.nodeZoneState.Store(node1Name, true) + fakeOvn.controller.eIPC.nodeZoneState.Store(node2Name, false) + fakeOvn.controller.localZoneNodes.Store(node1Name, true) + fakeOvn.controller.localZoneNodes.Store(node2Name, false) + + err := fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ginkgo.By("ensuring cleanup of invalid LRP and NAT") + egressIPServedPodsASv4, egressIPServedPodsASv6 := buildEgressIPServedPodsAddressSets([]string{podV4IP, podV6IP}, types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName) + expectedDatabaseState := []libovsdbtest.TestData{ + // LRPs to support EIP assigned to a remote node + // valid LRP for IPv4/IPv6. IPv4 Egress Node is local, IPv6 is remote + getReRoutePolicy(podV4IP, "4", "valid-reroute-ipv4-UUID", + nodeLogicalRouterIPv4, getEgressIPLRPReRouteDbIDs(eIP.Name, egressPod.Namespace, egressPod.Name, + IPFamilyValueV4, types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName).GetExternalIDs()), + getReRoutePolicy(podV6IP, "6", "valid-reroute-ipv6-UUID", + []string{node2TranSwitchIPv6}, getEgressIPLRPReRouteDbIDs(eIP.Name, egressPod.Namespace, egressPod.Name, + IPFamilyValueV6, types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName).GetExternalIDs()), + // valid NAT + &nbdb.NAT{ + UUID: "valid-egressip-nat-UUID", + LogicalIP: podV4IP, + ExternalIP: eipIPv4, + ExternalIDs: getEgressIPNATDbIDs(egressIPName, egressPod.Namespace, egressPod.Name, IPFamilyValueV4, fakeOvn.controller.controllerName).GetExternalIDs(), + Type: nbdb.NATTypeSNAT, + LogicalPort: &node1NatLogicalPortName, + Options: map[string]string{ + "stateless": "false", + }, + }, + &nbdb.LogicalRouter{ + Name: types.OVNClusterRouter, + UUID: types.OVNClusterRouter + "-UUID", + Policies: []string{"valid-reroute-ipv4-UUID", "valid-reroute-ipv6-UUID"}, + }, + &nbdb.LogicalRouterPort{ + UUID: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1Name + "-UUID", + Name: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1Name, + Networks: []string{nodeLogicalRouterIfAddrV6, nodeLogicalRouterIfAddrV4}, + }, + &nbdb.LogicalRouter{ + Name: types.GWRouterPrefix + node1Name, + UUID: types.GWRouterPrefix + node1Name + "-UUID", + Ports: []string{types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1Name + "-UUID"}, + Nat: []string{"valid-egressip-nat-UUID"}, + Options: map[string]string{"dynamic_neigh_routers": "false"}, + }, + &nbdb.LogicalSwitchPort{ + UUID: "k8s-" + node1.Name + "-UUID", + Name: "k8s-" + node1.Name, + Addresses: []string{"fe:1a:b2:3f:0e:fb " + util.GetNodeManagementIfAddr(node1IPV4Net).IP.String(), + "fe:1a:b2:3f:0e:fb " + util.GetNodeManagementIfAddr(node1IPV6Net).IP.String()}, + }, + &nbdb.LogicalSwitchPort{ + UUID: "k8s-" + node2.Name + "-UUID", + Name: "k8s-" + node2.Name, + Addresses: []string{"fe:1a:b2:3f:0e:fd " + util.GetNodeManagementIfAddr(node2IPV4Net).IP.String(), + "fe:1a:b2:3f:0e:fd " + util.GetNodeManagementIfAddr(node2IPV6Net).IP.String()}, + }, + &nbdb.LogicalSwitch{ + UUID: node1.Name + "-UUID", + Name: node1.Name, + Ports: []string{"k8s-" + node1.Name + "-UUID"}, + }, + &nbdb.LogicalSwitch{ + UUID: node2.Name + "-UUID", + Name: node2.Name, + Ports: []string{"k8s-" + node2.Name + "-UUID"}, + }, + egressIPServedPodsASv4, + egressIPServedPodsASv6, + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + ginkgo.By("ensure config is consistent") + gomega.Consistently(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + }) + }) // TEST UTILITY FUNCTIONS; diff --git a/go-controller/pkg/ovn/external_ids_syncer/logical_router_policy/logical_router_policy_sync.go b/go-controller/pkg/ovn/external_ids_syncer/logical_router_policy/logical_router_policy_sync.go index 01cbd40512..8933e78521 100644 --- a/go-controller/pkg/ovn/external_ids_syncer/logical_router_policy/logical_router_policy_sync.go +++ b/go-controller/pkg/ovn/external_ids_syncer/logical_router_policy/logical_router_policy_sync.go @@ -101,7 +101,8 @@ func (syncer *LRPSyncer) syncEgressIPReRoutes() error { podInfo, err := cache.getPod(podIP) if err != nil { klog.Infof("Failed to find Logical Switch Port cache entry for pod IP %s: %v", podIP.String(), err) - continue + // pod not found, add dummy metadata that will be cleaned up by EIP controller sync. + podInfo = podNetInfo{namespace: "UNKNOWN", name: "UNKNOWN"} } ipFamily := getIPFamily(isIPv6) lrp.ExternalIDs = getEgressIPLRPReRouteDbIDs(eipName, podInfo.namespace, podInfo.name, ipFamily, defaultNetworkName, syncer.controllerName).GetExternalIDs() diff --git a/go-controller/pkg/ovn/external_ids_syncer/logical_router_policy/logical_router_policy_sync_test.go b/go-controller/pkg/ovn/external_ids_syncer/logical_router_policy/logical_router_policy_sync_test.go index efebfb9c31..da0b0d2ff9 100644 --- a/go-controller/pkg/ovn/external_ids_syncer/logical_router_policy/logical_router_policy_sync_test.go +++ b/go-controller/pkg/ovn/external_ids_syncer/logical_router_policy/logical_router_policy_sync_test.go @@ -122,7 +122,7 @@ var _ = ginkgo.Describe("OVN Logical Router Syncer", func() { map[string]string{"name": egressIPName}, defaultNetworkControllerName)}, finalLRPs: []*nbdb.LogicalRouterPolicy{getReRouteLRP(podNamespace, podName, v4PodIPStr, 0, v4IPFamilyValue, v4PodNextHops, - map[string]string{"name": egressIPName}, + getEgressIPLRPReRouteDbIDs(egressIPName, "UNKNOWN", "UNKNOWN", v4IPFamilyValue, defaultNetworkName, defaultNetworkControllerName).GetExternalIDs(), defaultNetworkControllerName)}, v4ClusterSubnets: []*net.IPNet{v4PodClusterSubnet}, v4JoinSubnet: v4JoinSubnet, diff --git a/go-controller/pkg/ovn/external_ids_syncer/nat/nat_sync.go b/go-controller/pkg/ovn/external_ids_syncer/nat/nat_sync.go index 617bddd411..cf9d433cc8 100644 --- a/go-controller/pkg/ovn/external_ids_syncer/nat/nat_sync.go +++ b/go-controller/pkg/ovn/external_ids_syncer/nat/nat_sync.go @@ -10,6 +10,7 @@ import ( libovsdbclient "github.com/ovn-org/libovsdb/client" "github.com/ovn-org/libovsdb/ovsdb" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" libovsdbops "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb/ops" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" ovntypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" @@ -86,10 +87,10 @@ func (n *NATSyncer) syncEgressIPNATs() error { klog.Errorf("Expected NAT %s to contain 'name' as a key within its external IDs", nat.UUID) continue } - podIP, _, err := net.ParseCIDR(nat.LogicalIP) - if err != nil { - klog.Errorf("Failed to process logical IP %q of NAT %s", nat.LogicalIP, nat.UUID) - continue + // for egress IP, the logicalIP does not contain a mask. + podIP := net.ParseIP(nat.LogicalIP) + if podIP == nil { + return fmt.Errorf("failed to process logical IP %q of NAT %s", nat.LogicalIP, nat.UUID) } isV6 := utilsnet.IsIPv6(podIP) var ipFamily egressIPFamilyValue @@ -103,15 +104,15 @@ func (n *NATSyncer) syncEgressIPNATs() error { pod, found = v4PodCache.getPodByIP(podIP) } if !found { - klog.Errorf("Failed to find logical switch port that contains IP address %s", podIP.String()) - continue + // set it to unknown and the egress IP controller syncer will take care of removing it. + pod = podNetInfo{namespace: "UNKNOWN", name: "UNKNOWN"} + ipFamily = getFirstSupportIPFamily() } nat.ExternalIDs = getEgressIPNATDbIDs(eIPName, pod.namespace, pod.name, ipFamily, n.controllerName).GetExternalIDs() ops, err = libovsdbops.UpdateNATOps(n.nbClient, ops, nat) if err != nil { klog.Errorf("Failed to generate NAT ops for NAT %s: %v", nat.UUID, err) } - klog.Infof("## martin found %d nats", len(ops)) } _, err = libovsdbops.TransactAndCheck(n.nbClient, ops) @@ -176,3 +177,10 @@ func getEgressIPNATDbIDs(eIPName, podNamespace, podName string, ipFamily egressI libovsdbops.IPFamilyKey: string(ipFamily), }) } + +func getFirstSupportIPFamily() egressIPFamilyValue { + if config.IPv4Mode { + return ipFamilyValueV4 + } + return ipFamilyValueV6 +} diff --git a/go-controller/pkg/ovn/external_ids_syncer/nat/nat_sync_test.go b/go-controller/pkg/ovn/external_ids_syncer/nat/nat_sync_test.go index 9c0c9fa18d..58d8b54045 100644 --- a/go-controller/pkg/ovn/external_ids_syncer/nat/nat_sync_test.go +++ b/go-controller/pkg/ovn/external_ids_syncer/nat/nat_sync_test.go @@ -26,22 +26,22 @@ const ( egressIP = "10.10.10.10" nat1UUID = "nat-1-UUID" nat2UUID = "nat-2-UUID" - pod1V4CIDRStr = "10.128.0.5/32" - pod1V6CIDRStr = "2001:0000:130F:0000:0000:09C0:876A:130B/128" + pod1V4Str = "10.128.0.5" + pod1V6Str = "2001:0000:130F:0000:0000:09C0:876A:130B" pod1Namespace = "ns1" pod1Name = "pod1" - pod2V4CIDRStr = "10.128.0.6/32" - pod2V6CIDRStr = "2001:0000:130F:0000:0000:09C0:876A:130A/128" + pod2V4Str = "10.128.0.6" + pod2V6Str = "2001:0000:130F:0000:0000:09C0:876A:130A" pod2Namespace = "ns1" pod2Name = "pod2" defaultNetworkControllerName = "default-network-controller" ) var ( - pod1V4IPNet = testing.MustParseIPNet(pod1V4CIDRStr) - pod1V6IPNet = testing.MustParseIPNet(pod1V6CIDRStr) - pod2V4IPNet = testing.MustParseIPNet(pod2V4CIDRStr) - pod2V6IPNet = testing.MustParseIPNet(pod2V6CIDRStr) + pod1V4IP = testing.MustParseIP(pod1V4Str) + pod1V6IP = testing.MustParseIP(pod1V6Str) + pod2V4IP = testing.MustParseIP(pod2V4Str) + pod2V6IP = testing.MustParseIP(pod2V6Str) legacyExtIDs = map[string]string{legacyEIPNameExtIDKey: egressIPName} pod1V4ExtIDs = getEgressIPNATDbIDs(egressIPName, pod1Namespace, pod1Name, ipFamilyValueV4, defaultNetworkControllerName).GetExternalIDs() pod1V6ExtIDs = getEgressIPNATDbIDs(egressIPName, pod1Namespace, pod1Name, ipFamilyValueV6, defaultNetworkControllerName).GetExternalIDs() @@ -54,64 +54,64 @@ var _ = ginkgo.Describe("NAT Syncer", func() { ginkgo.DescribeTable("egress NATs", func(sync natSync) { performTest(defaultNetworkControllerName, sync.initialNATs, sync.finalNATs, sync.pods) }, ginkgo.Entry("converts legacy IPv4 NATs", natSync{ - initialNATs: []*nbdb.NAT{getSNAT(nat1UUID, pod1V4CIDRStr, egressIP, legacyExtIDs)}, - finalNATs: []*nbdb.NAT{getSNAT(nat1UUID, pod1V4CIDRStr, egressIP, pod1V4ExtIDs)}, + initialNATs: []*nbdb.NAT{getSNAT(nat1UUID, pod1V4Str, egressIP, legacyExtIDs)}, + finalNATs: []*nbdb.NAT{getSNAT(nat1UUID, pod1V4Str, egressIP, pod1V4ExtIDs)}, pods: podsNetInfo{ { - []net.IP{pod1V4IPNet.IP}, + []net.IP{pod1V4IP}, pod1Namespace, pod1Name, }, { - []net.IP{pod2V4IPNet.IP}, + []net.IP{pod2V4IP}, pod2Namespace, pod2Name, }, }, }), ginkgo.Entry("converts legacy IPv6 NATs", natSync{ - initialNATs: []*nbdb.NAT{getSNAT(nat1UUID, pod1V6CIDRStr, egressIP, legacyExtIDs)}, - finalNATs: []*nbdb.NAT{getSNAT(nat1UUID, pod1V6CIDRStr, egressIP, pod1V6ExtIDs)}, + initialNATs: []*nbdb.NAT{getSNAT(nat1UUID, pod1V6Str, egressIP, legacyExtIDs)}, + finalNATs: []*nbdb.NAT{getSNAT(nat1UUID, pod1V6Str, egressIP, pod1V6ExtIDs)}, pods: podsNetInfo{ { - []net.IP{pod1V6IPNet.IP}, + []net.IP{pod1V6IP}, pod1Namespace, pod1Name, }, { - []net.IP{pod2V6IPNet.IP}, + []net.IP{pod2V6IP}, pod2Namespace, pod2Name, }, }, }), ginkgo.Entry("converts legacy dual stack NATs", natSync{ - initialNATs: []*nbdb.NAT{getSNAT(nat1UUID, pod1V4CIDRStr, egressIP, legacyExtIDs), getSNAT(nat2UUID, pod1V6CIDRStr, egressIP, legacyExtIDs)}, - finalNATs: []*nbdb.NAT{getSNAT(nat1UUID, pod1V4CIDRStr, egressIP, pod1V4ExtIDs), getSNAT(nat2UUID, pod1V6CIDRStr, egressIP, pod1V6ExtIDs)}, + initialNATs: []*nbdb.NAT{getSNAT(nat1UUID, pod1V4Str, egressIP, legacyExtIDs), getSNAT(nat2UUID, pod1V6Str, egressIP, legacyExtIDs)}, + finalNATs: []*nbdb.NAT{getSNAT(nat1UUID, pod1V4Str, egressIP, pod1V4ExtIDs), getSNAT(nat2UUID, pod1V6Str, egressIP, pod1V6ExtIDs)}, pods: podsNetInfo{ { - []net.IP{pod1V4IPNet.IP, pod1V6IPNet.IP}, + []net.IP{pod1V4IP, pod1V6IP}, pod1Namespace, pod1Name, }, { - []net.IP{pod2V4IPNet.IP, pod2V6IPNet.IP}, + []net.IP{pod2V4IP, pod2V6IP}, pod2Namespace, pod2Name, }, }, }), ginkgo.Entry("doesn't alter NAT with correct external IDs", natSync{ - initialNATs: []*nbdb.NAT{getSNAT(nat1UUID, pod1V6CIDRStr, egressIP, pod1V6ExtIDs)}, - finalNATs: []*nbdb.NAT{getSNAT(nat1UUID, pod1V6CIDRStr, egressIP, pod1V6ExtIDs)}, + initialNATs: []*nbdb.NAT{getSNAT(nat1UUID, pod1V6Str, egressIP, pod1V6ExtIDs)}, + finalNATs: []*nbdb.NAT{getSNAT(nat1UUID, pod1V6Str, egressIP, pod1V6ExtIDs)}, pods: podsNetInfo{ { - []net.IP{pod1V4IPNet.IP, pod1V6IPNet.IP}, + []net.IP{pod1V4IP, pod1V6IP}, pod1Namespace, pod1Name, }, { - []net.IP{pod2V4IPNet.IP, pod2V6IPNet.IP}, + []net.IP{pod2V4IP, pod2V6IP}, pod2Namespace, pod2Name, },