Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 16 additions & 5 deletions dist/images/ovnkube.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1701,11 +1701,6 @@ ovnkube-controller-with-node() {
echo "=============== ovnkube-controller-with-node - (wait for ovs)"
wait_for_event ovs_ready

if [[ ${ovnkube_node_mode} != "dpu-host" ]]; then
echo "=============== ovnkube-controller-with-node - (ovn-node wait for ovn-controller.pid)"
wait_for_event process_ready ovn-controller
fi

ovn_routable_mtu_flag=
if [[ -n "${routable_mtu}" ]]; then
routable_mtu_flag="--routable-mtu ${routable_mtu}"
Expand Down Expand Up @@ -2360,6 +2355,22 @@ ovn-controller() {
echo "ovn_nbdb ${ovn_nbdb} ovn_sbdb ${ovn_sbdb}"
echo "ovn_nbdb_conn ${ovn_nbdb_conn}"

# if ovn IC, we do not support multi Node per Zone, therefore its safe to assume ovnkube-controller is local and ovn-controller
# has access to the file. Block starting on a file emitted by ovnkube-controller when SB DB is not stale.
if [[ ${ovn_enable_interconnect} == "true" ]]; then
echo "=============== time: $(date +%d-%m-%H:%M:%S:%N) ovn-controller - (wait for ovnkube-controller SB DB hot file for 5 minutes)"
retries=0
while [[ ${retries} -lt 3000 ]]; do
if [[ -f "/var/run/ovn-kubernetes/ovnkube-controller-sb-db-hot" ]]; then
echo "=============== time: $(date +%d-%m-%H:%M:%S:%N) ovn-controller - ovnkube-controller SB DB hot file found"
break
fi
echo "=============== time: $(date +%d-%m-%H:%M:%S:%N) ovn-controller - (wait for ovnkube-controller SB DB hot file)..."
sleep .1
((retries += 1))
done
fi

echo "=============== ovn-controller start_controller"
rm -f /var/run/ovn-kubernetes/cni/*
rm -f ${OVN_RUNDIR}/ovn-controller.*.ctl
Expand Down
8 changes: 5 additions & 3 deletions dist/templates/ovnkube-single-node-zone.yaml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,9 @@ spec:
- mountPath: /ovn-cert
name: host-ovn-cert
readOnly: true

- mountPath: /var/run/ovn-kubernetes
name: host-var-run-ovn-kubernetes
readOnly: true
resources:
requests:
cpu: 100m
Expand All @@ -520,12 +522,12 @@ spec:
fieldPath: metadata.namespace
- name: OVN_SSL_ENABLE
value: "{{ ovn_ssl_en }}"

- name: OVN_NORTH
value: "local"
- name: OVN_SOUTH
value: "local"

- name: OVN_ENABLE_INTERCONNECT
value: "{{ ovn_enable_interconnect }}"
readinessProbe:
exec:
command: ["/usr/bin/ovn-kube-util", "readiness-probe", "-t", "ovn-controller"]
Expand Down
13 changes: 13 additions & 0 deletions go-controller/cmd/ovnkube/ovnkube.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/controllermanager"
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory"
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb"
libovsdbutil "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb/util"
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/metrics"
ovnnode "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node"
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/routemanager"
Expand Down Expand Up @@ -522,6 +523,18 @@ func runOvnKube(ctx context.Context, runMode *ovnkubeRunMode, ovnClientset *util
controllerErr = fmt.Errorf("failed to start network controller: %w", err)
return
}
// wait until all changes in OVN NB DB have been sync'd to OVN SB DB. If context is cancelled, func returns.
if err = libovsdbutil.WaitUntilNorthdSyncOnce(ctx, libovsdbOvnNBClient, libovsdbOvnSBClient); err != nil {
klog.Errorf("Failed waiting for northd to sync OVN Northbound DB to Southbound: %v", err)
}
// ovnkube-controller writes a file when OVN SB DB contains the changes post sync. File is removed on exit.
const sbDBHotFileName = "/var/run/ovn-kubernetes/ovnkube-controller-sb-db-hot"
if err = os.WriteFile(sbDBHotFileName, []byte(time.Now().String()), 0o644); err != nil {
klog.Errorf("Failed to write ovnkube controller sb db hot file: %v", err)
}
defer func() {
os.Remove(sbDBHotFileName)
}()

// record delay until ready
metrics.MetricOVNKubeControllerReadyDuration.Set(time.Since(startTime).Seconds())
Expand Down
65 changes: 65 additions & 0 deletions go-controller/pkg/libovsdb/util/northd_sync.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package util

import (
"context"
"errors"
"fmt"
"time"

"k8s.io/apimachinery/pkg/util/wait"

"github.com/ovn-org/libovsdb/client"
"github.com/ovn-org/libovsdb/model"
"github.com/ovn-org/libovsdb/ovsdb"

libovsdbops "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb/ops"
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb"
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/sbdb"
)

// WaitUntilNorthdSyncOnce ensures northd has sync'd at least once by increments nb_cfg value in NB DB and waiting
// for northd to copy it to SB DB. Poll SB DB until context is cancelled.
// The expectation is that the data you wish to be sync'd to SB DB has already been written to NB DB so when we get the initial
// nb_cfg value, we know that if we increment that by one and see that value or greater in SB DB, then the data has sync'd.
// All other processes interacting with nb_cfg increment it. This function depends on other processes respecting that.
// No guarantee of any changes in SB DB made after this func.
func WaitUntilNorthdSyncOnce(ctx context.Context, nbClient, sbClient client.Client) error {
// 1. Get value of nb_cfg
// 2. Increment value of nb_cfg
// 3. Wait until value appears in SB DB after northd copies it.
nbGlobal := &nbdb.NBGlobal{}
nbGlobal, err := libovsdbops.GetNBGlobal(nbClient, nbGlobal)
if err != nil {
return fmt.Errorf("failed to find OVN Northbound NB_Global table"+
" entry: %w", err)
}
// increment nb_cfg value by 1. When northd consumes updates from NB DB, it will copy this value to SB DBs SB_Global table nb_cfg field.
ops, err := nbClient.Where(nbGlobal).Mutate(nbGlobal, model.Mutation{
Field: &nbGlobal.NbCfg,
Mutator: ovsdb.MutateOperationAdd,
Value: 1,
})
if err != nil {
return fmt.Errorf("failed to generate ops to mutate nb_cfg: %w", err)
}
expectedNbCfgValue := nbGlobal.NbCfg + 1
if _, err = libovsdbops.TransactAndCheck(nbClient, ops); err != nil {
return fmt.Errorf("failed to transact to increment nb_cfg: %w", err)
}
sbGlobal := &sbdb.SBGlobal{}
// poll until we see the expected value in SB DB every 5 milliseconds until context is cancelled.
err = wait.PollUntilContextCancel(ctx, time.Millisecond*5, true, func(_ context.Context) (done bool, err error) {
if sbGlobal, err = libovsdbops.GetSBGlobal(sbClient, sbGlobal); err != nil {
// northd hasn't added an entry yet
if errors.Is(err, client.ErrNotFound) {
return false, nil
}
return false, fmt.Errorf("failed to get sb_global table entry from SB DB: %w", err)
}
return sbGlobal.NbCfg >= expectedNbCfgValue, nil // we only need to ensure it is greater than or equal to the expected value
})
if err != nil {
return fmt.Errorf("failed while waiting for nb_cfg value greater than or equal %d in sb db sb_global table: %w", expectedNbCfgValue, err)
}
return nil
}
44 changes: 6 additions & 38 deletions go-controller/pkg/node/default_node_network_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,12 @@ func (oc *DefaultNodeNetworkController) Reconcile(netInfo util.NetInfo) error {
}

func clearOVSFlowTargets() error {
// TODO: match on something more specific than just the existance of an error
// nothing to clear if the bridge doesnt exist
if _, _, err := util.RunOVSVsctl("br-exists", "br-int"); err != nil {
return nil
}

_, _, err := util.RunOVSVsctl(
"--",
"clear", "bridge", "br-int", "netflow",
Expand Down Expand Up @@ -508,44 +514,6 @@ func setEncapPort(ctx context.Context) error {
return nil
}

func isOVNControllerReady() (bool, error) {
// check node's connection status
runDir := util.GetOvnRunDir()
pid, err := os.ReadFile(runDir + "ovn-controller.pid")
if err != nil {
return false, fmt.Errorf("unknown pid for ovn-controller process: %v", err)
}
ctlFile := runDir + fmt.Sprintf("ovn-controller.%s.ctl", strings.TrimSuffix(string(pid), "\n"))
ret, _, err := util.RunOVSAppctl("-t", ctlFile, "connection-status")
if err != nil {
return false, fmt.Errorf("could not get connection status: %w", err)
}
klog.Infof("Node connection status = %s", ret)
if ret != "connected" {
return false, nil
}

// check whether br-int exists on node
_, _, err = util.RunOVSVsctl("--", "br-exists", "br-int")
if err != nil {
return false, nil
}

// check by dumping br-int flow entries
stdout, _, err := util.RunOVSOfctl("dump-aggregate", "br-int")
if err != nil {
klog.V(5).Infof("Error dumping aggregate flows: %v", err)
return false, nil
}
hasFlowCountZero := strings.Contains(stdout, "flow_count=0")
if hasFlowCountZero {
klog.V(5).Info("Got a flow count of 0 when dumping flows for node")
return false, nil
}

return true, nil
}

// getEnvNameFromResourceName gets the device plugin env variable from the device plugin resource name.
func getEnvNameFromResourceName(resource string) string {
res1 := strings.ReplaceAll(resource, ".", "_")
Expand Down
10 changes: 1 addition & 9 deletions go-controller/pkg/node/gateway_init.go
Original file line number Diff line number Diff line change
Expand Up @@ -406,19 +406,11 @@ func (nc *DefaultNodeNetworkController) initGatewayPreStart(
return gw.initFunc()
}

readyGwFunc := func() (bool, error) {
controllerReady, err := isOVNControllerReady()
if err != nil || !controllerReady {
return false, err
}
return gw.readyFunc()
}

if err := nodeAnnotator.Run(); err != nil {
return nil, fmt.Errorf("failed to set node %s annotations: %w", nc.name, err)
}

waiter.AddWait(readyGwFunc, initGwFunc)
waiter.AddWait(gw.readyFunc, initGwFunc)
nc.Gateway = gw

// Wait for management port and gateway resources to be created by the master
Expand Down
Loading