Skip to content

Commit e7c07a1

Browse files
Gate ovn-controller start until ovnkube controller syncs
For IC mode, after a Node reboot, SB DB may contain stale control plane data because ovnkube-controller has not config'd OVN and changes propagated to SB DB. The issue we see downstream is that when an egress Node which hosts an EgressIP IP, and when it reboots, ovn-controller will connect to SB DB which will contain stale SNAT to support EgressIP. ovn-controller will GARP for the EgressIP IP even though the EgressIP IP may have already been assigned to another Node. Therefore, gate ovn-controller until ovnkube controller has syncd and the changes have reached SB DB for IC mode only. Other options were considered including wiping the OVN DBs on startup before ovnkube-node pod starts but we may wipe out dynamic info like learned MACs. Also, we do not want to gate starting ovn-controller for a simple ovnkube pod restart because it would affect the data plane whereas post reboot, there will not be any active workloads. Signed-off-by: Martin Kennelly <[email protected]> (cherry picked from commit b95dcb6cd6655cdd3c1202b419e6c980db566199)
1 parent 80ebd7a commit e7c07a1

File tree

2 files changed

+117
-4
lines changed

2 files changed

+117
-4
lines changed

dist/templates/ovnkube-node.yaml.j2

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -289,14 +289,23 @@ spec:
289289
- name: ovn-controller
290290
image: "{{ ovn_image | default('docker.io/ovnkube/ovn-daemonset:latest') }}"
291291
imagePullPolicy: "{{ ovn_image_pull_policy | default('IfNotPresent') }}"
292-
293-
command: ["/root/ovnkube.sh", "ovn-controller"]
294-
292+
command:
293+
- "/bin/sh"
294+
- "-c"
295+
- |
296+
{% if ovn_enable_interconnect == "true" -%}
297+
echo "waiting for ovnkube-controller to sync once post reboot by ensuring a file exits..."
298+
while [ ! -f /tmp/ovnkube-controller-sync-post-boot ]; do
299+
echo "waiting for OVN Kubernetes ovnkube-controller to write a file to indicate it has sync'd post reboot before starting ovn-controller..."
300+
sleep .1
301+
done
302+
{% endif %}
303+
echo "starting ovn-controller"
304+
/root/ovnkube.sh ovn-controller
295305
securityContext:
296306
runAsUser: 0
297307
capabilities:
298308
add: ["SYS_NICE"]
299-
300309
terminationMessagePolicy: FallbackToLogsOnError
301310
volumeMounts:
302311
- mountPath: /var/run/dbus/

go-controller/cmd/ovnkube/ovnkube.go

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package main
22

33
import (
44
"context"
5+
"errors"
56
"fmt"
67
"io"
78
"os"
@@ -16,22 +17,28 @@ import (
1617
"github.com/urfave/cli/v2"
1718

1819
"k8s.io/apimachinery/pkg/util/sets"
20+
"k8s.io/apimachinery/pkg/util/wait"
1921
"k8s.io/client-go/tools/leaderelection"
2022
"k8s.io/client-go/tools/leaderelection/resourcelock"
2123
"k8s.io/client-go/tools/record"
2224
"k8s.io/klog/v2"
2325
kexec "k8s.io/utils/exec"
2426

2527
"github.com/ovn-org/libovsdb/client"
28+
"github.com/ovn-org/libovsdb/model"
29+
"github.com/ovn-org/libovsdb/ovsdb"
2630

2731
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/clustermanager"
2832
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config"
2933
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/controllermanager"
3034
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory"
3135
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb"
36+
libovsdbops "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb/ops"
3237
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/metrics"
38+
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb"
3339
ovnnode "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node"
3440
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/routemanager"
41+
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/sbdb"
3542
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types"
3643
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util"
3744
utilerrors "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util/errors"
@@ -149,6 +156,77 @@ func delPidfile(pidfile string) {
149156
}
150157
}
151158

159+
// ovnkubeControllerSyncPostBootFilePath is located in storage that does not persist following a reboot. ovn-controller start
160+
// is predicated on the existence of this file.
161+
const ovnkubeControllerSyncPostBootFilePath = "/tmp/ovnkube-controller-sync-post-boot"
162+
163+
func ensureOVNKubeControllerSyncPostBootFile() error {
164+
if err := os.WriteFile(ovnkubeControllerSyncPostBootFilePath, []byte(time.Now().String()), 0o644); err != nil {
165+
return fmt.Errorf("failed to write to file at path %q: %v", ovnkubeControllerSyncPostBootFilePath, err)
166+
}
167+
return nil
168+
}
169+
170+
// doesOVNKubeControllerSyncPostBootFileExist checks if a sentinel file exists. File will not exist post Node restart. OVNKube controller creates the file post sync and after a reboot.
171+
func doesOVNKubeControllerSyncPostBootFileExist() (bool, error) {
172+
exists := true
173+
_, err := os.Stat(ovnkubeControllerSyncPostBootFilePath)
174+
if err != nil {
175+
if !os.IsNotExist(err) {
176+
return exists, fmt.Errorf("failed to stat file at path %q: %w", ovnkubeControllerSyncPostBootFilePath, err)
177+
}
178+
exists = false
179+
}
180+
return exists, nil
181+
}
182+
183+
// waitUntilNorthdSyncOnce ensures northd has sync'd at least once by increments nb_cfg value in NB DB and waiting
184+
// for northd to copy it to SB DB. Poll SB DB until context is cancelled.
185+
// The expectation is that the data you wish to be sync'd to SB DB has already been written to NB DB so when we get the initial
186+
// nb_cfg value, we know that if we increment that by one and see that value or greater in SB DB, then the data has sync'd.
187+
// All other processes interacting with nb_cfg increment it. This function depends on other processes respecting that.
188+
// No guarantee of any changes in SB DB made after this func.
189+
func waitUntilNorthdSyncOnce(ctx context.Context, nbClient, sbClient client.Client) error {
190+
// 1. Get value of nb_cfg
191+
// 2. Increment value of nb_cfg
192+
// 3. Wait until value appears in SB DB after northd copies it.
193+
nbGlobal := &nbdb.NBGlobal{}
194+
nbGlobal, err := libovsdbops.GetNBGlobal(nbClient, nbGlobal)
195+
if err != nil {
196+
return fmt.Errorf("failed to find OVN Northbound NB_Global table"+
197+
" entry: %w", err)
198+
}
199+
// increment nb_cfg value by 1. When northd consumes updates from NB DB, it will copy this value to SB DBs SB_Global table nb_cfg field.
200+
ops, err := nbClient.Where(nbGlobal).Mutate(nbGlobal, model.Mutation{
201+
Field: &nbGlobal.NbCfg,
202+
Mutator: ovsdb.MutateOperationAdd,
203+
Value: 1,
204+
})
205+
if err != nil {
206+
return fmt.Errorf("failed to generate ops to mutate nb_cfg: %w", err)
207+
}
208+
expectedNbCfgValue := nbGlobal.NbCfg + 1
209+
if _, err = libovsdbops.TransactAndCheck(nbClient, ops); err != nil {
210+
return fmt.Errorf("failed to transact to increment nb_cfg: %w", err)
211+
}
212+
sbGlobal := &sbdb.SBGlobal{}
213+
// poll until we see the expected value in SB DB every 5 milliseconds until context is cancelled.
214+
err = wait.PollUntilContextCancel(ctx, time.Millisecond*5, true, func(_ context.Context) (done bool, err error) {
215+
if sbGlobal, err = libovsdbops.GetSBGlobal(sbClient, sbGlobal); err != nil {
216+
// northd hasn't added an entry yet
217+
if errors.Is(err, client.ErrNotFound) {
218+
return false, nil
219+
}
220+
return false, fmt.Errorf("failed to get sb_global table entry from SB DB: %w", err)
221+
}
222+
return sbGlobal.NbCfg >= expectedNbCfgValue, nil // we only need to ensure it is greater than or equal to the expected value
223+
})
224+
if err != nil {
225+
return fmt.Errorf("failed while waiting for nb_cfg value greater than or equal %d in sb db sb_global table: %w", expectedNbCfgValue, err)
226+
}
227+
return nil
228+
}
229+
152230
func setupPIDFile(pidfile string) error {
153231
// need to test if already there
154232
_, err := os.Stat(pidfile)
@@ -493,6 +571,19 @@ func runOvnKube(ctx context.Context, runMode *ovnkubeRunMode, ovnClientset *util
493571
defer cancel()
494572
defer wg.Done()
495573

574+
// If IC mode, ovnkube controller will create a file in directory that will not persistent a Node reboot, therefore
575+
// we can determine if the Node is rebooting or not. Post sync, ensure the file exists. ovn-controller start is
576+
// predicated on the file existence because in IC mode, SB DB may contain stale data from before reboot and, we do
577+
// not want ovn-controller to consume this stale control plane data.
578+
var doesBootFileExist bool
579+
if config.OVNKubernetesFeature.EnableInterconnect {
580+
// no support for multiple Nodes per zone.
581+
doesBootFileExist, err = doesOVNKubeControllerSyncPostBootFileExist()
582+
if err != nil {
583+
controllerErr = fmt.Errorf("failed to determine if Node restarted via sentinel file availability: %v", err)
584+
}
585+
}
586+
496587
libovsdbOvnNBClient, err := libovsdb.NewNBClient(ctx.Done())
497588
if err != nil {
498589
controllerErr = fmt.Errorf("failed to initialize libovsdb NB client: %w", err)
@@ -523,6 +614,19 @@ func runOvnKube(ctx context.Context, runMode *ovnkubeRunMode, ovnClientset *util
523614
return
524615
}
525616

617+
// if IC and a Node restart, block writing sentinel file until northd has sync'd once.
618+
// external resources may predicate on this file availability and guaranteed ovnkube controller has sync'd once
619+
// and changes propagated to SB DB.
620+
if config.OVNKubernetesFeature.EnableInterconnect && !doesBootFileExist {
621+
if err = waitUntilNorthdSyncOnce(ctx, libovsdbOvnNBClient, libovsdbOvnSBClient); err != nil {
622+
controllerErr = fmt.Errorf("failed while waiting for northd to copy NB DB contents to SB DB: %w", err)
623+
return
624+
}
625+
if err = ensureOVNKubeControllerSyncPostBootFile(); err != nil {
626+
controllerErr = fmt.Errorf("failed to ensure ovnkube controller start file: %w", err)
627+
}
628+
}
629+
526630
// record delay until ready
527631
metrics.MetricOVNKubeControllerReadyDuration.Set(time.Since(startTime).Seconds())
528632

0 commit comments

Comments
 (0)