-
Notifications
You must be signed in to change notification settings - Fork 415
Add SQL election module #3318
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Add SQL election module #3318
Changes from 1 commit
219fc8d
5f4b850
4d31437
c2b24eb
05b113d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,269 @@ | ||
| // Copyright 2023 Google LLC. All Rights Reserved. | ||
| // | ||
| // Licensed under the Apache License, Version 2.0 (the "License"); | ||
| // you may not use this file except in compliance with the License. | ||
| // You may obtain a copy of the License at | ||
| // | ||
| // http://www.apache.org/licenses/LICENSE-2.0 | ||
| // | ||
| // Unless required by applicable law or agreed to in writing, software | ||
| // distributed under the License is distributed on an "AS IS" BASIS, | ||
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| // See the License for the specific language governing permissions and | ||
| // limitations under the License. | ||
|
|
||
| // Package etcd provides an implementation of leader election based on a SQL database. | ||
| package sql | ||
|
|
||
| import ( | ||
| "context" | ||
| "database/sql" | ||
| "fmt" | ||
| "sync" | ||
| "time" | ||
|
|
||
| "github.com/google/trillian/util/election2" | ||
| "k8s.io/klog/v2" | ||
| ) | ||
|
|
||
| type leaderData struct { | ||
| currentLeader string | ||
| timestamp time.Time | ||
| } | ||
|
|
||
| // Election is an implementation of election2.Election based on a SQL database. | ||
| type Election struct { | ||
| db *sql.DB | ||
| instanceID string | ||
| resourceID string | ||
|
|
||
| currentLeader leaderData | ||
| leaderLock sync.Cond | ||
|
|
||
| // If a channel is supplied with the cancel, it will be signalled when the election routine has exited. | ||
| cancel chan *chan error | ||
| electionInterval time.Duration | ||
| } | ||
|
|
||
| var _ election2.Election = (*Election)(nil) | ||
|
|
||
| // Await implements election2.Election | ||
| func (e *Election) Await(ctx context.Context) error { | ||
| e.leaderLock.L.Lock() | ||
| defer e.leaderLock.L.Unlock() | ||
| if e.cancel == nil { | ||
| e.cancel = make(chan *chan error) | ||
| go e.becomeLeaderLoop(context.Background(), e.cancel) | ||
| } | ||
| if e.currentLeader.currentLeader == e.instanceID { | ||
| return nil | ||
| } | ||
| for e.currentLeader.currentLeader != e.instanceID { | ||
| e.leaderLock.Wait() | ||
|
|
||
| select { | ||
| case <-ctx.Done(): | ||
| return ctx.Err() | ||
| default: | ||
| klog.Infof("Waiting for leadership, %s is the leader at %s", e.currentLeader.currentLeader, e.currentLeader.timestamp) | ||
| } | ||
| } | ||
| klog.Infof("%s became leader for %s at %s", e.instanceID, e.resourceID, e.currentLeader.timestamp) | ||
| return nil | ||
| } | ||
|
|
||
| // Close implements election2.Election | ||
| func (e *Election) Close(ctx context.Context) error { | ||
| if err := e.Resign(ctx); err != nil { | ||
| klog.Errorf("Failed to resign leadership: %v", err) | ||
| return err | ||
| } | ||
| return nil | ||
| } | ||
|
|
||
| // Resign implements election2.Election | ||
| func (e *Election) Resign(ctx context.Context) error { | ||
| e.leaderLock.L.Lock() | ||
| closer := e.cancel | ||
| e.cancel = nil | ||
| e.leaderLock.L.Unlock() | ||
| if closer == nil { | ||
| return nil | ||
| } | ||
| // Stop trying to elect ourselves | ||
| done := make(chan error) | ||
| closer <- &done | ||
| return <-done | ||
| } | ||
|
|
||
| // WithMastership implements election2.Election | ||
| func (e *Election) WithMastership(ctx context.Context) (context.Context, error) { | ||
| cctx, cancel := context.WithCancel(ctx) | ||
| e.leaderLock.L.Lock() | ||
| defer e.leaderLock.L.Unlock() | ||
| if e.currentLeader.currentLeader != e.instanceID { | ||
| // Not the leader, cancel | ||
| cancel() | ||
| return cctx, nil | ||
| } | ||
|
|
||
| // Start a goroutine to cancel the context when we are no longer leader | ||
| go func() { | ||
| e.leaderLock.L.Lock() | ||
| defer e.leaderLock.L.Unlock() | ||
| for e.currentLeader.currentLeader == e.instanceID { | ||
| e.leaderLock.Wait() | ||
| } | ||
| select { | ||
| case <-ctx.Done(): | ||
| // Don't complain if our context already completed. | ||
| return | ||
| default: | ||
| cancel() | ||
| klog.Warningf("%s cancelled: lost leadership, %s is the leader at %s", e.resourceID, e.currentLeader.currentLeader, e.currentLeader.timestamp) | ||
| } | ||
| }() | ||
|
|
||
| return cctx, nil | ||
| } | ||
|
|
||
| // becomeLeaderLoop runs continuously to participate in elections until a message is sent on `cancel` | ||
| func (e *Election) becomeLeaderLoop(ctx context.Context, closer chan *chan error) { | ||
| for { | ||
| select { | ||
| case ch := <-closer: | ||
| err := e.tearDown() | ||
| klog.Infof("Election teardown for %s: %v", e.resourceID, err) | ||
| if ch != nil { | ||
| *ch <- err | ||
| } | ||
| return | ||
| default: | ||
| leader, err := e.tryBecomeLeader(ctx) | ||
| if err != nil { | ||
| klog.Errorf("Failed attempt to become leader for %s, retrying: %v", e.resourceID, err) | ||
| } else { | ||
| e.leaderLock.L.Lock() | ||
| if leader != e.currentLeader { | ||
| // Note: this code does not actually care _which_ instance was | ||
| // elected, it sends notifications on each leadership cahnge. | ||
| e.currentLeader = leader | ||
| e.leaderLock.Broadcast() | ||
| } | ||
| e.leaderLock.L.Unlock() | ||
| } | ||
| time.Sleep(e.electionInterval) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| func (e *Election) tryBecomeLeader(ctx context.Context) (leaderData, error) { | ||
| leader := leaderData{} | ||
| tx, err := e.db.BeginTx(ctx, &sql.TxOptions{Isolation: sql.LevelSerializable}) | ||
| if err != nil { | ||
| return leader, fmt.Errorf("BeginTX: %w", err) | ||
| } | ||
| defer tx.Rollback() | ||
| row := tx.QueryRow( | ||
| "SELECT leader, last_update FROM leader_election WHERE resource_id = $1", | ||
|
||
| e.resourceID) | ||
| if err := row.Scan(&leader.currentLeader, &leader.timestamp); err != nil { | ||
| return leader, fmt.Errorf("Select: %w", err) | ||
| } | ||
|
|
||
| if leader.currentLeader != e.instanceID && leader.timestamp.Add(e.electionInterval*10).After(time.Now()) { | ||
| return leader, nil // Someone else won the election | ||
| } | ||
|
|
||
| timestamp := time.Now() | ||
| _, err = tx.Exec( | ||
| "UPDATE leader_election SET leader = $1, last_update = $2 WHERE resource_id = $3 AND leader = $4 AND last_update = $5", | ||
| e.instanceID, timestamp, e.resourceID, leader.currentLeader, leader.timestamp) | ||
| if err != nil { | ||
| return leader, fmt.Errorf("Update: %w", err) | ||
| } | ||
|
|
||
| if err := tx.Commit(); err != nil { | ||
| return leader, fmt.Errorf("Commit failed: %w", err) | ||
| } | ||
| leader = leaderData{currentLeader: e.instanceID, timestamp: timestamp} | ||
| return leader, nil | ||
| } | ||
|
|
||
| func (e *Election) tearDown() error { | ||
| e.leaderLock.L.Lock() | ||
| defer e.leaderLock.L.Unlock() | ||
| if e.currentLeader.currentLeader != e.instanceID { | ||
| return nil | ||
| } | ||
| e.currentLeader.currentLeader = "empty leader" | ||
| e.leaderLock.Broadcast() | ||
|
|
||
| // Reset election time to epoch to allow a faster fail-over | ||
| res, err := e.db.Exec( | ||
| "UPDATE leader_election SET last_update = $1 WHERE resource_id = $2 AND leader = $3 AND last_update = $4", | ||
| time.Time{}, e.resourceID, e.instanceID, e.currentLeader.timestamp) | ||
| if err != nil { | ||
| return fmt.Errorf("Update: %w", err) | ||
| } | ||
| if n, err := res.RowsAffected(); n != 1 || err != nil { | ||
| return fmt.Errorf("failed to resign leadership: %d, %w", n, err) | ||
| } | ||
| return nil | ||
| } | ||
|
|
||
| func (e *Election) initializeLock(ctx context.Context) error { | ||
| insert, err := e.db.Prepare("INSERT INTO leader_election (resource_id, leader, last_update) VALUES ($1, $2, $3)") | ||
|
||
| if err != nil { | ||
| return err | ||
| } | ||
| defer insert.Close() | ||
|
|
||
| _, err = insert.Exec(e.resourceID, "empty leader", time.Time{}) | ||
| return err | ||
| } | ||
|
|
||
| type SqlFactory struct { | ||
| db *sql.DB | ||
| instanceID string | ||
| opts []Option | ||
| } | ||
|
|
||
| var _ election2.Factory = (*SqlFactory)(nil) | ||
|
|
||
| type Option func(*Election) *Election | ||
|
|
||
| func NewFactory(instanceID string, database *sql.DB, opts... Option) (*SqlFactory, error) { | ||
| return &SqlFactory{db: database, instanceID: instanceID, opts: opts}, nil | ||
| } | ||
|
|
||
| func WithElectionInterval(interval time.Duration) Option { | ||
| return func(f *Election) *Election { | ||
| f.electionInterval = interval | ||
| return f | ||
| } | ||
| } | ||
|
|
||
| // NewElection implements election2.Factory. | ||
| func (f *SqlFactory) NewElection(ctx context.Context, resourceID string) (election2.Election, error) { | ||
| // Ensure we have a database connection | ||
| if f.db == nil { | ||
| return nil, fmt.Errorf("no database connection") | ||
| } | ||
| if err := f.db.Ping(); err != nil { | ||
| return nil, err | ||
| } | ||
| e := &Election{ | ||
| db: f.db, | ||
| instanceID: f.instanceID, | ||
| resourceID: resourceID, | ||
| leaderLock: sync.Cond{L: &sync.Mutex{}}, | ||
| electionInterval: 1 * time.Second, | ||
| } | ||
| for _, opt := range f.opts { | ||
| e = opt(e) | ||
| } | ||
| e.initializeLock(ctx) | ||
|
|
||
| return e, nil | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.