Skip to content

Commit a90382b

Browse files
committed
Increase throughput
Current implementation is not optimal. It is using fixed-size go-routines pool. If the pool specified by the user is too small, one slow resource could block processing of other resources (different keys, but same hash). User can adjust the pool size, but it is very hard to figure out this number upfront (before running the app on production). The new implementation spawn a dedicated go-routine for each new batch. At max one go-routine is created for given resource key. Go-routine is destroyed once batch ends.
1 parent 870ccfc commit a90382b

File tree

6 files changed

+163
-239
lines changed

6 files changed

+163
-239
lines changed

batch.go

+63-67
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ package batch
55

66
import (
77
"context"
8-
"runtime"
98
"sync"
109
"time"
1110
)
@@ -36,19 +35,6 @@ type Options[Resource any] struct {
3635
//
3736
// By default, does nothing.
3837
SaveResource func(_ context.Context, key string, _ Resource) error
39-
// GoRoutines specifies how many goroutines should be used to run batch operations.
40-
//
41-
// By default, 16 * number of CPUs.
42-
GoRoutines int
43-
// GoRoutineNumberForKey returns go-routine number which will be used to run operation on
44-
// a given resource key. This function is crucial to properly serialize requests.
45-
//
46-
// This function must be deterministic - it should always return the same go-routine number
47-
// for given combination of key and goroutines parameters.
48-
//
49-
// By default, GoroutineNumberForKey function is used. This implementation calculates hash
50-
// on a given key and use modulo to calculate go-routine number.
51-
GoRoutineNumberForKey func(key string, goroutines int) int
5238
}
5339

5440
// StartProcessor starts batch processor which will run operations in batches.
@@ -58,42 +44,20 @@ type Options[Resource any] struct {
5844
func StartProcessor[Resource any](options Options[Resource]) *Processor[Resource] {
5945
options = options.withDefaults()
6046

61-
workerChannels := make([]chan operation[Resource], options.GoRoutines)
62-
63-
var workersFinished sync.WaitGroup
64-
workersFinished.Add(options.GoRoutines)
65-
66-
for i := 0; i < options.GoRoutines; i++ {
67-
workerChannels[i] = make(chan operation[Resource])
68-
_worker := worker[Resource]{
69-
goRoutineNumber: i,
70-
incomingOperations: workerChannels[i],
71-
loadResource: options.LoadResource,
72-
saveResource: options.SaveResource,
73-
minDuration: options.MinDuration,
74-
maxDuration: options.MaxDuration,
75-
}
76-
77-
go func() {
78-
_worker.run()
79-
workersFinished.Done()
80-
}()
81-
}
82-
8347
return &Processor[Resource]{
84-
options: options,
85-
stopped: make(chan struct{}),
86-
workerChannels: workerChannels,
87-
workersFinished: &workersFinished,
48+
options: options,
49+
stopped: make(chan struct{}),
50+
batchChannels: map[string]chan operation[Resource]{},
8851
}
8952
}
9053

9154
// Processor represents instance of batch processor which can be used to issue operations which run in a batch manner.
9255
type Processor[Resource any] struct {
93-
options Options[Resource]
94-
stopped chan struct{}
95-
workerChannels []chan operation[Resource]
96-
workersFinished *sync.WaitGroup
56+
options Options[Resource]
57+
stopped chan struct{}
58+
allBatchesFinished sync.WaitGroup
59+
mutex sync.Mutex
60+
batchChannels map[string]chan operation[Resource]
9761
}
9862

9963
func (s Options[Resource]) withDefaults() Options[Resource] {
@@ -118,14 +82,6 @@ func (s Options[Resource]) withDefaults() Options[Resource] {
11882
s.MaxDuration = 2 * s.MinDuration
11983
}
12084

121-
if s.GoRoutines == 0 {
122-
s.GoRoutines = 16 * runtime.NumCPU()
123-
}
124-
125-
if s.GoRoutineNumberForKey == nil {
126-
s.GoRoutineNumberForKey = GoroutineNumberForKey
127-
}
128-
12985
return s
13086
}
13187

@@ -152,30 +108,70 @@ func (p *Processor[Resource]) Run(ctx context.Context, key string, _operation fu
152108
result := make(chan error)
153109
defer close(result)
154110

155-
goRoutineNumber := p.options.GoRoutineNumberForKey(key, p.options.GoRoutines)
111+
operationMessage := operation[Resource]{
112+
run: _operation,
113+
result: result,
114+
}
115+
116+
for {
117+
incomingOperations := p.temporaryBatchChannel(key)
118+
119+
select {
120+
case <-ctx.Done():
121+
return OperationCancelled
156122

157-
o := operation[Resource]{
158-
resourceKey: key,
159-
run: _operation,
160-
result: result,
123+
case incomingOperations <- operationMessage:
124+
return <-result
125+
126+
case <-time.After(10 * time.Millisecond):
127+
// Timeout waiting to push operation. Possibly batch goroutine was stopped.
128+
}
161129
}
162130

163-
select {
164-
case p.workerChannels[goRoutineNumber] <- o:
165-
return <-result
166-
case <-ctx.Done():
167-
return OperationCancelled
131+
}
132+
133+
func (p *Processor[Resource]) temporaryBatchChannel(key string) chan<- operation[Resource] {
134+
p.mutex.Lock()
135+
defer p.mutex.Unlock()
136+
137+
batchChannel, ok := p.batchChannels[key]
138+
if !ok {
139+
batchChannel = make(chan operation[Resource])
140+
p.batchChannels[key] = batchChannel
141+
142+
go p.startBatch(key, batchChannel)
143+
}
144+
145+
return batchChannel
146+
}
147+
148+
func (p *Processor[Resource]) startBatch(key string, batchChannel chan operation[Resource]) {
149+
p.allBatchesFinished.Add(1)
150+
defer p.allBatchesFinished.Done()
151+
152+
now := time.Now()
153+
154+
w := &batch[Resource]{
155+
Options: p.options,
156+
resourceKey: key,
157+
incomingOperations: batchChannel,
158+
stopped: p.stopped,
159+
softDeadline: now.Add(p.options.MinDuration),
160+
hardDeadline: now.Add(p.options.MaxDuration),
168161
}
162+
w.process()
163+
164+
p.mutex.Lock()
165+
defer p.mutex.Unlock()
166+
// Delete the channel even though it is still used by pending Run calls.
167+
// Those calls should time out and retry on a new channel.
168+
delete(p.batchChannels, key)
169169
}
170170

171171
// Stop ends all running batches. No new operations will be accepted.
172172
// Stop blocks until all pending batches are ended and resources saved.
173173
func (p *Processor[Resource]) Stop() {
174174
close(p.stopped)
175175

176-
for _, channel := range p.workerChannels {
177-
close(channel)
178-
}
179-
180-
p.workersFinished.Wait()
176+
p.allBatchesFinished.Wait()
181177
}

batch_bench_test.go

+3
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
// (c) 2022 Jacek Olszak
2+
// This code is licensed under MIT license (see LICENSE for details)
3+
14
package batch_test
25

36
import (

goroutine.go

+97
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
// (c) 2022 Jacek Olszak
2+
// This code is licensed under MIT license (see LICENSE for details)
3+
4+
package batch
5+
6+
import (
7+
"context"
8+
"time"
9+
)
10+
11+
type batch[Resource any] struct {
12+
Options[Resource]
13+
resourceKey string
14+
incomingOperations <-chan operation[Resource]
15+
stopped <-chan struct{} // stopped is used to stop batch prematurely
16+
softDeadline time.Time
17+
hardDeadline time.Time
18+
19+
resource *Resource
20+
results []chan error
21+
}
22+
23+
func (b *batch[Resource]) process() {
24+
softDeadlineReached := time.NewTimer(b.softDeadline.Sub(time.Now()))
25+
defer softDeadlineReached.Stop()
26+
27+
for {
28+
select {
29+
case <-b.stopped:
30+
b.end()
31+
return
32+
33+
case <-softDeadlineReached.C:
34+
b.end()
35+
return
36+
37+
case _operation := <-b.incomingOperations:
38+
err := b.load()
39+
if err != nil {
40+
_operation.result <- err
41+
return
42+
}
43+
44+
b.results = append(b.results, _operation.result)
45+
_operation.run(*b.resource)
46+
}
47+
}
48+
}
49+
50+
func (b *batch[Resource]) end() {
51+
if b.resource == nil {
52+
return
53+
}
54+
55+
err := b.save()
56+
for _, result := range b.results {
57+
result <- err
58+
}
59+
}
60+
61+
func (b *batch[Resource]) save() error {
62+
ctx, cancel := context.WithDeadline(context.Background(), b.hardDeadline)
63+
defer cancel()
64+
65+
if err := b.SaveResource(ctx, b.resourceKey, *b.resource); err != nil {
66+
return err
67+
}
68+
69+
return nil
70+
}
71+
72+
func (b *batch[Resource]) load() error {
73+
if b.alreadyLoaded() {
74+
return nil
75+
}
76+
77+
ctx, cancel := context.WithDeadline(context.Background(), b.hardDeadline)
78+
defer cancel()
79+
80+
resource, err := b.LoadResource(ctx, b.resourceKey)
81+
if err != nil {
82+
return err
83+
}
84+
85+
b.resource = &resource
86+
87+
return nil
88+
}
89+
90+
func (b *batch[Resource]) alreadyLoaded() bool {
91+
return b.resource != nil
92+
}
93+
94+
type operation[Resource any] struct {
95+
run func(Resource)
96+
result chan error
97+
}

hash.go

-15
This file was deleted.

hash_test.go

-23
This file was deleted.

0 commit comments

Comments
 (0)