@@ -278,10 +278,15 @@ func reconcileTaskState(ctx context.Context, w *worker, assignments []*api.Assig
278
278
279
279
removeTaskAssignment := func (taskID string ) error {
280
280
ctx := log .WithLogger (ctx , log .G (ctx ).WithField ("task.id" , taskID ))
281
- if err := SetTaskAssignment (tx , taskID , false ); err != nil {
282
- log .G (ctx ).WithError (err ).Error ("error setting task assignment in database" )
281
+ // if a task is no longer assigned, then we do not have to keep track
282
+ // of it. a task will only be unassigned when it is deleted on the
283
+ // manager. instead of SetTaskAssginment to true, we'll just remove the
284
+ // task now.
285
+ if err := DeleteTask (tx , taskID ); err != nil {
286
+ log .G (ctx ).WithError (err ).Error ("error removing de-assigned task" )
287
+ return err
283
288
}
284
- return err
289
+ return nil
285
290
}
286
291
287
292
// If this was a complete set of assignments, we're going to remove all the remaining
@@ -500,6 +505,21 @@ func (w *worker) newTaskManager(ctx context.Context, tx *bolt.Tx, task *api.Task
500
505
// updateTaskStatus reports statuses to listeners, read lock must be held.
501
506
func (w * worker ) updateTaskStatus (ctx context.Context , tx * bolt.Tx , taskID string , status * api.TaskStatus ) error {
502
507
if err := PutTaskStatus (tx , taskID , status ); err != nil {
508
+ // we shouldn't fail to put a task status. however, there exists the
509
+ // possibility of a race in which we try to put a task status after the
510
+ // task has been deleted. because this whole contraption is a careful
511
+ // dance of too-tightly-coupled concurrent parts, fixing tht race is
512
+ // fraught with hazards. instead, we'll recognize that it can occur,
513
+ // log the error, and then ignore it.
514
+ if err == errTaskUnknown {
515
+ // log at info level. debug logging in docker is already really
516
+ // verbose, so many people disable it. the race that causes this
517
+ // behavior should be very rare, but if it occurs, we should know
518
+ // about it, because if there is some case where it is _not_ rare,
519
+ // then knowing about it will go a long way toward debugging.
520
+ log .G (ctx ).Info ("attempted to update status for a task that has been removed" )
521
+ return nil
522
+ }
503
523
log .G (ctx ).WithError (err ).Error ("failed writing status to disk" )
504
524
return err
505
525
}
0 commit comments