Skip to content

Possible race condition after password update #23445

Description

@mridul303

If you are reporting a problem, please make sure the following information are provided:

Expected behavior and actual behavior:
We have a password rotation framework in go which is responsible for rotating password for various services, including harbor's local admin user. The flow of the rotation is:
LOGIN (verification) -> PASSWORD UPDATE -> RE-LOGIN (with new creds) -> POST UPDATE STEPS

When the password is updated for harbor via harbor's API and we get the 200 response, only then do we proceed to the re-login step. On average, there is only a couple of milliseconds (or less) delay between password update and re-login.

Expected behavior: Re-login step is successful
Actual behavior: In rare cases the re-login step returns 401 unauthorized. But adding a delay before re-login fixes this. (The delay is setup dependent as well. In some setups 2s delay works while in slower setups it needs to be upto 10s)

Steps to reproduce the problem:
Wrote a simple go code which will constantly changes the password and logs in before and after the password is updated. It does so for 300 cycles. This is not 100% reproducible but after running the same code in succession, after 5-10 attempts we hit the issue.

package main

import (
      "bytes"
      "context"
      "crypto/tls"
      "encoding/base64"
      "encoding/json"
      "fmt"
      "io"
      "log"
      "net/http"
      "os"
      "time"
)

// ── Configure these ───────────────────────────────────────────────────────────

const (
      harborBaseURL = "https://<harbor-domain>"

      // passwordA must be the CURRENT live password before you run this script.
      // passwordB just needs to satisfy Harbor's complexity rules.
      // The script ping-pongs between them so nothing is permanently changed.
      passwordA = "password1"
      passwordB = "password2"

      adminUser = "admin"

      // maxCycles: how many rotate-and-verify loops to run.
      // The race window is narrow; 200-500 cycles reliably surfaces it.
      maxCycles = 300

      // cacheWarmRounds: plain logins before cycling, to ensure Harbor's Redis
      // cache has a hot entry for the user.  A cold cache = no race to hit.
      cacheWarmRounds = 10
)

// ── Harbor API paths ──────────────────────────────────────────────────────────

const (
      sessionPath = "api/v2.0/users/current"
      usersPath   = "api/v2.0/users"
)

// ── Result types ──────────────────────────────────────────────────────────────

type cycleResult struct {
      n         int
      fromPass  string
      toPass    string
      updateMs  int64
      gapNs     int64 // nanoseconds between Update response and Login request send
      loginMs   int64
      loginCode int
      loginErr  error
}

// ── Entry point ───────────────────────────────────────────────────────────────

func main() {
      log.SetFlags(log.Ltime | log.Lmicroseconds)
      ctx := context.Background()

      client := &http.Client{
            Transport: &http.Transport{
                  TLSClientConfig:     &tls.Config{InsecureSkipVerify: true}, //nolint:gosec
                  MaxIdleConnsPerHost: 10,
                  IdleConnTimeout:     90 * time.Second,
            },
            Timeout: 30 * time.Second,
      }

      log.Println("[BOOT] Verifying initial credentials and fetching userID…")
      userID, err := loginGetUID(ctx, client, adminUser, passwordA)
      if err != nil {
            log.Fatalf("[BOOT] Login failed: %v", err)
      }
      log.Printf("[BOOT] userID=%d — ready\n", userID)

      log.Printf("[WARM] Warming Harbor cache with %d logins…\n", cacheWarmRounds)
      for i := 0; i < cacheWarmRounds; i++ {
            if _, err := loginGetUID(ctx, client, adminUser, passwordA); err != nil {
                  log.Fatalf("[WARM] Warm-up login %d failed: %v", i, err)
            }
      }
      log.Println("[WARM] Done")

      current, next := passwordA, passwordB
      var failures []cycleResult

      for i := 1; i <= maxCycles; i++ {
            r := runCycle(ctx, client, i, userID, adminUser, current, next)

            label := "OK"
            if r.loginErr != nil {
                  label = fmt.Sprintf("FAIL http=%d", r.loginCode)
                  failures = append(failures, r)
            }

            log.Printf("[%04d] %s→%s | update=%4dms  gap=%7dµs  login=%4dms | %s",
                  i,
                  mask(current), mask(next),
                  r.updateMs,
                  r.gapNs/1_000,
                  r.loginMs,
                  label,
            )

            if r.loginErr != nil {
                  log.Printf("[%04d]   └─ %v", i, r.loginErr)

                  log.Printf("[%04d]   └─ recovering state…", i)
                  current, userID, err = recoverState(ctx, client, adminUser, current, next)
                  if err != nil {
                        log.Fatalf("[%04d]   └─ UNRECOVERABLE — manual reset required: %v", i, err)
                  }
                  next = alternate(current)
                  log.Printf("[%04d]   └─ recovered; active password is %s\n", i, mask(current))
                  continue
            }

            current, next = next, current // swap for next iteration
      }

      // ── Summary ───────────────────────────────────────────────────────────────
      fmt.Println()
      fmt.Println("╔════════════════════════════════════════╗")
      fmt.Printf("║  Cycles run  : %-6d                  ║\n", maxCycles)
      fmt.Printf("║  Failures    : %-6d                  ║\n", len(failures))
      fmt.Println("╚════════════════════════════════════════╝")

      if len(failures) > 0 {
            fmt.Println("\nFailure detail  (gap = time between Update response and Login send):")
            for _, f := range failures {
                  fmt.Printf("  cycle=%-4d  gap=%-8dµs  http=%-3d  err=%v\n",
                        f.n, f.gapNs/1_000, f.loginCode, f.loginErr)
            }
            os.Exit(1)
      }
}

// runCycle is one complete rotation:
//
//  1. Pre-update login  – validates current password AND refreshes the Redis
//     cache entry right before the update (maximises cache-hit probability).
//  2. Update            – changes current → next.
//  3. Re-login          – uses next; this is the exact race point from
//     production.  No artificial delay is inserted.
func runCycle(
      ctx context.Context,
      client *http.Client,
      n, userID int,
      username, current, next string,
) cycleResult {
      r := cycleResult{n: n, fromPass: current, toPass: next}

      // Step 1 — pre-update login (cache warm for THIS specific cycle)
      if _, err := loginGetUID(ctx, client, username, current); err != nil {
            r.loginErr = fmt.Errorf("pre-update login: %w", err)
            return r
      }

      // Step 2 — update password
      t0 := time.Now()
      if err := updatePassword(ctx, client, username, current, userID, current, next); err != nil {
            r.loginErr = fmt.Errorf("update: %w", err)
            return r
      }
      t1 := time.Now()
      r.updateMs = t1.Sub(t0).Milliseconds()

      // Step 3 — re-login with zero artificial delay
      // t2 is captured immediately; gapNs is the scheduler overhead only.
      t2 := time.Now()
      r.gapNs = t2.Sub(t1).Nanoseconds()

      code, err := loginGetCode(ctx, client, username, next)
      r.loginMs = time.Since(t2).Milliseconds()
      r.loginCode = code
      r.loginErr = err
      return r
}

// ── Harbor API calls ──────────────────────────────────────────────────────────

// loginGetUID authenticates and returns the user_id from Harbor's
// /api/v2.0/users/current endpoint.
func loginGetUID(ctx context.Context, client *http.Client, username, password string) (int, error) {
      uid, _, err := loginFull(ctx, client, username, password)
      return uid, err
}

// loginGetCode authenticates and returns only the HTTP status code (and error).
// Used for the post-update verification where we want to capture the exact code.
func loginGetCode(ctx context.Context, client *http.Client, username, password string) (int, error) {
      _, code, err := loginFull(ctx, client, username, password)
      return code, err
}

func loginFull(ctx context.Context, client *http.Client, username, password string) (userID, code int, err error) {
      url := fmt.Sprintf("%s/%s", harborBaseURL, sessionPath)
      req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
      if err != nil {
            return 0, 0, err
      }
      setBasicAuth(req, username, password)
      req.Header.Set("Accept", "application/json")

      resp, err := client.Do(req)
      if err != nil {
            return 0, 0, fmt.Errorf("transport: %w", err)
      }
      defer resp.Body.Close()
      body, _ := io.ReadAll(resp.Body)

      var parsed map[string]any
      _ = json.Unmarshal(body, &parsed)

      if resp.StatusCode != http.StatusOK {
            return 0, resp.StatusCode,
                  fmt.Errorf("HTTP %d: %s", resp.StatusCode, harborErrMsg(parsed))
      }

      uid, ok := parsed["user_id"].(float64)
      if !ok {
            return 0, resp.StatusCode, fmt.Errorf("user_id absent in response")
      }
      return int(uid), resp.StatusCode, nil
}

func updatePassword(
      ctx context.Context,
      client *http.Client,
      authUser, authPass string,
      userID int,
      oldPass, newPass string,
) error {
      url := fmt.Sprintf("%s/%s/%d/password", harborBaseURL, usersPath, userID)
      payload, _ := json.Marshal(map[string]string{
            "old_password": oldPass,
            "new_password": newPass,
      })

      req, err := http.NewRequestWithContext(ctx, http.MethodPut, url, bytes.NewReader(payload))
      if err != nil {
            return err
      }
      setBasicAuth(req, authUser, authPass)
      req.Header.Set("Content-Type", "application/json")
      req.Header.Set("Accept", "application/json")

      resp, err := client.Do(req)
      if err != nil {
            return fmt.Errorf("transport: %w", err)
      }
      defer resp.Body.Close()

      if resp.StatusCode != http.StatusOK {
            body, _ := io.ReadAll(resp.Body)
            var parsed map[string]any
            _ = json.Unmarshal(body, &parsed)
            return fmt.Errorf("HTTP %d: %s", resp.StatusCode, harborErrMsg(parsed))
      }
      return nil
}

// ── State recovery ────────────────────────────────────────────────────────────

// recoverState probes Harbor with both passwords to determine which one is
// actually active.  Called after a cycle where the update succeeded (200) but
// the re-login got 401.
//
// Possible states after that failure:
//
//    a) Harbor has 'next' (update committed, cache was stale → 401 was the bug)
//    b) Harbor still has 'current' (update committed but something else failed)
//
// We try 'next' first since the update returned 200.
func recoverState(
      ctx context.Context,
      client *http.Client,
      username, current, next string,
) (activePassword string, userID int, err error) {
      if uid, err := loginGetUID(ctx, client, username, next); err == nil {
            return next, uid, nil
      }
      if uid, err := loginGetUID(ctx, client, username, current); err == nil {
            return current, uid, nil
      }
      return "", 0, fmt.Errorf("neither '%s' nor '%s' authenticates", mask(current), mask(next))
}

// ── Helpers ───────────────────────────────────────────────────────────────────

func setBasicAuth(req *http.Request, username, password string) {
      token := base64.StdEncoding.EncodeToString([]byte(username + ":" + password))
      req.Header.Set("Authorization", "Basic "+token)
}

func harborErrMsg(resp map[string]any) string {
      if resp == nil {
            return "(empty body)"
      }
      errs, _ := resp["errors"].([]any)
      if len(errs) == 0 {
            b, _ := json.Marshal(resp)
            return string(b)
      }
      msg := ""
      for _, e := range errs {
            if em, ok := e.(map[string]any); ok {
                  msg += fmt.Sprintf("[%v] %v  ", em["code"], em["message"])
            }
      }
      return msg
}

// mask shows only the first 2 and last 1 characters to keep logs readable
// without leaking full passwords.
func mask(p string) string {
      if len(p) <= 4 {
            return "****"
      }
      return fmt.Sprintf("%s…%s", p[:2], p[len(p)-1:])
}

func alternate(current string) string {
      if current == passwordA {
            return passwordB
      }
      return passwordA
}

Versions:
Please specify the versions of following systems.

  • harbor version: 2.14.2
  • containerd version: 2.1.5
  • RKE2 version: 1.35.1

Additional context:

  • The setup is running a 3 master and 3 worker bare-metal RKE2 configuration. Harbor is installed via the official helm chart.
  • The setups are airgapped.
  • Harbor is running as a single pod instance.

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Fields

    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions