diff --git a/data/postgres/base/backup-jobs/backup-cronjobs-minio.yaml b/data/postgres/base/backup-jobs/backup-cronjobs-minio.yaml index bcec19f4..78332e15 100644 --- a/data/postgres/base/backup-jobs/backup-cronjobs-minio.yaml +++ b/data/postgres/base/backup-jobs/backup-cronjobs-minio.yaml @@ -17,7 +17,7 @@ spec: jobTemplate: spec: backoffLimit: 2 - activeDeadlineSeconds: 3600 # 1 hour timeout + activeDeadlineSeconds: 7200 # 2 hour timeout (full ~45min observed) template: metadata: labels: @@ -76,7 +76,7 @@ spec: jobTemplate: spec: backoffLimit: 2 - activeDeadlineSeconds: 1800 # 30 minutes timeout + activeDeadlineSeconds: 5400 # 90 min timeout (diff ~30min observed) template: metadata: labels: diff --git a/data/postgres/base/backup-jobs/backup-cronjobs-repo1.yaml b/data/postgres/base/backup-jobs/backup-cronjobs-repo1.yaml index b89c5dca..ee1936ea 100644 --- a/data/postgres/base/backup-jobs/backup-cronjobs-repo1.yaml +++ b/data/postgres/base/backup-jobs/backup-cronjobs-repo1.yaml @@ -17,7 +17,7 @@ spec: jobTemplate: spec: backoffLimit: 2 - activeDeadlineSeconds: 3600 # 1 hour timeout + activeDeadlineSeconds: 7200 # 2 hour timeout (full ~45min observed) template: metadata: labels: @@ -76,7 +76,7 @@ spec: jobTemplate: spec: backoffLimit: 2 - activeDeadlineSeconds: 1800 # 30 minutes timeout + activeDeadlineSeconds: 5400 # 90 min timeout (diff ~30min observed) template: metadata: labels: diff --git a/data/postgres/overlays/production/postgres-minio-backup.yaml b/data/postgres/overlays/production/postgres-minio-backup.yaml index 473b49ed..36963a48 100644 --- a/data/postgres/overlays/production/postgres-minio-backup.yaml +++ b/data/postgres/overlays/production/postgres-minio-backup.yaml @@ -10,10 +10,10 @@ spec: name: postgres-minio-backup-credentials global: # Configuration for "repo2" (MinIO S3) - repo2-retention-full: "8" # Keep more backups offsite + repo2-retention-full: "4" # 4 weekly fulls (Apr 21: reduced from 8 after MinIO-full incident) repo2-retention-full-type: count - repo2-retention-archive: "4" - repo2-retention-diff: "14" + repo2-retention-archive: "2" + repo2-retention-diff: "7" repo2-s3-uri-style: path # Required for MinIO repo2-storage-verify-tls: "n" # Use HTTP instead of HTTPS repos: @@ -27,7 +27,7 @@ spec: - ReadWriteOnce resources: requests: - storage: 300Gi + storage: 1000Gi # Apr 21: bumped from 300Gi (live was 500Gi via manual expand) to absorb WAL growth - name: repo2 schedules: full: "0 2 * * 6" # Saturday 02:00 (offset from repo1)