diff --git a/docker/docker-compose.provider.yml b/docker/docker-compose.provider.yml index 4c77c98446..c17e0fcfd8 100644 --- a/docker/docker-compose.provider.yml +++ b/docker/docker-compose.provider.yml @@ -132,6 +132,15 @@ services: - ./provider.Caddyfile:/etc/caddy/Caddyfile - caddy_data:/data - caddy_config:/config + # PEM for the load-balanced pronode.prosopo.io hostname. The cert + # getter (pronode4) obtains it via certbot DNS-01 against Bunny, + # then provider.yml's distribute step rsyncs it to every pronode + # under ./certs/pronode-global/. The site block in + # provider.cert-getter.Caddyfile references the path inside the + # container. KEEP THIS MOUNT — removing it breaks TLS for the + # global hostname on the next `up --force-recreate` even though + # an already-running container keeps the old mount alive. + - ./certs/pronode-global:/certs/pronode-global:ro networks: external: internal: diff --git a/docker/provider.Caddyfile b/docker/provider.Caddyfile index 9b59e52b1a..79dda272b2 100644 --- a/docker/provider.Caddyfile +++ b/docker/provider.Caddyfile @@ -90,21 +90,14 @@ http://:9090 { metrics /metrics } -# Main site: bound on :8443. Layer4 (above) proxies non-trap-subzone -# SNI connections from :443 in. ACME issuer is restricted to HTTP-01 -# because TLS-ALPN-01 requires control of :443. The :80 redirect -# server is auto-created by Caddy (auto_https=on); it serves both -# ACME HTTP-01 challenges AND HTTP→HTTPS redirects targeting port -# 443 (i.e. the layer4 listener), which forwards non-matching SNIs -# back to this same :8443 server. -https://{$CADDY_DOMAIN}:8443 { - - tls { - issuer acme { - disable_tlsalpn_challenge - } - } - +# Shared route/handler config. Imported by the per-host site below +# ({$CADDY_DOMAIN} = pronodeN.prosopo.io) and, on the cert getter, by +# the global site for the load-balanced pronode.prosopo.io hostname +# defined in provider.cert-getter.Caddyfile. Only the TLS strategy +# differs between the two: per-host uses ACME HTTP-01 (A record points +# to one IP per pronodeN), global uses a PEM provisioned by certbot on +# the cert getter (see ansible playbooks/providerCertbotProvision.yml). +(provider_site) { handle /robots.txt { # Serve /srv/static/robots.txt. Do NOT strip the leading slash: # file_server prepends `root` to the request path, so `/robots.txt` @@ -294,3 +287,19 @@ https://{$CADDY_DOMAIN}:8443 { format json } } + +# Per-host site: bound on :8443. Layer4 (above) proxies non-trap-subzone +# SNI connections from :443 in. ACME issuer is restricted to HTTP-01 +# because TLS-ALPN-01 requires control of :443. The :80 redirect server +# is auto-created by Caddy (auto_https=on); it serves both ACME HTTP-01 +# challenges AND HTTP→HTTPS redirects targeting port 443 (i.e. the +# layer4 listener), which forwards non-matching SNIs back to this same +# :8443. +https://{$CADDY_DOMAIN}:8443 { + tls { + issuer acme { + disable_tlsalpn_challenge + } + } + import provider_site +} diff --git a/docker/provider.cert-getter.Caddyfile b/docker/provider.cert-getter.Caddyfile new file mode 100644 index 0000000000..271cb75120 --- /dev/null +++ b/docker/provider.cert-getter.Caddyfile @@ -0,0 +1,24 @@ +# Cert-getter addendum to provider.Caddyfile. Concatenated onto the +# base file by ansible (playbooks/provider.yml) when +# `cert_getter: true` is set on the host in hosts.{env}.yml — currently +# pronode4 only. Defines an extra HTTPS site for the load-balanced +# pronode.prosopo.io hostname. +# +# Why a single cert getter: +# pronode.prosopo.io is DNS-round-robined across every pronode, which +# breaks ACME HTTP-01 per-node renewal (the challenge lands on +# whichever node DNS hands out, usually not the one renewing). We +# solve this by having exactly one node (the cert getter) own ACME for +# the hostname, using DNS-01 against the Bunny DNS zone that +# pronode.prosopo.io is delegated to. The PEM is provisioned by +# certbot outside Caddy (see playbooks/providerCertbotProvision.yml) +# and dropped into the bind-mounted /certs/pronode-global/ dir. The +# other pronodes don't carry this block — distribution of the PEM to +# the rest of the fleet is a manual ansible step done out of band. +https://{$CADDY_GLOBAL_DOMAIN}:8443 { + # Static PEM provisioned by certbot's deploy hook. Caddy does not + # poll file-based certs, so the deploy hook must run `caddy reload` + # after copying the new PEM in. + tls /certs/pronode-global/fullchain.pem /certs/pronode-global/privkey.pem + import provider_site +}