Skip to content

Commit 4b1b34e

Browse files
xapi: Improve error reporting when pool join fails on TLS
verification When a host joins a pool (pool.join_force), the process has two phases: 1. An unverified TLS connection is used to run pre-join checks and exchange host certificates. The joiner imports the pool bundle. 2. A verified TLS connection (verifyPeer=yes, SNI=pool) is opened using the freshly-generated pool bundle. Previously, any failure at Phase 2 surfaced as: INTERNAL_ERROR(Stunnel.Stunnel_verify_error( This error is opaque and gives no actionable information to the administrator. The idea is to improve error handling in order to obtain something more precise. Signed-off-by: Lucas RAVAGNIER <ravagnierlucas@gmail.com>
1 parent 0db9a87 commit 4b1b34e

3 files changed

Lines changed: 54 additions & 1 deletion

File tree

ocaml/xapi-consts/api_errors.ml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -748,6 +748,12 @@ let pool_joining_host_has_network_sriovs =
748748
let pool_joining_host_tls_verification_mismatch =
749749
add_error "POOL_JOINING_HOST_TLS_VERIFICATION_MISMATCH"
750750

751+
let pool_joining_master_certificate_not_in_pool_bundle =
752+
add_error "POOL_JOINING_MASTER_CERTIFICATE_NOT_IN_POOL_BUNDLE"
753+
754+
let pool_joining_pool_bundle_empty_after_import =
755+
add_error "POOL_JOINING_POOL_BUNDLE_EMPTY_AFTER_IMPORT"
756+
751757
let pool_joining_host_ca_certificates_conflict =
752758
add_error "POOL_JOINING_HOST_CA_CERTIFICATES_CONFLICT"
753759

ocaml/xapi/cert_distrib.ml

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -704,7 +704,29 @@ let exchange_certificates_with_joiner ~__context ~uuid ~certificate =
704704
let import_joining_pool_certs ~__context ~pool_certs =
705705
let pool_certs = List.map WireProtocol.certificate_file_of_pair pool_certs in
706706
Worker.local_write_cert_fs ~__context HostPoolCert Merge pool_certs ;
707-
Worker.local_regen_bundle ~__context
707+
Worker.local_regen_bundle ~__context ;
708+
(* update-ca-bundle.sh can fail silently, leaving an empty bundle that would
709+
cause an opaque Stunnel_verify_error when the verified connection is
710+
opened in Phase 2 of the join. *)
711+
let bundle_path = !Xapi_globs.pool_bundle_path in
712+
let bundle_empty_or_missing =
713+
match Unix.stat bundle_path with
714+
| exception Unix.Unix_error (Unix.ENOENT, _, _) ->
715+
true
716+
| stats ->
717+
stats.Unix.st_size = 0
718+
in
719+
if bundle_empty_or_missing then (
720+
D.error
721+
"import_joining_pool_certs: pool bundle '%s' is empty or missing after \
722+
certificate import. The bundle generation script \
723+
(/opt/xensource/bin/update-ca-bundle.sh) likely failed silently."
724+
bundle_path ;
725+
raise
726+
Api_errors.(
727+
Server_error (pool_joining_pool_bundle_empty_after_import, [bundle_path])
728+
)
729+
)
708730

709731
let collect_ca_certs ~__context ~names =
710732
Worker.local_collect_certs LegacyRootCert ~__context names

ocaml/xapi/xapi_pool.ml

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1871,6 +1871,31 @@ let join_common ~__context ~master_address ~master_username ~master_password
18711871
Client.Pool.exchange_certificates_on_join ~rpc:unverified_rpc
18721872
~session_id ~uuid:my_uuid ~certificate:my_certificate
18731873
in
1874+
(* Verify the master included its own certificate in the pool bundle
1875+
before importing. If it is absent the verified connection in Phase 2
1876+
will fail with an opaque Stunnel_verify_error. The filename convention
1877+
is "<uuid>.pem" (see Cert_distrib.HostPoolProvider). *)
1878+
let master_uuid =
1879+
Client.Host.get_uuid ~rpc:unverified_rpc ~session_id
1880+
~self:(get_master ~rpc:unverified_rpc ~session_id)
1881+
in
1882+
let expected_cert_filename = master_uuid ^ ".pem" in
1883+
if not (List.mem_assoc expected_cert_filename pool_certs) then (
1884+
error
1885+
"join_common: master certificate file '%s' is absent from the \
1886+
pool's certificate store (/etc/stunnel/certs-pool/). The pool \
1887+
bundle sent to the joiner does not contain the master's own \
1888+
certificate. Run 'xe pool-certificate-sync' on the master and \
1889+
retry."
1890+
expected_cert_filename ;
1891+
raise
1892+
Api_errors.(
1893+
Server_error
1894+
( pool_joining_master_certificate_not_in_pool_bundle
1895+
, [master_uuid]
1896+
)
1897+
)
1898+
) ;
18741899
Cert_distrib.import_joining_pool_certs ~__context ~pool_certs
18751900
)
18761901
(fun () -> Client.Session.logout ~rpc:unverified_rpc ~session_id) ;

0 commit comments

Comments
 (0)