From 586ac4160c2740511d96cb2b88c53102713aa592 Mon Sep 17 00:00:00 2001 From: Jize Ning Date: Mon, 13 Oct 2025 17:02:48 -0400 Subject: [PATCH] HBASE-29652 Chaos testing in ZK mode does not work on hosts with ZNode persistence issue --- .../apache/hadoop/hbase/chaos/ChaosAgent.java | 39 ++++++++++++++++++- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/hbase-it/src/main/java/org/apache/hadoop/hbase/chaos/ChaosAgent.java b/hbase-it/src/main/java/org/apache/hadoop/hbase/chaos/ChaosAgent.java index 197977d4f3a0..44cdc95d31dc 100644 --- a/hbase-it/src/main/java/org/apache/hadoop/hbase/chaos/ChaosAgent.java +++ b/hbase-it/src/main/java/org/apache/hadoop/hbase/chaos/ChaosAgent.java @@ -126,6 +126,33 @@ public void process(WatchedEvent watchedEvent) { } }; + /** + * Watcher for recreating the ephemeral znode of agent if it is deleted + */ + Watcher ephemeralZnodeWatcher = new Watcher() { + @Override + public void process(WatchedEvent watchedEvent) { + if (watchedEvent.getType() == Event.EventType.NodeDeleted) { + String deletedPath = watchedEvent.getPath(); + LOG.warn("Ephemeral znode deleted: {}, attempting to recreate", deletedPath); + + // Try to recreate the ephemeral znode + try { + createEphemeralZNode(deletedPath, new byte[0]); + } catch (Exception e) { + LOG.warn("Failed to recreate ephemeral znode: {}", deletedPath, e); + } + + // Re-establish the watch + try { + zk.exists(deletedPath, this); + } catch (KeeperException | InterruptedException e) { + LOG.warn("Failed to re-establish watch on ephemeral znode: {}", deletedPath, e); + } + } + } + }; + // CALLBACKS: Below are the Callbacks used by Chaos Agent /** @@ -384,8 +411,16 @@ private void register() { createIfZNodeNotExists(ChaosConstants.CHAOS_AGENT_STATUS_PERSISTENT_ZNODE + ChaosConstants.ZNODE_PATH_SEPARATOR + agentName); - createEphemeralZNode(ChaosConstants.CHAOS_AGENT_REGISTRATION_EPIMERAL_ZNODE - + ChaosConstants.ZNODE_PATH_SEPARATOR + agentName, new byte[0]); + String agentEphemeralPath = ChaosConstants.CHAOS_AGENT_REGISTRATION_EPIMERAL_ZNODE + + ChaosConstants.ZNODE_PATH_SEPARATOR + agentName; + + createEphemeralZNode(agentEphemeralPath, new byte[0]); + + try { + zk.exists(agentEphemeralPath, ephemeralZnodeWatcher); + } catch (KeeperException | InterruptedException e) { + LOG.error("Failed to establish watch on ephemeral znode: {}", agentEphemeralPath, e); + } } /***