Skip to content

Commit 26035d4

Browse files
Changes to consider Recovered/Available Host HA state along with the agent connection status to determine the Host HA inspection in progress or not, and some code improvements
1 parent dc17cb7 commit 26035d4

3 files changed

Lines changed: 27 additions & 12 deletions

File tree

plugins/hypervisors/kvm/src/main/java/org/apache/cloudstack/kvm/ha/KVMHAProvider.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,17 +68,18 @@ public boolean hasActivity(final Host r, final DateTime suspectTime) throws HACh
6868

6969
@Override
7070
public boolean recover(Host r) throws HARecoveryException {
71+
logger.debug("Recover the host {}", r);
7172
try {
72-
if (outOfBandManagementService.isOutOfBandManagementEnabled(r)){
73+
if (outOfBandManagementService.isOutOfBandManagementEnabled(r)) {
7374
final OutOfBandManagementResponse resp = outOfBandManagementService.executePowerOperation(r, PowerOperation.RESET, null);
7475
return resp.getSuccess();
7576
} else {
7677
logger.warn("OOBM recover operation failed for the host {}", r);
7778
return false;
7879
}
79-
} catch (Exception e){
80+
} catch (Exception e) {
8081
logger.warn("OOBM service is not configured or enabled for this host {} error is {}", r, e.getMessage());
81-
throw new HARecoveryException(String.format(" OOBM service is not configured or enabled for this host %s", r), e);
82+
throw new HARecoveryException(String.format("OOBM service is not configured or enabled for this host %s", r), e);
8283
}
8384
}
8485

server/src/main/java/com/cloud/ha/HighAvailabilityManagerImpl.java

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,23 @@ private boolean isHostHAInspectionInProgress(long hostId) {
265265

266266
HAConfig.HAState state = haConfig.getState();
267267
logger.debug("Checking Host HA inspection is in progress or not for the host {} from HAConfig, HA state is {}", hostId, state);
268-
return state == HAConfig.HAState.Suspect || state == HAConfig.HAState.Checking;
268+
if (state == HAConfig.HAState.Suspect || state == HAConfig.HAState.Checking) {
269+
return true;
270+
}
271+
272+
if (state == HAConfig.HAState.Recovered || state == HAConfig.HAState.Available) {
273+
// If the host HA state is Recovered, it indicates that the host has restarted successfully.
274+
// If the host HA state is Available, it means the host has restarted successfully and the recovery waiting period has completed.
275+
// In both states, the agent can connect as soon as the host is ready (and can move to Suspect -> Checking HA state if the agent connection fails again before Fencing).
276+
final HostVO host = _hostDao.findById(hostId);
277+
if (host != null && host.getStatus() != Status.Up) {
278+
logger.debug("{} is in {} status and HA state is {}, considering Host HA inspection is still in progress" +
279+
" until we are sure the host is ready after a recovery wait period and agent is connected/Up", host, host.getStatus(), state);
280+
return true;
281+
}
282+
}
283+
284+
return false;
269285
}
270286

271287
@Override

utils/src/main/java/org/apache/cloudstack/utils/redfish/RedfishClient.java

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -231,21 +231,19 @@ private HttpResponse executeHttpRequest(String url, HttpRequestBase httpReq) {
231231
}
232232

233233
protected HttpResponse retryHttpRequest(String url, HttpRequestBase httpReq, HttpClient client) {
234-
logger.warn(String.format("Failed to execute HTTP %s request [URL: %s]. Executing the request again.", httpReq.getMethod(), url));
234+
logger.warn("Failed to execute HTTP {} request [URL: {}]. Executing the request again.", httpReq.getMethod(), url);
235235
HttpResponse response = null;
236236
for (int attempt = 1; attempt < redfishRequestMaxRetries + 1; attempt++) {
237237
try {
238238
TimeUnit.SECONDS.sleep(WAIT_FOR_REQUEST_RETRY);
239-
logger.debug(String.format("HTTP %s request retry attempt %d/%d [URL: %s].", httpReq.getMethod(), attempt, redfishRequestMaxRetries, url));
239+
logger.debug("HTTP {} request retry attempt {}/{} [URL: {}].", httpReq.getMethod(), attempt, redfishRequestMaxRetries, url);
240240
response = client.execute(httpReq);
241241
break;
242242
} catch (IOException | InterruptedException e) {
243243
if (attempt == redfishRequestMaxRetries) {
244244
throw new RedfishException(String.format("Failed to execute HTTP %s request retry attempt %d/%d [URL: %s] due to exception %s", httpReq.getMethod(), attempt, redfishRequestMaxRetries,url, e));
245245
} else {
246-
logger.warn(
247-
String.format("Failed to execute HTTP %s request retry attempt %d/%d [URL: %s] due to exception %s", httpReq.getMethod(), attempt, redfishRequestMaxRetries,
248-
url, e));
246+
logger.warn("Failed to execute HTTP {} request retry attempt {}/{} [URL: {}] due to exception {}", httpReq.getMethod(), attempt, redfishRequestMaxRetries, url, e);
249247
}
250248
}
251249
}
@@ -312,7 +310,7 @@ public void executeComputerSystemReset(String hostAddress, RedfishResetCmd reset
312310
throw new RedfishException(String.format("Failed to execute System power command for host by performing '%s' request on URL '%s' and host address '%s'. The expected HTTP status code is '%s' but it got '%s'.",
313311
HttpPost.METHOD_NAME, url, hostAddress, EXPECTED_HTTP_STATUS, statusCode));
314312
}
315-
logger.debug(String.format("Sending ComputerSystem.Reset Command '%s' to host '%s' with request '%s %s'", resetCommand, hostAddress, HttpPost.METHOD_NAME, url));
313+
logger.debug("Sending ComputerSystem.Reset Command '{}' to host '{}' with request '{} {}'", resetCommand, hostAddress, HttpPost.METHOD_NAME, url);
316314
}
317315

318316
/**
@@ -330,7 +328,7 @@ public String getSystemId(String hostAddress) {
330328

331329
String systemId = processGetSystemIdResponse(response);
332330

333-
logger.debug(String.format("Retrieved System ID '%s' with request '%s: %s'", systemId, HttpGet.METHOD_NAME, url));
331+
logger.debug("Retrieved System ID '{}' with request '{}: {}'", systemId, HttpGet.METHOD_NAME, url);
334332

335333
return systemId;
336334
}
@@ -384,7 +382,7 @@ public RedfishPowerState getSystemPowerState(String hostAddress) {
384382
}
385383

386384
RedfishPowerState powerState = processGetSystemRequestResponse(response);
387-
logger.debug(String.format("Retrieved System power state '%s' with request '%s: %s'", powerState, HttpGet.METHOD_NAME, url));
385+
logger.debug("Retrieved System power state '{}' with request '{}: {}'", powerState, HttpGet.METHOD_NAME, url);
388386
return powerState;
389387
}
390388

0 commit comments

Comments
 (0)