Skip to content

Commit 19d07b6

Browse files
authored
Improve Azure deployment stability (#26)
* Handle eventual consistency errors during Datahub list cluster definitions in CDP * Handle eventual consistency errors during Azure cross-account credential creation in CDP Signed-off-by: Daniel Chaffelson <[email protected]>
1 parent 2d08a02 commit 19d07b6

File tree

3 files changed

+40
-6
lines changed

3 files changed

+40
-6
lines changed

src/cdpy/common.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ def _warning_format(message, category, filename, lineno, line=None):
198198
'DELETE_FAILED',
199199
'Error', # DW
200200
'installation:failed', # ML
201+
'provision:failed', # ML
201202
'deprovision:failed', # ML
202203
'BAD_HEALTH' # DF
203204
]

src/cdpy/datahub.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# -*- coding: utf-8 -*-
22

3-
from cdpy.common import CdpSdkBase, Squelch
3+
from cdpy.common import CdpSdkBase, Squelch, CdpError, CdpWarning
44

55

66
class CdpyDatahub(CdpSdkBase):
@@ -28,8 +28,25 @@ def describe_all_clusters(self, environment_name=None):
2828
return [self.describe_cluster(cluster['clusterName']) for cluster in clusters_listing]
2929
return clusters_listing
3030

31-
def list_cluster_templates(self):
32-
return self.sdk.call(svc='datahub', func='list_cluster_templates', ret_field='clusterTemplates')
31+
def list_cluster_templates(self, retries=3, delay=5):
32+
# Intermittent timeout issue in CDP 7.2.10, should be reverted to bare listing in 7.2.12
33+
resp = self.sdk.call(
34+
svc='datahub', func='list_cluster_templates', ret_field='clusterTemplates',
35+
ret_error=True
36+
)
37+
if isinstance(resp, CdpError):
38+
if retries > 0:
39+
if str(resp.status_code) == '500' and resp.error_code == 'UNKNOWN':
40+
retries = retries - 1
41+
self.sdk.throw_warning(
42+
CdpWarning('Got likely CDP Control Plane eventual consistency error, %d retries left...'
43+
% (retries))
44+
)
45+
self.sdk.sleep(delay)
46+
return self.list_cluster_templates(retries, delay)
47+
else:
48+
self.sdk.throw_error(resp)
49+
return resp
3350

3451
def describe_cluster_template(self, name):
3552
return self.sdk.call(svc='datahub', func='describe_cluster_template', ret_field='clusterTemplate', squelch=[

src/cdpy/environments.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -276,16 +276,32 @@ def create_aws_credential(self, name, role, description, retries=3, delay=2):
276276
self.sdk.throw_error(resp)
277277
return resp
278278

279-
def create_azure_credential(self, name, subscription, tenant, application, secret):
280-
return self.sdk.call(
281-
svc='environments', func='create_azure_credential', squelch=[
279+
def create_azure_credential(self, name, subscription, tenant, application, secret, retries=3, delay=5):
280+
resp = self.sdk.call(
281+
svc='environments', func='create_azure_credential', ret_error=True, squelch=[
282282
Squelch(field='violations', value='Credential already exists with name',
283283
warning='Credential with this name already exists', default=None)],
284284
credentialName=name,
285285
subscriptionId=subscription,
286286
tenantId=tenant,
287287
appBased={'applicationId': application, 'secretKey': secret}
288288
)
289+
if isinstance(resp, CdpError):
290+
if retries > 0:
291+
consistency_violations = [
292+
'You may have sent your authentication request to the wrong tenant'
293+
]
294+
if any(x in str(resp.violations) for x in consistency_violations):
295+
retries = retries - 1
296+
self.sdk.throw_warning(
297+
CdpWarning('Got likely Azure eventual consistency error [%s], %d retries left...'
298+
% (str(resp.violations), retries))
299+
)
300+
self.sdk.sleep(delay)
301+
return self.create_azure_credential(name, subscription, tenant, application, secret, retries, delay)
302+
else:
303+
self.sdk.throw_error(resp)
304+
return resp
289305

290306
def create_gcp_credential(self, name, key_file):
291307
return self.sdk.call(

0 commit comments

Comments
 (0)