From f5a0b95c37ad37b0e40c36c5c4034610a663df6d Mon Sep 17 00:00:00 2001 From: "Shekhar Sorot ( MSFT )" Date: Sat, 13 Dec 2025 19:59:27 +0530 Subject: [PATCH 1/4] fix(azure): Enable storage account and private endpoint reuse for AzureFileShare on re-runs fix(azure): Enable storage account and private endpoint reuse for AzureFileShare on re-runs Problem: When LISA was re-run with preserved resource groups (using deploy:false), the AzureFileShare feature would create new storage accounts and file shares instead of reusing existing ones, causing resource tracking and cleanup issues. Additionally, re-runs would fail with IndexError when private endpoints already existed. Changes: 1. features.py - _initialize_fileshare_information(): - Added logic to scan for existing 'lisasc*' storage accounts in the resource group and reuse them instead of generating new random names - Added import for get_storage_client 2. common.py - create_update_private_endpoints(): - Added check for existing private endpoints before attempting creation - Added new helper function _get_private_endpoint_ip() that extracts IP address using two methods: a) From custom_dns_configs (original approach) b) From network interface (fallback for existing endpoints) - Fixes IndexError when private endpoint already exists but custom_dns_configs is empty 3. storage.py - verify_cifs_basic(): - Fixed test to pass allow_shared_key_access=True since the test uses key-based authentication (credentials stored in /etc/smbcredentials/) - Added TODO comment to flag test for future validation with these changes Tested: - verify_cifs_basic: PASSED (fresh deployment) - verify_azure_file_share: Requires validation with re-run scenario Impact: - Enables proper resource reuse when re-running LISA with preserved environments - Prevents orphaned storage accounts on failed/retried test runs - Fixes private endpoint IP retrieval for existing endpoints --- lisa/microsoft/testsuites/core/storage.py | 8 ++- lisa/sut_orchestrator/azure/common.py | 71 ++++++++++++++++++++++- lisa/sut_orchestrator/azure/features.py | 21 ++++++- 3 files changed, 96 insertions(+), 4 deletions(-) diff --git a/lisa/microsoft/testsuites/core/storage.py b/lisa/microsoft/testsuites/core/storage.py index bf5126059a..60cc6d1115 100644 --- a/lisa/microsoft/testsuites/core/storage.py +++ b/lisa/microsoft/testsuites/core/storage.py @@ -600,6 +600,10 @@ def after_case(self, log: Logger, **kwargs: Any) -> None: Downgrading priority from 1 to 5. The file share relies on the storage account key, which we cannot use currently. Will change it back once file share works with MSI. + + TODO: Test this case with storage account reuse changes in + AzureFileShare class (features.py). This test does NOT use + private endpoints (enable_private_endpoint=False by default). """, timeout=TIME_OUT, requirement=simple_requirement( @@ -619,7 +623,9 @@ def verify_cifs_basic(self, node: Node, environment: Environment) -> None: try: fs_url_dict = azure_file_share.create_file_share( - file_share_names=[fileshare_name], environment=environment + file_share_names=[fileshare_name], + environment=environment, + allow_shared_key_access=True, ) test_folders_share_dict = { test_folder: fs_url_dict[fileshare_name], diff --git a/lisa/sut_orchestrator/azure/common.py b/lisa/sut_orchestrator/azure/common.py index 5775aa58b2..dad4ca505d 100644 --- a/lisa/sut_orchestrator/azure/common.py +++ b/lisa/sut_orchestrator/azure/common.py @@ -1240,6 +1240,38 @@ def get_compute_client( ) +def _get_private_endpoint_ip( + network: Any, + resource_group_name: str, + private_endpoint: Any, + log: Logger, +) -> Optional[str]: + """ + Helper function to extract IP address from a private endpoint. + Tries multiple methods: custom_dns_configs, then network interface. + """ + # Method 1: Try custom_dns_configs + if ( + private_endpoint.custom_dns_configs + and private_endpoint.custom_dns_configs[0].ip_addresses + ): + return str(private_endpoint.custom_dns_configs[0].ip_addresses[0]) + + # Method 2: Get IP from network interface + if private_endpoint.network_interfaces: + try: + nic_id = private_endpoint.network_interfaces[0].id + # Extract NIC name from resource ID + nic_name = nic_id.split("/")[-1] + nic = network.network_interfaces.get(resource_group_name, nic_name) + if nic.ip_configurations and nic.ip_configurations[0].private_ip_address: + return str(nic.ip_configurations[0].private_ip_address) + except Exception as e: + log.debug(f"Failed to get IP from network interface: {e}") + + return None + + def create_update_private_endpoints( platform: "AzurePlatform", resource_group_name: str, @@ -1253,6 +1285,24 @@ def create_update_private_endpoints( private_endpoint_name = "pe_test" status = "Approved" description = "Auto-Approved" + + # Check if private endpoint already exists + try: + existing_pe = network.private_endpoints.get( + resource_group_name=resource_group_name, + private_endpoint_name=private_endpoint_name, + ) + log.debug(f"found existing private endpoint: {private_endpoint_name}") + # Return IP from existing endpoint if available + ip_address = _get_private_endpoint_ip( + network, resource_group_name, existing_pe, log + ) + if ip_address: + return ip_address + log.debug("Could not get IP from existing endpoint, will recreate...") + except Exception: + log.debug(f"private endpoint {private_endpoint_name} not found, creating...") + private_endpoint = network.private_endpoints.begin_create_or_update( resource_group_name=resource_group_name, private_endpoint_name=private_endpoint_name, @@ -1275,7 +1325,26 @@ def create_update_private_endpoints( ) log.debug(f"create private endpoints: {private_endpoint_name}") result = private_endpoint.result() - return result.custom_dns_configs[0].ip_addresses[0] + + # Try to get IP from the creation result + ip_address = _get_private_endpoint_ip(network, resource_group_name, result, log) + if ip_address: + return ip_address + + # DNS configs may not be immediately available, fetch endpoint again + log.debug("IP not in result, fetching endpoint again...") + result = network.private_endpoints.get( + resource_group_name=resource_group_name, + private_endpoint_name=private_endpoint_name, + ) + + ip_address = _get_private_endpoint_ip(network, resource_group_name, result, log) + if ip_address: + return ip_address + + raise LisaException( + f"Failed to get IP address from private endpoint {private_endpoint_name}" + ) def delete_private_endpoints( diff --git a/lisa/sut_orchestrator/azure/features.py b/lisa/sut_orchestrator/azure/features.py index 439479a952..55845b3c2c 100644 --- a/lisa/sut_orchestrator/azure/features.py +++ b/lisa/sut_orchestrator/azure/features.py @@ -129,6 +129,7 @@ get_node_context, get_or_create_file_share, get_primary_ip_addresses, + get_storage_client, get_storage_credential, get_virtual_networks, get_vm, @@ -3698,8 +3699,23 @@ def _initialize(self, *args: Any, **kwargs: Any) -> None: self._initialize_fileshare_information() def _initialize_fileshare_information(self) -> None: - random_str = generate_random_chars(string.ascii_lowercase + string.digits, 10) - self._storage_account_name = f"lisasc{random_str}" + platform: AzurePlatform = self._platform # type: ignore + storage_client = get_storage_client( + platform.credential, platform.subscription_id, platform.cloud + ) + storage_accounts = storage_client.storage_accounts.list_by_resource_group( + self._resource_group_name + ) + for account in storage_accounts: + if account.name.startswith("lisasc"): + self._storage_account_name = account.name + break + else: + random_str = generate_random_chars( + string.ascii_lowercase + string.digits, 10 + ) + self._storage_account_name = f"lisasc{random_str}" + self._fstab_info = ( f"nofail,vers={self.get_smb_version()}," "credentials=/etc/smbcredentials/lisa.cred" @@ -3743,6 +3759,7 @@ def create_file_share( # No changes need to be done in code calling function # For Quota, currently all volumes will share same quota number. # Ensure that you use the highest value applicable for your shares + # Reuse existing file shares if they exist, otherwise create new ones for share_name in file_share_names: fs_url_dict[share_name] = get_or_create_file_share( credential=platform.credential, From 2645700f6fd9035a9bd44f910dee68d190267a10 Mon Sep 17 00:00:00 2001 From: "Shekhar Sorot ( MSFT )" Date: Wed, 17 Dec 2025 15:06:27 +0530 Subject: [PATCH 2/4] updates to code to allow for selective conditional deletion when deploy:false is set. --- lisa/microsoft/testsuites/core/storage.py | 40 +++++++- lisa/sut_orchestrator/azure/common.py | 40 +++++++- lisa/sut_orchestrator/azure/features.py | 114 +++++++++++++++++----- 3 files changed, 163 insertions(+), 31 deletions(-) diff --git a/lisa/microsoft/testsuites/core/storage.py b/lisa/microsoft/testsuites/core/storage.py index 60cc6d1115..bcd4fba440 100644 --- a/lisa/microsoft/testsuites/core/storage.py +++ b/lisa/microsoft/testsuites/core/storage.py @@ -45,6 +45,7 @@ BadEnvironmentStateException, LisaException, SkippedException, + constants, generate_random_chars, get_matched_str, ) @@ -621,6 +622,25 @@ def verify_cifs_basic(self, node: Node, environment: Environment) -> None: azure_file_share = node.features[AzureFileShare] self._install_cifs_dependencies(node) + # Clean up any leftover state from previous runs (important for deploy:false) + # This handles cases where the VM is reused but has stale mounts/folders + mount = node.tools[Mount] + # Try to unmount if already mounted + try: + mount.umount(point=test_folder, disk_name="", erase=False) + except Exception: + pass # Ignore if not mounted + # Remove test folder if it exists + node.execute(f"rm -rf {test_folder}", sudo=True) + # Restore original fstab if backup exists (from previous failed run) + node.execute( + "test -f /etc/fstab_cifs && cp -f /etc/fstab_cifs /etc/fstab || true", + sudo=True, + shell=True, + ) + + # Track test failure for keep_environment handling + test_failed = False try: fs_url_dict = azure_file_share.create_file_share( file_share_names=[fileshare_name], @@ -636,7 +656,6 @@ def verify_cifs_basic(self, node: Node, environment: Environment) -> None: self._reload_fstab_config(node) # Verify that the file share is mounted - mount = node.tools[Mount] mount_point_exists = mount.check_mount_point_exist(test_folder) if not mount_point_exists: raise LisaException(f"Mount point {test_folder} does not exist.") @@ -661,8 +680,25 @@ def verify_cifs_basic(self, node: Node, environment: Environment) -> None: ) assert file_content_after_mount == initial_file_content + except Exception: + test_failed = True + raise finally: - azure_file_share.delete_azure_fileshare([fileshare_name]) + # Respect keep_environment setting for cleanup + # - "always": Never cleanup (user wants to inspect) + # - "failed": Cleanup only if test passed + # - "no" (default): Always cleanup + keep_environment = environment.platform.runbook.keep_environment + should_cleanup = True + + if keep_environment == constants.ENVIRONMENT_KEEP_ALWAYS: + should_cleanup = False + elif keep_environment == constants.ENVIRONMENT_KEEP_FAILED: + # Only cleanup if test passed (not failed) + should_cleanup = not test_failed + + if should_cleanup: + azure_file_share.delete_azure_fileshare([fileshare_name]) def _install_cifs_dependencies(self, node: Node) -> None: posix_os: Posix = cast(Posix, node.os) diff --git a/lisa/sut_orchestrator/azure/common.py b/lisa/sut_orchestrator/azure/common.py index dad4ca505d..2695fa13b1 100644 --- a/lisa/sut_orchestrator/azure/common.py +++ b/lisa/sut_orchestrator/azure/common.py @@ -1280,7 +1280,12 @@ def create_update_private_endpoints( private_link_service_id: str, group_ids: List[str], log: Logger, -) -> Any: +) -> Tuple[Any, bool]: + """ + Create or update private endpoints. + Returns a tuple of (ip_address, was_created) where was_created is True if + the endpoint was created by this call, False if it already existed. + """ network = get_network_client(platform) private_endpoint_name = "pe_test" status = "Approved" @@ -1298,7 +1303,7 @@ def create_update_private_endpoints( network, resource_group_name, existing_pe, log ) if ip_address: - return ip_address + return (ip_address, False) # Endpoint existed, not created by us log.debug("Could not get IP from existing endpoint, will recreate...") except Exception: log.debug(f"private endpoint {private_endpoint_name} not found, creating...") @@ -1329,7 +1334,7 @@ def create_update_private_endpoints( # Try to get IP from the creation result ip_address = _get_private_endpoint_ip(network, resource_group_name, result, log) if ip_address: - return ip_address + return (ip_address, True) # Endpoint was created by us # DNS configs may not be immediately available, fetch endpoint again log.debug("IP not in result, fetching endpoint again...") @@ -1340,7 +1345,7 @@ def create_update_private_endpoints( ip_address = _get_private_endpoint_ip(network, resource_group_name, result, log) if ip_address: - return ip_address + return (ip_address, True) # Endpoint was created by us raise LisaException( f"Failed to get IP address from private endpoint {private_endpoint_name}" @@ -2122,6 +2127,33 @@ def get_share_service_client( # Update: Added quota to allow creation of variable sized file share volumes. # Default is 100 GiB. All units are in GiB. +def list_file_shares( + credential: Any, + subscription_id: str, + cloud: Cloud, + account_name: str, + resource_group_name: str, + log: Logger, +) -> List[str]: + """ + List all file shares in an Azure Storage account. + Returns a list of file share names. + """ + try: + share_service_client = get_share_service_client( + credential, + subscription_id, + cloud, + account_name, + resource_group_name, + ) + all_shares = list(share_service_client.list_shares()) + return [share.name for share in all_shares] + except Exception as e: + log.debug(f"Failed to list file shares in {account_name}: {e}") + return [] + + def get_or_create_file_share( credential: Any, subscription_id: str, diff --git a/lisa/sut_orchestrator/azure/features.py b/lisa/sut_orchestrator/azure/features.py index 55845b3c2c..58b9bc1d32 100644 --- a/lisa/sut_orchestrator/azure/features.py +++ b/lisa/sut_orchestrator/azure/features.py @@ -187,9 +187,9 @@ def _start(self, wait: bool = True) -> None: node_info = self._node.connection_info node_info[constants.ENVIRONMENTS_NODES_REMOTE_PUBLIC_ADDRESS] = public_ip node_info[constants.ENVIRONMENTS_NODES_REMOTE_ADDRESS] = private_ip - node_info[ - constants.ENVIRONMENTS_NODES_REMOTE_USE_PUBLIC_ADDRESS - ] = platform._azure_runbook.use_public_address + node_info[constants.ENVIRONMENTS_NODES_REMOTE_USE_PUBLIC_ADDRESS] = ( + platform._azure_runbook.use_public_address + ) self._node.set_connection_info(**node_info) self._node._is_initialized = False self._node.initialize() @@ -2765,9 +2765,9 @@ def on_before_deployment(cls, *args: Any, **kwargs: Any) -> None: ) # Disk Encryption Set ID - node_parameters.security_profile[ - "disk_encryption_set_id" - ] = settings.disk_encryption_set_id + node_parameters.security_profile["disk_encryption_set_id"] = ( + settings.disk_encryption_set_id + ) # Return Skipped Exception if security profile is set on Gen 1 VM if node_parameters.security_profile["security_type"] == "": @@ -3199,8 +3199,8 @@ def create_share( subnet_id = subnet_ids[0] break - # create private endpoints - ipv4_address = create_update_private_endpoints( + # create private endpoints - returns (ip, was_created) + ipv4_address, _ = create_update_private_endpoints( platform, resource_group_name, location, @@ -3706,15 +3706,27 @@ def _initialize_fileshare_information(self) -> None: storage_accounts = storage_client.storage_accounts.list_by_resource_group( self._resource_group_name ) + # Track whether we're reusing an existing storage account + self._storage_account_reused = False for account in storage_accounts: if account.name.startswith("lisasc"): self._storage_account_name = account.name + self._storage_account_reused = True + self._log.debug( + f"Reusing existing storage account: {self._storage_account_name}" + ) break else: random_str = generate_random_chars( string.ascii_lowercase + string.digits, 10 ) self._storage_account_name = f"lisasc{random_str}" + self._log.debug( + f"Will create new storage account: {self._storage_account_name}" + ) + + # Track private endpoint creation state + self._private_endpoint_created_by_lisa = False self._fstab_info = ( f"nofail,vers={self.get_smb_version()}," @@ -3791,15 +3803,17 @@ def create_file_share( subnet_id = subnet_ids[0] break - # Create Private endpoint - ipv4_address = create_update_private_endpoints( - platform, - resource_group_name, - location, - subnet_id, - storage_account_resource_id, - ["file"], - self._log, + # Create Private endpoint - returns (ip, was_created) + ipv4_address, self._private_endpoint_created_by_lisa = ( + create_update_private_endpoints( + platform, + resource_group_name, + location, + subnet_id, + storage_account_resource_id, + ["file"], + self._log, + ) ) # Create private zone private_dns_zone_id = create_update_private_zones( @@ -3847,9 +3861,24 @@ def create_fileshare_folders(self, test_folders_share_dict: Dict[str, str]) -> N ) def delete_azure_fileshare(self, file_share_names: List[str]) -> None: + """ + Delete Azure file shares and optionally the storage account. + + Cleanup behavior: + - Always deletes the specified file shares (created by LISA for this test) + - Only deletes private endpoint resources if LISA created them + - Storage account deletion rules: + a) If LISA created it: Always delete (lifecycle tied to test case) + b) If reused/pre-existing: Never delete (user-managed resource) + + Note: Pre-existing storage accounts are never deleted because they could + be set up by end users for monitoring, audit, or debugging purposes. + """ resource_group_name = self._resource_group_name storage_account_name = self._storage_account_name platform: AzurePlatform = self._platform # type: ignore + + # Delete the specified file shares for share_name in file_share_names: delete_file_share( credential=platform.credential, @@ -3860,14 +3889,49 @@ def delete_azure_fileshare(self, file_share_names: List[str]) -> None: resource_group_name=resource_group_name, log=self._log, ) - delete_storage_account( - credential=platform.credential, - subscription_id=platform.subscription_id, - cloud=platform.cloud, - account_name=storage_account_name, - resource_group_name=resource_group_name, - log=self._log, - ) + + # Determine if we should delete the storage account + # Only delete if LISA created it - pre-existing accounts are user-managed + should_delete_storage_account = False + if not self._storage_account_reused: + # LISA created this storage account, delete it (lifecycle tied to test) + should_delete_storage_account = True + self._log.debug( + f"Storage account {storage_account_name} was created by LISA, " + "will delete." + ) + else: + # Storage account was reused/pre-existing - never delete + # These could be user-managed for monitoring, audit, or debugging + self._log.debug( + f"Storage account {storage_account_name} was pre-existing/reused. " + "Skipping deletion (user-managed resource)." + ) + + # Clean up private endpoint resources only if LISA created them + if self._private_endpoint_created_by_lisa: + self._log.debug("Cleaning up private endpoint resources created by LISA") + delete_private_dns_zone_groups(platform, resource_group_name, self._log) + delete_virtual_network_links(platform, resource_group_name, self._log) + delete_record_sets(platform, resource_group_name, self._log) + delete_private_zones(platform, resource_group_name, self._log) + delete_private_endpoints(platform, resource_group_name, self._log) + else: + self._log.debug( + "Private endpoint was not created by LISA, skipping cleanup" + ) + + # Delete storage account if appropriate + if should_delete_storage_account: + delete_storage_account( + credential=platform.credential, + subscription_id=platform.subscription_id, + cloud=platform.cloud, + account_name=storage_account_name, + resource_group_name=resource_group_name, + log=self._log, + ) + # revert file into original status after testing. self._node.execute("cp -f /etc/fstab_cifs /etc/fstab", sudo=True) From c7658640f7eda38c4299b9756015834490b2ab07 Mon Sep 17 00:00:00 2001 From: "Shekhar Sorot ( MSFT )" Date: Wed, 17 Dec 2025 23:22:51 +0530 Subject: [PATCH 3/4] Update features.py --- lisa/sut_orchestrator/azure/features.py | 33 +++++++++++++------------ 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/lisa/sut_orchestrator/azure/features.py b/lisa/sut_orchestrator/azure/features.py index 58b9bc1d32..7b90e9081a 100644 --- a/lisa/sut_orchestrator/azure/features.py +++ b/lisa/sut_orchestrator/azure/features.py @@ -187,9 +187,9 @@ def _start(self, wait: bool = True) -> None: node_info = self._node.connection_info node_info[constants.ENVIRONMENTS_NODES_REMOTE_PUBLIC_ADDRESS] = public_ip node_info[constants.ENVIRONMENTS_NODES_REMOTE_ADDRESS] = private_ip - node_info[constants.ENVIRONMENTS_NODES_REMOTE_USE_PUBLIC_ADDRESS] = ( - platform._azure_runbook.use_public_address - ) + node_info[ + constants.ENVIRONMENTS_NODES_REMOTE_USE_PUBLIC_ADDRESS + ] = platform._azure_runbook.use_public_address self._node.set_connection_info(**node_info) self._node._is_initialized = False self._node.initialize() @@ -2765,9 +2765,9 @@ def on_before_deployment(cls, *args: Any, **kwargs: Any) -> None: ) # Disk Encryption Set ID - node_parameters.security_profile["disk_encryption_set_id"] = ( - settings.disk_encryption_set_id - ) + node_parameters.security_profile[ + "disk_encryption_set_id" + ] = settings.disk_encryption_set_id # Return Skipped Exception if security profile is set on Gen 1 VM if node_parameters.security_profile["security_type"] == "": @@ -3804,16 +3804,17 @@ def create_file_share( break # Create Private endpoint - returns (ip, was_created) - ipv4_address, self._private_endpoint_created_by_lisa = ( - create_update_private_endpoints( - platform, - resource_group_name, - location, - subnet_id, - storage_account_resource_id, - ["file"], - self._log, - ) + ( + ipv4_address, + self._private_endpoint_created_by_lisa, + ) = create_update_private_endpoints( + platform, + resource_group_name, + location, + subnet_id, + storage_account_resource_id, + ["file"], + self._log, ) # Create private zone private_dns_zone_id = create_update_private_zones( From 1713778267d9cbcb602d8410bb57c517c8e94ed8 Mon Sep 17 00:00:00 2001 From: "Shekhar Sorot ( MSFT )" Date: Wed, 17 Dec 2025 23:33:01 +0530 Subject: [PATCH 4/4] Update storage.py --- lisa/microsoft/testsuites/core/storage.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/lisa/microsoft/testsuites/core/storage.py b/lisa/microsoft/testsuites/core/storage.py index bcd4fba440..712c5e5103 100644 --- a/lisa/microsoft/testsuites/core/storage.py +++ b/lisa/microsoft/testsuites/core/storage.py @@ -688,14 +688,15 @@ def verify_cifs_basic(self, node: Node, environment: Environment) -> None: # - "always": Never cleanup (user wants to inspect) # - "failed": Cleanup only if test passed # - "no" (default): Always cleanup - keep_environment = environment.platform.runbook.keep_environment should_cleanup = True - if keep_environment == constants.ENVIRONMENT_KEEP_ALWAYS: - should_cleanup = False - elif keep_environment == constants.ENVIRONMENT_KEEP_FAILED: - # Only cleanup if test passed (not failed) - should_cleanup = not test_failed + if environment.platform: + keep_environment = environment.platform.runbook.keep_environment + if keep_environment == constants.ENVIRONMENT_KEEP_ALWAYS: + should_cleanup = False + elif keep_environment == constants.ENVIRONMENT_KEEP_FAILED: + # Only cleanup if test passed (not failed) + should_cleanup = not test_failed if should_cleanup: azure_file_share.delete_azure_fileshare([fileshare_name])