From ba3869c426c8056a9f4934631efc2af798048ab7 Mon Sep 17 00:00:00 2001 From: mgunnala Date: Fri, 6 Dec 2024 14:41:47 -0500 Subject: [PATCH] Address test comments --- azurelinuxagent/ga/exthandlers.py | 64 ++++--- tests/ga/test_extension.py | 12 +- tests/ga/test_multi_config_extension.py | 163 +++++++++--------- tests_e2e/tests/ext_policy/ext_policy.py | 143 +++++++++------ .../ext_policy_with_dependencies.py | 31 ++-- .../ext_policy/policy_dependencies_cases.py | 54 +----- 6 files changed, 236 insertions(+), 231 deletions(-) diff --git a/azurelinuxagent/ga/exthandlers.py b/azurelinuxagent/ga/exthandlers.py index 07792096a..12a85fe18 100644 --- a/azurelinuxagent/ga/exthandlers.py +++ b/azurelinuxagent/ga/exthandlers.py @@ -528,14 +528,16 @@ def handle_ext_handlers(self, goal_state_id): logger.info("{0}: {1}".format(ext_full_name, msg)) add_event(op=WALAEventOperation.ExtensionProcessing, message="{0}: {1}".format(ext_full_name, msg)) handler_i.set_handler_status(status=ExtHandlerStatusValue.not_ready, message=msg, code=-1) - handler_i.create_status_file_if_not_exist(extension, - status=ExtensionStatusValue.error, - code=-1, - operation=handler_i.operation, - message=msg) + handler_i.create_status_file(extension, + status=ExtensionStatusValue.error, + code=-1, + operation=handler_i.operation, + message=msg) continue - # Invoke policy engine to determine if extension is allowed. If not, block extension and report error on - # behalf of the extension. + + # If an error was thrown during policy engine initialization, skip further processing of the extension. + # CRP is still waiting for status, so we report error status here. + # of the extension. policy_op, policy_err_code = _POLICY_ERROR_MAP.get(ext_handler.state) if policy_error is not None: msg = "Extension will not be processed: {0}".format(ustr(policy_error)) @@ -544,15 +546,6 @@ def handle_ext_handlers(self, goal_state_id): extension=extension) continue - extension_allowed = policy_engine.should_allow_extension(ext_handler.name) - if not extension_allowed: - msg = ( - "Extension will not be processed: failed to {0} extension '{1}' because it is not specified " - "in the allowlist. To {0}, add the extension to the allowed list in the policy file ('{2}')." - ).format(policy_op, ext_handler.name, conf.get_policy_file_path()) - self.__report_policy_error(handler_i, policy_err_code, report_op=handler_i.operation, - message=msg, extension=extension) - # In case of depends-on errors, we skip processing extensions if there was an error processing dependent extensions. # But CRP is still waiting for some status back for the skipped extensions. In order to propagate the status back to CRP, # we will report status back here with the relevant error message for each of the dependent extension. @@ -564,9 +557,9 @@ def handle_ext_handlers(self, goal_state_id): if handler_i.get_handler_status() is None: handler_i.set_handler_status(message=depends_on_err_msg, code=-1) - handler_i.create_status_file_if_not_exist(extension, status=ExtensionStatusValue.error, code=-1, - operation=WALAEventOperation.ExtensionProcessing, - message=depends_on_err_msg) + handler_i.create_status_file(extension, status=ExtensionStatusValue.error, code=-1, + operation=WALAEventOperation.ExtensionProcessing, + message=depends_on_err_msg) # For SC extensions, overwrite the HandlerStatus with the relevant message else: @@ -574,6 +567,17 @@ def handle_ext_handlers(self, goal_state_id): continue + # Invoke policy engine to determine if extension is allowed. If disallowed, report an error on behalf of + # the extension and do not process the extension. Dependent extensions will also be blocked. + extension_allowed = policy_engine.should_allow_extension(ext_handler.name) + if not extension_allowed: + msg = ( + "Extension will not be processed: failed to {0} extension '{1}' because it is not specified " + "in the allowlist. To {0}, add the extension to the allowed list in the policy file ('{2}')." + ).format(policy_op, ext_handler.name, conf.get_policy_file_path()) + self.__report_policy_error(handler_i, policy_err_code, report_op=handler_i.operation, + message=msg, extension=extension) + # Process extensions and get if it was successfully executed or not # If extension was blocked by policy, treat the extension as failed and do not process the handler. if not extension_allowed: @@ -694,8 +698,8 @@ def handle_ext_handler(self, ext_handler_i, extension, goal_state_id): # This error is only thrown for enable operation on MultiConfig extension. # Since these are maintained by the extensions, the expectation here is that they would update their status files appropriately with their errors. # The extensions should already have a placeholder status file, but incase they dont, setting one here to fail fast. - ext_handler_i.create_status_file_if_not_exist(extension, status=ExtensionStatusValue.error, code=error.code, - operation=ext_handler_i.operation, message=err_msg) + ext_handler_i.create_status_file(extension, status=ExtensionStatusValue.error, code=error.code, + operation=ext_handler_i.operation, message=err_msg) add_event(name=ext_name, version=ext_handler_i.ext_handler.version, op=ext_handler_i.operation, is_success=False, log_event=True, message=err_msg) except ExtensionsGoalStateError as error: @@ -735,8 +739,8 @@ def __handle_and_report_ext_handler_errors(ext_handler_i, error, report_op, mess # file with failure since the extensions wont be called where they can create their status files. # This way we guarantee reporting back to CRP if ext_handler_i.should_perform_multi_config_op(extension): - ext_handler_i.create_status_file_if_not_exist(extension, status=ExtensionStatusValue.error, code=error.code, - operation=report_op, message=message) + ext_handler_i.create_status_file(extension, status=ExtensionStatusValue.error, code=error.code, + operation=report_op, message=message) if report: name = ext_handler_i.get_extension_full_name(extension) @@ -760,9 +764,11 @@ def __report_policy_error(ext_handler_i, error_code, report_op, message, extensi ext_handler_i.set_handler_status(message=message, code=error_code) # Create status file for extensions with settings (single and multi config). + # If status file already exists, overwrite it. If an extension was previously reporting status and is now + # blocked by a policy error, we should report the policy error. if extension is not None: - ext_handler_i.create_status_file_if_not_exist(extension, status=ExtensionStatusValue.error, code=error_code, - operation=report_op, message=message) + ext_handler_i.create_status_file(extension, status=ExtensionStatusValue.error, code=error_code, + operation=report_op, message=message, overwrite=True) name = ext_handler_i.get_extension_full_name(extension) handler_version = ext_handler_i.ext_handler.version @@ -1067,7 +1073,7 @@ def report_ext_handler_status(self, vm_status, ext_handler, goal_state_changed): # extension even if HandlerState == NotInstalled (Sample scenario: ExtensionsGoalStateError, DecideVersionError, etc) # We also need to report extension status for an uninstalled handler if extensions are disabled, or if the extension # failed due to policy, because CRP waits for extension runtime status before failing the extension operation. - if handler_state != ExtHandlerState.NotInstalled or ext_handler.supports_multi_config or not conf.get_extensions_enabled() or ExtensionPolicyEngine.get_policy_enforcement_enabled(): + if handler_state != ExtHandlerState.NotInstalled or ext_handler.supports_multi_config or not conf.get_extensions_enabled(): # Since we require reading the Manifest for reading the heartbeat, this would fail if HandlerManifest not found. # Only try to read heartbeat if HandlerState != NotInstalled. @@ -1420,9 +1426,11 @@ def set_extension_resource_limits(self): extension_name=extension_name, cpu_quota=resource_limits.get_extension_slice_cpu_quota()) CGroupConfigurator.get_instance().set_extension_services_cpu_memory_quota(resource_limits.get_service_list()) - def create_status_file_if_not_exist(self, extension, status, code, operation, message): + def create_status_file(self, extension, status, code, operation, message, overwrite=False): + # Create status file for specified extension. If overwrite is true, overwrite any existing status file. If + # false, create a status file only if it does not already exist. _, status_path = self.get_status_file_path(extension) - if status_path is not None and not os.path.exists(status_path): + if status_path is not None and (overwrite or not os.path.exists(status_path)): now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") status_contents = [ { diff --git a/tests/ga/test_extension.py b/tests/ga/test_extension.py index 865441777..902c069c7 100644 --- a/tests/ga/test_extension.py +++ b/tests/ga/test_extension.py @@ -3536,7 +3536,7 @@ def _create_policy_file(self, policy): policy_file.write(policy) policy_file.flush() - def _test_policy_case(self, policy, op, expected_status_code, expected_handler_status, expected_ext_count=1, + def _test_policy_case(self, policy, op, expected_status_code, expected_handler_status, expected_ext_count=0, expected_status_msg=None): with mock_wire_protocol(wire_protocol_data.DATA_FILE) as protocol: @@ -3630,13 +3630,13 @@ def test_should_fail_enable_if_dependent_extension_disallowed(self): # OtherExampleHandlerLinux should be disallowed by policy, ExampleHandlerLinux should be skipped because # dependent extension failed - self._assert_handler_status(protocol.report_vm_status, "NotReady", 1, "1.0.0", - expected_handler_name="OSTCExtensions.OtherExampleHandlerLinux", + self._assert_handler_status(protocol.report_vm_status, expected_status="NotReady", expected_ext_count=0, + version="1.0.0", expected_handler_name="OSTCExtensions.OtherExampleHandlerLinux", expected_msg=("failed to run extension 'OSTCExtensions.OtherExampleHandlerLinux' " "because it is not specified in the allowlist.")) - self._assert_handler_status(protocol.report_vm_status, "NotReady", 1, "1.0.0", - expected_handler_name="OSTCExtensions.ExampleHandlerLinux", + self._assert_handler_status(protocol.report_vm_status, expected_status="NotReady", expected_ext_count=0, + version="1.0.0", expected_handler_name="OSTCExtensions.ExampleHandlerLinux", expected_msg="Skipping processing of extensions since execution of dependent " "extension OSTCExtensions.OtherExampleHandlerLinux failed") @@ -3669,7 +3669,7 @@ def test_enable_should_succeed_if_extension_allowed(self): ] for policy in policy_cases: self._test_policy_case(policy=policy, op=ExtensionRequestedState.Enabled, expected_status_code=0, - expected_handler_status='Ready') + expected_handler_status='Ready', expected_ext_count=1) def test_uninstall_should_succeed_if_extension_allowed(self): policy_cases = [ diff --git a/tests/ga/test_multi_config_extension.py b/tests/ga/test_multi_config_extension.py index 02fb8cdad..8ce38441e 100644 --- a/tests/ga/test_multi_config_extension.py +++ b/tests/ga/test_multi_config_extension.py @@ -630,97 +630,94 @@ def test_it_should_handle_and_report_enable_errors_properly(self): } self._assert_extension_status(sc_handler, expected_extensions) - def test_it_should_handle_and_report_extensions_disallowed_by_policy_properly(self): + def test_it_should_report_failed_status_for_extensions_disallowed_by_policy(self): """If multiconfig extension is disallowed by policy, all instances should be blocked.""" policy_path = os.path.join(self.tmp_dir, "waagent_policy.json") - patch('azurelinuxagent.common.conf.get_policy_file_path', return_value=str(policy_path)).start() - patch('azurelinuxagent.ga.policy.policy_engine.conf.get_extension_policy_enabled', - return_value=True).start() - policy = \ - { - "policyVersion": "0.0.1", - "extensionPolicies": { - "allowListedExtensionsOnly": True, - "signatureRequired": True, - "extensions": { - "Microsoft.Powershell.ExampleExtension": {} + with patch('azurelinuxagent.common.conf.get_policy_file_path', return_value=str(policy_path)): + with patch('azurelinuxagent.ga.policy.policy_engine.conf.get_extension_policy_enabled', return_value=True): + policy = \ + { + "policyVersion": "0.0.1", + "extensionPolicies": { + "allowListedExtensionsOnly": True, + "signatureRequired": True, + "extensions": { + "Microsoft.Powershell.ExampleExtension": {} + } + } } - } - } - with open(policy_path, mode='w') as policy_file: - json.dump(policy, policy_file, indent=4) - policy_file.flush() - self.test_data['ext_conf'] = os.path.join(self._MULTI_CONFIG_TEST_DATA, - "ext_conf_multi_config_no_dependencies.xml") - with self._setup_test_env() as (exthandlers_handler, protocol, no_of_extensions): - disallowed_mc_1 = extension_emulator(name="OSTCExtensions.ExampleHandlerLinux.firstExtension", - supports_multiple_extensions=True) - disallowed_mc_2 = extension_emulator(name="OSTCExtensions.ExampleHandlerLinux.secondExtension", - supports_multiple_extensions=True) - disallowed_mc_3 = extension_emulator(name="OSTCExtensions.ExampleHandlerLinux.thirdExtension", - supports_multiple_extensions=True) - allowed_ext = extension_emulator(name="Microsoft.Powershell.ExampleExtension") - with enable_invocations(disallowed_mc_1, disallowed_mc_2, disallowed_mc_3, - allowed_ext) as invocation_record: - exthandlers_handler.run() - exthandlers_handler.report_ext_handlers_status() - self.assertEqual(no_of_extensions, - len(protocol.aggregate_status['aggregateStatus']['handlerAggregateStatus']), - "incorrect extensions reported") - - # We should only enable the allowed extension, no instances of the multiconfig extension should be enabled - invocation_record.compare( - (allowed_ext, ExtensionCommandNames.INSTALL), - (allowed_ext, ExtensionCommandNames.ENABLE) - ) - - mc_handlers = self._assert_and_get_handler_status(aggregate_status=protocol.aggregate_status, - handler_name="OSTCExtensions.ExampleHandlerLinux", - expected_count=3, status="NotReady") - msg = "failed to run extension 'OSTCExtensions.ExampleHandlerLinux' because it is not specified in the allowlist" - expected_extensions = { - "firstExtension": {"status": ExtensionStatusValue.error, "seq_no": 1, "message": msg}, - "secondExtension": {"status": ExtensionStatusValue.error, "seq_no": 2, "message": msg}, - "thirdExtension": {"status": ExtensionStatusValue.error, "seq_no": 3, "message": msg}, - } - self._assert_extension_status(mc_handlers, expected_extensions, multi_config=True) - - sc_handler = self._assert_and_get_handler_status(aggregate_status=protocol.aggregate_status, - handler_name="Microsoft.Powershell.ExampleExtension", - status="Ready", message=None) - expected_extensions = { - "Microsoft.Powershell.ExampleExtension": {"status": ExtensionStatusValue.success, "seq_no": 9, - "message": None} - } - self._assert_extension_status(sc_handler, expected_extensions) - + with open(policy_path, mode='w') as policy_file: + json.dump(policy, policy_file, indent=4) + policy_file.flush() + self.test_data['ext_conf'] = os.path.join(self._MULTI_CONFIG_TEST_DATA, + "ext_conf_multi_config_no_dependencies.xml") + with self._setup_test_env() as (exthandlers_handler, protocol, no_of_extensions): + disallowed_mc_1 = extension_emulator(name="OSTCExtensions.ExampleHandlerLinux.firstExtension", + supports_multiple_extensions=True) + disallowed_mc_2 = extension_emulator(name="OSTCExtensions.ExampleHandlerLinux.secondExtension", + supports_multiple_extensions=True) + disallowed_mc_3 = extension_emulator(name="OSTCExtensions.ExampleHandlerLinux.thirdExtension", + supports_multiple_extensions=True) + allowed_ext = extension_emulator(name="Microsoft.Powershell.ExampleExtension") + with enable_invocations(disallowed_mc_1, disallowed_mc_2, disallowed_mc_3, + allowed_ext) as invocation_record: + exthandlers_handler.run() + exthandlers_handler.report_ext_handlers_status() + self.assertEqual(no_of_extensions, + len(protocol.aggregate_status['aggregateStatus']['handlerAggregateStatus']), + "incorrect extensions reported") + + # We should only enable the allowed extension, no instances of the multiconfig extension should be enabled + invocation_record.compare( + (allowed_ext, ExtensionCommandNames.INSTALL), + (allowed_ext, ExtensionCommandNames.ENABLE) + ) + + mc_handlers = self._assert_and_get_handler_status(aggregate_status=protocol.aggregate_status, + handler_name="OSTCExtensions.ExampleHandlerLinux", + expected_count=3, status="NotReady") + msg = "failed to run extension 'OSTCExtensions.ExampleHandlerLinux' because it is not specified in the allowlist" + expected_extensions = { + "firstExtension": {"status": ExtensionStatusValue.error, "seq_no": 1, "message": msg}, + "secondExtension": {"status": ExtensionStatusValue.error, "seq_no": 2, "message": msg}, + "thirdExtension": {"status": ExtensionStatusValue.error, "seq_no": 3, "message": msg}, + } + self._assert_extension_status(mc_handlers, expected_extensions, multi_config=True) + + sc_handler = self._assert_and_get_handler_status(aggregate_status=protocol.aggregate_status, + handler_name="Microsoft.Powershell.ExampleExtension", + status="Ready", message=None) + expected_extensions = { + "Microsoft.Powershell.ExampleExtension": {"status": ExtensionStatusValue.success, "seq_no": 9, + "message": None} + } + self._assert_extension_status(sc_handler, expected_extensions) - def test_it_should_handle_and_report_extensions_allowed_by_policy_properly(self): + def test_it_should_report_successful_status_for_extensions_allowed_by_policy(self): """If multiconfig extension is allowed by policy, all instances should be allowed.""" policy_path = os.path.join(self.tmp_dir, "waagent_policy.json") - patch('azurelinuxagent.common.conf.get_policy_file_path', return_value=str(policy_path)).start() - patch('azurelinuxagent.ga.policy.policy_engine.conf.get_extension_policy_enabled', - return_value=True).start() - policy = \ - { - "policyVersion": "0.0.1", - "extensionPolicies": { - "allowListedExtensionsOnly": True, - "signatureRequired": True, - "extensions": { - "OSTCExtensions.ExampleHandlerLinux": {}, - "Microsoft.Powershell.ExampleExtension": {} + with patch('azurelinuxagent.common.conf.get_policy_file_path', return_value=str(policy_path)): + with patch('azurelinuxagent.ga.policy.policy_engine.conf.get_extension_policy_enabled', return_value=True): + policy = \ + { + "policyVersion": "0.0.1", + "extensionPolicies": { + "allowListedExtensionsOnly": True, + "signatureRequired": True, + "extensions": { + "OSTCExtensions.ExampleHandlerLinux": {}, + "Microsoft.Powershell.ExampleExtension": {} + } + } } - } - } - with open(policy_path, mode='w') as policy_file: - json.dump(policy, policy_file, indent=4) - policy_file.flush() + with open(policy_path, mode='w') as policy_file: + json.dump(policy, policy_file, indent=4) + policy_file.flush() - self.test_data['ext_conf'] = os.path.join(self._MULTI_CONFIG_TEST_DATA, - "ext_conf_multi_config_no_dependencies.xml") - with self._setup_test_env(mock_manifest=True) as (exthandlers_handler, protocol, no_of_extensions): - self.__run_and_assert_generic_case(exthandlers_handler, protocol, no_of_extensions) + self.test_data['ext_conf'] = os.path.join(self._MULTI_CONFIG_TEST_DATA, + "ext_conf_multi_config_no_dependencies.xml") + with self._setup_test_env(mock_manifest=True) as (exthandlers_handler, protocol, no_of_extensions): + self.__run_and_assert_generic_case(exthandlers_handler, protocol, no_of_extensions) def test_it_should_cleanup_extension_state_on_disable(self): diff --git a/tests_e2e/tests/ext_policy/ext_policy.py b/tests_e2e/tests/ext_policy/ext_policy.py index da7b752f9..c489c323b 100644 --- a/tests_e2e/tests/ext_policy/ext_policy.py +++ b/tests_e2e/tests/ext_policy/ext_policy.py @@ -26,12 +26,13 @@ from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.ssh_client import SshClient from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient +from tests_e2e.tests.lib.virtual_machine_runcommand_client import VirtualMachineRunCommandClient from tests_e2e.tests.lib.agent_test_context import AgentVmTestContext class ExtPolicy(AgentVmTest): class TestCase: - def __init__(self, extension: VirtualMachineExtensionClient, settings: Any): + def __init__(self, extension, settings: Any): self.extension = extension self.settings = settings @@ -55,12 +56,17 @@ def _create_policy_file(self, policy): self._ssh_client.run_command(f"mv {remote_path} {policy_file_final_dest}", use_sudo=True) def _operation_should_succeed(self, operation, extension_case): + log.info("") log.info(f"Attempting to {operation} {extension_case.extension.__str__()}, expected to succeed ") # Attempt operation. If enabling, assert that the extension is present in instance view. # If deleting, assert that the extension is not present in instance view. try: if operation == "enable": - extension_case.extension.enable(settings=extension_case.settings, force_update=True, timeout=15 * 60) + # VirtualMachineRunCommandClient (and VirtualMachineRunCommand) does not take force_update_tag as a parameter. + if type(extension_case.extension) == VirtualMachineRunCommandClient: + extension_case.extension.enable(settings=extension_case.settings, timeout=15*60) + else: + extension_case.extension.enable(settings=extension_case.settings, force_update=True, timeout=15*60) extension_case.extension.assert_instance_view() elif operation == "delete": extension_case.extension.delete(timeout=15 * 60) @@ -68,7 +74,7 @@ def _operation_should_succeed(self, operation, extension_case): if instance_view_extensions is not None and any( e.name == extension_case.extension._resource_name for e in instance_view_extensions): raise Exception( - "extension {0} still in instance view after attempting to delete".format(extension_case.extension._resource_nam)) + "extension {0} still in instance view after attempting to delete".format(extension_case.extension)) log.info(f"Operation '{operation}' for {extension_case.extension.__str__()} succeeded as expected.") except Exception as error: fail( @@ -76,50 +82,75 @@ def _operation_should_succeed(self, operation, extension_case): f"Extension is allowed by policy so this operation should have completed successfully.\n" f"Error: {error}") - @staticmethod - def _operation_should_fail(operation, extension_case): - log.info(f"Attempting to {operation} {extension_case.extension.__str__()}, should fail fast.") - try: - timeout = (6 * 60) # Fail fast. - if operation == "enable": - extension_case.extension.enable(settings=extension_case.settings, force_update=True, timeout=timeout) - elif operation == "delete": - extension_case.extension.delete(timeout=timeout) - fail(f"The agent should have reported an error trying to {operation} {extension_case.extension.__str__()} " - f"because the extension is disallowed by policy.") - except Exception as error: - assert_that("[ExtensionPolicyError] Extension will not be processed" in str(error)) \ - .described_as( - f"Error message should communicate that extension is disallowed by policy, but actual error " - f"was: {error}").is_true() - log.info(f"{extension_case.extension.__str__()} {operation} failed as expected") + def _operation_should_fail(self, operation, extension_case): + log.info("") + log.info(f"Attempting to {operation} {extension_case.extension}, should fail fast.") + if operation == "enable": + try: + timeout = (6 * 60) # Fail fast. + # VirtualMachineRunCommandClient (and VirtualMachineRunCommand) does not take force_update_tag as a parameter. + if type(extension_case.extension) == VirtualMachineRunCommandClient: + extension_case.extension.enable(settings=extension_case.settings, timeout=timeout) + else: + extension_case.extension.enable(settings=extension_case.settings, force_update=True, + timeout=timeout) + fail( + f"The agent should have reported an error trying to {operation} {extension_case.extension} " + f"because the extension is disallowed by policy.") + except Exception as error: + expected_msg = "Extension will not be processed: failed to run extension" + assert_that(expected_msg in str(error)) \ + .described_as( + f"Error message is expected to contain '{expected_msg}', but actual error message was '{error}'").is_true() + log.info(f"{extension_case.extension} {operation} failed as expected") + + elif operation == "delete": + # Delete is a best effort operation and should not fail, so CRP will wait for the full timeout instead + # instead of reporting an error for the operation. We set a short timeout limit, swallow the error, and + # assert that the extension is still in the instance view to confirm that the delete failed. + try: + delete_start_time = self._ssh_client.run_command("date '+%Y-%m-%d %T'").rstrip() + extension_case.extension.delete(timeout=(1 * 60)) + fail(f"The agent should have reported a timeout error when attempting to delete {extension_case.extension} " + f"because the extension is disallowed by policy.") + except TimeoutError: + log.info("Reported a timeout error when attempting to delete extension, as expected. Checking instance view " + "and agent log to confirm that delete operation failed.") + # Confirm that extension is still present in instance view + instance_view_extensions = self._context.vm.get_instance_view().extensions + if instance_view_extensions is not None and not any( + e.name == extension_case.extension._resource_name for e in instance_view_extensions): + fail(f"Delete operation is disallowed by policy and should have failed, but extension " + f"{extension_case.extension} is no longer present in the instance view.") + + # Confirm that expected error message is in the agent log + expected_msg = "Extension will not be processed: failed to uninstall extension" + result = self._ssh_client.run_command( + f"agent_ext_workflow-check_data_in_agent_log.py --data '{expected_msg}' --after-timestamp '{delete_start_time}'", + use_sudo=True) def run(self): # Prepare no-config, single-config, and multi-config extension to test. Extensions with settings and extensions # without settings have different status reporting logic, so we should test all cases. - unique = str(uuid.uuid4()) - test_file = f"waagent-test.{unique}" # CustomScript is a single-config extension. custom_script = ExtPolicy.TestCase( VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.CustomScript, resource_name="CustomScript"), - {'commandToExecute': f"echo '{unique}' > /tmp/{test_file}"} + {'commandToExecute': f"echo '{str(uuid.uuid4())}'"} ) # RunCommandHandler is a multi-config extension, so we set up two instances (configurations) here and test both. run_command = ExtPolicy.TestCase( - VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.RunCommandHandler, + VirtualMachineRunCommandClient(self._context.vm, VmExtensionIds.RunCommandHandler, resource_name="RunCommandHandler"), - {'source': {'script': f"echo '{unique}' > /tmp/{test_file}"}} + {'source': f"echo '{str(uuid.uuid4())}'"} ) - unique2 = str(uuid.uuid4()) - test_file2 = f"waagent-test.{unique2}" run_command_2 = ExtPolicy.TestCase( - VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.RunCommandHandler, + VirtualMachineRunCommandClient(self._context.vm, VmExtensionIds.RunCommandHandler, resource_name="RunCommandHandler2"), - {'source': {'script': f"echo '{unique2}' > /tmp/{test_file2}"}} + {'source': f"echo '{str(uuid.uuid4())}'"} ) # AzureMonitorLinuxAgent is a no-config extension (extension without settings). @@ -129,13 +160,21 @@ def run(self): None ) + # Another e2e test may have left behind an extension we want to test here. Cleanup any leftovers so that they + # do not affect the test results. + ext_to_cleanup = [custom_script, run_command, run_command_2, azure_monitor] + for ext in ext_to_cleanup: + ext.extension.delete() + # Enable policy via conf log.info("Enabling policy via conf file on the test VM [%s]", self._context.vm.name) self._ssh_client.run_command("update-waagent-conf Debug.EnableExtensionPolicy=y", use_sudo=True) - # Only allowlisted extensions should be processed. - # We only allowlist CustomScript: CustomScript should be enabled, RunCommand and AzureMonitor should fail. - # (Note that CustomScript blocked by policy is tested in a later test case.) + # This policy tests the following scenarios: + # - enable a single-config extension (CustomScript) that is allowed by policy -> should succeed + # - enable a no-config extension (AzureMonitorLinuxAgent) that is disallowed by policy -> should fail fast + # - enable two instances of a multi-config extension (RunCommandHandler) that is disallowed by policy -> both should fail fast + # (Note that CustomScript disallowed by policy is tested in a later test case.) policy = \ { "policyVersion": "0.1.0", @@ -150,11 +189,15 @@ def run(self): self._create_policy_file(policy) self._operation_should_succeed("enable", custom_script) self._operation_should_fail("enable", run_command) + self._operation_should_fail("enable", run_command_2) if VmExtensionIds.AzureMonitorLinuxAgent.supports_distro((self._ssh_client.run_command("get_distro.py").rstrip())): self._operation_should_fail("enable", azure_monitor) - # When allowlist is turned off, all extensions should be processed. - # RunCommand and AzureMonitorLinuxAgent should be successfully enabled and then deleted. + # This policy tests the following scenarios: + # - enable two instances of a multi-config extension (RunCommandHandler) when allowed by policy -> should succeed + # - delete two instances of a multi-config extension (RunCommandHandler) when allowed by policy -> should succeed + # - enable no-config extension (AzureMonitorLinuxAgent) when allowed by policy -> should succeed + # - delete no-config extension (AzureMonitorLinuxAgent) when allowed by policy -> should succeed policy = \ { "policyVersion": "0.1.0", @@ -165,14 +208,21 @@ def run(self): } } self._create_policy_file(policy) + # Update settings to force an update to the seq no + run_command.settings = {'source': f"echo '{str(uuid.uuid4())}'"} + run_command_2.settings = {'source': f"echo '{str(uuid.uuid4())}'"} self._operation_should_succeed("enable", run_command) + self._operation_should_succeed("enable", run_command_2) self._operation_should_succeed("delete", run_command) + self._operation_should_succeed("delete", run_command_2) if VmExtensionIds.AzureMonitorLinuxAgent.supports_distro((self._ssh_client.run_command("get_distro.py").rstrip())): self._operation_should_succeed("enable", azure_monitor) self._operation_should_succeed("delete", azure_monitor) - # Should not uninstall disallowed extensions. - # CustomScript is removed from the allowlist: delete operation should fail. + # This policy tests the following scenarios: + # - disallow a previously-enabled single-config extension (CustomScript), then delete -> should fail fast + # - enable two instances of a multi-config extension (RunCommandHandler) when disallowed by policy -> should fail fast + # - enable single-config extension (CustomScript) when disallowed by policy -> should fail fast policy = \ { "policyVersion": "0.1.0", @@ -183,18 +233,16 @@ def run(self): } } self._create_policy_file(policy) - # # Known CRP issue - delete/uninstall operation times out instead of reporting an error. - # # TODO: uncomment this test case after issue is resolved - # # self._operation_should_fail("delete", custom_script) + self._operation_should_fail("delete", custom_script) # If a multiconfig extension is disallowed, no instances should be processed. # RunCommand is not allowed - if we try to enable two instances, both should fail fast. self._operation_should_fail("enable", run_command) self._operation_should_fail("enable", run_command_2) - - # If single-config extension is initially blocked, and policy is updated to allow it, extension should be - # successfully enabled and report status correctly. self._operation_should_fail("enable", custom_script) + + # This policy tests the following scenario: + # - allow a previously-disallowed single-config extension (CSE), then enable -> should succeed policy = \ { "policyVersion": "0.1.0", @@ -209,17 +257,6 @@ def run(self): def get_ignore_error_rules(self) -> List[Dict[str, Any]]: ignore_rules = [ - # - # 2024-10-23T17:50:38.107793Z WARNING ExtHandler ExtHandler Dependent extension Microsoft.Azure.Monitor.AzureMonitorLinuxAgent failed or timed out, will skip processing the rest of the extensions - # We intentionally block extensions with policy and expect any dependent extensions to be skipped - { - 'message': r"Dependent extension .* failed or timed out, will skip processing the rest of the extensions" - }, - # 2024-10-23T18:01:32.247341Z ERROR ExtHandler ExtHandler Event: name=Microsoft.Azure.Monitor.AzureMonitorLinuxAgent, op=ExtensionProcessing, message=Skipping processing of extensions since execution of dependent extension Microsoft.Azure.Monitor.AzureMonitorLinuxAgent failed, duration=0 - # We intentionally block extensions with policy and expect any dependent extensions to be skipped - { - 'message': r"Skipping processing of extensions since execution of dependent extension .* failed" - }, # 2024-10-24T17:34:20.808235Z ERROR ExtHandler ExtHandler Event: name=Microsoft.Azure.Monitor.AzureMonitorLinuxAgent, op=None, message=[ExtensionPolicyError] Extension will not be processed: failed to enable extension 'Microsoft.Azure.Monitor.AzureMonitorLinuxAgent' because extension is not specified in allowlist. To enable, add extension to the allowed list in the policy file ('/etc/waagent_policy.json')., duration=0 # We intentionally block extensions with policy and expect this failure message { diff --git a/tests_e2e/tests/ext_policy/ext_policy_with_dependencies.py b/tests_e2e/tests/ext_policy/ext_policy_with_dependencies.py index 403734b23..739432fcd 100644 --- a/tests_e2e/tests/ext_policy/ext_policy_with_dependencies.py +++ b/tests_e2e/tests/ext_policy/ext_policy_with_dependencies.py @@ -54,7 +54,7 @@ def __init__(self, context: AgentVmTestContext): _test_cases = [ _should_fail_single_config_depends_on_disallowed_no_config, _should_fail_single_config_depends_on_disallowed_single_config, - # TODO: RunCommand is unable to be installed properly, so these tests are currently disabled. Investigate the + # TODO: RunCommandHandler is unable to be uninstalled properly, so these tests are currently disabled. Investigate the # issue and enable these 3 tests. # _should_fail_single_config_depends_on_disallowed_multi_config, # _should_fail_multi_config_depends_on_disallowed_single_config, @@ -82,6 +82,14 @@ def run(self): ssh_clients: Dict[str, SshClient] = {} for instance in instances_ip_address: ssh_clients[instance.instance_name] = SshClient(ip_address=instance.ip_address, username=self._context.username, identity_file=self._context.identity_file) + + # Cleanup any extensions left behind by other tests, as they may be blocked by policy and erroneously cause failures. + instance_view_ext = self._context.vmss.get_instance_view().extensions + if instance_view_ext is not None and len(instance_view_ext) > 0: + for ex in instance_view_ext: + self._context.vmss.delete_extension(ex.name) + + # Enable policy via conf file. for ssh_client in ssh_clients.values(): ssh_client.run_command("update-waagent-conf Debug.EnableExtensionPolicy=y", use_sudo=True) @@ -121,7 +129,6 @@ def run(self): # to generate a new sequence number each time test_guid = str(uuid.uuid4()) policy, extensions, expected_errors, deletion_order = case() - for ext in extensions: ext["properties"].update({ "forceUpdateTag": test_guid @@ -174,23 +181,23 @@ def run(self): for phrase in expected_errors: if phrase not in error_message: fail("Extension template deployment failed as expected, but with an unexpected error. Error expected to contain message '{0}'. Actual error: {1}".format(phrase, e)) - log.info("Extensions failed as expected") + log.info("Extensions failed as expected. Expected errors: '{0}'. Actual errors: '{1}'.".format(expected_errors, e)) log.info("") # After each test, clean up failed extensions to leave VMSS in a good state for the next test. # If there are leftover failed extensions, CRP will attempt to uninstall them in the next test, but uninstall # will be disallowed by policy. Since CRP waits for a 90 minute timeout for uninstall, the operation will - # timeout and fail without an appropriate error message (known issue), and the whole test case will fail. + # timeout and fail without an appropriate error message, and the whole test case will fail. # To clean up, we first update the policy to allow all, then remove the extensions. log.info("Starting cleanup for test case...") - for ssh_client in ssh_clients.values(): - allow_all_policy = \ - { - "policyVersion": "0.1.0", - "extensionPolicies": { - "allowListedExtensionsOnly": False - } + allow_all_policy = \ + { + "policyVersion": "0.1.0", + "extensionPolicies": { + "allowListedExtensionsOnly": False } + } + for ssh_client in ssh_clients.values(): self._create_policy_file(ssh_client, allow_all_policy) for ext_to_delete in deletion_order: @@ -269,7 +276,7 @@ def get_ignore_error_rules(self) -> List[Dict[str, Any]]: { 'message': r"Skipping processing of extensions since execution of dependent extension .* failed" }, - # 2024-10-24T17:34:20.808235Z ERROR ExtHandler ExtHandler Event: name=Microsoft.Azure.Monitor.AzureMonitorLinuxAgent, op=None, message=[ExtensionPolicyError] Extension will not be processed: failed to enable extension 'Microsoft.Azure.Monitor.AzureMonitorLinuxAgent' because extension is not specified in allowlist. To enable, add extension to the allowed list in the policy file ('/etc/waagent_policy.json')., duration=0 + # 2024-10-24T17:34:20.808235Z ERROR ExtHandler ExtHandler Event: name=Microsoft.Azure.Monitor.AzureMonitorLinuxAgent, op=None, message=Extension will not be processed: failed to enable extension 'Microsoft.Azure.Monitor.AzureMonitorLinuxAgent' because extension is not specified in allowlist. To enable, add extension to the allowed list in the policy file ('/etc/waagent_policy.json')., duration=0 # We intentionally block extensions with policy and expect this failure message { 'message': r"Extension will not be processed" diff --git a/tests_e2e/tests/ext_policy/policy_dependencies_cases.py b/tests_e2e/tests/ext_policy/policy_dependencies_cases.py index 4f11b4139..b9fb8157b 100644 --- a/tests_e2e/tests/ext_policy/policy_dependencies_cases.py +++ b/tests_e2e/tests/ext_policy/policy_dependencies_cases.py @@ -56,7 +56,7 @@ def _should_fail_single_config_depends_on_disallowed_single_config(): } } expected_errors = [ - "[ExtensionPolicyError] Extension will not be processed: failed to enable extension 'Microsoft.OSTCExtensions.VMAccessForLinux' because extension is not specified in allowlist", + "Extension will not be processed: failed to enable extension 'Microsoft.OSTCExtensions.VMAccessForLinux' because extension is not specified in allowlist", "'CustomScript' is marked as failed since it depends upon the VM Extension 'VMAccessForLinux' which has failed" ] deletion_order = [VmExtensionIds.CustomScript, VmExtensionIds.VmAccess] @@ -80,7 +80,7 @@ def _should_fail_single_config_depends_on_disallowed_no_config(): } } expected_errors = [ - "[ExtensionPolicyError] Extension will not be processed: failed to enable extension 'Microsoft.Azure.Monitor.AzureMonitorLinuxAgent' because extension is not specified in allowlist", + "Extension will not be processed: failed to enable extension 'Microsoft.Azure.Monitor.AzureMonitorLinuxAgent' because extension is not specified in allowlist", "'CustomScript' is marked as failed since it depends upon the VM Extension 'AzureMonitorLinuxAgent' which has failed" ] deletion_order = [VmExtensionIds.CustomScript, VmExtensionIds.AzureMonitorLinuxAgent] @@ -103,7 +103,7 @@ def _should_fail_single_config_depends_on_disallowed_multi_config(): } } expected_errors = [ - "[ExtensionPolicyError] Extension will not be processed: failed to enable extension 'Microsoft.CPlat.Core.RunCommandHandlerLinux' because extension is not specified in allowlist", + "Extension will not be processed: failed to enable extension 'Microsoft.CPlat.Core.RunCommandHandlerLinux' because extension is not specified in allowlist", "'CustomScript' is marked as failed since it depends upon the VM Extension 'RunCommandHandlerLinux' which has failed" ] deletion_order = [VmExtensionIds.CustomScript, VmExtensionIds.RunCommandHandler] @@ -126,7 +126,7 @@ def _should_fail_multi_config_depends_on_disallowed_single_config(): } } expected_errors = [ - "[ExtensionPolicyError] Extension will not be processed: failed to enable extension 'Microsoft.Azure.Extensions.CustomScript' because extension is not specified in allowlist", + "Extension will not be processed: failed to enable extension 'Microsoft.Azure.Extensions.CustomScript' because extension is not specified in allowlist", "VM has reported a failure when processing extension 'RunCommandHandlerLinux' (publisher 'Microsoft.CPlat.Core' and type 'RunCommandHandlerLinux'). Error message: 'Skipping processing of extensions since execution of dependent extension Microsoft.Azure.Extensions.CustomScript failed'." ] deletion_order = [VmExtensionIds.RunCommandHandler, VmExtensionIds.CustomScript] @@ -149,7 +149,7 @@ def _should_fail_multi_config_depends_on_disallowed_no_config(): } } expected_errors = [ - "[ExtensionPolicyError] Extension will not be processed: failed to enable extension 'Microsoft.Azure.Monitor.AzureMonitorLinuxAgent' because extension is not specified in allowlist", + "Extension will not be processed: failed to enable extension 'Microsoft.Azure.Monitor.AzureMonitorLinuxAgent' because extension is not specified in allowlist", "VM has reported a failure when processing extension 'RunCommandHandlerLinux' (publisher 'Microsoft.CPlat.Core' and type 'RunCommandHandlerLinux'). Error message: 'Skipping processing of extensions since execution of dependent extension Microsoft.Azure.Monitor.AzureMonitorLinuxAgent failed'." ] deletion_order = [VmExtensionIds.RunCommandHandler, VmExtensionIds.AzureMonitorLinuxAgent] @@ -197,47 +197,3 @@ def _should_succeed_single_config_depends_on_single_config(): expected_errors = [] deletion_order = [VmExtensionIds.VmAccess, VmExtensionIds.CustomScript] return policy, template, expected_errors, deletion_order - - -def _should_no_dependencies(): - template = \ - [{ - "name": "CustomScript", - "properties": { - "publisher": "Microsoft.Azure.Extensions", - "type": "CustomScript", - "typeHandlerVersion": "2.1", - "autoUpgradeMinorVersion": True, - "settings": { - "commandToExecute": "date" - } - } - }, - { - "name": "VMAccessForLinux", - "properties": { - "publisher": "Microsoft.OSTCExtensions", - "type": "VMAccessForLinux", - "typeHandlerVersion": "1.0", - "autoUpgradeMinorVersion": True, - "settings": {}, - "protectedSettings": { - "username": "testuser" - } - } - }] - policy = \ - { - "policyVersion": "0.1.0", - "extensionPolicies": { - "allowListedExtensionsOnly": True, - "extensions": { - } - } - } - expected_errors = [ - "[ExtensionPolicyError] Extension will not be processed: failed to enable extension 'Microsoft.OSTCExtensions.VMAccessForLinux' because extension is not specified in allowlist", - "[ExtensionPolicyError] Extension will not be processed: failed to enable extension 'Microsoft.Azure.Extensions.CustomScript' because extension is not specified in allowlist" - ] - deletion_order = [VmExtensionIds.VmAccess, VmExtensionIds.CustomScript] - return policy, template, expected_errors, deletion_order \ No newline at end of file