diff --git a/azurelinuxagent/ga/exthandlers.py b/azurelinuxagent/ga/exthandlers.py index 341e070b4..b1789605f 100644 --- a/azurelinuxagent/ga/exthandlers.py +++ b/azurelinuxagent/ga/exthandlers.py @@ -39,6 +39,7 @@ from azurelinuxagent.common.agent_supported_feature import get_agent_supported_features_list_for_extensions, \ SupportedFeatureNames, get_supported_feature_by_name, get_agent_supported_features_list_for_crp from azurelinuxagent.ga.cgroupconfigurator import CGroupConfigurator +from azurelinuxagent.ga.policy.policy_engine import ExtensionPolicyEngine from azurelinuxagent.common.datacontract import get_properties, set_properties from azurelinuxagent.common.errorstate import ErrorState from azurelinuxagent.common.event import add_event, elapsed_milliseconds, WALAEventOperation, \ @@ -86,6 +87,26 @@ # This is the default sequence number we use when there are no settings available for Handlers _DEFAULT_SEQ_NO = "0" +# For extension disallowed errors (e.g. blocked by policy, extensions disabled), this mapping is used to generate +# user-friendly error messages and determine the appropriate terminal error code based on the blocked operation. +# Format: {: (, )} +# - The first element of the tuple is a user-friendly operation name included in error messages. +# - The second element of the tuple is the CRP terminal error code for the operation. +_EXT_DISALLOWED_ERROR_MAP = \ + { + ExtensionRequestedState.Enabled: ('run', ExtensionErrorCodes.PluginEnableProcessingFailed), + # TODO: CRP does not currently have a terminal error code for uninstall. Once this code is added, use + # it instead of PluginDisableProcessingFailed below. + # + # Note: currently, when uninstall is requested for an extension, CRP polls until the agent does not + # report status for that extension, or until timeout is reached. In the case of an extension disallowed + # error, agent reports failed status on behalf of the extension, which will cause CRP to poll for the full + # timeout, instead of failing fast. + ExtensionRequestedState.Uninstall: ('uninstall', ExtensionErrorCodes.PluginDisableProcessingFailed), + # "Disable" is an internal operation, users are unaware of it. We surface the term "uninstall" instead. + ExtensionRequestedState.Disabled: ('uninstall', ExtensionErrorCodes.PluginDisableProcessingFailed), + } + class ExtHandlerStatusValue(object): """ @@ -276,6 +297,9 @@ class ExtHandlersHandler(object): def __init__(self, protocol): self.protocol = protocol self.ext_handlers = None + # Maintain a list of extension handler objects that are disallowed (e.g. blocked by policy, extensions disabled, etc.). + # Extension status, if it exists, is always reported for the extensions in this list. List is reset for each goal state. + self.__disallowed_ext_handlers = [] # The GoalState Aggregate status needs to report the last status of the GoalState. Since we only process # extensions on goal state change, we need to maintain its state. # Setting the status to None here. This would be overridden as soon as the first GoalState is processed @@ -482,27 +506,42 @@ def handle_ext_handlers(self, goal_state_id): depends_on_err_msg = None extensions_enabled = conf.get_extensions_enabled() + # Instantiate policy engine, and use same engine to handle all extension handlers. If an error is thrown during + # policy engine initialization, we block all extensions and report the error via handler status for each extension. + policy_error = None + try: + policy_engine = ExtensionPolicyEngine() + except Exception as ex: + policy_error = ex + + self.__disallowed_ext_handlers = [] + for extension, ext_handler in all_extensions: handler_i = ExtHandlerInstance(ext_handler, self.protocol, extension=extension) + # Get user-friendly operation name and terminal error code to use in status messages if extension is disallowed + operation, error_code = _EXT_DISALLOWED_ERROR_MAP.get(ext_handler.state) + # In case of extensions disabled, we skip processing extensions. But CRP is still waiting for some status # back for the skipped extensions. In order to propagate the status back to CRP, we will report status back # here with an error message. if not extensions_enabled: - agent_conf_file_path = get_osutil().agent_conf_file_path - msg = "Extension will not be processed since extension processing is disabled. To enable extension " \ - "processing, set Extensions.Enabled=y in '{0}'".format(agent_conf_file_path) ext_full_name = handler_i.get_extension_full_name(extension) - logger.info('') - logger.info("{0}: {1}".format(ext_full_name, msg)) - add_event(op=WALAEventOperation.ExtensionProcessing, message="{0}: {1}".format(ext_full_name, msg)) - handler_i.set_handler_status(status=ExtHandlerStatusValue.not_ready, message=msg, code=-1) - handler_i.create_status_file_if_not_exist(extension, - status=ExtensionStatusValue.error, - code=-1, - operation=handler_i.operation, - message=msg) + agent_conf_file_path = get_osutil().get_agent_conf_file_path() + msg = "Extension '{0}' will not be processed since extension processing is disabled. To enable extension " \ + "processing, set Extensions.Enabled=y in '{1}'".format(ext_full_name, agent_conf_file_path) + self.__handle_ext_disallowed_error(handler_i, error_code, report_op=WALAEventOperation.ExtensionProcessing, + message=msg, extension=extension) + continue + + # If an error was thrown during policy engine initialization, skip further processing of the extension. + # CRP is still waiting for status, so we report error status here. + if policy_error is not None: + msg = "Extension will not be processed: {0}".format(ustr(policy_error)) + self.__handle_ext_disallowed_error(ext_handler_i=handler_i, error_code=error_code, + report_op=WALAEventOperation.ExtensionPolicy, message=msg, + extension=extension) continue # In case of depends-on errors, we skip processing extensions if there was an error processing dependent extensions. @@ -516,9 +555,9 @@ def handle_ext_handlers(self, goal_state_id): if handler_i.get_handler_status() is None: handler_i.set_handler_status(message=depends_on_err_msg, code=-1) - handler_i.create_status_file_if_not_exist(extension, status=ExtensionStatusValue.error, code=-1, - operation=WALAEventOperation.ExtensionProcessing, - message=depends_on_err_msg) + handler_i.create_status_file(extension, status=ExtensionStatusValue.error, code=-1, + operation=WALAEventOperation.ExtensionProcessing, + message=depends_on_err_msg, overwrite=False) # For SC extensions, overwrite the HandlerStatus with the relevant message else: @@ -526,8 +565,21 @@ def handle_ext_handlers(self, goal_state_id): continue - # Process extensions and get if it was successfully executed or not - extension_success = self.handle_ext_handler(handler_i, extension, goal_state_id) + # Invoke policy engine to determine if extension is allowed. + # - if allowed: process the extension and get if it was successfully executed or not + # - if disallowed: do not process the handler and report an error on behalf of the extension, dependent + # extensions will also be blocked. + extension_allowed = policy_engine.should_allow_extension(ext_handler.name) + if not extension_allowed: + msg = ( + "Extension will not be processed: failed to {0} extension '{1}' because it is not specified " + "as an allowed extension. To {0}, add the extension to the list of allowed extensions in the policy file ('{2}')." + ).format(operation, ext_handler.name, conf.get_policy_file_path()) + self.__handle_ext_disallowed_error(handler_i, error_code, report_op=WALAEventOperation.ExtensionPolicy, + message=msg, extension=extension) + extension_success = False + else: + extension_success = self.handle_ext_handler(handler_i, extension, goal_state_id) dep_level = self.__get_dependency_level((extension, ext_handler)) if 0 <= dep_level < max_dep_level: @@ -642,8 +694,8 @@ def handle_ext_handler(self, ext_handler_i, extension, goal_state_id): # This error is only thrown for enable operation on MultiConfig extension. # Since these are maintained by the extensions, the expectation here is that they would update their status files appropriately with their errors. # The extensions should already have a placeholder status file, but incase they dont, setting one here to fail fast. - ext_handler_i.create_status_file_if_not_exist(extension, status=ExtensionStatusValue.error, code=error.code, - operation=ext_handler_i.operation, message=err_msg) + ext_handler_i.create_status_file(extension, status=ExtensionStatusValue.error, code=error.code, + operation=ext_handler_i.operation, message=err_msg, overwrite=False) add_event(name=ext_name, version=ext_handler_i.ext_handler.version, op=ext_handler_i.operation, is_success=False, log_event=True, message=err_msg) except ExtensionsGoalStateError as error: @@ -683,8 +735,8 @@ def __handle_and_report_ext_handler_errors(ext_handler_i, error, report_op, mess # file with failure since the extensions wont be called where they can create their status files. # This way we guarantee reporting back to CRP if ext_handler_i.should_perform_multi_config_op(extension): - ext_handler_i.create_status_file_if_not_exist(extension, status=ExtensionStatusValue.error, code=error.code, - operation=report_op, message=message) + ext_handler_i.create_status_file(extension, status=ExtensionStatusValue.error, code=error.code, + operation=report_op, message=message, overwrite=False) if report: name = ext_handler_i.get_extension_full_name(extension) @@ -692,6 +744,48 @@ def __handle_and_report_ext_handler_errors(ext_handler_i, error, report_op, mess add_event(name=name, version=handler_version, op=report_op, is_success=False, log_event=True, message=message) + def __handle_ext_disallowed_error(self, ext_handler_i, error_code, report_op, message, extension): + # + # Handle and report errors for disallowed extensions (e.g. extensions blocked by policy or disabled via config). + # + # TODO: __handle_and_report_ext_handler_errors() is also used to report extension errors, but it does not create + # a status file for single-config extensions (see below as to why this is important). This function, + # __handle_ext_disallowed_error, implements what we believe is the correct behavior, but at this point we + # use it only for disallowed extensions scenarios. In a future release, consider merging the two functions + # after assessing any impact. + # + # Note: When CRP polls for extension status, it first looks at handler status and then looks for any extension + # status. If extension status is present, CRP uses it instead of the handler status, ensuring that the + # sequence number for the extension settings match the sequence number in the reported status. CRP polls + # asynchronously to the Agent and, on a new goal state, it can check the status blob before the Agent has + # reported status for that goal state, effectively checking the status of the previous goal state. This is + # not an issue when the extension reports status at the extension level, since CRP wil wait for the status + # for the correct sequence number. However, when the extension reports status *only* at the handler level + # (e.g if the extension has no settings, during install errors, if extension is disallowed, etc.) CRP can + # end up picking up a stale status. There is not a good solution for extensions with no settings, and CRP + # can report an error from a previous goal state. For install errors of extensions with settings, though, + # we work around this issue by reporting the error *both* at the handler level and at the extension level + # (although reporting at the handler level *should* be sufficient). By reporting at the extension level, + # CRP will enforce a match on the sequence number for the settings, and skip stale status blobs. + + # Keep a list of disallowed extensions so that report_ext_handler_status() can report status for them. + self.__disallowed_ext_handlers.append(ext_handler_i.ext_handler) + + ext_handler_i.set_handler_status(status=ExtHandlerStatusValue.not_ready, message=message, code=error_code) + + # Only report extension status for install errors of extensions with settings. Disable/uninstall errors are + # reported at the handler status level only. + if extension is not None and ext_handler_i.ext_handler.state == ExtensionRequestedState.Enabled: + # TODO: if extension is reporting heartbeat, it overwrites status. Consider overwriting heartbeat here. + # Overwrite any existing status file to reflect the failure accurately. + ext_handler_i.create_status_file(extension, status=ExtensionStatusValue.error, code=error_code, + operation=ext_handler_i.operation, message=message, overwrite=True) + + name = ext_handler_i.get_extension_full_name(extension) + handler_version = ext_handler_i.ext_handler.version + add_event(name=name, version=handler_version, op=report_op, is_success=False, log_event=True, + message=message) + def handle_enable(self, ext_handler_i, extension): """ 1- Ensure the handler is installed @@ -985,12 +1079,13 @@ def report_ext_handler_status(self, vm_status, ext_handler, goal_state_changed): handler_state = ext_handler_i.get_handler_state() ext_handler_statuses = [] + ext_disallowed = ext_handler in self.__disallowed_ext_handlers # For MultiConfig, we need to report status per extension even for Handler level failures. # If we have HandlerStatus for a MultiConfig handler and GS is requesting for it, we would report status per # extension even if HandlerState == NotInstalled (Sample scenario: ExtensionsGoalStateError, DecideVersionError, etc) - # We also need to report extension status for an uninstalled handler if extensions are disabled because CRP - # waits for extension runtime status before failing the extension operation. - if handler_state != ExtHandlerState.NotInstalled or ext_handler.supports_multi_config or not conf.get_extensions_enabled(): + # We also need to report extension status for an uninstalled handler if the extension is disallowed (due to + # policy failure, extensions disabled, etc.) because CRP waits for extension runtime status before failing the operation. + if handler_state != ExtHandlerState.NotInstalled or ext_handler.supports_multi_config or ext_disallowed: # Since we require reading the Manifest for reading the heartbeat, this would fail if HandlerManifest not found. # Only try to read heartbeat if HandlerState != NotInstalled. @@ -1342,9 +1437,11 @@ def set_extension_resource_limits(self): extension_name=extension_name, cpu_quota=resource_limits.get_extension_slice_cpu_quota()) CGroupConfigurator.get_instance().set_extension_services_cpu_memory_quota(resource_limits.get_service_list()) - def create_status_file_if_not_exist(self, extension, status, code, operation, message): + def create_status_file(self, extension, status, code, operation, message, overwrite): + # Create status file for specified extension. If overwrite is true, overwrite any existing status file. If + # false, create a status file only if it does not already exist. _, status_path = self.get_status_file_path(extension) - if status_path is not None and not os.path.exists(status_path): + if status_path is not None and (overwrite or not os.path.exists(status_path)): now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") status_contents = [ { diff --git a/azurelinuxagent/ga/policy/policy_engine.py b/azurelinuxagent/ga/policy/policy_engine.py index 065589f67..347e7887e 100644 --- a/azurelinuxagent/ga/policy/policy_engine.py +++ b/azurelinuxagent/ga/policy/policy_engine.py @@ -36,12 +36,6 @@ _MAX_SUPPORTED_POLICY_VERSION = "0.1.0" -class PolicyError(AgentError): - """ - Error raised during agent policy enforcement. - """ - - class InvalidPolicyError(AgentError): """ Error raised if user-provided policy is invalid. @@ -50,7 +44,6 @@ def __init__(self, msg, inner=None): msg = "Customer-provided policy file ('{0}') is invalid, please correct the following error: {1}".format(conf.get_policy_file_path(), msg) super(InvalidPolicyError, self).__init__(msg, inner) - class _PolicyEngine(object): """ Implements base policy engine API. @@ -61,6 +54,7 @@ def __init__(self): if not self.policy_enforcement_enabled: return + _PolicyEngine._log_policy_event("Policy enforcement is enabled.") self._policy = self._parse_policy(self.__read_policy()) @staticmethod @@ -98,8 +92,10 @@ def __read_policy(): with open(conf.get_policy_file_path(), 'r') as f: try: contents = f.read() + # TODO: Consider copying the policy file contents to the history folder, and only log the policy locally + # in the case of policy-related failure. _PolicyEngine._log_policy_event( - "Policy enforcement is enabled. Enforcing policy using policy file found at '{0}'. File contents:\n{1}" + "Enforcing policy using policy file found at '{0}'. File contents:\n{1}" .format(conf.get_policy_file_path(), contents)) # json.loads will raise error if file contents are not a valid json (including empty file). custom_policy = json.loads(contents) diff --git a/tests/data/test_waagent.conf b/tests/data/test_waagent.conf index 8abbd55e5..a0147b01a 100644 --- a/tests/data/test_waagent.conf +++ b/tests/data/test_waagent.conf @@ -140,4 +140,4 @@ OS.SshDir=/notareal/path # - The default is false to protect the state of existing VMs OS.EnableFirewall=n -Debug.EnableExtensionPolicy=y +Debug.EnableExtensionPolicy=n diff --git a/tests/ga/test_extension.py b/tests/ga/test_extension.py index 851a6d5c2..c1afd6bcf 100644 --- a/tests/ga/test_extension.py +++ b/tests/ga/test_extension.py @@ -427,7 +427,7 @@ def test_migration_ignores_tree_remove_errors(self, shutil_mock): # pylint: dis class TestExtensionBase(AgentTestCase): def _assert_handler_status(self, report_vm_status, expected_status, expected_ext_count, version, - expected_handler_name="OSTCExtensions.ExampleHandlerLinux", expected_msg=None): + expected_handler_name="OSTCExtensions.ExampleHandlerLinux", expected_msg=None, expected_code=None): self.assertTrue(report_vm_status.called) args, kw = report_vm_status.call_args # pylint: disable=unused-variable vm_status = args[0] @@ -443,6 +443,9 @@ def _assert_handler_status(self, report_vm_status, expected_status, if expected_msg is not None: self.assertIn(expected_msg, handler_status.message) + if expected_code is not None: + self.assertEqual(expected_code, handler_status.code) + # Deprecated. New tests should be added to the TestExtension class @patch('time.sleep', side_effect=lambda _: mock_sleep(0.001)) @@ -1649,13 +1652,13 @@ def test_extensions_disabled(self, _, *args): vm_status = args[0] self.assertEqual(1, len(vm_status.vmAgent.extensionHandlers)) exthandler = vm_status.vmAgent.extensionHandlers[0] - self.assertEqual(-1, exthandler.code) + self.assertEqual(ExtensionErrorCodes.PluginEnableProcessingFailed, exthandler.code) self.assertEqual('NotReady', exthandler.status) - self.assertEqual("Extension will not be processed since extension processing is disabled. To enable extension processing, set Extensions.Enabled=y in '/etc/waagent.conf'", exthandler.message) + self.assertEqual("Extension 'OSTCExtensions.ExampleHandlerLinux' will not be processed since extension processing is disabled. To enable extension processing, set Extensions.Enabled=y in '/etc/waagent.conf'", exthandler.message) ext_status = exthandler.extension_status - self.assertEqual(-1, ext_status.code) + self.assertEqual(ExtensionErrorCodes.PluginEnableProcessingFailed, ext_status.code) self.assertEqual('error', ext_status.status) - self.assertEqual("Extension will not be processed since extension processing is disabled. To enable extension processing, set Extensions.Enabled=y in '/etc/waagent.conf'", ext_status.message) + self.assertEqual("Extension 'OSTCExtensions.ExampleHandlerLinux' will not be processed since extension processing is disabled. To enable extension processing, set Extensions.Enabled=y in '/etc/waagent.conf'", ext_status.message) def test_extensions_deleted(self, *args): # Ensure initial enable is successful @@ -3507,5 +3510,211 @@ def test_report_msg_if_handler_manifest_contains_invalid_values(self): self.assertIn("'supportsMultipleExtensions' has a non-boolean value", kw_messages[2]['message']) +class TestExtensionPolicy(TestExtensionBase): + def setUp(self): + AgentTestCase.setUp(self) + self.policy_path = os.path.join(self.tmp_dir, "waagent_policy.json") + + # Patch attributes to enable policy feature + self.patch_policy_path = patch('azurelinuxagent.common.conf.get_policy_file_path', + return_value=str(self.policy_path)) + self.patch_policy_path.start() + self.patch_conf_flag = patch('azurelinuxagent.ga.policy.policy_engine.conf.get_extension_policy_enabled', + return_value=True) + self.patch_conf_flag.start() + self.maxDiff = None # When long error messages don't match, display the entire diff. + + def tearDown(self): + patch.stopall() + AgentTestCase.tearDown(self) + + def _create_policy_file(self, policy): + with open(self.policy_path, mode='w') as policy_file: + if isinstance(policy, dict): + json.dump(policy, policy_file, indent=4) + else: + policy_file.write(policy) + policy_file.flush() + + def _test_policy_case(self, policy, op, expected_status_code, expected_handler_status, expected_ext_count, + expected_status_msg=None): + + # Set up a mock protocol instance. + with mock_wire_protocol(wire_protocol_data.DATA_FILE) as protocol: + if op == ExtensionRequestedState.Uninstall: + # Generate a new mock goal state to uninstall the extension - increment the incarnation + protocol.mock_wire_data.set_incarnation(2) + protocol.mock_wire_data.set_extensions_config_state(ExtensionRequestedState.Uninstall) + protocol.client.update_goal_state() + protocol.aggregate_status = None + protocol.report_vm_status = MagicMock() + exthandlers_handler = get_exthandlers_handler(protocol) + + # Create policy file and process extensions. + self._create_policy_file(policy) + exthandlers_handler.run() + exthandlers_handler.report_ext_handlers_status() + + # Assert that agent is reporting the expected handler status + report_vm_status = protocol.report_vm_status + self.assertTrue(report_vm_status.called) + self._assert_handler_status(report_vm_status, expected_handler_status, expected_ext_count=expected_ext_count, + version="1.0.0", expected_handler_name='OSTCExtensions.ExampleHandlerLinux', + expected_msg=expected_status_msg, expected_code=expected_status_code) + + def test_should_fail_enable_if_extension_disallowed(self): + policy = \ + { + "policyVersion": "0.1.0", + "extensionPolicies": { + "allowListedExtensionsOnly": True, + } + } + expected_msg = "failed to run extension 'OSTCExtensions.ExampleHandlerLinux' because it is not specified as an allowed extension." + self._test_policy_case(policy=policy, op=ExtensionRequestedState.Enabled, expected_status_code=ExtensionErrorCodes.PluginEnableProcessingFailed, + expected_handler_status='NotReady', expected_ext_count=1, expected_status_msg=expected_msg) + + def test_should_fail_enable_for_invalid_policy(self): + policy = \ + { + "policyVersion": "0.1.0", + "extensionPolicies": { + "allowListedExtensionsOnly": "False" + } + } + expected_msg = "attribute 'extensionPolicies.allowListedExtensionsOnly'; must be 'boolean'" + self._test_policy_case(policy=policy, op=ExtensionRequestedState.Enabled, expected_status_code=ExtensionErrorCodes.PluginEnableProcessingFailed, + expected_handler_status='NotReady', expected_ext_count=1, expected_status_msg=expected_msg) + + def test_should_fail_extension_if_error_thrown_during_policy_engine_init(self): + policy = \ + { + "policyVersion": "0.1.0" + } + with patch('azurelinuxagent.ga.policy.policy_engine.ExtensionPolicyEngine.__init__', + side_effect=Exception("mock exception")): + expected_msg = "Extension will not be processed: mock exception" + self._test_policy_case(policy=policy, op=ExtensionRequestedState.Enabled, + expected_status_code=ExtensionErrorCodes.PluginEnableProcessingFailed, + expected_handler_status='NotReady', expected_ext_count=1, expected_status_msg=expected_msg) + + def test_should_fail_uninstall_if_extension_disallowed(self): + policy = \ + { + "policyVersion": "0.1.0", + "extensionPolicies": { + "allowListedExtensionsOnly": True, + "signatureRequired": False, + "extensions": {} + }, + } + expected_msg = "failed to uninstall extension 'OSTCExtensions.ExampleHandlerLinux' because it is not specified as an allowed extension." + self._test_policy_case(policy=policy, op=ExtensionRequestedState.Uninstall, expected_status_code=ExtensionErrorCodes.PluginDisableProcessingFailed, + expected_handler_status='NotReady', expected_ext_count=1, expected_status_msg=expected_msg) + + def test_should_fail_enable_if_dependent_extension_disallowed(self): + self._create_policy_file({ + "policyVersion": "0.1.0", + "extensionPolicies": { + "allowListedExtensionsOnly": True, + "extensions": { + "OSTCExtensions.ExampleHandlerLinux": {} + } + } + }) + with mock_wire_protocol(wire_protocol_data.DATA_FILE_EXT_SEQUENCING) as protocol: + protocol.aggregate_status = None + protocol.report_vm_status = MagicMock() + exthandlers_handler = get_exthandlers_handler(protocol) + dep_ext_level_2 = extension_emulator(name="OSTCExtensions.ExampleHandlerLinux") + dep_ext_level_1 = extension_emulator(name="OSTCExtensions.OtherExampleHandlerLinux") + + exthandlers_handler.run() + exthandlers_handler.report_ext_handlers_status() + + # OtherExampleHandlerLinux should be disallowed by policy, ExampleHandlerLinux should be skipped because + # dependent extension failed + self._assert_handler_status(protocol.report_vm_status, expected_status="NotReady", expected_ext_count=1, + version="1.0.0", expected_handler_name="OSTCExtensions.OtherExampleHandlerLinux", + expected_msg=("failed to run extension 'OSTCExtensions.OtherExampleHandlerLinux' " + "because it is not specified as an allowed extension.")) + + self._assert_handler_status(protocol.report_vm_status, expected_status="NotReady", expected_ext_count=0, + version="1.0.0", expected_handler_name="OSTCExtensions.ExampleHandlerLinux", + expected_msg="Skipping processing of extensions since execution of dependent " + "extension OSTCExtensions.OtherExampleHandlerLinux failed") + + # check handler list and dependency levels + self.assertTrue(exthandlers_handler.ext_handlers is not None) + self.assertTrue(exthandlers_handler.ext_handlers is not None) + self.assertEqual(len(exthandlers_handler.ext_handlers), 2) + self.assertEqual(1, next(handler for handler in exthandlers_handler.ext_handlers if + handler.name == dep_ext_level_1.name).settings[0].dependencyLevel) + self.assertEqual(2, next(handler for handler in exthandlers_handler.ext_handlers if + handler.name == dep_ext_level_2.name).settings[0].dependencyLevel) + + def test_enable_should_succeed_if_extension_allowed(self): + policy_cases = [ + { + "policyVersion": "0.1.0", + "extensionPolicies": { + "allowListedExtensionsOnly": False, + } + }, + { + "policyVersion": "0.1.0", + "extensionPolicies": { + "allowListedExtensionsOnly": True, + "extensions": { + "OSTCExtensions.ExampleHandlerLinux": {} + } + } + } + ] + for policy in policy_cases: + self._test_policy_case(policy=policy, op=ExtensionRequestedState.Enabled, expected_status_code=0, + expected_handler_status='Ready', expected_ext_count=1) + + def test_uninstall_should_succeed_if_extension_allowed(self): + policy_cases = [ + { + "policyVersion": "0.1.0", + "extensionPolicies": { + "allowListedExtensionsOnly": False, + } + }, + { + "policyVersion": "0.1.0", + "extensionPolicies": { + "allowListedExtensionsOnly": True, + "extensions": { + "OSTCExtensions.ExampleHandlerLinux": {} + } + } + } + ] + for policy in policy_cases: + with mock_wire_protocol(wire_protocol_data.DATA_FILE) as protocol: + # Generate a new mock goal state to uninstall the extension - increment the incarnation + protocol.mock_wire_data.set_incarnation(2) + protocol.mock_wire_data.set_extensions_config_state(ExtensionRequestedState.Uninstall) + protocol.client.update_goal_state() + protocol.aggregate_status = None + protocol.report_vm_status = MagicMock() + exthandlers_handler = get_exthandlers_handler(protocol) + + # Create policy file and process extensions. + self._create_policy_file(policy) + exthandlers_handler.run() + exthandlers_handler.report_ext_handlers_status() + + # Assert that no status is being reported for the extension, to confirm that uninstall was successful. + report_vm_status = protocol.report_vm_status + self.assertTrue(report_vm_status.called) + args, kw = report_vm_status.call_args # pylint: disable=unused-variable + vm_status = args[0] + self.assertEqual(0, len(vm_status.vmAgent.extensionHandlers)) + + if __name__ == '__main__': unittest.main() diff --git a/tests/ga/test_multi_config_extension.py b/tests/ga/test_multi_config_extension.py index 482126be3..c7ce20c73 100644 --- a/tests/ga/test_multi_config_extension.py +++ b/tests/ga/test_multi_config_extension.py @@ -630,6 +630,95 @@ def test_it_should_handle_and_report_enable_errors_properly(self): } self._assert_extension_status(sc_handler, expected_extensions) + def test_it_should_report_failed_status_for_extensions_disallowed_by_policy(self): + """If multiconfig extension is disallowed by policy, all instances should be blocked.""" + policy_path = os.path.join(self.tmp_dir, "waagent_policy.json") + with patch('azurelinuxagent.common.conf.get_policy_file_path', return_value=str(policy_path)): + with patch('azurelinuxagent.ga.policy.policy_engine.conf.get_extension_policy_enabled', return_value=True): + policy = \ + { + "policyVersion": "0.0.1", + "extensionPolicies": { + "allowListedExtensionsOnly": True, + "signatureRequired": True, + "extensions": { + "Microsoft.Powershell.ExampleExtension": {} + } + } + } + with open(policy_path, mode='w') as policy_file: + json.dump(policy, policy_file, indent=4) + policy_file.flush() + self.test_data['ext_conf'] = os.path.join(self._MULTI_CONFIG_TEST_DATA, + "ext_conf_multi_config_no_dependencies.xml") + with self._setup_test_env() as (exthandlers_handler, protocol, no_of_extensions): + disallowed_mc_1 = extension_emulator(name="OSTCExtensions.ExampleHandlerLinux.firstExtension", + supports_multiple_extensions=True) + disallowed_mc_2 = extension_emulator(name="OSTCExtensions.ExampleHandlerLinux.secondExtension", + supports_multiple_extensions=True) + disallowed_mc_3 = extension_emulator(name="OSTCExtensions.ExampleHandlerLinux.thirdExtension", + supports_multiple_extensions=True) + allowed_ext = extension_emulator(name="Microsoft.Powershell.ExampleExtension") + with enable_invocations(disallowed_mc_1, disallowed_mc_2, disallowed_mc_3, + allowed_ext) as invocation_record: + exthandlers_handler.run() + exthandlers_handler.report_ext_handlers_status() + self.assertEqual(no_of_extensions, + len(protocol.aggregate_status['aggregateStatus']['handlerAggregateStatus']), + "incorrect extensions reported") + + # We should only enable the allowed extension, no instances of the multiconfig extension should be enabled + invocation_record.compare( + (allowed_ext, ExtensionCommandNames.INSTALL), + (allowed_ext, ExtensionCommandNames.ENABLE) + ) + + mc_handlers = self._assert_and_get_handler_status(aggregate_status=protocol.aggregate_status, + handler_name="OSTCExtensions.ExampleHandlerLinux", + expected_count=3, status="NotReady") + msg = "failed to run extension 'OSTCExtensions.ExampleHandlerLinux' because it is not specified as an allowed extension" + expected_extensions = { + "firstExtension": {"status": ExtensionStatusValue.error, "seq_no": 1, "message": msg}, + "secondExtension": {"status": ExtensionStatusValue.error, "seq_no": 2, "message": msg}, + "thirdExtension": {"status": ExtensionStatusValue.error, "seq_no": 3, "message": msg}, + } + self._assert_extension_status(mc_handlers, expected_extensions, multi_config=True) + + sc_handler = self._assert_and_get_handler_status(aggregate_status=protocol.aggregate_status, + handler_name="Microsoft.Powershell.ExampleExtension", + status="Ready", message=None) + expected_extensions = { + "Microsoft.Powershell.ExampleExtension": {"status": ExtensionStatusValue.success, "seq_no": 9, + "message": None} + } + self._assert_extension_status(sc_handler, expected_extensions) + + def test_it_should_report_successful_status_for_extensions_allowed_by_policy(self): + """If multiconfig extension is allowed by policy, all instances should be allowed.""" + policy_path = os.path.join(self.tmp_dir, "waagent_policy.json") + with patch('azurelinuxagent.common.conf.get_policy_file_path', return_value=str(policy_path)): + with patch('azurelinuxagent.ga.policy.policy_engine.conf.get_extension_policy_enabled', return_value=True): + policy = \ + { + "policyVersion": "0.0.1", + "extensionPolicies": { + "allowListedExtensionsOnly": True, + "signatureRequired": True, + "extensions": { + "OSTCExtensions.ExampleHandlerLinux": {}, + "Microsoft.Powershell.ExampleExtension": {} + } + } + } + with open(policy_path, mode='w') as policy_file: + json.dump(policy, policy_file, indent=4) + policy_file.flush() + + self.test_data['ext_conf'] = os.path.join(self._MULTI_CONFIG_TEST_DATA, + "ext_conf_multi_config_no_dependencies.xml") + with self._setup_test_env(mock_manifest=True) as (exthandlers_handler, protocol, no_of_extensions): + self.__run_and_assert_generic_case(exthandlers_handler, protocol, no_of_extensions) + def test_it_should_cleanup_extension_state_on_disable(self): def __assert_state_file(handler_name, handler_version, extensions, state, not_present=None): diff --git a/tests/test_agent.py b/tests/test_agent.py index 291823ad5..6e7e7a2a2 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -47,7 +47,7 @@ Debug.CgroupLogMetrics = False Debug.EnableAgentMemoryUsageCheck = False Debug.EnableCgroupV2ResourceLimiting = False -Debug.EnableExtensionPolicy = True +Debug.EnableExtensionPolicy = False Debug.EnableFastTrack = True Debug.EnableGAVersioning = True Debug.EtpCollectionPeriod = 300 diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index b96cc5107..c7bcaed61 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -45,6 +45,8 @@ variable: - agent_update - ext_cgroups - extensions_disabled + - ext_policy + - ext_policy_with_dependencies - ext_sequencing - ext_telemetry_pipeline - fips diff --git a/tests_e2e/orchestrator/scripts/collect-logs b/tests_e2e/orchestrator/scripts/collect-logs index c221288a1..94cb44edd 100755 --- a/tests_e2e/orchestrator/scripts/collect-logs +++ b/tests_e2e/orchestrator/scripts/collect-logs @@ -13,13 +13,16 @@ echo "Collecting logs to $logs_file_name ..." PYTHON=$(get-agent-python) waagent_conf=$($PYTHON -c 'from azurelinuxagent.common.osutil import get_osutil; print(get_osutil().agent_conf_file_path)') +# TODO: instead of collecting /etc/waagent_policy.json here, consider adding it to goal state history, since it can +# change per goal state. tar --exclude='journal/*' --exclude='omsbundle' --exclude='omsagent' --exclude='mdsd' --exclude='scx*' \ --exclude='*.so' --exclude='*__LinuxDiagnostic__*' --exclude='*.zip' --exclude='*.deb' --exclude='*.rpm' \ --warning=no-file-changed \ -czf "$logs_file_name" \ /var/log \ /var/lib/waagent/ \ - $waagent_conf + $waagent_conf \ + /etc/waagent_policy.json set -euxo pipefail diff --git a/tests_e2e/test_suites/ext_policy.yml b/tests_e2e/test_suites/ext_policy.yml new file mode 100644 index 000000000..a394f471d --- /dev/null +++ b/tests_e2e/test_suites/ext_policy.yml @@ -0,0 +1,8 @@ +# +# The test suite verifies that disallowed extensions are not processed, but the agent should still report status. +# +name: "ExtensionPolicy" +tests: + - "ext_policy/ext_policy.py" +images: "endorsed" +owns_vm: false \ No newline at end of file diff --git a/tests_e2e/test_suites/ext_policy_with_dependencies.yml b/tests_e2e/test_suites/ext_policy_with_dependencies.yml new file mode 100644 index 000000000..51ba959be --- /dev/null +++ b/tests_e2e/test_suites/ext_policy_with_dependencies.yml @@ -0,0 +1,11 @@ +# +# The test suite verifies that disallowed extensions and any extensions dependent on disallowed extensions are not processed, +# but the agent should still report status. +name: "ExtPolicyWithDependencies" +tests: + - "ext_policy/ext_policy_with_dependencies.py" +images: "endorsed" +executes_on_scale_set: true +owns_vm: false +skip_on_images: + - "alma_9" # TODO: Currently AlmaLinux is not available for scale sets; enable this image when it is available. \ No newline at end of file diff --git a/tests_e2e/tests/ext_policy/ext_policy.py b/tests_e2e/tests/ext_policy/ext_policy.py new file mode 100644 index 000000000..97b78df62 --- /dev/null +++ b/tests_e2e/tests/ext_policy/ext_policy.py @@ -0,0 +1,330 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import json +import re +import uuid +import os +from typing import List, Dict, Any +from assertpy import assert_that, fail + +from tests_e2e.tests.lib.agent_test import AgentVmTest +from tests_e2e.tests.lib.vm_extension_identifier import VmExtensionIds +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.ssh_client import SshClient +from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient +from tests_e2e.tests.lib.virtual_machine_runcommand_client import VirtualMachineRunCommandClient +from tests_e2e.tests.lib.agent_test_context import AgentVmTestContext + + +class ExtPolicy(AgentVmTest): + class TestCase: + def __init__(self, extension, settings: Any): + self.extension = extension + self.settings = settings + + def __init__(self, context: AgentVmTestContext): + super().__init__(context) + self._ssh_client: SshClient = self._context.create_ssh_client() + + def _create_policy_file(self, policy): + """ + Create policy json file and copy to /etc/waagent_policy.json on test machine. + """ + unique_id = uuid.uuid4() + file_path = "/tmp/waagent_policy_{0}.json".format(unique_id) + with open(file_path, mode='w') as policy_file: + json.dump(policy, policy_file, indent=4) + policy_file.flush() + log.info("Policy file contents: {0}".format(json.dumps(policy, indent=4))) + + remote_path = "/tmp/waagent_policy.json" + local_path = policy_file.name + self._ssh_client.copy_to_node(local_path=local_path, remote_path=remote_path) + policy_file_final_dest = "/etc/waagent_policy.json" + log.info("Copying policy file to test VM [%s]", self._context.vm.name) + self._ssh_client.run_command(f"mv {remote_path} {policy_file_final_dest}", use_sudo=True) + os.remove(file_path) + + def _operation_should_succeed(self, operation, extension_case): + log.info("") + log.info(f"Attempting to {operation} {extension_case.extension.__str__()}, expected to succeed") + # Attempt operation. If enabling, assert that the extension is present in instance view. + # If deleting, assert that the extension is not present in instance view. + try: + if operation == "enable": + # VirtualMachineRunCommandClient (and VirtualMachineRunCommand) does not take force_update_tag as a parameter. + if isinstance(extension_case.extension, VirtualMachineRunCommandClient): + extension_case.extension.enable(settings=extension_case.settings) + else: + extension_case.extension.enable(settings=extension_case.settings, force_update=True) + extension_case.extension.assert_instance_view() + elif operation == "delete": + extension_case.extension.delete() + instance_view_extensions = self._context.vm.get_instance_view().extensions + if instance_view_extensions is not None and any( + e.name == extension_case.extension._resource_name for e in instance_view_extensions): + raise Exception( + "extension {0} still in instance view after attempting to delete".format(extension_case.extension)) + log.info(f"Operation '{operation}' for {extension_case.extension.__str__()} succeeded as expected.") + except Exception as error: + fail( + f"Unexpected error while trying to {operation} {extension_case.extension.__str__()}. " + f"Extension is allowed by policy so this operation should have completed successfully.\n" + f"Error: {error}") + + def _operation_should_fail(self, operation, extension_case): + log.info("") + if operation == "enable": + try: + log.info(f"Attempting to enable {extension_case.extension}, should fail fast due to policy.") + timeout = (6 * 60) # Fail fast. + # VirtualMachineRunCommandClient (and VirtualMachineRunCommand) does not take force_update_tag as a parameter. + if isinstance(extension_case.extension, VirtualMachineRunCommandClient): + extension_case.extension.enable(settings=extension_case.settings, timeout=timeout) + else: + extension_case.extension.enable(settings=extension_case.settings, force_update=True, + timeout=timeout) + fail( + f"The agent should have reported an error trying to {operation} {extension_case.extension} " + f"because the extension is disallowed by policy.") + except Exception as error: + # We exclude the extension name from regex because CRP sometimes installs test extensions with different + # names (ex: Microsoft.Azure.Extensions.Edp.RunCommandHandlerLinuxTest instead of Microsoft.CPlat.Core.RunCommandHandlerLinux) + pattern = r".*Extension will not be processed: failed to run extension .* because it is not specified as an allowed extension.*" + assert_that(re.search(pattern, str(error))) \ + .described_as( + f"Error message is expected to contain '{pattern}', but actual error message was '{error}'").is_not_none() + log.info(f"{extension_case.extension} {operation} failed as expected due to policy") + + elif operation == "delete": + # For delete operations, CRP polls until the agent stops reporting status for the extension, or until timeout is + # reached, because delete is a best-effort operation and is not expected to fail. However, when delete is called + # on a disallowed extension, the agent reports failure status, so CRP will continue to poll until timeout. + # For efficiency, we asynchronously check the instance view and agent log to confirm that deletion failed, + # and do not wait for a response from CRP. + # + # Note: CRP will not allow an 'enable' request until deletion succeeds or times out. The next call must be + # a delete operation allowed by policy. + log.info(f"Attempting to delete {extension_case.extension}, should fail due to policy.") + delete_start_time = self._ssh_client.run_command("date '+%Y-%m-%d %T'").rstrip() + try: + timeout = (3 * 60) # Allow agent some time to process goal state, but do not wait for full timeout. + extension_case.extension.delete(timeout=timeout) + fail(f"CRP should not have successfully completed the delete operation for {extension_case.extension} " + f"because the extension is disallowed by policy and agent should have reported a policy failure.") + except TimeoutError: + log.info("Delete operation did not complete, as expected. Checking instance view " + "and agent log to confirm that delete operation failed due to policy.") + # Confirm that extension is still present in instance view + instance_view_extensions = self._context.vm.get_instance_view().extensions + if instance_view_extensions is not None and not any( + e.name == extension_case.extension._resource_name for e in instance_view_extensions): + fail(f"Delete operation is disallowed by policy and should have failed, but extension " + f"{extension_case.extension} is no longer present in the instance view.") + + # Confirm that agent log contains error message that uninstall was blocked due to policy + # The script will check for a log message such as "Extension will not be processed: failed to uninstall + # extension 'Microsoft.Azure.Extensions.CustomScript' because it is not specified as an allowed extension" + self._ssh_client.run_command( + f"agent_ext_policy-verify_operation_disallowed.py --extension-name '{extension_case.extension._identifier}' " + f"--after-timestamp '{delete_start_time}' --operation 'uninstall'", use_sudo = True) + + def run(self): + + log.info("*** Begin test setup") + + # Prepare no-config, single-config, and multi-config extension to test. Extensions with settings and extensions + # without settings have different status reporting logic, so we should test all cases. + # CustomScript is a single-config extension. + custom_script = ExtPolicy.TestCase( + VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.CustomScript, + resource_name="CustomScript"), + {'commandToExecute': f"echo '{str(uuid.uuid4())}'"} + ) + + # RunCommandHandler is a multi-config extension, so we set up two instances (configurations) here and test both. + # We append the resource name with "Policy" because agent_bvt/run_command.py leaves behind a "RunCommandHandler" + # that cannot be deleted via extensions API. + run_command = ExtPolicy.TestCase( + VirtualMachineRunCommandClient(self._context.vm, VmExtensionIds.RunCommandHandler, + resource_name="RunCommandHandlerPolicy"), + {'source': f"echo '{str(uuid.uuid4())}'"} + ) + run_command_2 = ExtPolicy.TestCase( + VirtualMachineRunCommandClient(self._context.vm, VmExtensionIds.RunCommandHandler, + resource_name="RunCommandHandlerPolicy2"), + {'source': f"echo '{str(uuid.uuid4())}'"} + ) + + # AzureMonitorLinuxAgent is a no-config extension (extension without settings). + azure_monitor = ExtPolicy.TestCase( + VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.AzureMonitorLinuxAgent, + resource_name="AzureMonitorLinuxAgent"), + None + ) + + # Another e2e test may have left behind an extension we want to test here. Cleanup any leftovers so that they + # do not affect the test results. + log.info("Cleaning up existing extensions on the test VM [%s]", self._context.vm.name) + # TODO: Consider deleting only extensions used by this test instead of all extensions. + self._context.vm.delete_all_extensions() + + # Enable policy via conf file + log.info("Enabling policy via conf file on the test VM [%s]", self._context.vm.name) + self._ssh_client.run_command("update-waagent-conf Debug.EnableExtensionPolicy=y", use_sudo=True) + log.info("Test setup complete.") + + # This policy tests the following scenarios: + # - enable a single-config extension (CustomScript) that is allowed by policy -> should succeed + # - enable a no-config extension (AzureMonitorLinuxAgent) that is disallowed by policy -> should fail fast + # - enable two instances of a multi-config extension (RunCommandHandler) that is disallowed by policy -> both should fail fast + # (Note that CustomScript disallowed by policy is tested in a later test case.) + log.info("") + log.info("*** Begin test case 1") + log.info("This policy tests the following scenarios:") + log.info(" - enable a single-config extension (CustomScript) that is allowed by policy -> should succeed") + log.info(" - enable a no-config extension (AzureMonitorLinuxAgent) that is disallowed by policy -> should fail fast") + log.info(" - enable two instances of a multi-config extension (RunCommandHandler) that is disallowed by policy -> both should fail fast") + policy = \ + { + "policyVersion": "0.1.0", + "extensionPolicies": { + "allowListedExtensionsOnly": True, + "signatureRequired": False, + "extensions": { + "Microsoft.Azure.Extensions.CustomScript": {}, + # GuestConfiguration is added to all VMs for security requirements, so we always allow it. + "Microsoft.GuestConfiguration.ConfigurationforLinux": {} + } + } + } + self._create_policy_file(policy) + self._operation_should_succeed("enable", custom_script) + self._operation_should_fail("enable", run_command) + self._operation_should_fail("enable", run_command_2) + # We only enable AMA on supported distros. + # If we were to enable AMA on an unsupported distro, the operation would initially be blocked by policy as + # expected. However, after changing the policy to allow all with the next goal state, the agent would attempt to + # re-enable AMA on an unsupported distro, causing errors. + if VmExtensionIds.AzureMonitorLinuxAgent.supports_distro((self._ssh_client.run_command("get_distro.py").rstrip())): + self._operation_should_fail("enable", azure_monitor) + + # This policy tests the following scenarios: + # - enable two instances of a multi-config extension (RunCommandHandler) when allowed by policy -> should succeed + # - delete two instances of a multi-config extension (RunCommandHandler) when allowed by policy -> should succeed + # - enable no-config extension (AzureMonitorLinuxAgent) when allowed by policy -> should succeed + # - delete no-config extension (AzureMonitorLinuxAgent) when allowed by policy -> should succeed + log.info("") + log.info("*** Begin test case 2") + log.info("This policy tests the following scenarios:") + log.info(" - enable two instances of a multi-config extension (RunCommandHandler) when allowed by policy -> should succeed") + log.info(" - delete two instances of a multi-config extension (RunCommandHandler) when allowed by policy -> should succeed") + log.info(" - enable no-config extension (AzureMonitorLinuxAgent) when allowed by policy -> should succeed") + log.info(" - delete no-config extension (AzureMonitorLinuxAgent) when allowed by policy -> should succeed") + policy = \ + { + "policyVersion": "0.1.0", + "extensionPolicies": { + "allowListedExtensionsOnly": False, + "signatureRequired": False, + "extensions": {} + } + } + self._create_policy_file(policy) + self._operation_should_succeed("enable", custom_script) + # Update settings to force an update to the seq no + run_command.settings = {'source': f"echo '{str(uuid.uuid4())}'"} + run_command_2.settings = {'source': f"echo '{str(uuid.uuid4())}'"} + self._operation_should_succeed("enable", run_command) + self._operation_should_succeed("enable", run_command_2) + self._operation_should_succeed("delete", run_command) + self._operation_should_succeed("delete", run_command_2) + if VmExtensionIds.AzureMonitorLinuxAgent.supports_distro((self._ssh_client.run_command("get_distro.py").rstrip())): + self._operation_should_succeed("enable", azure_monitor) + self._operation_should_succeed("delete", azure_monitor) + + + # This policy tests the following scenarios: + # - disallow a previously-enabled single-config extension (CustomScript, then try to enable again -> should fail fast + # - disallow a previously-enabled single-config extension (CustomScript), then try to delete -> should fail fast + log.info("") + log.info("*** Begin test case 3") + log.info("This policy tests the following scenarios:") + log.info(" - disallow a previously-enabled single-config extension (CustomScript, then try to enable again -> should fail fast") + log.info(" - disallow a previously-enabled single-config extension (CustomScript), then try to delete -> should reach timeout") + policy = \ + { + "policyVersion": "0.1.0", + "extensionPolicies": { + "allowListedExtensionsOnly": True, + "signatureRequired": False, + "extensions": { + # GuestConfiguration is added to all VMs for security requirements, so we always allow it. + "Microsoft.GuestConfiguration.ConfigurationforLinux": {} + } + } + } + self._create_policy_file(policy) + self._operation_should_fail("enable", custom_script) + self._operation_should_fail("delete", custom_script) + + # This policy tests the following scenario: + # - allow a previously-disallowed single-config extension (CustomScript), then delete -> should succeed + # - allow a previously-disallowed single-config extension (CustomScript), then enable -> should succeed + log.info("") + log.info("*** Begin test case 4") + log.info("This policy tests the following scenario:") + log.info(" - allow a previously-disallowed single-config extension (CustomScript), then delete -> should succeed") + log.info(" - allow a previously-disallowed single-config extension (CustomScript), then enable -> should succeed") + policy = \ + { + "policyVersion": "0.1.0", + "extensionPolicies": { + "allowListedExtensionsOnly": False, + "signatureRequired": False, + "extensions": {} + } + } + self._create_policy_file(policy) + # Since CustomScript is marked for deletion by previous test case, we can only retry the delete operation (enable + # is not allowed by CRP). So we first delete successfully, and then re-install/enable CustomScript. + self._operation_should_succeed("delete", custom_script) + self._operation_should_succeed("enable", custom_script) + + # Cleanup after test: disable policy enforcement in conf file. + log.info("") + log.info("*** Begin test cleanup") + log.info("Disabling policy via conf file on the test VM [%s]", self._context.vm.name) + self._ssh_client.run_command("update-waagent-conf Debug.EnableExtensionPolicy=n", use_sudo=True) + log.info("*** Test cleanup complete.") + + + def get_ignore_error_rules(self) -> List[Dict[str, Any]]: + ignore_rules = [ + # 2024-10-24T17:34:20.808235Z ERROR ExtHandler ExtHandler Event: name=Microsoft.Azure.Monitor.AzureMonitorLinuxAgent, op=None, message=[ExtensionPolicyError] Extension will not be processed: failed to run extension 'Microsoft.Azure.Monitor.AzureMonitorLinuxAgent' because it is not specified as an allowed extension. To enable, add the extension to the list of allowed extensions in the policy file ('/etc/waagent_policy.json')., duration=0 + # We intentionally block extensions with policy and expect this failure message + { + 'message': r"Extension will not be processed: failed to .* extension .* because it is not specified as an allowed extension" + } + ] + return ignore_rules + + +if __name__ == "__main__": + ExtPolicy.run_from_command_line() \ No newline at end of file diff --git a/tests_e2e/tests/ext_policy/ext_policy_with_dependencies.py b/tests_e2e/tests/ext_policy/ext_policy_with_dependencies.py new file mode 100644 index 000000000..c4f798848 --- /dev/null +++ b/tests_e2e/tests/ext_policy/ext_policy_with_dependencies.py @@ -0,0 +1,332 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# This test adds extensions with multiple dependencies to a VMSS and checks that extensions fail and report status +# as expected when blocked by extension policy. + +import copy +import json +import random +import re +import os +import uuid +import time +from datetime import datetime +from typing import List, Dict, Any + +from assertpy import fail +from tests_e2e.tests.lib.agent_test import AgentVmssTest, TestSkipped +from tests_e2e.tests.lib.agent_test_context import AgentVmTestContext +from tests_e2e.tests.lib.virtual_machine_scale_set_client import VmssInstanceIpAddress +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.resource_group_client import ResourceGroupClient +from tests_e2e.tests.lib.ssh_client import SshClient +from tests_e2e.tests.lib.vm_extension_identifier import VmExtensionIds +from tests_e2e.tests.ext_policy.policy_dependencies_cases import _should_fail_single_config_depends_on_disallowed_no_config, \ + _should_fail_single_config_depends_on_disallowed_single_config, \ + _should_succeed_single_config_depends_on_no_config, \ + _should_succeed_single_config_depends_on_single_config + # TODO: RunCommandHandler is unable to be uninstalled properly, so these tests are currently disabled. Uncomment + # the below imports after re-enabling the test. + # _should_fail_single_config_depends_on_disallowed_multi_config, + # _should_fail_multi_config_depends_on_disallowed_single_config, + # _should_fail_multi_config_depends_on_disallowed_no_config, + +class ExtPolicyWithDependencies(AgentVmssTest): + def __init__(self, context: AgentVmTestContext): + super().__init__(context) + self._scenario_start = datetime.min + + # Cases to test different dependency scenarios + _test_cases = [ + _should_fail_single_config_depends_on_disallowed_single_config, + _should_fail_single_config_depends_on_disallowed_no_config, + # TODO: RunCommandHandler is unable to be uninstalled properly, so these tests are currently disabled. Investigate the + # issue and enable these 3 tests. + # _should_fail_single_config_depends_on_disallowed_multi_config, + # _should_fail_multi_config_depends_on_disallowed_single_config, + # _should_fail_multi_config_depends_on_disallowed_no_config, + _should_succeed_single_config_depends_on_single_config, + _should_succeed_single_config_depends_on_no_config + ] + + @staticmethod + def _create_policy_file(ssh_client, policy): + # Generate a unique file name to avoid conflicts with any other tests running in parallel. + unique_id = uuid.uuid4() + file_path = "/tmp/waagent_policy_{0}.json".format(unique_id) + with open(file_path, mode='w') as policy_file: + json.dump(policy, policy_file, indent=4) + policy_file.flush() + + remote_path = "/tmp/waagent_policy.json" + local_path = policy_file.name + ssh_client.copy_to_node(local_path=local_path, remote_path=remote_path) + policy_file_final_dest = "/etc/waagent_policy.json" + ssh_client.run_command(f"mv {remote_path} {policy_file_final_dest}", use_sudo=True) + os.remove(file_path) + + def run(self): + + # Set up the test run + instances_ip_address: List[VmssInstanceIpAddress] = self._context.vmss.get_instances_ip_address() + ssh_clients: Dict[str, SshClient] = {} + for instance in instances_ip_address: + ssh_clients[instance.instance_name] = SshClient(ip_address=instance.ip_address, username=self._context.username, identity_file=self._context.identity_file) + + # Cleanup any extensions left behind by other tests, as they may be blocked by policy and erroneously cause failures. + instance_view_ext = self._context.vmss.get_instance_view().extensions + if instance_view_ext is not None and len(instance_view_ext) > 0: + for ex in instance_view_ext: + self._context.vmss.delete_extension(ex.name) + + # Enable policy via conf file. + for ssh_client in ssh_clients.values(): + ssh_client.run_command("update-waagent-conf Debug.EnableExtensionPolicy=y", use_sudo=True) + + if not VmExtensionIds.AzureMonitorLinuxAgent.supports_distro(next(iter(ssh_clients.values())).run_command("get_distro.py").rstrip()): + raise TestSkipped("Currently AzureMonitorLinuxAgent is not supported on this distro") + + # This is the base ARM template that's used for deploying extensions for this scenario. + base_extension_template = { + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json", + "contentVersion": "1.0.0.0", + "resources": [ + { + "type": "Microsoft.Compute/virtualMachineScaleSets", + "name": f"{self._context.vmss.name}", + "location": "[resourceGroup().location]", + "apiVersion": "2018-06-01", + "properties": { + "virtualMachineProfile": { + "extensionProfile": { + "extensions": [] + } + } + } + } + ] + } + + for case in self._test_cases: + log.info("") + log.info("*** Test case: {0}".format(case.__name__.replace('_', ' '))) + test_case_start = random.choice(list(ssh_clients.values())).run_command("date '+%Y-%m-%d %T'").rstrip() + if self._scenario_start == datetime.min: + self._scenario_start = test_case_start + log.info("Test case start time: {0}".format(test_case_start)) + + # Assign unique guid to forceUpdateTag for each extension to make sure they're always unique to force CRP + # to generate a new sequence number each time + test_guid = str(uuid.uuid4()) + policy, extensions, expected_errors, deletion_order = case() + for ext in extensions: + ext["properties"].update({ + "forceUpdateTag": test_guid + }) + + # We update the extension template here with extensions that are specific to the scenario that we want to + # test out + ext_template = copy.deepcopy(base_extension_template) + ext_template['resources'][0]['properties']['virtualMachineProfile']['extensionProfile'][ + 'extensions'] = extensions + + # Log the dependencies for the extensions in this test case + for ext in extensions: + provisioned_after = ext['properties'].get('provisionAfterExtensions') + depends_on = provisioned_after if provisioned_after else [] + if depends_on: + dependency_list = ' and '.join(depends_on) + log.info("{0} depends on {1}".format(ext['name'], dependency_list)) + else: + log.info("{0} does not depend on any extension".format(ext['name'])) + + # Copy policy file to each VM instance + log.info("Updating policy file with new policy: {0}".format(policy)) + for ssh_client in ssh_clients.values(): + self._create_policy_file(ssh_client, policy) + + log.info("Deploying extensions to the scale set...") + rg_client = ResourceGroupClient(self._context.vmss.cloud, self._context.vmss.subscription, + self._context.vmss.resource_group, self._context.vmss.location) + + # Deploy updated extension template to the scale set. + # If test case is supposed to fail, assert that the operation fails with the expected error messages. + try: + rg_client.deploy_template(template=ext_template) + if expected_errors is not None and len(expected_errors) != 0: + fail("Extension deployment was expected to fail with the following errors: {0}".format(expected_errors)) + log.info("Extension deployment succeeded as expected") + log.info("") + except Exception as e: + if expected_errors is None or len(expected_errors) == 0: + fail("Extension template deployment unexpectedly failed: {0}".format(e)) + else: + deployment_failure_pattern = r"[\s\S]*\"code\":\s*\"ResourceDeploymentFailure\"[\s\S]*\"details\":\s*\[\s*(?P[\s\S]*)\]" + deployment_failure_match = re.match(deployment_failure_pattern, str(e)) + try: + if deployment_failure_match is None: + raise Exception("Unable to match a ResourceDeploymentFailure") + error_json = json.loads(deployment_failure_match.group("error")) + error_message = error_json['message'] + except Exception as parse_exc: + fail("Extension template deployment failed as expected, but there was an error in parsing the failure. Parsing failure: {0}\nDeployment Failure: {1}".format(parse_exc, e)) + + for phrase in expected_errors: + if phrase not in error_message: + fail("Extension template deployment failed as expected, but with an unexpected error. Error expected to contain message '{0}'. Actual error: {1}".format(phrase, e)) + + log.info("Extensions failed as expected.") + log.info("") + log.info("Expected errors:") + for expected_error in expected_errors: + log.info(" - {0}".format(expected_error)) + log.info("") + log.info("") + log.info("Actual errors:") + log.info(str(e)) + + # Clean up failed extensions to leave VMSS in a good state for the next test. CRP will attempt to uninstall + # leftover extensions in the next test, but uninstall will be disallowed and reach timeout unexpectedly. + # CRP also won't allow deletion of an extension that is dependent on another failed extension, so we first + # update policy to allow all, re-enable all extensions, and then delete them in dependency order. + log.info("Starting cleanup for test case...") + allow_all_policy = \ + { + "policyVersion": "0.1.0", + "extensionPolicies": { + "allowListedExtensionsOnly": False + } + } + for ssh_client in ssh_clients.values(): + self._create_policy_file(ssh_client, allow_all_policy) + + log.info("Trying to re-enable before deleting extensions...") + for ext in extensions: + ext["properties"].update({ + "forceUpdateTag": str(uuid.uuid4()) + }) + ext_template['resources'][0]['properties']['virtualMachineProfile']['extensionProfile'][ + 'extensions'] = extensions + enable_start_time = random.choice(list(ssh_clients.values())).run_command("date '+%Y-%m-%d %T'").rstrip() + try: + rg_client.deploy_template(template=ext_template) + except Exception as err: + # Known issue - CRP returns a stale status for no-config extensions, because it does not wait for a new + # sequence number. Only for cases testing no-config extension dependencies, swallow the CRP error and + # check agent log instead to confirm that extensions were enabled successfully. + test_cases_to_work_around = [ + _should_fail_single_config_depends_on_disallowed_no_config + ] + if case in test_cases_to_work_around: + log.info("CRP returned error when re-enabling extensions after allowing. Checking agent log to see if enable succeeded. " + "Error: {0}".format(err)) + time.sleep(2 * 60) # Give extensions some time to finish processing. + extension_list = ' '.join([str(e) for e in deletion_order]) + command = (f"agent_ext_policy-verify_operation_success.py --after-timestamp '{enable_start_time}' " + f"--operation 'enable' --extension-list {extension_list}") + for ssh_client in ssh_clients.values(): + ssh_client.run_command(command, use_sudo=True) + log.info("Agent reported successful status for all extensions, enable succeeded.") + else: + fail("Failed to re-enable extensions after allowing with policy.") + + # Delete all extensions in dependency order. + for ext_to_delete in deletion_order: + ext_name_to_delete = ext_to_delete.type + try: + self._context.vmss.delete_extension(ext_name_to_delete) + except Exception as crp_err: + fail("Failed to uninstall extension {0}. Exception: {1}".format(ext_name_to_delete, crp_err)) + log.info("Successfully uninstalled extension {0}".format(ext_name_to_delete)) + + log.info("Successfully removed all extensions from VMSS") + log.info("---------------------------------------------") + + # Disable policy via conf file. + for ssh_client in ssh_clients.values(): + ssh_client.run_command("update-waagent-conf Debug.EnableExtensionPolicy=n", use_sudo=True) + + def get_ignore_errors_before_timestamp(self) -> datetime: + # Ignore errors in the agent log before the first test case starts + if self._scenario_start == datetime.min: + return self._scenario_start + return datetime.strptime(self._scenario_start, u'%Y-%m-%d %H:%M:%S') + + def get_ignore_error_rules(self) -> List[Dict[str, Any]]: + ignore_rules = [ + # + # WARNING ExtHandler ExtHandler Missing dependsOnExtension on extension Microsoft.Azure.Monitor.AzureMonitorLinuxAgent + # This message appears when an extension doesn't depend on another extension + # + { + 'message': r"Missing dependsOnExtension on extension .*" + }, + # + # WARNING ExtHandler ExtHandler Extension Microsoft.Azure.Monitor.AzureMonitorLinuxAgent does not have any settings. Will ignore dependency (dependency level: 1) + # We currently ignore dependencies for extensions without settings + # + { + 'message': r"Extension .* does not have any settings\. Will ignore dependency \(dependency level: \d\)" + }, + # + # 2023-10-31T17:46:59.675959Z WARNING ExtHandler ExtHandler Dependent extension Microsoft.Azure.Extensions.CustomScript failed or timed out, will skip processing the rest of the extensions + # We intentionally disallow some extensions to test that dependent are skipped. We assert the specific expected failure message in the test. + # + { + 'message': r"Dependent extension .* failed or timed out, will skip processing the rest of the extensions" + }, + # + # 2023-10-31T17:48:13.349214Z ERROR ExtHandler ExtHandler Event: name=Microsoft.Azure.Extensions.CustomScript, op=ExtensionProcessing, message=Dependent Extension Microsoft.Azure.Extensions.CustomScript did not succeed. Status was error, duration=0 + # We intentionally fail to test that dependent extensions are skipped + # + { + 'message': r"message=Dependent Extension .* did not succeed. Status was error, duration=0" + }, + # + # 2023-10-31T17:47:07.689083Z WARNING ExtHandler ExtHandler [PERIODIC] This status is being reported by the Guest Agent since no status file was reported by extension Microsoft.Azure.Monitor.AzureMonitorLinuxAgent: [ExtensionStatusError] Status file /var/lib/waagent/Microsoft.Azure.Monitor.AzureMonitorLinuxAgent-1.28.11/status/6.status does not exist + # We expect extensions that are dependent on a failing extension to not report status + # + { + 'message': r"\[PERIODIC\] This status is being reported by the Guest Agent since no status file was reported by extension .*: \[ExtensionStatusError\] Status file \/var\/lib\/waagent\/.*\/status\/\d.status does not exist" + }, + # + # 2023-10-31T17:48:11.306835Z WARNING ExtHandler ExtHandler A new goal state was received, but not all the extensions in the previous goal state have completed: [('Microsoft.Azure.Extensions.CustomScript', 'error'), ('Microsoft.Azure.Monitor.AzureMonitorLinuxAgent', 'transitioning'), ('Microsoft.CPlat.Core.RunCommandLinux', 'success')] + # This message appears when the previous test scenario had failing extensions due to extension dependencies + # + { + 'message': r"A new goal state was received, but not all the extensions in the previous goal state have completed: \[(\(u?'.*', u?'(error|transitioning|success)'\),?)+\]" + }, + # 2024-10-23T18:01:32.247341Z ERROR ExtHandler ExtHandler Event: name=Microsoft.Azure.Monitor.AzureMonitorLinuxAgent, op=ExtensionProcessing, message=Skipping processing of extensions since execution of dependent extension Microsoft.Azure.Monitor.AzureMonitorLinuxAgent failed, duration=0 + # We intentionally block extensions with policy and expect any dependent extensions to be skipped + { + 'message': r"Skipping processing of extensions since execution of dependent extension .* failed" + }, + # 2024-10-24T17:34:20.808235Z ERROR ExtHandler ExtHandler Event: name=Microsoft.Azure.Monitor.AzureMonitorLinuxAgent, op=None, message=Extension will not be processed: failed to enable extension 'Microsoft.Azure.Monitor.AzureMonitorLinuxAgent' because extension is not specified in allowlist. To enable, add extension to the allowed list in the policy file ('/etc/waagent_policy.json')., duration=0 + # We intentionally block extensions with policy and expect this failure message + { + 'message': r"Extension will not be processed" + } + ] + return ignore_rules + + +if __name__ == "__main__": + ExtPolicyWithDependencies.run_from_command_line() \ No newline at end of file diff --git a/tests_e2e/tests/ext_policy/policy_dependencies_cases.py b/tests_e2e/tests/ext_policy/policy_dependencies_cases.py new file mode 100644 index 000000000..55c7daf95 --- /dev/null +++ b/tests_e2e/tests/ext_policy/policy_dependencies_cases.py @@ -0,0 +1,212 @@ + + +from tests_e2e.tests.lib.vm_extension_identifier import VmExtensionIds, VmExtensionIdentifier +import uuid + + +def __get_extension_template(extension_id: VmExtensionIdentifier, depends_on=None): + template = { + "name": extension_id.type, + "properties": { + "provisionAfterExtensions": depends_on, + "publisher": extension_id.publisher, + "type": extension_id.type, + "typeHandlerVersion": extension_id.version, + "autoUpgradeMinorVersion": True + } + } + + if depends_on is not None and len(depends_on) > 0: + template["properties"]["provisionAfterExtensions"] = depends_on + + # Update template properties for each extension type + if extension_id == VmExtensionIds.AzureMonitorLinuxAgent: + # For compliance with S360, enable automatic upgrade for AzureMonitorLinuxAgent + template["properties"]["enableAutomaticUpgrade"] = True + elif extension_id == VmExtensionIds.CustomScript: + template["properties"]["settings"] = {"commandToExecute": "date"} + elif extension_id == VmExtensionIds.RunCommandHandler: + # Each time, we generate a RunCommand template with different settings + unique = str(uuid.uuid4()) + test_file = f"waagent-test.{unique}" + unique_command = f"echo '{unique}' > /tmp/{test_file}" + template["properties"]["settings"] = {"commandToExecute": unique_command} + elif extension_id == VmExtensionIds.VmAccess: + template["properties"]["settings"] = {} + template["properties"]["protectedSettings"] = {"username": "testuser"} + else: + raise ValueError("invalid value '{0}' for 'extension_id'".format(extension_id)) + + return template + + +def _should_fail_single_config_depends_on_disallowed_single_config(): + template = [ + __get_extension_template(VmExtensionIds.VmAccess), + __get_extension_template(VmExtensionIds.CustomScript, depends_on=["VMAccessForLinux"]) + ] + policy = \ + { + "policyVersion": "0.1.0", + "extensionPolicies": { + "allowListedExtensionsOnly": True, + "extensions": { + "Microsoft.Azure.Extensions.CustomScript": {}, + # GuestConfiguration is added to all VMs for security requirements, so we always allow it. + "Microsoft.GuestConfiguration.ConfigurationforLinux": {} + } + } + } + expected_errors = [ + "Extension will not be processed: failed to run extension 'Microsoft.OSTCExtensions.VMAccessForLinux' because it is not specified as an allowed extension", + "'CustomScript' is marked as failed since it depends upon the VM Extension 'VMAccessForLinux' which has failed" + ] + deletion_order = [VmExtensionIds.CustomScript, VmExtensionIds.VmAccess] + return policy, template, expected_errors, deletion_order + + +def _should_fail_single_config_depends_on_disallowed_no_config(): + template = [ + __get_extension_template(VmExtensionIds.AzureMonitorLinuxAgent), + __get_extension_template(VmExtensionIds.CustomScript, depends_on=["AzureMonitorLinuxAgent"]) + ] + policy = \ + { + "policyVersion": "0.1.0", + "extensionPolicies": { + "allowListedExtensionsOnly": True, + "extensions": { + "Microsoft.Azure.Extensions.CustomScript": {}, + # GuestConfiguration is added to all VMs for security requirements, so we always allow it. + "Microsoft.GuestConfiguration.ConfigurationforLinux": {} + } + } + } + expected_errors = [ + "Extension will not be processed: failed to run extension 'Microsoft.Azure.Monitor.AzureMonitorLinuxAgent' because it is not specified as an allowed extension", + "'CustomScript' is marked as failed since it depends upon the VM Extension 'AzureMonitorLinuxAgent' which has failed" + ] + deletion_order = [VmExtensionIds.CustomScript, VmExtensionIds.AzureMonitorLinuxAgent] + return policy, template, expected_errors, deletion_order + + +def _should_fail_single_config_depends_on_disallowed_multi_config(): + template = [ + __get_extension_template(VmExtensionIds.RunCommandHandler), + __get_extension_template(VmExtensionIds.CustomScript, depends_on=["RunCommandHandlerLinux"]) + ] + policy = \ + { + "policyVersion": "0.1.0", + "extensionPolicies": { + "allowListedExtensionsOnly": True, + "extensions": { + "Microsoft.Azure.Extensions.CustomScript": {}, + # GuestConfiguration is added to all VMs for security requirements, so we always allow it. + "Microsoft.GuestConfiguration.ConfigurationforLinux": {} + } + } + } + expected_errors = [ + "Extension will not be processed: failed to run extension 'Microsoft.CPlat.Core.RunCommandHandlerLinux' because it is not specified as an allowed extension", + "'CustomScript' is marked as failed since it depends upon the VM Extension 'RunCommandHandlerLinux' which has failed" + ] + deletion_order = [VmExtensionIds.CustomScript, VmExtensionIds.RunCommandHandler] + return policy, template, expected_errors, deletion_order + + +def _should_fail_multi_config_depends_on_disallowed_single_config(): + template = [ + __get_extension_template(VmExtensionIds.CustomScript), + __get_extension_template(VmExtensionIds.RunCommandHandler, depends_on=["CustomScript"]) + ] + policy = \ + { + "policyVersion": "0.1.0", + "extensionPolicies": { + "allowListedExtensionsOnly": True, + "extensions": { + "Microsoft.CPlat.Core.RunCommandHandlerLinux": {}, + # GuestConfiguration is added to all VMs for security requirements, so we always allow it. + "Microsoft.GuestConfiguration.ConfigurationforLinux": {} + } + } + } + expected_errors = [ + "Extension will not be processed: failed to run extension 'Microsoft.Azure.Extensions.CustomScript' because it is not specified as an allowed extension", + "VM has reported a failure when processing extension 'RunCommandHandlerLinux' (publisher 'Microsoft.CPlat.Core' and type 'RunCommandHandlerLinux'). Error message: 'Skipping processing of extensions since execution of dependent extension Microsoft.Azure.Extensions.CustomScript failed'." + ] + deletion_order = [VmExtensionIds.RunCommandHandler, VmExtensionIds.CustomScript] + return policy, template, expected_errors, deletion_order + + +def _should_fail_multi_config_depends_on_disallowed_no_config(): + template = [ + __get_extension_template(VmExtensionIds.AzureMonitorLinuxAgent), + __get_extension_template(VmExtensionIds.RunCommandHandler, depends_on=["AzureMonitorLinuxAgent"]) + ] + policy = \ + { + "policyVersion": "0.1.0", + "extensionPolicies": { + "allowListedExtensionsOnly": True, + "extensions": { + "Microsoft.CPlat.Core.RunCommandHandlerLinux": {}, + # GuestConfiguration is added to all VMs for security requirements, so we always allow it. + "Microsoft.GuestConfiguration.ConfigurationforLinux": {} + } + } + } + expected_errors = [ + "Extension will not be processed: failed to run extension 'Microsoft.Azure.Monitor.AzureMonitorLinuxAgent' because it is not specified as an allowed extension", + "VM has reported a failure when processing extension 'RunCommandHandlerLinux' (publisher 'Microsoft.CPlat.Core' and type 'RunCommandHandlerLinux'). Error message: 'Skipping processing of extensions since execution of dependent extension Microsoft.Azure.Monitor.AzureMonitorLinuxAgent failed'." + ] + deletion_order = [VmExtensionIds.RunCommandHandler, VmExtensionIds.AzureMonitorLinuxAgent] + return policy, template, expected_errors, deletion_order + + +def _should_succeed_single_config_depends_on_no_config(): + template = [ + __get_extension_template(VmExtensionIds.AzureMonitorLinuxAgent), + __get_extension_template(VmExtensionIds.CustomScript, depends_on=["AzureMonitorLinuxAgent"]) + + ] + policy = \ + { + "policyVersion": "0.1.0", + "extensionPolicies": { + "allowListedExtensionsOnly": True, + "extensions": { + "Microsoft.Azure.Monitor.AzureMonitorLinuxAgent": {}, + "Microsoft.Azure.Extensions.CustomScript": {}, + # GuestConfiguration is added to all VMs for security requirements, so we always allow it. + "Microsoft.GuestConfiguration.ConfigurationforLinux": {} + } + } + } + expected_errors = [] + deletion_order = [VmExtensionIds.CustomScript, VmExtensionIds.AzureMonitorLinuxAgent] + return policy, template, expected_errors, deletion_order + + +def _should_succeed_single_config_depends_on_single_config(): + template = [ + __get_extension_template(VmExtensionIds.CustomScript), + __get_extension_template(VmExtensionIds.VmAccess, depends_on=["CustomScript"]) + ] + policy = \ + { + "policyVersion": "0.1.0", + "extensionPolicies": { + "allowListedExtensionsOnly": True, + "extensions": { + "Microsoft.Azure.Extensions.CustomScript": {}, + "Microsoft.OSTCExtensions.VMAccessForLinux": {}, + # GuestConfiguration is added to all VMs for security requirements, so we always allow it. + "Microsoft.GuestConfiguration.ConfigurationforLinux": {} + } + } + } + expected_errors = [] + deletion_order = [VmExtensionIds.VmAccess, VmExtensionIds.CustomScript] + return policy, template, expected_errors, deletion_order \ No newline at end of file diff --git a/tests_e2e/tests/extensions_disabled/extensions_disabled.py b/tests_e2e/tests/extensions_disabled/extensions_disabled.py index 002d83357..a365c6dd6 100755 --- a/tests_e2e/tests/extensions_disabled/extensions_disabled.py +++ b/tests_e2e/tests/extensions_disabled/extensions_disabled.py @@ -27,7 +27,7 @@ import uuid from assertpy import assert_that, fail -from typing import Any +from typing import List, Dict, Any from azure.mgmt.compute.models import VirtualMachineInstanceView @@ -88,7 +88,7 @@ def run(self): assert_that("VMExtensionProvisioningError" in str(error)) \ .described_as(f"Expected a VMExtensionProvisioningError error, but actual error was: {error}") \ .is_true() - assert_that("Extension will not be processed since extension processing is disabled" in str(error)) \ + assert_that("will not be processed since extension processing is disabled" in str(error)) \ .described_as( f"Error message should communicate that extension will not be processed, but actual error " f"was: {error}").is_true() @@ -137,6 +137,15 @@ def run(self): fail(f"Unexpected error while processing {t.extension.__str__()} after re-enabling extension " f"processing: {error}") + def get_ignore_error_rules(self) -> List[Dict[str, Any]]: + ignore_rules = [ + # 2025-01-15T20:45:13.359784Z ERROR ExtHandler ExtHandler Event: name=Microsoft.Azure.Extensions.CustomScript, op=ExtensionProcessing, message=Extension 'Microsoft.Azure.Extensions.CustomScript' will not be processed since extension processing is disabled. To enable extension processing, set Extensions.Enabled=y in '/etc/waagent.conf', duration=0 + # We expect this error message when we disable extension processing via conf + { + 'message': r"Extension .* will not be processed since extension processing is disabled" + } + ] + return ignore_rules if __name__ == "__main__": ExtensionsDisabled.run_from_command_line() diff --git a/tests_e2e/tests/lib/virtual_machine_client.py b/tests_e2e/tests/lib/virtual_machine_client.py index c4181be5a..68a71c3e9 100644 --- a/tests_e2e/tests/lib/virtual_machine_client.py +++ b/tests_e2e/tests/lib/virtual_machine_client.py @@ -108,6 +108,22 @@ def get_extensions(self) -> List[VirtualMachineExtension]: resource_group_name=self.resource_group, vm_name=self.name)) + def delete_all_extensions(self, timeout: int = AzureSdkClient._DEFAULT_TIMEOUT) -> None: + """ + Delete all extensions installed on the virtual machine + """ + extensions_to_delete = self.get_extensions().value + for ext in extensions_to_delete: + ext_name = ext.name + log.info(f"Deleting extension {ext_name} from {self.name}") + self._execute_async_operation( + lambda extension_name=ext_name: self._compute_client.virtual_machine_extensions.begin_delete( + self.resource_group, + self.name, + extension_name), + operation_name=f"Delete extension {ext_name}", + timeout=timeout) + def update(self, properties: Dict[str, Any], timeout: int = AzureSdkClient._DEFAULT_TIMEOUT) -> None: """ Updates a set of properties on the virtual machine diff --git a/tests_e2e/tests/scripts/agent_ext_policy-verify_operation_disallowed.py b/tests_e2e/tests/scripts/agent_ext_policy-verify_operation_disallowed.py new file mode 100755 index 000000000..32e84e8b4 --- /dev/null +++ b/tests_e2e/tests/scripts/agent_ext_policy-verify_operation_disallowed.py @@ -0,0 +1,64 @@ +#!/usr/bin/env pypy3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Checks that the input data is found in the agent log +# +import argparse +import sys +import re + +from datetime import datetime +from pathlib import Path +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.agent_log import AgentLog + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--extension-name', dest='extension_name', required=True) + parser.add_argument('--operation', dest='operation', required=True, choices=['run', 'uninstall']) + parser.add_argument("--after-timestamp", dest='after_timestamp', required=False) + + args, _ = parser.parse_known_args() + + log.info("Verifying that agent log shows {0} failure due to policy".format(args.operation)) + pattern = (r".*Extension will not be processed: failed to {0} extension '{1}' because it is not specified as an allowed extension.*" + .format(args.operation, re.escape(args.extension_name))) + agent_log = AgentLog(Path('/var/log/waagent.log')) + + if args.after_timestamp is None: + after_datetime = datetime.min + else: + after_datetime = datetime.strptime(args.after_timestamp, '%Y-%m-%d %H:%M:%S') + + try: + for record in agent_log.read(): + if record.timestamp > after_datetime: + if re.search(pattern, record.message): + log.info("Found expected error in agent log: {0}".format(record.message)) + sys.exit(0) + + except Exception as e: + log.info("Error thrown when searching for test data in agent log: {0}".format(str(e))) + + log.info("Did not find expected error in agent log. Expected to find pattern: {0}".format(pattern)) + sys.exit(1) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/tests_e2e/tests/scripts/agent_ext_policy-verify_operation_success.py b/tests_e2e/tests/scripts/agent_ext_policy-verify_operation_success.py new file mode 100755 index 000000000..2e382cebe --- /dev/null +++ b/tests_e2e/tests/scripts/agent_ext_policy-verify_operation_success.py @@ -0,0 +1,117 @@ +#!/usr/bin/env pypy3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# +import argparse + +from assertpy import fail +from pathlib import Path +from datetime import datetime +import time +import re + +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.agent_log import AgentLog + +# This script verifies the success of an operation using the agent log. +# Enable: check that the agent has reported a successful status for the specified list of extensions +# Uninstall: check that the agent has not reported any status for the specified list of extensions +# +# Usage: +# agent_ext_policy-verify_operation_success.py --extension-list "A" "B" --operation "enable" --after-timestamp "2025-01-13 11:21:40" + + +def __get_last_reported_status(after_timestamp): + # Get last reported status from the agent log file. If after_timestamp is specified, return only the status reported + # after that timestamp, and raise error if not found after 2 tries. + agent_log = AgentLog(Path('/var/log/waagent.log')) + + retries = 2 + for attempt in range(retries): + phrase = "All extensions in the goal state have reached a terminal state" + latest_status = None + for record in agent_log.read(): + if record.timestamp < after_timestamp: + continue + + if phrase in record.message: + if latest_status is None: + latest_status = record + else: + if latest_status.timestamp < record.timestamp: + latest_status = record + + if latest_status is not None: + log.info("Latest status: {0}".format(latest_status.message)) + return latest_status + + log.info("Unable to find handler status in agent log on attempt {0}. Retrying...".format(attempt + 1)) + time.sleep(30) + + return None + + +def check_extension_reported_successful_status(status_message, ext_name: str): + # Extract extension statuses from the agent record + pattern = r"\(u?'(" + re.escape(ext_name) + r")', u?'([^']+)'\)" + match = re.search(pattern, status_message) + if match is None: + fail("Agent did not report any status for extension {0}, enable failed.".format(ext_name)) + else: + status_code = match.group(2).lower() + log.info("Status code: {0}".format(status_code)) + if status_code not in ["success", "ready"]: + fail("Agent did not report a successful status for extension {0}, enable failed. Status: {1}".format(ext_name, status_code)) + else: + log.info("Agent reported a successful status for extension {0}, enable succeeded.".format(ext_name)) + +def main(): + + parser = argparse.ArgumentParser() + parser.add_argument('--extension-list', dest='extension_list', required=True, nargs='+', + help='Extension name(s) to process. Provide a single name or a space-separated list of names.') + + parser.add_argument('--operation', dest='operation', required=True, choices=['enable', 'uninstall']) + parser.add_argument("--after-timestamp", dest='after_timestamp', required=False) + args = parser.parse_args() + + if args.after_timestamp is not None: + after_datetime = datetime.strptime(args.after_timestamp, '%Y-%m-%d %H:%M:%S') + else: + after_datetime = datetime.min + + status = __get_last_reported_status(after_datetime) + if status is None: + fail("Unable to find extension status in agent log.") + + if args.operation == "enable": + log.info("Checking agent status file to verify that extensions were enabled successfully.") + for extension in args.extension_list: + check_extension_reported_successful_status(status.message, extension) + + elif args.operation == "uninstall": + log.info("Checking agent log to verify that status is not reported for uninstalled extensions.") + for extension in args.extension_list: + if extension in status.message: + fail("Agent reported status for extension {0}, uninstall failed.".format(extension)) + else: + log.info("Agent did not report status for extension {0}, uninstall succeeded.".format(extension)) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/tests_e2e/tests/scripts/agent_ext_workflow-check_data_in_agent_log.py b/tests_e2e/tests/scripts/agent_ext_workflow-check_data_in_agent_log.py index 867c9b67d..03155cb44 100755 --- a/tests_e2e/tests/scripts/agent_ext_workflow-check_data_in_agent_log.py +++ b/tests_e2e/tests/scripts/agent_ext_workflow-check_data_in_agent_log.py @@ -21,6 +21,7 @@ import argparse import sys +from datetime import datetime from pathlib import Path from tests_e2e.tests.lib.agent_log import AgentLog @@ -28,13 +29,18 @@ def main(): parser = argparse.ArgumentParser() parser.add_argument("--data", dest='data', required=True) + parser.add_argument("--after-timestamp", dest='after_timestamp', required=False) args, _ = parser.parse_known_args() print("Verifying data: {0} in waagent.log".format(args.data)) found = False try: - found = AgentLog(Path('/var/log/waagent.log')).agent_log_contains(args.data) + if args.after_timestamp is not None: + after_datetime = datetime.strptime(args.after_timestamp, '%Y-%m-%d %H:%M:%S') + found = AgentLog(Path('/var/log/waagent.log')).agent_log_contains(args.data, after_datetime) + else: + found = AgentLog(Path('/var/log/waagent.log')).agent_log_contains(args.data) if found: print("Found data: {0} in agent log".format(args.data)) else: