Address test comments

Azure · Dec 9, 2024 · dfcc158 · dfcc158
1 parent ba3869c
commit dfcc158
Show file tree

Hide file tree

Showing 7 changed files with 83 additions and 26 deletions.
diff --git a/azurelinuxagent/ga/policy/policy_engine.py b/azurelinuxagent/ga/policy/policy_engine.py
@@ -22,7 +22,7 @@
 from azurelinuxagent.common import logger
 from azurelinuxagent.common.event import WALAEventOperation, add_event
 from azurelinuxagent.common import conf
-from azurelinuxagent.common.exception import AgentError, ExtensionError, ExtensionErrorCodes
+from azurelinuxagent.common.exception import AgentError
 from azurelinuxagent.common.protocol.extensions_goal_state_from_vm_settings import _CaseFoldedDict
 from azurelinuxagent.common.utils.flexible_version import FlexibleVersion
 

diff --git a/tests_e2e/test_suites/ext_policy.yml b/tests_e2e/test_suites/ext_policy.yml
@@ -5,4 +5,4 @@ name: "ExtensionPolicy"
 tests:
   - "ext_policy/ext_policy.py"
 images: "endorsed"
-owns_vm: true
+owns_vm: false
diff --git a/tests_e2e/test_suites/ext_policy_with_dependencies.yml b/tests_e2e/test_suites/ext_policy_with_dependencies.yml
@@ -6,6 +6,4 @@ tests:
   - "ext_policy/ext_policy_with_dependencies.py"
 images: "endorsed"
 executes_on_scale_set: true
-# This test should run on its own VMSS, because other tests may leave behind extensions
-# that are disallowed by policy and affect results.
-owns_vm: true
+owns_vm: false
diff --git a/tests_e2e/tests/ext_policy/ext_policy.py b/tests_e2e/tests/ext_policy/ext_policy.py
@@ -105,12 +105,12 @@ def _operation_should_fail(self, operation, extension_case):
                 log.info(f"{extension_case.extension} {operation} failed as expected")
 
         elif operation == "delete":
-            # Delete is a best effort operation and should not fail, so CRP will wait for the full timeout instead
-            # instead of reporting an error for the operation. We set a short timeout limit, swallow the error, and
-            # assert that the extension is still in the instance view to confirm that the delete failed.
+            # Delete is a best effort operation and should not fail, so CRP will timeout instead of reporting the
+            # appropriate error. We swallow the timeout error, and instead, assert that the extension is still in the
+            # instance view and that the expected error is in the agent log to confirm that deletion failed.
+            delete_start_time = self._ssh_client.run_command("date '+%Y-%m-%d %T'").rstrip()
             try:
-                delete_start_time = self._ssh_client.run_command("date '+%Y-%m-%d %T'").rstrip()
-                extension_case.extension.delete(timeout=(1 * 60))
+                extension_case.extension.delete(timeout=(15 * 60))
                 fail(f"The agent should have reported a timeout error when attempting to delete {extension_case.extension} "
                      f"because the extension is disallowed by policy.")
             except TimeoutError:
@@ -125,15 +125,20 @@ def _operation_should_fail(self, operation, extension_case):
 
                 # Confirm that expected error message is in the agent log
                 expected_msg = "Extension will not be processed: failed to uninstall extension"
-                result = self._ssh_client.run_command(
+                self._ssh_client.run_command(
                     f"agent_ext_workflow-check_data_in_agent_log.py --data '{expected_msg}' --after-timestamp '{delete_start_time}'",
                     use_sudo=True)
 
     def run(self):
 
+        # The full CRP timeout period for extension operation failure is 90 minutes. For efficiency, we reduce the
+        # timeout limit to 15 minutes here. We expect "delete" operations on disallowed VMs to reach timeout instead of
+        # failing fast, because delete is a best effort operation by-design and should not fail.
+        self._context.vm.update({"extensionsTimeBudget": "PT15M"})
+
+
         # Prepare no-config, single-config, and multi-config extension to test. Extensions with settings and extensions
         # without settings have different status reporting logic, so we should test all cases.
-
         # CustomScript is a single-config extension.
         custom_script = ExtPolicy.TestCase(
             VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.CustomScript,
@@ -166,7 +171,7 @@ def run(self):
         for ext in ext_to_cleanup:
             ext.extension.delete()
 
-        # Enable policy via conf
+        # Enable policy via conf file
         log.info("Enabling policy via conf file on the test VM [%s]", self._context.vm.name)
         self._ssh_client.run_command("update-waagent-conf Debug.EnableExtensionPolicy=y", use_sudo=True)
 
@@ -234,9 +239,6 @@ def run(self):
             }
         self._create_policy_file(policy)
         self._operation_should_fail("delete", custom_script)
-
-        # If a multiconfig extension is disallowed, no instances should be processed.
-        # RunCommand is not allowed - if we try to enable two instances, both should fail fast.
         self._operation_should_fail("enable", run_command)
         self._operation_should_fail("enable", run_command_2)
         self._operation_should_fail("enable", custom_script)

diff --git a/tests_e2e/tests/ext_policy/ext_policy_with_dependencies.py b/tests_e2e/tests/ext_policy/ext_policy_with_dependencies.py
@@ -206,14 +206,12 @@ def run(self):
                     self._context.vmss.delete_extension(ext_name_to_delete)
                 except Exception as crp_err:
                     # Known issue - CRP returns stale status in cases of dependency failures. Even if the deletion succeeds,
-                    # CRP may return a failure here. We swallow the error, and instead, check that the logs for uninstall
-                    # are present in the agent log (after the start time of this test case).
+                    # CRP may return a failure. We swallow the error and instead, verify that the agent does not report
+                    # status for the uninstalled extension.
                     log.info("CRP returned an error for deletion operation, may be a false error. Checking agent log to determine if operation succeeded. Exception: {0}".format(crp_err))
                     try:
                         for ssh_client in ssh_clients.values():
-                            msg = ("Remove the extension slice: {0}".format(str(ext_to_delete)))
-                            result = ssh_client.run_command(f"agent_ext_workflow-check_data_in_agent_log.py --data '{msg}' --after-timestamp '{test_case_start}'", use_sudo=True)
-                            log.info(result)
+                            ssh_client.run_command(f"agent_ext_policy-verify_uninstall_success.py --extension-name '{ext_to_delete}'")
                     except Exception as agent_err:
                         fail("Unable to successfully uninstall extension {0}. Exception: {1}".format(ext_name_to_delete, agent_err))
                 log.info("Successfully uninstalled extension {0}".format(ext_name_to_delete))

diff --git a/tests_e2e/tests/ext_policy/policy_dependencies_cases.py b/tests_e2e/tests/ext_policy/policy_dependencies_cases.py
@@ -56,7 +56,7 @@ def _should_fail_single_config_depends_on_disallowed_single_config():
             }
         }
     expected_errors = [
-        "Extension will not be processed: failed to enable extension 'Microsoft.OSTCExtensions.VMAccessForLinux' because extension is not specified in allowlist",
+        "Extension will not be processed: failed to run extension 'Microsoft.OSTCExtensions.VMAccessForLinux' because it is not specified in the allowlist",
         "'CustomScript' is marked as failed since it depends upon the VM Extension 'VMAccessForLinux' which has failed"
     ]
     deletion_order = [VmExtensionIds.CustomScript, VmExtensionIds.VmAccess]
@@ -80,7 +80,7 @@ def _should_fail_single_config_depends_on_disallowed_no_config():
             }
         }
     expected_errors = [
-        "Extension will not be processed: failed to enable extension 'Microsoft.Azure.Monitor.AzureMonitorLinuxAgent' because extension is not specified in allowlist",
+        "Extension will not be processed: failed to run extension 'Microsoft.Azure.Monitor.AzureMonitorLinuxAgent' because it is not specified in the allowlist",
         "'CustomScript' is marked as failed since it depends upon the VM Extension 'AzureMonitorLinuxAgent' which has failed"
     ]
     deletion_order = [VmExtensionIds.CustomScript, VmExtensionIds.AzureMonitorLinuxAgent]
@@ -103,7 +103,7 @@ def _should_fail_single_config_depends_on_disallowed_multi_config():
             }
         }
     expected_errors = [
-        "Extension will not be processed: failed to enable extension 'Microsoft.CPlat.Core.RunCommandHandlerLinux' because extension is not specified in allowlist",
+        "Extension will not be processed: failed to run extension 'Microsoft.CPlat.Core.RunCommandHandlerLinux' because it is not specified in the allowlist",
         "'CustomScript' is marked as failed since it depends upon the VM Extension 'RunCommandHandlerLinux' which has failed"
     ]
     deletion_order = [VmExtensionIds.CustomScript, VmExtensionIds.RunCommandHandler]
@@ -126,7 +126,7 @@ def _should_fail_multi_config_depends_on_disallowed_single_config():
             }
         }
     expected_errors = [
-        "Extension will not be processed: failed to enable extension 'Microsoft.Azure.Extensions.CustomScript' because extension is not specified in allowlist",
+        "Extension will not be processed: failed to run extension 'Microsoft.Azure.Extensions.CustomScript' because it is not specified in the allowlist",
         "VM has reported a failure when processing extension 'RunCommandHandlerLinux' (publisher 'Microsoft.CPlat.Core' and type 'RunCommandHandlerLinux'). Error message: 'Skipping processing of extensions since execution of dependent extension Microsoft.Azure.Extensions.CustomScript failed'."
     ]
     deletion_order = [VmExtensionIds.RunCommandHandler, VmExtensionIds.CustomScript]
@@ -149,7 +149,7 @@ def _should_fail_multi_config_depends_on_disallowed_no_config():
             }
         }
     expected_errors = [
-        "Extension will not be processed: failed to enable extension 'Microsoft.Azure.Monitor.AzureMonitorLinuxAgent' because extension is not specified in allowlist",
+        "Extension will not be processed: failed to run extension 'Microsoft.Azure.Monitor.AzureMonitorLinuxAgent' because it is not specified in the allowlist",
         "VM has reported a failure when processing extension 'RunCommandHandlerLinux' (publisher 'Microsoft.CPlat.Core' and type 'RunCommandHandlerLinux'). Error message: 'Skipping processing of extensions since execution of dependent extension Microsoft.Azure.Monitor.AzureMonitorLinuxAgent failed'."
     ]
     deletion_order = [VmExtensionIds.RunCommandHandler, VmExtensionIds.AzureMonitorLinuxAgent]

diff --git a/tests_e2e/tests/scripts/agent_ext_policy-verify_uninstall_success.py b/tests_e2e/tests/scripts/agent_ext_policy-verify_uninstall_success.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env pypy3
+
+# Microsoft Azure Linux Agent
+#
+# Copyright 2018 Microsoft Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Verify if the agent reported update status to CRP via status file
+#
+import argparse
+import glob
+import json
+
+from assertpy import fail
+
+from tests_e2e.tests.lib.logging import log
+
+
+def extension_found_in_agent_status_file(ext_name: str) -> bool:
+    # Check if the provided extension name is present in the agent status file, under handlerAggregateStatus.
+    # If the name is not present, the uninstall operation was successful.
+    agent_status_file = "/var/lib/waagent/history/*/waagent_status.json"
+    file_paths = glob.glob(agent_status_file, recursive=True)
+    for file in file_paths:
+        with open(file, 'r') as f:
+            data = json.load(f)
+            log.info("Agent status file (%s): %s", file, data)
+            handler_status = data["aggregateStatus"]["handlerAggregateStatus"]
+            if any(handler["handlerName"].lower() == ext_name.lower() for handler in handler_status):
+                return True
+            return False
+
+
+def main():
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--extension-name', dest='name', required=True)
+    args = parser.parse_args()
+
+    log.info("Checking agent status file to verify that the uninstalled extension is not present in reported status")
+    if extension_found_in_agent_status_file(args.name):
+        fail("Handler status was found in the status file for extension {0}, uninstall failed.".format(args.name))
+    else:
+        log.info("Handler status was not found in the status file for extension {0}, uninstall succeeded.".format(args.name))
+
+
+if __name__ == "__main__":
+    main()