Skip to content

Commit

Permalink
Merge branch 'master' into support_qos_kvm
Browse files Browse the repository at this point in the history
  • Loading branch information
xwjiang-ms committed Jan 6, 2025
2 parents f5a7557 + 169c7be commit 7b9ccbe
Show file tree
Hide file tree
Showing 77 changed files with 10,169 additions and 433 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
parameters:
- name: TOPOLOGY
type: string
default: ""

- name: BUILD_BRANCH
type: string
default: ""

steps:
- script: |
set -x
sudo apt-get update && sudo apt-get install -y jq
TEST_SCRIPTS=$(echo '$(TEST_SCRIPTS)' | jq -r -c '."${{ parameters.TOPOLOGY }}"')
if [[ $? -ne 0 ]]; then
echo "##vso[task.complete result=Failed;]Get test scripts of specfic topology fails."
exit 1
fi
SCRIPTS=$(echo "$TEST_SCRIPTS" | jq -r '. | join(",")')
echo -n "##vso[task.setvariable variable=SCRIPTS]$SCRIPTS"
displayName: "Get ${{ parameters.TOPOLOGY }} test scripts"

- script: |
set -x
# Check if azure cli is installed. If not, try to install it
if ! command -v az; then
echo "Azure CLI is not installed. Trying to install it..."
echo "Get packages needed for the installation process"
sudo apt-get -o DPkg::Lock::Timeout=600 update
sudo apt-get -o DPkg::Lock::Timeout=600 -y install apt-transport-https ca-certificates curl gnupg lsb-release
echo "Download and install the Microsoft signing key"
sudo mkdir -p /etc/apt/keyrings
curl -sLS https://packages.microsoft.com/keys/microsoft.asc |
gpg --dearmor | sudo tee /etc/apt/keyrings/microsoft.gpg > /dev/null
sudo chmod go+r /etc/apt/keyrings/microsoft.gpg
echo "Add the Azure CLI software repository"
AZ_DIST=$(lsb_release -cs)
echo "Types: deb
URIs: https://packages.microsoft.com/repos/azure-cli/
Suites: ${AZ_DIST}
Components: main
Architectures: $(dpkg --print-architecture)
Signed-by: /etc/apt/keyrings/microsoft.gpg" | sudo tee /etc/apt/sources.list.d/azure-cli.sources
echo "Update repository information and install the azure-cli package"
sudo apt-get -o DPkg::Lock::Timeout=600 update
sudo apt-get -o DPkg::Lock::Timeout=600 -y install azure-cli
else
echo "Azure CLI is already installed"
fi
displayName: "Install azure-cli"

- script: |
set -x
pip install azure-kusto-data
pip install azure-kusto-data azure-identity
INSTANCE_NUMBER=$(python ./.azure-pipelines/impacted_area_testing/calculate_instance_number.py --scripts $(SCRIPTS) --topology ${{ parameters.TOPOLOGY }} --branch ${{ parameters.BUILD_BRANCH }})
if [[ $? -ne 0 ]]; then
echo "##vso[task.complete result=Failed;]Get instances number fails."
exit 1
fi
echo "$INSTANCE_NUMBER"
echo -n "##vso[task.setvariable variable=INSTANCE_NUMBER]$INSTANCE_NUMBER"
displayName: "Calculate instance number"
142 changes: 142 additions & 0 deletions .azure-pipelines/impacted_area_testing/calculate_instance_number.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
import os
import argparse
import math
import logging
from constant import PR_CHECKER_TOPOLOGY_NAME, MAX_INSTANCE_NUMBER, MAX_GET_TOKEN_RETRY_TIMES
from azure.kusto.data import KustoConnectionStringBuilder, KustoClient

logging.basicConfig(level=logging.INFO)


def parse_list_from_str(s):
# Since Azure Pipeline doesn't support to receive an empty parameter,
# We use ' ' as a magic code for empty parameter.
# So we should consider ' ' as en empty input.
if isinstance(s, str):
s = s.strip()
if not s:
return None
return [single_str.strip()
for single_str in s.split(',')
if single_str.strip()]


def get_access_token():
managed_identity_id = os.environ.get("SONIC_AUTOMATION_UMI")

# 1. Run az login with re-try
az_login_cmd = f"az login --identity --username {managed_identity_id}"
az_login_attempts = 0
while az_login_attempts < MAX_GET_TOKEN_RETRY_TIMES:
try:
result = os.popen(az_login_cmd)
result.read()
break
except Exception as exception:
az_login_attempts += 1
raise Exception(
f"Failed to az login with exception: {repr(exception)}. "
f"Retry {MAX_GET_TOKEN_RETRY_TIMES - az_login_attempts} times to login."
)

# If az login failed, return with exception
if az_login_attempts >= MAX_GET_TOKEN_RETRY_TIMES:
raise Exception(f"Failed to az login after {MAX_GET_TOKEN_RETRY_TIMES} attempts.")

# 2. Get access token with re-try
get_token_cmd = "az account get-access-token --resource https://api.kusto.windows.net --query accessToken -o tsv"
get_token_attempts = 0
while get_token_attempts < MAX_GET_TOKEN_RETRY_TIMES:
try:
result = os.popen(get_token_cmd)
access_token = result.read()
if not access_token:
raise Exception("Parse token from stdout failed, accessToken is None.")

return access_token

except Exception as exception:
get_token_attempts += 1
raise Exception(f"Failed to get token with exception: {repr(exception)}.")

# If az get token failed, return with exception
if get_token_attempts >= MAX_GET_TOKEN_RETRY_TIMES:
raise Exception(f"Failed to get token after {MAX_GET_TOKEN_RETRY_TIMES} attempts")


def main(scripts, topology, branch):
ingest_cluster = os.getenv("TEST_REPORT_QUERY_KUSTO_CLUSTER_BACKUP")
access_token = get_access_token()

if not ingest_cluster or not access_token:
raise RuntimeError(
"Could not load Kusto Credentials from environment")

try:
kcsb = KustoConnectionStringBuilder.with_aad_application_token_authentication(ingest_cluster,
access_token) # noqa F841
client = KustoClient(kcsb)
except Exception as e:
raise Exception("Connect to kusto fails, error {}".format(e))

scripts = parse_list_from_str(scripts)

scripts_running_time = {}
total_running_time = 0

for script in scripts:
# As baseline test is the universal set of PR test
# we get the historical running time of one script here
# We get recent 5 test plans and calculate the average running time
query = "V2TestCases " \
"| join kind=inner" \
"(TestPlans " \
"| where TestPlanType == 'PR' and Result == 'FINISHED' " \
f"and Topology == '{PR_CHECKER_TOPOLOGY_NAME[topology][0]}' " \
f"and TestBranch == '{branch}' and TestPlanName contains '{PR_CHECKER_TOPOLOGY_NAME[topology][1]}' " \
"and TestPlanName contains '_BaselineTest_' and UploadTime > ago(7d)" \
"| order by UploadTime desc) on TestPlanId " \
f"| where FilePath == '{script}' " \
"| where Result !in ('failure', 'error') " \
"| take 5" \
"| summarize ActualCount = count(), TotalRuntime = sum(Runtime)"
try:
response = client.execute("SonicTestData", query)
except Exception as e:
raise Exception("Query results from Kusto fails, error {}".format(e))

for row in response.primary_results[0]:
# We have obtained the results of the most recent five times.
# To get the result for a single time, we need to divide by five
# If response.primary_results is None, which means where is no historical data in Kusto,
# we will use the default 1800s for a script.
actual_count = row["ActualCount"]

# There is no relevant records in Kusto
if actual_count == 0:
average_running_time = 1800
else:
average_running_time = row["TotalRuntime"] / actual_count

total_running_time += average_running_time
scripts_running_time[script] = average_running_time
logging.info(f"Time for each test script: {scripts_running_time}")
logging.info(f"Total running time: {total_running_time}")
# Total running time is calculated by seconds, divide by 60 to get minutes
# For one instance, we plan to assign 90 minutes to run test scripts
# Obtain the number of instances by rounding up the calculation.
# To prevent unexpected situations, we set the maximum number of instance
print(min(math.ceil(total_running_time / 60 / 90), MAX_INSTANCE_NUMBER))


if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--topology", help="The topology of testplan", type=str, default="")
parser.add_argument("--scripts", help="Test scripts to be executed", type=str, default="")
parser.add_argument("--branch", help="Test branch", type=str, default="")
args = parser.parse_args()

scripts = args.scripts
topology = args.topology
branch = args.branch
main(scripts, topology, branch)
28 changes: 28 additions & 0 deletions .azure-pipelines/impacted_area_testing/constant.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Now, we only have below types of PR checker
# - dpu
# - dualtor-t0
# - multi-asic-t1-lag
# - t0
# - t0-2vlans
# - t0-sonic
# - t1- lag
PR_TOPOLOGY_TYPE = ["t0", "t0-2vlans", "t0-sonic", "t1", "t1-multi-asic", "dpu", "dualtor"]

EXCLUDE_TEST_SCRIPTS = [
"test_posttest.py",
"test_pretest.py"
]

# The mapping of topology type in PR test and topology recorded in kusto and the name of PR test.
PR_CHECKER_TOPOLOGY_NAME = {
"t0": ["t0", "_kvmtest-t0_"],
"t0-2vlans": ["t0", "_kvmtest-t0-2vlans_"],
"t0-sonic": ["t0-64-32", "_kvmtest-t0-sonic_"],
"t1": ["t1-lag", "_kvmtest-t1-lag_"],
"t1-multi-asic": ["t1-8-lag", "_kvmtest-multi-asic-t1-lag_"],
"dpu": ["dpu", "_kvmtest-dpu_"],
"dualtor": ["dualtor", "_kvmtest-dualtor-t0_"]
}

MAX_INSTANCE_NUMBER = 25
MAX_GET_TOKEN_RETRY_TIMES = 3
76 changes: 76 additions & 0 deletions .azure-pipelines/impacted_area_testing/get-impacted-area.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
steps:
- script: |
set -x
git fetch --all
DIFF_FOLDERS=$(git diff origin/master HEAD --name-only | xargs -n1 dirname | sort -u | tr '\n' ' ')
if [[ $? -ne 0 ]]; then
echo "##vso[task.complete result=Failed;]Get diff folders fails."
exit 1
else
echo -n "##vso[task.setvariable variable=DIFF_FOLDERS]$DIFF_FOLDERS"
fi
continueOnError: false
displayName: "Get diff folders"

- script: |
set -x
pip install PyYAML
pip install natsort
sudo apt-get install -y jq
FINAL_FEATURES=""
IFS=' ' read -ra FEATURES_LIST <<< "$(DIFF_FOLDERS)"
for FEATURE in "${FEATURES_LIST[@]}"
do
# If changes contains the common part in tests folder,the scope of PR testing is all test scripts.
if [[ "$FEATURE" == *tests/common* ]]; then
FINAL_FEATURES=""
break
# If changes only limited to specific feature, the scope of PR testing is impacted area.
elif [[ "$FEATURE" =~ tests\/* ]]; then
# Cut the feature path
if [[ $FEATURE == */*/* ]]; then
FEATURE=$(echo "$FEATURE" | cut -d'/' -f1-2)
fi
FEATURE=${FEATURE#tests/}
if [[ -z "$FINAL_FEATURES" ]]; then
FINAL_FEATURES="$FEATURE"
elif [[ ! "$FINAL_FEATURES" == *$FEATURE* ]]; then
FINAL_FEATURES="$FINAL_FEATURES,$FEATURE"
fi
# If changes related to other folders excpet tests, we also consider them as common part.
# The scope of PR testing is all test scripts.
else
FINAL_FEATURES=""
break
fi
done
TEST_SCRIPTS=$(python ./.azure-pipelines/impacted_area_testing/get_test_scripts.py --features ${FINAL_FEATURES} --location tests)
if [[ $? -ne 0 ]]; then
echo "##vso[task.complete result=Failed;]Get test scripts fails."
exit 1
fi
PR_CHECKERS=$(echo "${TEST_SCRIPTS}" | jq -c 'keys')
if [[ $? -ne 0 ]]; then
echo "##vso[task.complete result=Failed;]Get valid PR checkers fails."
exit 1
fi
echo "##vso[task.setvariable variable=PR_CHECKERS;isOutput=true]$PR_CHECKERS"
echo "##vso[task.setvariable variable=TEST_SCRIPTS;isOutput=true]$TEST_SCRIPTS"
name: SetVariableTask
continueOnError: false
displayName: "Get impacted area"
Loading

0 comments on commit 7b9ccbe

Please sign in to comment.