Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Retry test failures on Unity #1028

Merged
merged 13 commits into from
May 21, 2024
32 changes: 32 additions & 0 deletions .github/workflows/integration_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -644,3 +644,35 @@ jobs:
--run_id ${{github.run_id}}
- name: Summarize results into GitHub log
run: python scripts/gha/summarize_test_results.py --dir test_results --github_log


attempt_retry:
name: "attempt-retry"
needs: [check_and_prepare, summarize_results]
runs-on: ubuntu-20.04
if: ${{ failure() && needs.check_and_prepare.outputs.trigger == 'scheduled_trigger' }}
steps:
- uses: actions/checkout@v3
with:
ref: ${{needs.check_and_prepare.outputs.github_ref}}
- name: Setup python
uses: actions/setup-python@v4
with:
python-version: ${{ env.pythonVersion }}
- name: Install python deps
run: pip install -r scripts/gha/requirements.txt
# The default token can't run workflows, so get an alternate token.
- name: Generate token for GitHub API
uses: tibdex/github-app-token@v1
id: generate-token
with:
app_id: ${{ secrets.WORKFLOW_TRIGGER_APP_ID }}
private_key: ${{ secrets.WORKFLOW_TRIGGER_APP_PRIVATE_KEY }}
- name: Retry failed tests
run: |
echo "::warning ::Attempting to retry failed tests"
python scripts/gha/trigger_workflow.py -t ${{ steps.generate-token.outputs.token }} \
-w retry-test-failures.yml \
-p run_id ${{ github.run_id }} \
-s 10 \
-A
30 changes: 27 additions & 3 deletions .github/workflows/retry-test-failures.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,31 @@ jobs:
check_results_and_retry_if_needed:
name: check-results-and-retry-if-needed
runs-on: ubuntu-20.04
if:
steps:
- name: No-op
run: true
- name: Get token for firebase-workflow-trigger
uses: tibdex/github-app-token@v1
id: generate-token
with:
app_id: ${{ secrets.WORKFLOW_TRIGGER_APP_ID }}
private_key: ${{ secrets.WORKFLOW_TRIGGER_APP_PRIVATE_KEY }}

- name: Setup python
uses: actions/setup-python@v4
with:
python-version: 3.8

- uses: actions/checkout@v3
with:
fetch-depth: 0
submodules: false

- name: Wait 3 minutes for run to finish
run: |
sleep 180

- name: Install python deps
run: pip install -r scripts/gha/requirements.txt

- name: Run test failure retry script
run: |
python scripts/gha/retry_test_failures.py --token '${{ steps.generate-token.outputs.token }}' --run_id '${{ github.event.inputs.run_id }}'
58 changes: 58 additions & 0 deletions scripts/gha/firebase_github.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,3 +310,61 @@ def list_workflow_runs(token, workflow_id, branch=None, event=None, limit=200):
keep_going = False
results = results[:limit]
return results


def list_jobs_for_workflow_run(token, run_id, attempt=None, limit=200):
"""https://docs.github.com/en/rest/actions/workflow-jobs#list-jobs-for-a-workflow-run
https://docs.github.com/en/rest/actions/workflow-jobs#list-jobs-for-a-workflow-run-attempt

Args:
attempt: Which attempt to fetch. Should be a number >0, 'latest', or 'all'.
If unspecified, returns 'latest'.
"""
if attempt == 'latest' or attempt== 'all' or attempt == None:
url = f'{GITHUB_API_URL}/actions/runs/{run_id}/jobs'
else:
url = f'{GITHUB_API_URL}/actions/runs/{run_id}/attempts/{attempt}/jobs'
headers = {'Accept': 'application/vnd.github.v3+json', 'Authorization': f'token {token}'}
page = 1
per_page = 100
results = []
keep_going = True
while keep_going:
params = {'per_page': per_page, 'page': page}
if attempt == 'latest' or attempt == 'all':
params.update({'filter': attempt})
page = page + 1
keep_going = False
with requests_retry_session().get(url, headers=headers, params=params,
stream=True, timeout=TIMEOUT) as response:
logging.info("list_jobs_for_workflow_run: %s page %d, response: %s",
url, params['page'], response)
if 'jobs' not in response.json():
break
job_results = response.json()['jobs']
results = results + job_results
# If exactly per_page results were retrieved, read the next page.
keep_going = (len(job_results) == per_page)
if limit > 0 and len(results) >= limit:
keep_going = False
results = results[:limit]
return results


def download_job_logs(token, job_id):
"""https://docs.github.com/en/rest/actions/workflow-jobs#download-job-logs-for-a-workflow-run"""
url = f'{GITHUB_API_URL}/actions/jobs/{job_id}/logs'
headers = {'Accept': 'application/vnd.github.v3+json', 'Authorization': f'token {token}'}
with requests_retry_session().get(url, headers=headers, stream=True, timeout=TIMEOUT) as response:
logging.info("download_job_logs: %s response: %s", url, response)
return response.content.decode('utf-8')


def rerun_failed_jobs_for_workflow_run(token, run_id):
"""https://docs.github.com/en/rest/actions/workflow-runs#re-run-failed-jobs-from-a-workflow-run"""
url = f'{GITHUB_API_URL}/actions/runs/{run_id}/rerun-failed-jobs'
headers = {'Accept': 'application/vnd.github.v3+json', 'Authorization': f'token {token}'}
with requests.post(url, headers=headers,
stream=True, timeout=TIMEOUT) as response:
logging.info("rerun_failed_jobs_for_workflow_run: %s response: %s", url, response)
return True if response.status_code == 201 else False
128 changes: 128 additions & 0 deletions scripts/gha/retry_test_failures.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
# Copyright 2023 Google LLC
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

2024

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Eh, this is mostly a copy of the script from C++ that was written in 2023, should it still be 2024?

#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""A utility to retry failed jobs in a workflow run.

USAGE:
python3 scripts/gha/retry_test_failures.py \
--token ${{github.token}} \
--run_id <github_workflow_run_id>
"""

import datetime
import json
import re
import shutil

from absl import app
from absl import flags
from absl import logging

import firebase_github

FLAGS = flags.FLAGS
MAX_RETRIES=2

flags.DEFINE_string(
"token", None,
"github.token: A token to authenticate on your repository.")

flags.DEFINE_string(
"run_id", None,
"Github's workflow run ID.")


def get_log_group(log_text, group_name):
group_log = []
in_group = False
for line in log_text.split("\n"):
line_no_ts = line[29:]
if line_no_ts.startswith('##[group]'):
if group_name in line_no_ts:
print("got group %s" % group_name)
in_group = True
if in_group:
group_log.append(line_no_ts)
if line_no_ts.startswith('##[error])'):
print("end group %s" % group_name)
in_group = False
break
return group_log

def main(argv):
if len(argv) > 1:
raise app.UsageError("Too many command-line arguments.")
# Get list of workflow jobs.
workflow_jobs = firebase_github.list_jobs_for_workflow_run(
FLAGS.token, FLAGS.run_id, attempt='all')
if not workflow_jobs or len(workflow_jobs) == 0:
logging.error("No jobs found for workflow run %s", FLAGS.run_id)
exit(1)

failed_jobs = {}
all_jobs = {}
for job in workflow_jobs:
all_jobs[job['id']] = job
if job['conclusion'] != 'success' and job['conclusion'] != 'skipped':
if job['name'] in failed_jobs:
other_run = failed_jobs[job['name']]
if job['run_attempt'] > other_run['run_attempt']:
# This is a later run than the one that's already there
failed_jobs[job['name']] = job
else:
failed_jobs[job['name']] = job

should_rerun_jobs = False
for job_name in failed_jobs:
job = failed_jobs[job_name]
logging.info('Considering job %s attempt %d: %s (%s)',
job['conclusion'] if job['conclusion'] else job['status'],
job['run_attempt'], job['name'], job['id'])
if job['status'] != 'completed':
# Don't retry a job that is already in progress or queued
logging.info("Not retrying, as %s is already %s",
job['name'], job['status'].replace("_", " "))
should_rerun_jobs = False
break
if job['run_attempt'] > MAX_RETRIES:
# Don't retry a job more than MAX_RETRIES times.
logging.info("Not retrying, as %s has already been attempted %d times",
job['name'], job['run_attempt'])
should_rerun_jobs = False
break
if job['conclusion'] == 'failure':
job_logs = firebase_github.download_job_logs(FLAGS.token, job['id'])
if job['name'].startswith('build-'):
# Retry build jobs that timed out
if re.search(r'timed? ?out|network error|maximum execution time',
job_logs, re.IGNORECASE):
should_rerun_jobs = True
elif job['name'].startswith('test-'):
# Tests should always be retried (for now).
should_rerun_jobs = True

if should_rerun_jobs:
logging.info("Re-running failed jobs in workflow run %s", FLAGS.run_id)
if not firebase_github.rerun_failed_jobs_for_workflow_run(
FLAGS.token, FLAGS.run_id):
logging.error("Error submitting GitHub API request")
exit(1)
else:
logging.info("Not re-running jobs.")


if __name__ == "__main__":
flags.mark_flag_as_required("token")
flags.mark_flag_as_required("run_id")
app.run(main)
13 changes: 7 additions & 6 deletions scripts/gha/trigger_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
https://docs.github.com/en/rest/reference/actions#create-a-workflow-dispatch-event

Usage:
python trigger_workflow.py -w workflow_filename -t github_token [-b branch_name]
python3 trigger_workflow.py -w workflow_filename -t github_token [-b branch_name]
[-r git_repo_url] [-p <input1> <value1> -p <input2> <value2> ...]'
[-C curl_command]

Expand All @@ -38,6 +38,7 @@ def main():
args = parse_cmdline_args()
if args.branch is None:
args.branch=subprocess.check_output(['git', 'rev-parse', '--abbrev-ref', 'HEAD']).decode('utf-8').rstrip('\n')
if args.branch == 'HEAD': args.branch = 'main'
print('autodetected branch: %s' % args.branch)
if args.repo: # else use default firebase/firebase-unity-sdk repo
if not firebase_github.set_repo_url(args.repo):
Expand All @@ -52,13 +53,13 @@ def main():
print(f'request_url: {firebase_github.GITHUB_API_URL}/actions/workflows/{args.workflow}/dispatches')
print(f'request_body: ref: {args.branch}, inputs: {json_params}')
if args.dryrun:
return(0)
exit(0)

print('Sending request to GitHub API...')
if not firebase_github.create_workflow_dispatch(args.token, args.workflow, args.branch, json_params):
print('%sFailed to trigger workflow %s' % (
'::error ::' if args.in_github_action else '', args.workflow))
return(-1)
exit(1)

print('Success!')
time.sleep(args.sleep) # Give a few seconds for the job to become queued.
Expand All @@ -69,7 +70,7 @@ def main():
if "workflow_runs" in workflows:
for workflow in workflows['workflow_runs']:
# Use a heuristic to get the new workflow's run ID.
# Must match the branch name, and be queued/in progress.
# Must match the branch name and be queued/in progress.
if (workflow['status'] in ('queued', 'in_progress') and
workflow['head_branch'] == args.branch):
run_id = workflow['id']
Expand All @@ -79,8 +80,8 @@ def main():
workflow_url = 'https://github.com/firebase/firebase-unity-sdk/actions/runs/%s' % (run_id)
else:
# Couldn't get a run ID, use a generic URL.
workflow_url = '/%s/actions/workflows/%s?query=%s+%s' % (
firebase_github.GITHUB_API_URL, args.workflow,
workflow_url = '%s/actions/workflows/%s?query=%s+%s' % (
'https://github.com/firebase/firebase-unity-sdk', args.workflow,
urllib.parse.quote('event:workflow_dispatch', safe=''),
urllib.parse.quote('branch:'+args.branch, safe=''))
print('%sStarted workflow %s: %s' % ('::warning ::' if args.in_github_action else '',
Expand Down
Loading