Skip to content

Commit

Permalink
Fix branch reuse issue in new prefix PR creation workflow (#1258)
Browse files Browse the repository at this point in the history
The GitHub Actions workflow for registering new prefixes through the
issue template is reusing the same branch for each new issue, leading to
the content of the second pull request overwriting the first. This was
first noticed in this PR:
#1240. This occurs
because the workflow does not generate a unique branch for each new
issue. As a result, subsequent PRs are force-pushing content to the
previous PR's branch instead of creating new, independent branches for
each PR.

This PR introduces the following changes to the `new_prefix_pr.yml` to
address this issue:
- Create a step to extract the issue number from the issue URL.
- Dynamically generate a unique branch name
(`create-pull-request/patch-<issue_number>`) for each new issue.
- Modify the PR creation step to use the dynamically generated branch
name, ensuring that each PR is linked to its own unique branch.

---------

Co-authored-by: Mufaddal Naguthanawala <m.nguthana@hotmail.com>
Co-authored-by: Benjamin M. Gyori <ben.gyori@gmail.com>
  • Loading branch information
3 people authored Nov 19, 2024
1 parent 54a91e1 commit f4faefb
Show file tree
Hide file tree
Showing 5 changed files with 200 additions and 10 deletions.
5 changes: 4 additions & 1 deletion .github/ISSUE_TEMPLATE/new-prefix.yml
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,10 @@ body:
id: contributor_orcid
attributes:
label: Contributor ORCiD
description: Please provide your ORCiD identifier so we can attribute this contribution to you.
description: |
Please provide your ORCiD identifier so we can attribute this contribution to you.
For proper attribution of your contribution, please provide a valid ORCiD. If you don't have an ORCiD, please visit [ORCiD's registration page](https://orcid.org/register) to create one for free.
placeholder: ex. 0000-0003-4423-4370
validations:
required: true
Expand Down
30 changes: 28 additions & 2 deletions .github/workflows/new_prefix_pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,47 @@ jobs:
python-version: [ "3.12" ]
steps:
- uses: actions/checkout@v2
- name: Delay to ensure labels are attached
run: sleep 10
- name: Check Issue Labels
id: check_labels
uses: actions/github-script@v6
with:
script: |
const issue = context.payload.issue;
const hasRequiredLabels = issue.labels.some(label => label.name === "New") && issue.labels.some(label => label.name === "Prefix");
core.setOutput("hasRequiredLabels", hasRequiredLabels ? 'true' : 'false');
- name: End Workflow if Labels are Missing
if: steps.check_labels.outputs.hasRequiredLabels == 'false'
run: |
echo "Issue does not have 'New' and 'Prefix' labels. Ending workflow."
exit 0
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
if: steps.check_labels.outputs.hasRequiredLabels == 'true'
run: pip install -e .[gha]
- name: Update
id: update
if: steps.check_labels.outputs.hasRequiredLabels == 'true'
run: python -m bioregistry.gh.new_prefix --github
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Create Branch
id: create_branch
if: steps.check_labels.outputs.hasRequiredLabels == 'true'
run: |
issue_url="${{ github.event.issue.html_url }}"
issue_number=$(echo "$issue_url" | grep -oE '[0-9]+$')
branch_name="create-pull-request/patch-$issue_number"
echo "::set-output name=branch_name::$branch_name"
- name: Create Pull Request
if: steps.check_labels.outputs.hasRequiredLabels == 'true'
uses: peter-evans/create-pull-request@v3
with:
branch-suffix: short-commit-hash
branch: ${{ steps.create_branch.outputs.branch_name }}
labels: New,Prefix
body: ${{ steps.update.outputs.BR_BODY }}
title: ${{ steps.update.outputs.BR_TITLE }}
title: ${{ steps.update.outputs.BR_TITLE }}
17 changes: 11 additions & 6 deletions src/bioregistry/gh/new_prefix.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

import click
from more_click import force_option, verbose_option
from pydantic import ValidationError

import bioregistry
from bioregistry.constants import BIOREGISTRY_PATH, URI_FORMAT_KEY
Expand Down Expand Up @@ -66,12 +67,16 @@ def process_new_prefix_issue(issue_id: int, resource_data: Dict[str, Any]) -> Op
:returns: A Resource instance or None if there is an issue that warrants skipping the issue
"""
prefix = resource_data.pop("prefix").lower()
contributor = Author(
name=resource_data.pop("contributor_name"),
orcid=_pop_orcid(resource_data),
email=resource_data.pop("contributor_email", None),
github=removeprefix(resource_data.pop("contributor_github"), "@"),
)
try:
contributor = Author(
name=resource_data.pop("contributor_name"),
orcid=_pop_orcid(resource_data),
email=resource_data.pop("contributor_email", None),
github=removeprefix(resource_data.pop("contributor_github"), "@"),
)
except ValidationError:
logger.warning("Validation error occured")
contributor = None

contact_name = resource_data.pop("contact_name", None)
contact_orcid = resource_data.pop("contact_orcid", None)
Expand Down
2 changes: 1 addition & 1 deletion src/bioregistry/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def get_hexdigests(alg: str = "sha256") -> Mapping[str, str]:

def _get_hexdigest(path: Union[str, Path], alg: str = "sha256") -> str:
hashes = get_hashes(path, [alg])
return cast(str, hashes[alg].hexdigest())
return hashes[alg].hexdigest()


IdentifierGetter = Callable[[dict[str, Any], str], str]
Expand Down
156 changes: 156 additions & 0 deletions tests/test_new_prefix.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
"""Tests for new prefix pipeline."""

import unittest
from unittest.mock import patch

from bioregistry.gh.new_prefix import process_new_prefix_issue
from bioregistry.schema import Author, Resource


class TestNewPrefix(unittest.TestCase):
"""Tests for new prefix pipeline."""

@patch("bioregistry.gh.new_prefix.bioregistry.get_resource")
def test_process_new_prefix_issue(self, mock_get_resource):
"""Tests if Resource object returned is as expected using data from an old issue."""
mock_get_resource.return_value = None

issue_id = 1181
resource_data = {
"prefix": "ncbiortholog",
"name": "National Center for Biotechnology Information",
"homepage": "https://www.ncbi.nlm.nih.gov/gene/",
"repository": "n/a",
"description": (
"Database of one-to-one ortholog information provided by the NCBI "
"as a subset of their Gene resource. Used for users to access ortholog "
"information for over 1000 species of vertebrates and arthropods."
),
"license": "US gov't public domain",
"example": "2",
"pattern": "^\\d+$",
"uri_format": "https://www.ncbi.nlm.nih.gov/gene/$1/ortholog/",
"contributor_name": "Terence Murphy",
"contributor_github": "murphyte",
"contributor_orcid": "0000-0001-9311-9745",
"contributor_email": "murphyte@ncbi.nlm.nih.gov",
"contact_name": "Terence Murphy",
"contact_orcid": "0000-0001-9311-9745",
"contact_github": "murphyte",
"contact_email": "murphyte@ncbi.nlm.nih.gov",
"comment": (
"We do not currently have the source code for our ortholog resource available publicly, "
"although we are looking at how to split it off and make it available in the next year. "
"We are now in the process of adding this tag to the INSDC list for use in annotations, "
"so I'd like to mirror that tag in bioregistry."
),
}

expected_resource = Resource(
prefix="ncbiortholog",
name="National Center for Biotechnology Information",
description=(
"Database of one-to-one ortholog information provided by the NCBI as a subset "
"of their Gene resource. Used for users to access ortholog information for "
"over 1000 species of vertebrates and arthropods."
),
pattern="^\\d+$",
uri_format="https://www.ncbi.nlm.nih.gov/gene/$1/ortholog/",
uri_format_resolvable=None,
rdf_uri_format=None,
providers=None,
homepage="https://www.ncbi.nlm.nih.gov/gene/",
repository="n/a",
contact=Author(
name="Terence Murphy",
orcid="0000-0001-9311-9745",
email="murphyte@ncbi.nlm.nih.gov",
github="murphyte",
),
owners=None,
example="2",
example_extras=None,
example_decoys=None,
license="US gov't public domain",
version=None,
part_of=None,
provides=None,
download_owl=None,
download_obo=None,
download_json=None,
download_rdf=None,
banana=None,
banana_peel=None,
deprecated=None,
mappings=None,
synonyms=None,
keywords=None,
references=None,
publications=[],
appears_in=None,
depends_on=None,
namespace_in_lui=None,
no_own_terms=None,
comment=(
"We do not currently have the source code for our ortholog resource available publicly, "
"although we are looking at how to split it off and make it available in the next year. "
"We are now in the process of adding this tag to the INSDC list for use in annotations, "
"so I'd like to mirror that tag in bioregistry."
),
contributor=Author(
name="Terence Murphy",
orcid="0000-0001-9311-9745",
email="murphyte@ncbi.nlm.nih.gov",
github="murphyte",
),
contributor_extras=None,
reviewer=None,
proprietary=None,
has_canonical=None,
preferred_prefix=None,
twitter=None,
mastodon=None,
github_request_issue=issue_id,
logo=None,
miriam=None,
n2t=None,
prefixcommons=None,
wikidata=None,
go=None,
obofoundry=None,
bioportal=None,
ecoportal=None,
agroportal=None,
cropoct=None,
ols=None,
aberowl=None,
ncbi=None,
uniprot=None,
biolink=None,
cellosaurus=None,
ontobee=None,
cheminf=None,
fairsharing=None,
biocontext=None,
edam=None,
re3data=None,
hl7=None,
bartoc=None,
rrid=None,
lov=None,
zazuko=None,
togoid=None,
integbio=None,
pathguide=None,
)

actual = process_new_prefix_issue(issue_id, resource_data)

self.assertIsNotNone(actual, "Resource should not be None")
self.assertEqual(
actual, expected_resource, "Resource object does not match the expected output"
)


if __name__ == "__main__":
unittest.main()

0 comments on commit f4faefb

Please sign in to comment.