From f4faefb34c2c7f9a0f337d1999aff240b67da170 Mon Sep 17 00:00:00 2001 From: Mufaddal Naguthanawala <110353148+nagutm@users.noreply.github.com> Date: Tue, 19 Nov 2024 13:12:53 -0500 Subject: [PATCH] Fix branch reuse issue in new prefix PR creation workflow (#1258) The GitHub Actions workflow for registering new prefixes through the issue template is reusing the same branch for each new issue, leading to the content of the second pull request overwriting the first. This was first noticed in this PR: https://github.com/biopragmatics/bioregistry/pull/1240. This occurs because the workflow does not generate a unique branch for each new issue. As a result, subsequent PRs are force-pushing content to the previous PR's branch instead of creating new, independent branches for each PR. This PR introduces the following changes to the `new_prefix_pr.yml` to address this issue: - Create a step to extract the issue number from the issue URL. - Dynamically generate a unique branch name (`create-pull-request/patch-`) for each new issue. - Modify the PR creation step to use the dynamically generated branch name, ensuring that each PR is linked to its own unique branch. --------- Co-authored-by: Mufaddal Naguthanawala Co-authored-by: Benjamin M. Gyori --- .github/ISSUE_TEMPLATE/new-prefix.yml | 5 +- .github/workflows/new_prefix_pr.yml | 30 ++++- src/bioregistry/gh/new_prefix.py | 17 ++- src/bioregistry/utils.py | 2 +- tests/test_new_prefix.py | 156 ++++++++++++++++++++++++++ 5 files changed, 200 insertions(+), 10 deletions(-) create mode 100644 tests/test_new_prefix.py diff --git a/.github/ISSUE_TEMPLATE/new-prefix.yml b/.github/ISSUE_TEMPLATE/new-prefix.yml index b4790d05b..38bd59829 100644 --- a/.github/ISSUE_TEMPLATE/new-prefix.yml +++ b/.github/ISSUE_TEMPLATE/new-prefix.yml @@ -131,7 +131,10 @@ body: id: contributor_orcid attributes: label: Contributor ORCiD - description: Please provide your ORCiD identifier so we can attribute this contribution to you. + description: | + Please provide your ORCiD identifier so we can attribute this contribution to you. + + For proper attribution of your contribution, please provide a valid ORCiD. If you don't have an ORCiD, please visit [ORCiD's registration page](https://orcid.org/register) to create one for free. placeholder: ex. 0000-0003-4423-4370 validations: required: true diff --git a/.github/workflows/new_prefix_pr.yml b/.github/workflows/new_prefix_pr.yml index 62b946300..9ef6ba781 100644 --- a/.github/workflows/new_prefix_pr.yml +++ b/.github/workflows/new_prefix_pr.yml @@ -14,21 +14,47 @@ jobs: python-version: [ "3.12" ] steps: - uses: actions/checkout@v2 + - name: Delay to ensure labels are attached + run: sleep 10 + - name: Check Issue Labels + id: check_labels + uses: actions/github-script@v6 + with: + script: | + const issue = context.payload.issue; + const hasRequiredLabels = issue.labels.some(label => label.name === "New") && issue.labels.some(label => label.name === "Prefix"); + core.setOutput("hasRequiredLabels", hasRequiredLabels ? 'true' : 'false'); + - name: End Workflow if Labels are Missing + if: steps.check_labels.outputs.hasRequiredLabels == 'false' + run: | + echo "Issue does not have 'New' and 'Prefix' labels. Ending workflow." + exit 0 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} - name: Install dependencies + if: steps.check_labels.outputs.hasRequiredLabels == 'true' run: pip install -e .[gha] - name: Update id: update + if: steps.check_labels.outputs.hasRequiredLabels == 'true' run: python -m bioregistry.gh.new_prefix --github env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Create Branch + id: create_branch + if: steps.check_labels.outputs.hasRequiredLabels == 'true' + run: | + issue_url="${{ github.event.issue.html_url }}" + issue_number=$(echo "$issue_url" | grep -oE '[0-9]+$') + branch_name="create-pull-request/patch-$issue_number" + echo "::set-output name=branch_name::$branch_name" - name: Create Pull Request + if: steps.check_labels.outputs.hasRequiredLabels == 'true' uses: peter-evans/create-pull-request@v3 with: - branch-suffix: short-commit-hash + branch: ${{ steps.create_branch.outputs.branch_name }} labels: New,Prefix body: ${{ steps.update.outputs.BR_BODY }} - title: ${{ steps.update.outputs.BR_TITLE }} + title: ${{ steps.update.outputs.BR_TITLE }} \ No newline at end of file diff --git a/src/bioregistry/gh/new_prefix.py b/src/bioregistry/gh/new_prefix.py index 6b6a4acaf..ff9414cdc 100644 --- a/src/bioregistry/gh/new_prefix.py +++ b/src/bioregistry/gh/new_prefix.py @@ -14,6 +14,7 @@ import click from more_click import force_option, verbose_option +from pydantic import ValidationError import bioregistry from bioregistry.constants import BIOREGISTRY_PATH, URI_FORMAT_KEY @@ -66,12 +67,16 @@ def process_new_prefix_issue(issue_id: int, resource_data: Dict[str, Any]) -> Op :returns: A Resource instance or None if there is an issue that warrants skipping the issue """ prefix = resource_data.pop("prefix").lower() - contributor = Author( - name=resource_data.pop("contributor_name"), - orcid=_pop_orcid(resource_data), - email=resource_data.pop("contributor_email", None), - github=removeprefix(resource_data.pop("contributor_github"), "@"), - ) + try: + contributor = Author( + name=resource_data.pop("contributor_name"), + orcid=_pop_orcid(resource_data), + email=resource_data.pop("contributor_email", None), + github=removeprefix(resource_data.pop("contributor_github"), "@"), + ) + except ValidationError: + logger.warning("Validation error occured") + contributor = None contact_name = resource_data.pop("contact_name", None) contact_orcid = resource_data.pop("contact_orcid", None) diff --git a/src/bioregistry/utils.py b/src/bioregistry/utils.py index 1a7ba50d5..a2abc658b 100644 --- a/src/bioregistry/utils.py +++ b/src/bioregistry/utils.py @@ -158,7 +158,7 @@ def get_hexdigests(alg: str = "sha256") -> Mapping[str, str]: def _get_hexdigest(path: Union[str, Path], alg: str = "sha256") -> str: hashes = get_hashes(path, [alg]) - return cast(str, hashes[alg].hexdigest()) + return hashes[alg].hexdigest() IdentifierGetter = Callable[[dict[str, Any], str], str] diff --git a/tests/test_new_prefix.py b/tests/test_new_prefix.py new file mode 100644 index 000000000..f6edafa74 --- /dev/null +++ b/tests/test_new_prefix.py @@ -0,0 +1,156 @@ +"""Tests for new prefix pipeline.""" + +import unittest +from unittest.mock import patch + +from bioregistry.gh.new_prefix import process_new_prefix_issue +from bioregistry.schema import Author, Resource + + +class TestNewPrefix(unittest.TestCase): + """Tests for new prefix pipeline.""" + + @patch("bioregistry.gh.new_prefix.bioregistry.get_resource") + def test_process_new_prefix_issue(self, mock_get_resource): + """Tests if Resource object returned is as expected using data from an old issue.""" + mock_get_resource.return_value = None + + issue_id = 1181 + resource_data = { + "prefix": "ncbiortholog", + "name": "National Center for Biotechnology Information", + "homepage": "https://www.ncbi.nlm.nih.gov/gene/", + "repository": "n/a", + "description": ( + "Database of one-to-one ortholog information provided by the NCBI " + "as a subset of their Gene resource. Used for users to access ortholog " + "information for over 1000 species of vertebrates and arthropods." + ), + "license": "US gov't public domain", + "example": "2", + "pattern": "^\\d+$", + "uri_format": "https://www.ncbi.nlm.nih.gov/gene/$1/ortholog/", + "contributor_name": "Terence Murphy", + "contributor_github": "murphyte", + "contributor_orcid": "0000-0001-9311-9745", + "contributor_email": "murphyte@ncbi.nlm.nih.gov", + "contact_name": "Terence Murphy", + "contact_orcid": "0000-0001-9311-9745", + "contact_github": "murphyte", + "contact_email": "murphyte@ncbi.nlm.nih.gov", + "comment": ( + "We do not currently have the source code for our ortholog resource available publicly, " + "although we are looking at how to split it off and make it available in the next year. " + "We are now in the process of adding this tag to the INSDC list for use in annotations, " + "so I'd like to mirror that tag in bioregistry." + ), + } + + expected_resource = Resource( + prefix="ncbiortholog", + name="National Center for Biotechnology Information", + description=( + "Database of one-to-one ortholog information provided by the NCBI as a subset " + "of their Gene resource. Used for users to access ortholog information for " + "over 1000 species of vertebrates and arthropods." + ), + pattern="^\\d+$", + uri_format="https://www.ncbi.nlm.nih.gov/gene/$1/ortholog/", + uri_format_resolvable=None, + rdf_uri_format=None, + providers=None, + homepage="https://www.ncbi.nlm.nih.gov/gene/", + repository="n/a", + contact=Author( + name="Terence Murphy", + orcid="0000-0001-9311-9745", + email="murphyte@ncbi.nlm.nih.gov", + github="murphyte", + ), + owners=None, + example="2", + example_extras=None, + example_decoys=None, + license="US gov't public domain", + version=None, + part_of=None, + provides=None, + download_owl=None, + download_obo=None, + download_json=None, + download_rdf=None, + banana=None, + banana_peel=None, + deprecated=None, + mappings=None, + synonyms=None, + keywords=None, + references=None, + publications=[], + appears_in=None, + depends_on=None, + namespace_in_lui=None, + no_own_terms=None, + comment=( + "We do not currently have the source code for our ortholog resource available publicly, " + "although we are looking at how to split it off and make it available in the next year. " + "We are now in the process of adding this tag to the INSDC list for use in annotations, " + "so I'd like to mirror that tag in bioregistry." + ), + contributor=Author( + name="Terence Murphy", + orcid="0000-0001-9311-9745", + email="murphyte@ncbi.nlm.nih.gov", + github="murphyte", + ), + contributor_extras=None, + reviewer=None, + proprietary=None, + has_canonical=None, + preferred_prefix=None, + twitter=None, + mastodon=None, + github_request_issue=issue_id, + logo=None, + miriam=None, + n2t=None, + prefixcommons=None, + wikidata=None, + go=None, + obofoundry=None, + bioportal=None, + ecoportal=None, + agroportal=None, + cropoct=None, + ols=None, + aberowl=None, + ncbi=None, + uniprot=None, + biolink=None, + cellosaurus=None, + ontobee=None, + cheminf=None, + fairsharing=None, + biocontext=None, + edam=None, + re3data=None, + hl7=None, + bartoc=None, + rrid=None, + lov=None, + zazuko=None, + togoid=None, + integbio=None, + pathguide=None, + ) + + actual = process_new_prefix_issue(issue_id, resource_data) + + self.assertIsNotNone(actual, "Resource should not be None") + self.assertEqual( + actual, expected_resource, "Resource object does not match the expected output" + ) + + +if __name__ == "__main__": + unittest.main()