From 11adddbf215d7e9501f3a6fb92f6d7472f302606 Mon Sep 17 00:00:00 2001 From: William Woodruff Date: Tue, 26 Apr 2022 17:53:46 -0700 Subject: [PATCH 1/6] tests, warehouse: validate job_workflow_ref Add a bunch of counterexample tests to be certain. --- tests/unit/oidc/test_models.py | 24 ++++++++++++++++++++++++ warehouse/oidc/models.py | 13 +++++++------ 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/tests/unit/oidc/test_models.py b/tests/unit/oidc/test_models.py index bb12aebbd5ab..de05b02a614a 100644 --- a/tests/unit/oidc/test_models.py +++ b/tests/unit/oidc/test_models.py @@ -11,6 +11,7 @@ # limitations under the License. import pretend +import pytest from warehouse.oidc import models @@ -132,3 +133,26 @@ def test_github_provider_verifies(self, monkeypatch): } assert provider.verify_claims(signed_claims=signed_claims) assert len(noop_check.calls) == len(verifiable_claims) + + @pytest.mark.parametrize( + ("claim", "valid"), + [ + ("foo/bar/.github/workflows/baz.yml@", True), + ("foo/bar/.github/workflows/@", False), + ("foo/bar/.github/workflows/", False), + ("baz.yml", False), + ("foo/bar/.github/workflows/baz.yml@malicious.yml@", False), + ("foo/bar/.github/workflows/baz.yml@@", False), + ("", False), + ], + ) + def test_github_provider_job_workflow_ref(self, claim, valid): + provider = models.GitHubProvider( + repository_name="bar", + repository_owner="foo", + repository_owner_id=pretend.stub(), + workflow_filename="baz.yml", + ) + + check = models.GitHubProvider.__verifiable_claims__["job_workflow_ref"] + assert bool(check(provider.job_workflow_ref, claim)) is valid diff --git a/warehouse/oidc/models.py b/warehouse/oidc/models.py index f3a228963354..1430cd065997 100644 --- a/warehouse/oidc/models.py +++ b/warehouse/oidc/models.py @@ -11,6 +11,8 @@ # limitations under the License. +import re + from typing import Any, Callable, Dict, Set import sentry_sdk @@ -52,7 +54,7 @@ class OIDCProvider(db.Model): } # A map of claim names to "check" functions, each of which - # has the signature `check(ground-truth, signed-claim) -> bool`. + # has the signature `check(ground-truth, signed-claim) -> bool-like`. __verifiable_claims__: Dict[str, Callable[[Any, Any], bool]] = dict() # Claims that have already been verified during the JWT signature @@ -146,9 +148,9 @@ class GitHubProvider(OIDCProvider): __verifiable_claims__ = { "repository": str.__eq__, - "workflow": str.__eq__, "repository_owner": str.__eq__, "repository_owner_id": str.__eq__, + "job_workflow_ref": re.match, } __unchecked_claims__ = { @@ -166,8 +168,7 @@ class GitHubProvider(OIDCProvider): "event_name", "ref_type", "repository_id", - # TODO(#11096): Support reusable workflows. - "job_workflow_ref", + "workflow", } @property @@ -179,8 +180,8 @@ def repository(self): return f"{self.repository_owner}/{self.repository_name}" @property - def workflow(self): - return self.workflow_filename + def job_workflow_ref(self): + return rf"^{self.repository}/\.github/workflows/{self.workflow_filename}@$" def __str__(self): return f"{self.workflow_filename} @ {self.repository}" From cba17daae394d8a305aa0bf74aac7829b5efb148 Mon Sep 17 00:00:00 2001 From: William Woodruff Date: Tue, 26 Apr 2022 18:03:06 -0700 Subject: [PATCH 2/6] oidc/models: wrap `re.match` to make mypy happy --- warehouse/oidc/models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/warehouse/oidc/models.py b/warehouse/oidc/models.py index 1430cd065997..c990db99b9f0 100644 --- a/warehouse/oidc/models.py +++ b/warehouse/oidc/models.py @@ -54,7 +54,7 @@ class OIDCProvider(db.Model): } # A map of claim names to "check" functions, each of which - # has the signature `check(ground-truth, signed-claim) -> bool-like`. + # has the signature `check(ground-truth, signed-claim) -> bool`. __verifiable_claims__: Dict[str, Callable[[Any, Any], bool]] = dict() # Claims that have already been verified during the JWT signature @@ -150,7 +150,7 @@ class GitHubProvider(OIDCProvider): "repository": str.__eq__, "repository_owner": str.__eq__, "repository_owner_id": str.__eq__, - "job_workflow_ref": re.match, + "job_workflow_ref": lambda regex, rhs: bool(re.match(regex, rhs)), } __unchecked_claims__ = { From a0ee949f506d4ed624a00bc00038432465ea0c72 Mon Sep 17 00:00:00 2001 From: William Woodruff Date: Tue, 26 Apr 2022 18:03:41 -0700 Subject: [PATCH 3/6] tests/oidc: update --- tests/unit/oidc/test_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/oidc/test_models.py b/tests/unit/oidc/test_models.py index de05b02a614a..f2a0a3dceb2b 100644 --- a/tests/unit/oidc/test_models.py +++ b/tests/unit/oidc/test_models.py @@ -155,4 +155,4 @@ def test_github_provider_job_workflow_ref(self, claim, valid): ) check = models.GitHubProvider.__verifiable_claims__["job_workflow_ref"] - assert bool(check(provider.job_workflow_ref, claim)) is valid + assert check(provider.job_workflow_ref, claim) is valid From 798a29ffbbb3e716ffadfee430eadd88235d4c0b Mon Sep 17 00:00:00 2001 From: William Woodruff Date: Tue, 26 Apr 2022 21:14:36 -0700 Subject: [PATCH 4/6] warehouse, tests: fix `job_workflow_ref` regex --- tests/unit/oidc/test_models.py | 8 +++++++- warehouse/oidc/models.py | 6 +++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/tests/unit/oidc/test_models.py b/tests/unit/oidc/test_models.py index f2a0a3dceb2b..b764b55761a3 100644 --- a/tests/unit/oidc/test_models.py +++ b/tests/unit/oidc/test_models.py @@ -137,7 +137,13 @@ def test_github_provider_verifies(self, monkeypatch): @pytest.mark.parametrize( ("claim", "valid"), [ - ("foo/bar/.github/workflows/baz.yml@", True), + ("foo/bar/.github/workflows/baz.yml@refs/tags/v0.0.1", True), + ("foo/bar/.github/workflows/baz.yml@refs/pulls/6", True), + ("foo/bar/.github/workflows/baz.yml@refs/heads/main", True), + ("foo/bar/.github/workflows/baz.yml@fake.yml", False), + ("foo/bar/.github/workflows/baz.yml@fake.yml/refs/pulls/6", False), + ("foo/bar/.github/workflows/baz.yml@notrailingslash", False), + ("foo/bar/.github/workflows/baz.yml@", False), ("foo/bar/.github/workflows/@", False), ("foo/bar/.github/workflows/", False), ("baz.yml", False), diff --git a/warehouse/oidc/models.py b/warehouse/oidc/models.py index c990db99b9f0..fed70b2aa2a9 100644 --- a/warehouse/oidc/models.py +++ b/warehouse/oidc/models.py @@ -181,7 +181,11 @@ def repository(self): @property def job_workflow_ref(self): - return rf"^{self.repository}/\.github/workflows/{self.workflow_filename}@$" + # This is expected to match something like: + # OWNER/REPO/.github/workflows/WORKFLOW.yml@refs/... + return ( + rf"^{self.repository}/\.github/workflows/{self.workflow_filename}@refs/.+$" + ) def __str__(self): return f"{self.workflow_filename} @ {self.repository}" From 9a2d3ee2518879418ccd8bcff16eed7d655cbdbb Mon Sep 17 00:00:00 2001 From: William Woodruff Date: Tue, 26 Apr 2022 21:56:46 -0700 Subject: [PATCH 5/6] tests, warehouse: refactor `job_workflow_ref` again --- tests/unit/oidc/test_models.py | 9 ++++++--- warehouse/oidc/models.py | 26 ++++++++++++++++++-------- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/tests/unit/oidc/test_models.py b/tests/unit/oidc/test_models.py index b764b55761a3..2b9957bbd018 100644 --- a/tests/unit/oidc/test_models.py +++ b/tests/unit/oidc/test_models.py @@ -137,12 +137,15 @@ def test_github_provider_verifies(self, monkeypatch): @pytest.mark.parametrize( ("claim", "valid"), [ + # okay: workflow name, followed by a nonempty tail ("foo/bar/.github/workflows/baz.yml@refs/tags/v0.0.1", True), ("foo/bar/.github/workflows/baz.yml@refs/pulls/6", True), ("foo/bar/.github/workflows/baz.yml@refs/heads/main", True), - ("foo/bar/.github/workflows/baz.yml@fake.yml", False), - ("foo/bar/.github/workflows/baz.yml@fake.yml/refs/pulls/6", False), - ("foo/bar/.github/workflows/baz.yml@notrailingslash", False), + ("foo/bar/.github/workflows/baz.yml@notrailingslash", True), + # bad: workflow name, followed by an attempted impersonator + ("foo/bar/.github/workflows/baz.yml@fake.yml@notrailingslash", False), + ("foo/bar/.github/workflows/baz.yml@fake.yml@refs/pulls/6", False), + # bad: missing tail or workflow name or otherwise partial ("foo/bar/.github/workflows/baz.yml@", False), ("foo/bar/.github/workflows/@", False), ("foo/bar/.github/workflows/", False), diff --git a/warehouse/oidc/models.py b/warehouse/oidc/models.py index fed70b2aa2a9..0ae2cea1f60c 100644 --- a/warehouse/oidc/models.py +++ b/warehouse/oidc/models.py @@ -11,8 +11,6 @@ # limitations under the License. -import re - from typing import Any, Callable, Dict, Set import sentry_sdk @@ -24,6 +22,22 @@ from warehouse.packaging.models import Project +def _check_job_workflow_ref(ground_truth, signed_claim): + # We expect a string formatted as follows: + # OWNER/REPO/.github/workflows/WORKFLOW.yml@TAIL + # where TAIL might be a ref (`refs/...`) or a commit hash, + # but is always nonempty. + try: + repo_workflow_path, tail = signed_claim.rsplit("@", 1) + except ValueError: + return False + + if not tail: + return False + + return repo_workflow_path == ground_truth + + class OIDCProviderProjectAssociation(db.Model): __tablename__ = "oidc_provider_project_association" @@ -150,7 +164,7 @@ class GitHubProvider(OIDCProvider): "repository": str.__eq__, "repository_owner": str.__eq__, "repository_owner_id": str.__eq__, - "job_workflow_ref": lambda regex, rhs: bool(re.match(regex, rhs)), + "job_workflow_ref": _check_job_workflow_ref, } __unchecked_claims__ = { @@ -181,11 +195,7 @@ def repository(self): @property def job_workflow_ref(self): - # This is expected to match something like: - # OWNER/REPO/.github/workflows/WORKFLOW.yml@refs/... - return ( - rf"^{self.repository}/\.github/workflows/{self.workflow_filename}@refs/.+$" - ) + return f"{self.repository}/.github/workflows/{self.workflow_filename}" def __str__(self): return f"{self.workflow_filename} @ {self.repository}" From a518b040806b7e396e317de4e980665426f4d995 Mon Sep 17 00:00:00 2001 From: William Woodruff Date: Thu, 28 Apr 2022 09:42:05 -0700 Subject: [PATCH 6/6] warehouse, tests: refactor `job_workflow_ref` verification again --- tests/unit/oidc/test_models.py | 76 ++++++++++++++++++++++++---------- warehouse/oidc/models.py | 48 ++++++++++++++------- 2 files changed, 88 insertions(+), 36 deletions(-) diff --git a/tests/unit/oidc/test_models.py b/tests/unit/oidc/test_models.py index 2b9957bbd018..fdac08c070c3 100644 --- a/tests/unit/oidc/test_models.py +++ b/tests/unit/oidc/test_models.py @@ -16,6 +16,13 @@ from warehouse.oidc import models +def test_check_claim_binary(): + wrapped = models._check_claim_binary(str.__eq__) + + assert wrapped("foo", "bar", pretend.stub()) is False + assert wrapped("foo", "foo", pretend.stub()) is True + + class TestOIDCProvider: def test_oidc_provider_not_default_verifiable(self): provider = models.OIDCProvider(projects=[]) @@ -28,9 +35,9 @@ def test_github_provider_all_known_claims(self): assert models.GitHubProvider.all_known_claims() == { # verifiable claims "repository", - "workflow", "repository_owner", "repository_owner_id", + "job_workflow_ref", # preverified claims "iss", "iat", @@ -52,7 +59,7 @@ def test_github_provider_all_known_claims(self): "event_name", "ref_type", "repository_id", - "job_workflow_ref", + "workflow", } def test_github_provider_computed_properties(self): @@ -121,7 +128,7 @@ def test_github_provider_verifies(self, monkeypatch): workflow_filename="fakeworkflow.yml", ) - noop_check = pretend.call_recorder(lambda l, r: True) + noop_check = pretend.call_recorder(lambda gt, sc, ac: True) verifiable_claims = { claim_name: noop_check for claim_name in provider.__verifiable_claims__ } @@ -135,27 +142,54 @@ def test_github_provider_verifies(self, monkeypatch): assert len(noop_check.calls) == len(verifiable_claims) @pytest.mark.parametrize( - ("claim", "valid"), + ("claim", "ref", "valid"), [ - # okay: workflow name, followed by a nonempty tail - ("foo/bar/.github/workflows/baz.yml@refs/tags/v0.0.1", True), - ("foo/bar/.github/workflows/baz.yml@refs/pulls/6", True), - ("foo/bar/.github/workflows/baz.yml@refs/heads/main", True), - ("foo/bar/.github/workflows/baz.yml@notrailingslash", True), - # bad: workflow name, followed by an attempted impersonator - ("foo/bar/.github/workflows/baz.yml@fake.yml@notrailingslash", False), - ("foo/bar/.github/workflows/baz.yml@fake.yml@refs/pulls/6", False), + # okay: workflow name, followed by a nonempty ref + ( + "foo/bar/.github/workflows/baz.yml@refs/tags/v0.0.1", + "refs/tags/v0.0.1", + True, + ), + ("foo/bar/.github/workflows/baz.yml@refs/pulls/6", "refs/pulls/6", True), + ( + "foo/bar/.github/workflows/baz.yml@refs/heads/main", + "refs/heads/main", + True, + ), + ( + "foo/bar/.github/workflows/baz.yml@notrailingslash", + "notrailingslash", + True, + ), + # bad: workflow name, empty or missing ref + ("foo/bar/.github/workflows/baz.yml@emptyref", "", False), + ("foo/bar/.github/workflows/baz.yml@missingref", None, False), + # bad: workflow name with various attempted impersonations + ( + "foo/bar/.github/workflows/baz.yml@fake.yml@notrailingslash", + "notrailingslash", + False, + ), + ( + "foo/bar/.github/workflows/baz.yml@fake.yml@refs/pulls/6", + "refs/pulls/6", + False, + ), # bad: missing tail or workflow name or otherwise partial - ("foo/bar/.github/workflows/baz.yml@", False), - ("foo/bar/.github/workflows/@", False), - ("foo/bar/.github/workflows/", False), - ("baz.yml", False), - ("foo/bar/.github/workflows/baz.yml@malicious.yml@", False), - ("foo/bar/.github/workflows/baz.yml@@", False), - ("", False), + ("foo/bar/.github/workflows/baz.yml@", "notrailingslash", False), + ("foo/bar/.github/workflows/@", "notrailingslash", False), + ("foo/bar/.github/workflows/", "notrailingslash", False), + ("baz.yml", "notrailingslash", False), + ( + "foo/bar/.github/workflows/baz.yml@malicious.yml@", + "notrailingslash", + False, + ), + ("foo/bar/.github/workflows/baz.yml@@", "notrailingslash", False), + ("", "notrailingslash", False), ], ) - def test_github_provider_job_workflow_ref(self, claim, valid): + def test_github_provider_job_workflow_ref(self, claim, ref, valid): provider = models.GitHubProvider( repository_name="bar", repository_owner="foo", @@ -164,4 +198,4 @@ def test_github_provider_job_workflow_ref(self, claim, valid): ) check = models.GitHubProvider.__verifiable_claims__["job_workflow_ref"] - assert check(provider.job_workflow_ref, claim) is valid + assert check(provider.job_workflow_ref, claim, {"ref": ref}) is valid diff --git a/warehouse/oidc/models.py b/warehouse/oidc/models.py index 0ae2cea1f60c..86b52c776065 100644 --- a/warehouse/oidc/models.py +++ b/warehouse/oidc/models.py @@ -22,20 +22,36 @@ from warehouse.packaging.models import Project -def _check_job_workflow_ref(ground_truth, signed_claim): +def _check_claim_binary(binary_func): + """ + Wraps a binary comparison function so that it takes three arguments instead, + ignoring the third. + + This is used solely to make claim verification compatible with "trivial" + checks like `str.__eq__`. + """ + + def wrapper(ground_truth, signed_claim, all_signed_claims): + return binary_func(ground_truth, signed_claim) + + return wrapper + + +def _check_job_workflow_ref(ground_truth, signed_claim, all_signed_claims): # We expect a string formatted as follows: - # OWNER/REPO/.github/workflows/WORKFLOW.yml@TAIL - # where TAIL might be a ref (`refs/...`) or a commit hash, - # but is always nonempty. - try: - repo_workflow_path, tail = signed_claim.rsplit("@", 1) - except ValueError: + # OWNER/REPO/.github/workflows/WORKFLOW.yml@REF + # where REF is the value of the `ref` claim. + + # Defensive: GitHub should never give us an empty job_workflow_ref, + # but we check for one anyways just in case. + if not signed_claim: return False - if not tail: + ref = all_signed_claims.get("ref") + if not ref: return False - return repo_workflow_path == ground_truth + return f"{ground_truth}@{ref}" == signed_claim class OIDCProviderProjectAssociation(db.Model): @@ -68,8 +84,10 @@ class OIDCProvider(db.Model): } # A map of claim names to "check" functions, each of which - # has the signature `check(ground-truth, signed-claim) -> bool`. - __verifiable_claims__: Dict[str, Callable[[Any, Any], bool]] = dict() + # has the signature `check(ground-truth, signed-claim, all-signed-claims) -> bool`. + __verifiable_claims__: Dict[ + str, Callable[[Any, Any, Dict[str, Any]], bool] + ] = dict() # Claims that have already been verified during the JWT signature # verification phase. @@ -131,7 +149,7 @@ def verify_claims(self, signed_claims): ) return False - if not check(getattr(self, claim_name), signed_claim): + if not check(getattr(self, claim_name), signed_claim, signed_claims): return False return True @@ -161,9 +179,9 @@ class GitHubProvider(OIDCProvider): workflow_filename = Column(String) __verifiable_claims__ = { - "repository": str.__eq__, - "repository_owner": str.__eq__, - "repository_owner_id": str.__eq__, + "repository": _check_claim_binary(str.__eq__), + "repository_owner": _check_claim_binary(str.__eq__), + "repository_owner_id": _check_claim_binary(str.__eq__), "job_workflow_ref": _check_job_workflow_ref, }