diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2299ab9d4..bfe15f4b1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,7 +17,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python: ['3.7', '3.8', '3.9'] + python: ['3.8', '3.9', '3.10', '3.11'] steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v2 diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml index d536a5732..13b128ace 100644 --- a/.github/workflows/pypi.yml +++ b/.github/workflows/pypi.yml @@ -15,7 +15,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest] - python: ['3.7', '3.8', '3.9'] + python: ['3.8', '3.9', '3.10', '3.11'] steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v2 diff --git a/README.md b/README.md index 03e7e36fb..cf734df99 100644 --- a/README.md +++ b/README.md @@ -98,6 +98,7 @@ BasicAuthDetector CloudantDetector DiscordBotTokenDetector GitHubTokenDetector +GitLabTokenDetector Base64HighEntropyString HexHighEntropyString IbmCloudIamDetector diff --git a/detect_secrets/filters/heuristic.py b/detect_secrets/filters/heuristic.py index 0dbdb4949..7fb078181 100644 --- a/detect_secrets/filters/heuristic.py +++ b/detect_secrets/filters/heuristic.py @@ -164,7 +164,7 @@ def is_prefixed_with_dollar_sign(secret: str) -> bool: # false negatives than `is_templated_secret` (e.g. secrets that actually start with a $). # This is best used with files that actually use this as a means of referencing variables. # TODO: More intelligent filetype handling? - return secret[0] == '$' + return bool(secret) and secret[0] == '$' def is_indirect_reference(line: str) -> bool: diff --git a/detect_secrets/plugins/aws.py b/detect_secrets/plugins/aws.py index ee822b6f7..94af367de 100644 --- a/detect_secrets/plugins/aws.py +++ b/detect_secrets/plugins/aws.py @@ -25,10 +25,10 @@ class AWSKeyDetector(RegexBasedDetector): secret_keyword = r'(?:key|pwd|pw|password|pass|token)' denylist = ( - re.compile(r'AKIA[0-9A-Z]{16}'), + re.compile(r'(?:A3T[A-Z0-9]|ABIA|ACCA|AKIA|ASIA)[0-9A-Z]{16}'), # This examines the variable name to identify AWS secret tokens. - # The order is important since we want to prefer finding `AKIA`-based + # The order is important since we want to prefer finding access # keys (since they can be verified), rather than the secret tokens. re.compile( diff --git a/detect_secrets/plugins/gitlab_token.py b/detect_secrets/plugins/gitlab_token.py new file mode 100644 index 000000000..ed197fd7d --- /dev/null +++ b/detect_secrets/plugins/gitlab_token.py @@ -0,0 +1,59 @@ +""" +This plugin searches for GitLab tokens +""" +import re + +from detect_secrets.plugins.base import RegexBasedDetector + + +class GitLabTokenDetector(RegexBasedDetector): + """Scans for GitLab tokens.""" + + secret_type = 'GitLab Token' + + denylist = [ + # ref: + # - https://docs.gitlab.com/ee/security/token_overview.html#gitlab-tokens + # - https://gitlab.com/groups/gitlab-org/-/epics/8923 + # - https://github.com/gitlabhq/gitlabhq/blob/master/gems + # /gitlab-secret_detection/lib/gitleaks.toml#L6-L76 + + # `gl..-` prefix and a token of length >20 + # characters are typically alphanumeric, underscore, dash + # Most tokens are generated either with: + # - `Devise.friendly_token`, a string with a default length of 20, or + # - `SecureRandom.hex`, default data size of 16 bytes, encoded in different ways. + # String length may vary depending on the type of token, and probably + # even GL-settings in the future, so we expect between 20 and 50 chars. + + # Personal Access Token - glpat + # Deploy Token - gldt + # Feed Token - glft + # OAuth Access Token - glsoat + # Runner Token - glrt + re.compile( + r'(glpat|gldt|glft|glsoat|glrt)-' + r'[A-Za-z0-9_\-]{20,50}(?!\w)', + ), + + # Runner Registration Token + re.compile(r'GR1348941[A-Za-z0-9_\-]{20,50}(?!\w)'), + + # CI/CD Token - `glcbt` or `glcbt-XY_` where XY is a 2-char hex 'partition_id' + re.compile(r'glcbt-([0-9a-fA-F]{2}_)?[A-Za-z0-9_\-]{20,50}(?!\w)'), + + # Incoming Mail Token - generated by SecureRandom.hex, default length 16 bytes + # resulting token length is 26 when Base-36 encoded + re.compile(r'glimt-[A-Za-z0-9_\-]{25}(?!\w)'), + + # Trigger Token - generated by `SecureRandom.hex(20)` + re.compile(r'glptt-[A-Za-z0-9_\-]{40}(?!\w)'), + + # Agent Token - generated by `Devise.friendly_token(50)` + # tokens have a minimum length of 50 chars, up to 1024 chars + re.compile(r'glagent-[A-Za-z0-9_\-]{50,1024}(?!\w)'), + + # GitLab OAuth Application Secret - generated by `SecureRandom.hex(32)` + # -> becomes 64 base64-encoded characters + re.compile(r'gloas-[A-Za-z0-9_\-]{64}(?!\w)'), + ] diff --git a/detect_secrets/plugins/ibm_cloud_iam.py b/detect_secrets/plugins/ibm_cloud_iam.py index 037d971b5..6920849c6 100644 --- a/detect_secrets/plugins/ibm_cloud_iam.py +++ b/detect_secrets/plugins/ibm_cloud_iam.py @@ -34,7 +34,7 @@ def verify(self, secret: str) -> VerifiedResult: def verify_cloud_iam_api_key(apikey: Union[str, bytes]) -> requests.Response: # pragma: no cover - if type(apikey) == bytes: + if type(apikey) is bytes: apikey = apikey.decode('UTF-8') headers = { diff --git a/requirements-dev.txt b/requirements-dev.txt index 581d9d24a..1bcaa309a 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,44 +1,43 @@ -attrs==21.4.0 -backports.entry-points-selectable==1.1.1 -certifi==2021.10.8 -cfgv==3.2.0 -charset-normalizer==2.0.7 -coverage==4.5.4 -distlib==0.3.6 -filelock==3.0.12 -flake8==3.5.0 +attrs==23.2.0 +backports.entry-points-selectable==1.3.0 +certifi==2023.11.17 +cfgv==3.4.0 +charset-normalizer==3.3.2 +coverage==7.4.0 +distlib==0.3.8 +filelock==3.13.1 +flake8==6.1.0 gibberish-detector==0.1.1 -identify==2.3.0 -idna==3.3 -importlib-metadata==4.8.1 -iniconfig==1.1.1 -mccabe==0.6.1 +identify==2.5.33 +idna==3.7 +iniconfig==2.0.0 +mccabe==0.7.0 monotonic==1.6 mypy==0.971 -mypy-extensions==0.4.3 -nodeenv==1.6.0 -packaging==21.3 -platformdirs==2.0.2 -pluggy==0.13.1 -pre-commit==2.17.0 +mypy-extensions==1.0.0 +nodeenv==1.8.0 +packaging==23.2 +platformdirs==4.1.0 +pluggy==1.3.0 +pre-commit==3.5.0 py==1.11.0 -pyahocorasick==1.4.4 -pycodestyle==2.3.1 -pyflakes==1.6.0 -pyparsing==2.4.7 -pytest==6.2.2 -PyYAML==6.0 -requests==2.26.0 -responses==0.16.0 +pyahocorasick==2.0.0 +pycodestyle==2.11.1 +pyflakes==3.1.0 +pyparsing==3.1.1 +pytest==7.4.3 +PyYAML==6.0.1 +requests==2.31.0 +responses==0.24.1 six==1.16.0 toml==0.10.2 -tox==3.24.4 +tox==4.11.4 tox-pip-extensions==1.6.0 -typed-ast==1.5.4 -types-PyYAML==6.0.11 -types-requests==2.28.9 -typing-extensions==3.10.0.2 -unidiff==0.7.4 -urllib3==1.26.9 -virtualenv==20.6.0 -zipp==3.6.0 +typed-ast==1.5.5 +types-PyYAML==6.0.12.12 +types-requests==2.31.0.20240106 +typing-extensions==4.9.0 +unidiff==0.7.5 +urllib3==2.1.0 +virtualenv==20.25.0 +zipp==3.17.0 diff --git a/setup.py b/setup.py index 0ba463850..3613810f8 100644 --- a/setup.py +++ b/setup.py @@ -24,9 +24,8 @@ def get_version(): description='Tool for detecting secrets in the codebase', long_description=long_description, long_description_content_type='text/markdown', - license='Copyright Yelp, Inc. 2020', - author='Aaron Loo', - author_email='aaronloo@yelp.com', + author='Yelp, Inc.', + author_email='opensource@yelp.com', url='https://github.com/Yelp/detect-secrets', download_url='https://github.com/Yelp/detect-secrets/archive/{}.tar.gz'.format(VERSION), keywords=['secret-management', 'pre-commit', 'security', 'entropy-checks'], diff --git a/tests/filters/heuristic_filter_test.py b/tests/filters/heuristic_filter_test.py index a2f5dbb2b..90e1eb0de 100644 --- a/tests/filters/heuristic_filter_test.py +++ b/tests/filters/heuristic_filter_test.py @@ -121,9 +121,16 @@ def test_is_templated_secret(line, result): assert bool(list(scan_line(line))) is result -def test_is_prefixed_with_dollar_sign(): - assert filters.heuristic.is_prefixed_with_dollar_sign('$secret') - assert not filters.heuristic.is_prefixed_with_dollar_sign('secret') +@pytest.mark.parametrize( + 'secret, result', + ( + ('$secret', True), + ('secret', False), + ('', False), + ), +) +def test_is_prefixed_with_dollar_sign(secret, result): + assert filters.heuristic.is_prefixed_with_dollar_sign(secret) == result @pytest.mark.parametrize( diff --git a/tests/plugins/aws_key_test.py b/tests/plugins/aws_key_test.py index 6174a857c..9139c9dd6 100644 --- a/tests/plugins/aws_key_test.py +++ b/tests/plugins/aws_key_test.py @@ -32,6 +32,22 @@ def setup(self): 'AKIAZZZ', False, ), + ( + 'A3T0ZZZZZZZZZZZZZZZZ', + True, + ), + ( + 'ABIAZZZZZZZZZZZZZZZZ', + True, + ), + ( + 'ACCAZZZZZZZZZZZZZZZZ', + True, + ), + ( + 'ASIAZZZZZZZZZZZZZZZZ', + True, + ), ( 'aws_access_key = "{}"'.format(EXAMPLE_SECRET), True, diff --git a/tests/plugins/gitlab_token_test.py b/tests/plugins/gitlab_token_test.py new file mode 100644 index 000000000..e75085a99 --- /dev/null +++ b/tests/plugins/gitlab_token_test.py @@ -0,0 +1,138 @@ +import pytest + +from detect_secrets.plugins.gitlab_token import GitLabTokenDetector + + +class TestGitLabTokenDetector: + @pytest.mark.parametrize( + 'payload, should_flag', + [ + ( + # valid PAT prefix and token length + 'glpat-hellOworld380_testin', + True, + ), + ( + # spaces are not part of the token + 'glpat-hellOWorld380 testin', + False, + ), + ( + # invalid separator (underscore VS dash) + 'glpat_hellOworld380_testin', + False, + ), + ( + # valid different prefix and token length + 'gldt-HwllOuhfw-wu0rlD_yep', + True, + ), + ( + # token < 20 chars should be too short + 'gldt-seems_too000Sshorty', + False, + ), + ( + # invalid prefix, but valid token length + 'foo-hello-world80_testin', + False, + ), + ( + # token length may vary depending on the impl., but <= 50 chars should be fine + 'glsoat-PREfix_helloworld380_testin_pretty_long_token_long', + True, + ), + ( + # token > 50 chars is too long + 'glsoat-PREfix_helloworld380_testin_pretty_long_token_long_', + False, + ), + ( + # GitLab is not GitHub + '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', + False, + ), + ], + ) + def test_base_token_format(self, payload, should_flag): + logic = GitLabTokenDetector() + output = logic.analyze_line(filename='mock_filename', line=payload) + assert len(output) == int(should_flag) + + @pytest.mark.parametrize( + 'payload, should_flag', + [ + ('GR1348941PREfix_helloworld380', True), + ('GR1348941PREfix_helloworld380_testin_pretty_long_token_long', True), + ('GR1348941PREfix_helloworld380_testin_pretty_long_token_long_', False), # too long + ('GR1348941helloWord0', False), # too short + ], + ) + def test_runner_registration_token(self, payload, should_flag): + logic = GitLabTokenDetector() + output = logic.analyze_line(filename='mock_filename', line=payload) + assert len(output) == int(should_flag) + + @pytest.mark.parametrize( + 'payload, should_flag', + [ + ('glcbt-helloworld380_testin', True), + ], + ) + def test_cicd_token(self, payload, should_flag): + logic = GitLabTokenDetector() + output = logic.analyze_line(filename='mock_filename', line=payload) + assert len(output) == int(should_flag) + + @pytest.mark.parametrize( + 'payload, should_flag', + [ + ('glimt-my-tokens_are-correctAB38', True), + ('glimt-my-tokens_are-correctAB', False), # too short + ('glimt-my-tokens_are-correctAB38_280', False), # too long + ], + ) + def test_incoming_mail_token(self, payload, should_flag): + logic = GitLabTokenDetector() + output = logic.analyze_line(filename='mock_filename', line=payload) + assert len(output) == int(should_flag) + + @pytest.mark.parametrize( + 'payload, should_flag', + [ + ('glptt-Need5_T00-be-exactly-40-chars--ELse_fail', True), + ('glptt-Need5_T00-be-exactly-40-chars--ELse_failing', False), # too long + ('glptt-hellOworld380_testin', False), # too short + ], + ) + def test_trigger_token(self, payload, should_flag): + logic = GitLabTokenDetector() + output = logic.analyze_line(filename='mock_filename', line=payload) + assert len(output) == int(should_flag) + + @pytest.mark.parametrize( + 'payload, should_flag', + [ + ('glagent-Need5_T00-bee-longer-than-50_chars-or-else-failING', True), + ('glagent-Need5_T00-bee-longer-than-50_chars-or-else-failING-still_OK', True), + (('glagent-' + 'X' * 1025), False), # 2 long + ('glagent-hellOworld380_testin', False), # len 20 is too short + ], + ) + def test_agent_token(self, payload, should_flag): + logic = GitLabTokenDetector() + output = logic.analyze_line(filename='mock_filename', line=payload) + assert len(output) == int(should_flag) + + @pytest.mark.parametrize( + 'payload, should_flag', + [ + ('gloas-checking_Length-Is-_exactly_64--checking_Length-Is-_exactly_64--', True), + ('gloas-checking_Length-Is-checking_Length-Is-', False), # too short + ('gloas-checking_Length-Is-_exactly_64--Xchecking_Length-Is-_longer_longer', False), + ], + ) + def test_oauth_application_secret(self, payload, should_flag): + logic = GitLabTokenDetector() + output = logic.analyze_line(filename='mock_filename', line=payload) + assert len(output) == int(should_flag) diff --git a/tox.ini b/tox.ini index 48f568f5d..01f5d4d07 100644 --- a/tox.ini +++ b/tox.ini @@ -1,9 +1,8 @@ [tox] project = detect_secrets # These should match the ci python env list -envlist = py{37,38,39},mypy +envlist = py{38,39,310,311},mypy skip_missing_interpreters = true -tox_pip_extensions_ext_venv_update = true [testenv] passenv = SSH_AUTH_SOCK