From 11aef55904828519c9abb510f69f781105e49b15 Mon Sep 17 00:00:00 2001 From: jtimpe <111305129+jtimpe@users.noreply.github.com> Date: Wed, 6 Dec 2023 09:46:08 -0500 Subject: [PATCH 1/2] Fix parser/preparser validation of empty strings (#2748) * add validator tests * add notEmpty test * add blank string to value_is_empty values * replace removed text * fix partial oob string indexing --------- Co-authored-by: Alex P <63075587+ADPennington@users.noreply.github.com> --- tdrs-backend/tdpservice/parsers/fields.py | 14 ++----- tdrs-backend/tdpservice/parsers/row_schema.py | 3 +- .../tdpservice/parsers/test/test_util.py | 30 +------------- .../parsers/test/test_validators.py | 40 +++++++++++++++++++ tdrs-backend/tdpservice/parsers/validators.py | 24 ++++++++++- 5 files changed, 69 insertions(+), 42 deletions(-) diff --git a/tdrs-backend/tdpservice/parsers/fields.py b/tdrs-backend/tdpservice/parsers/fields.py index 1487cf498..c1b610821 100644 --- a/tdrs-backend/tdpservice/parsers/fields.py +++ b/tdrs-backend/tdpservice/parsers/fields.py @@ -1,19 +1,10 @@ """Datafile field representations.""" import logging +from .validators import value_is_empty logger = logging.getLogger(__name__) -def value_is_empty(value, length): - """Handle 'empty' values as field inputs.""" - empty_values = [ - ' '*length, # ' ' - '#'*length, # '#####' - '_'*length, # '_____' - ] - - return value is None or value in empty_values - class Field: """Provides a mapping between a field name and its position.""" @@ -38,8 +29,9 @@ def __repr__(self): def parse_value(self, line): """Parse the value for a field given a line, startIndex, endIndex, and field type.""" value = line[self.startIndex:self.endIndex] + value_length = self.endIndex-self.startIndex - if value_is_empty(value, self.endIndex-self.startIndex): + if len(value) < value_length or value_is_empty(value, value_length): logger.debug(f"Field: '{self.name}' at position: [{self.startIndex}, {self.endIndex}) is empty.") return None diff --git a/tdrs-backend/tdpservice/parsers/row_schema.py b/tdrs-backend/tdpservice/parsers/row_schema.py index 83885042c..b7fafcdd8 100644 --- a/tdrs-backend/tdpservice/parsers/row_schema.py +++ b/tdrs-backend/tdpservice/parsers/row_schema.py @@ -1,6 +1,7 @@ """Row schema for datafile.""" from .models import ParserErrorCategoryChoices -from .fields import Field, value_is_empty +from .fields import Field +from .validators import value_is_empty import logging logger = logging.getLogger(__name__) diff --git a/tdrs-backend/tdpservice/parsers/test/test_util.py b/tdrs-backend/tdpservice/parsers/test/test_util.py index 03997597a..d7b2afc0e 100644 --- a/tdrs-backend/tdpservice/parsers/test/test_util.py +++ b/tdrs-backend/tdpservice/parsers/test/test_util.py @@ -1,7 +1,7 @@ """Test the methods of RowSchema to ensure parsing and validation work in all individual cases.""" import pytest -from ..fields import Field, value_is_empty +from ..fields import Field from ..row_schema import RowSchema from ..util import SchemaManager @@ -224,6 +224,7 @@ class TestModel: @pytest.mark.parametrize('first,second', [ + ('', ''), (' ', ' '), ('#', '##'), (None, None), @@ -308,33 +309,6 @@ def test_run_postparsing_validators_returns_invalid_and_errors(): assert errors == ['Value is not valid.'] -@pytest.mark.parametrize("value,length", [ - (None, 0), - (None, 10), - (' ', 5), - ('###', 3) -]) -def test_value_is_empty_returns_true(value, length): - """Test value_is_empty returns valid.""" - result = value_is_empty(value, length) - assert result is True - - -@pytest.mark.parametrize("value,length", [ - (0, 1), - (1, 1), - (10, 2), - ('0', 1), - ('0000', 4), - ('1 ', 5), - ('##3', 3) -]) -def test_value_is_empty_returns_false(value, length): - """Test value_is_empty returns invalid.""" - result = value_is_empty(value, length) - assert result is False - - def test_multi_record_schema_parses_and_validates(): """Test SchemaManager parse_and_validate.""" line = '12345' diff --git a/tdrs-backend/tdpservice/parsers/test/test_validators.py b/tdrs-backend/tdpservice/parsers/test/test_validators.py index db7153c41..79e52518b 100644 --- a/tdrs-backend/tdpservice/parsers/test/test_validators.py +++ b/tdrs-backend/tdpservice/parsers/test/test_validators.py @@ -6,6 +6,35 @@ from tdpservice.parsers.test.factories import SSPM5Factory +@pytest.mark.parametrize("value,length", [ + (None, 0), + (None, 10), + (' ', 5), + ('###', 3), + ('', 0), + ('', 10), +]) +def test_value_is_empty_returns_true(value, length): + """Test value_is_empty returns valid.""" + result = validators.value_is_empty(value, length) + assert result is True + + +@pytest.mark.parametrize("value,length", [ + (0, 1), + (1, 1), + (10, 2), + ('0', 1), + ('0000', 4), + ('1 ', 5), + ('##3', 3), +]) +def test_value_is_empty_returns_false(value, length): + """Test value_is_empty returns invalid.""" + result = validators.value_is_empty(value, length) + assert result is False + + def test_or_validators(): """Test `or_validators` gives a valid result.""" value = "2" @@ -295,6 +324,17 @@ def test_notEmpty_returns_invalid_substring(): assert is_valid is False assert error == "111 333 contains blanks between positions 3 and 5." + +def test_notEmpty_returns_nonexistent_substring(): + """Test `notEmpty` gives an invalid result for a nonexistent substring.""" + value = '111 333' + + validator = validators.notEmpty(start=10, end=12) + is_valid, error = validator(value) + + assert is_valid is False + assert error == "111 333 contains blanks between positions 10 and 12." + @pytest.mark.usefixtures('db') class TestCat3ValidatorsBase: """A base test class for tests that evaluate category three validators.""" diff --git a/tdrs-backend/tdpservice/parsers/validators.py b/tdrs-backend/tdpservice/parsers/validators.py index e516a6530..eb32c666e 100644 --- a/tdrs-backend/tdpservice/parsers/validators.py +++ b/tdrs-backend/tdpservice/parsers/validators.py @@ -6,6 +6,18 @@ logger = logging.getLogger(__name__) + +def value_is_empty(value, length): + """Handle 'empty' values as field inputs.""" + empty_values = [ + '', + ' '*length, # ' ' + '#'*length, # '#####' + '_'*length, # '_____' + ] + + return value is None or value in empty_values + # higher order validator func def make_validator(validator_func, error_func): @@ -191,17 +203,25 @@ def isStringLargerThan(val): lambda value: f'{value} is not larger than {val}.' ) + +def _is_empty(value, start, end): + end = end if end else len(str(value)) + vlen = end - start + subv = str(value)[start:end] + return value_is_empty(subv, vlen) or len(subv) < vlen + + def notEmpty(start=0, end=None): """Validate that string value isn't only blanks.""" return make_validator( - lambda value: not str(value)[start:end if end else len(str(value))].isspace(), + lambda value: not _is_empty(value, start, end), lambda value: f'{str(value)} contains blanks between positions {start} and {end if end else len(str(value))}.' ) def isEmpty(start=0, end=None): """Validate that string value is only blanks.""" return make_validator( - lambda value: value[start:end if end else len(value)].isspace(), + lambda value: _is_empty(value, start, end), lambda value: f'{value} is not blank between positions {start} and {end if end else len(value)}.' ) From 25376871c98f77e79936860f43ca487a0a66b0ed Mon Sep 17 00:00:00 2001 From: George Hudson Date: Wed, 6 Dec 2023 09:12:18 -0700 Subject: [PATCH 2/2] added needed space into logic (#2765) * added needed space into logic * updated security controls documentation to mention how NGINX is used by lower envs to connect to prod clamav server --------- Co-authored-by: George Hudson Co-authored-by: Andrew <84722778+andrew-jameson@users.noreply.github.com> --- docs/Security-Compliance/boundary-diagram.md | 6 +++++- scripts/deploy-backend.sh | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/docs/Security-Compliance/boundary-diagram.md b/docs/Security-Compliance/boundary-diagram.md index e58c88ad4..d548a41b0 100644 --- a/docs/Security-Compliance/boundary-diagram.md +++ b/docs/Security-Compliance/boundary-diagram.md @@ -6,7 +6,11 @@ ### Data flow -Users with `OFA Admin` and (STT) `Data Analyst` roles can upload data on upload data files locally into the web application which will store the files in cloud.gov AWS S3 buckets only after the files are successfully scanned for viruses via [ClamAV](../Technical-Documentation/Architecture-Decision-Record/012-antivirus-strategy.md). Developers will deploy new code through GitHub, initiating the continuous integration process through Circle CI. +Users with `OFA Admin` and (STT) `Data Analyst` roles can upload data on upload data files locally into the web application which will store the files in cloud.gov AWS S3 buckets only after the files are successfully scanned for viruses via [ClamAV](../Technical-Documentation/Architecture-Decision-Record/012-antivirus-strategy.md). For lower environments, we use an NGINX server to function as a proxy, routing to the ClamAV-rest server in the production space. The NGINX server also functions as a gatekeeper, allowing documents for scanning to only come from backend servers, and only able to route them directly to the ClamAV-rest server. + +### Code Repository and CI Pipeline + +Developers will deploy new code through GitHub, initiating the continuous integration process through Circle CI. ### Environments/Spaces diff --git a/scripts/deploy-backend.sh b/scripts/deploy-backend.sh index 3547debb7..6e46fe93a 100755 --- a/scripts/deploy-backend.sh +++ b/scripts/deploy-backend.sh @@ -91,7 +91,7 @@ update_backend() cd tdrs-backend || exit cf unset-env "$CGAPPNAME_BACKEND" "AV_SCAN_URL" - if ["$CF_SPACE" = "tanf-prod" ]; then + if [ "$CF_SPACE" = "tanf-prod" ]; then cf set-env "$CGAPPNAME_BACKEND" AV_SCAN_URL "http://tanf-prod-clamav-rest.apps.internal:9000/scan" else # Add environment varilables for clamav @@ -120,7 +120,7 @@ update_backend() # Add network policy to allow frontend to access backend cf add-network-policy "$CGAPPNAME_FRONTEND" "$CGAPPNAME_BACKEND" --protocol tcp --port 8080 - if ["$CF_SPACE" = "tanf-prod" ]; then + if [ "$CF_SPACE" = "tanf-prod" ]; then # Add network policy to allow backend to access tanf-prod services cf add-network-policy "$CGAPPNAME_BACKEND" clamav-rest --protocol tcp --port 9000 else