Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix parser/preparser validation of empty strings #2748

Merged
merged 8 commits into from
Dec 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 3 additions & 11 deletions tdrs-backend/tdpservice/parsers/fields.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,10 @@
"""Datafile field representations."""

import logging
from .validators import value_is_empty

logger = logging.getLogger(__name__)

def value_is_empty(value, length):
"""Handle 'empty' values as field inputs."""
empty_values = [
' '*length, # ' '
'#'*length, # '#####'
'_'*length, # '_____'
]

return value is None or value in empty_values


class Field:
"""Provides a mapping between a field name and its position."""
Expand All @@ -38,8 +29,9 @@ def __repr__(self):
def parse_value(self, line):
"""Parse the value for a field given a line, startIndex, endIndex, and field type."""
value = line[self.startIndex:self.endIndex]
value_length = self.endIndex-self.startIndex

if value_is_empty(value, self.endIndex-self.startIndex):
if len(value) < value_length or value_is_empty(value, value_length):
logger.debug(f"Field: '{self.name}' at position: [{self.startIndex}, {self.endIndex}) is empty.")
return None

Expand Down
3 changes: 2 additions & 1 deletion tdrs-backend/tdpservice/parsers/row_schema.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Row schema for datafile."""
from .models import ParserErrorCategoryChoices
from .fields import Field, value_is_empty
from .fields import Field
from .validators import value_is_empty
import logging

logger = logging.getLogger(__name__)
Expand Down
30 changes: 2 additions & 28 deletions tdrs-backend/tdpservice/parsers/test/test_util.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Test the methods of RowSchema to ensure parsing and validation work in all individual cases."""

import pytest
from ..fields import Field, value_is_empty
from ..fields import Field
from ..row_schema import RowSchema
from ..util import SchemaManager

Expand Down Expand Up @@ -224,6 +224,7 @@ class TestModel:


@pytest.mark.parametrize('first,second', [
('', ''),
(' ', ' '),
('#', '##'),
(None, None),
Expand Down Expand Up @@ -308,33 +309,6 @@ def test_run_postparsing_validators_returns_invalid_and_errors():
assert errors == ['Value is not valid.']


@pytest.mark.parametrize("value,length", [
(None, 0),
(None, 10),
(' ', 5),
('###', 3)
])
def test_value_is_empty_returns_true(value, length):
"""Test value_is_empty returns valid."""
result = value_is_empty(value, length)
assert result is True


@pytest.mark.parametrize("value,length", [
(0, 1),
(1, 1),
(10, 2),
('0', 1),
('0000', 4),
('1 ', 5),
('##3', 3)
])
def test_value_is_empty_returns_false(value, length):
"""Test value_is_empty returns invalid."""
result = value_is_empty(value, length)
assert result is False


def test_multi_record_schema_parses_and_validates():
"""Test SchemaManager parse_and_validate."""
line = '12345'
Expand Down
40 changes: 40 additions & 0 deletions tdrs-backend/tdpservice/parsers/test/test_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,35 @@
from tdpservice.parsers.test.factories import SSPM5Factory


@pytest.mark.parametrize("value,length", [
(None, 0),
(None, 10),
(' ', 5),
('###', 3),
('', 0),
('', 10),
])
def test_value_is_empty_returns_true(value, length):
"""Test value_is_empty returns valid."""
result = validators.value_is_empty(value, length)
assert result is True


@pytest.mark.parametrize("value,length", [
(0, 1),
(1, 1),
(10, 2),
('0', 1),
('0000', 4),
('1 ', 5),
('##3', 3),
])
def test_value_is_empty_returns_false(value, length):
"""Test value_is_empty returns invalid."""
result = validators.value_is_empty(value, length)
assert result is False


def test_or_validators():
"""Test `or_validators` gives a valid result."""
value = "2"
Expand Down Expand Up @@ -295,6 +324,17 @@ def test_notEmpty_returns_invalid_substring():
assert is_valid is False
assert error == "111 333 contains blanks between positions 3 and 5."


def test_notEmpty_returns_nonexistent_substring():
"""Test `notEmpty` gives an invalid result for a nonexistent substring."""
value = '111 333'

validator = validators.notEmpty(start=10, end=12)
is_valid, error = validator(value)

assert is_valid is False
assert error == "111 333 contains blanks between positions 10 and 12."

@pytest.mark.usefixtures('db')
class TestCat3ValidatorsBase:
"""A base test class for tests that evaluate category three validators."""
Expand Down
24 changes: 22 additions & 2 deletions tdrs-backend/tdpservice/parsers/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,18 @@

logger = logging.getLogger(__name__)


def value_is_empty(value, length):
"""Handle 'empty' values as field inputs."""
empty_values = [
'',
' '*length, # ' '
'#'*length, # '#####'
'_'*length, # '_____'
]

return value is None or value in empty_values

# higher order validator func

def make_validator(validator_func, error_func):
Expand Down Expand Up @@ -191,17 +203,25 @@ def isStringLargerThan(val):
lambda value: f'{value} is not larger than {val}.'
)


def _is_empty(value, start, end):
end = end if end else len(str(value))
vlen = end - start
subv = str(value)[start:end]
return value_is_empty(subv, vlen) or len(subv) < vlen


def notEmpty(start=0, end=None):
"""Validate that string value isn't only blanks."""
return make_validator(
lambda value: not str(value)[start:end if end else len(str(value))].isspace(),
lambda value: not _is_empty(value, start, end),
lambda value: f'{str(value)} contains blanks between positions {start} and {end if end else len(str(value))}.'
)

def isEmpty(start=0, end=None):
"""Validate that string value is only blanks."""
return make_validator(
lambda value: value[start:end if end else len(value)].isspace(),
lambda value: _is_empty(value, start, end),
lambda value: f'{value} is not blank between positions {start} and {end if end else len(value)}.'
)

Expand Down
Loading