Skip to content

Commit

Permalink
2711-catch-rpt-month-year-mismatches (#2789)
Browse files Browse the repository at this point in the history
* created pre-check error

* corrected some of failing tests

* corrected failing tests

* two tests still failing

* Passing tests

* Update small_incorrect_file_cross_validator.txt

* Update small_incorrect_file_cross_validator.txt

* Update small_incorrect_file_cross_validator.txt

* Update small_incorrect_file_cross_validator.txt

* revert changes on test file

* corrected the failing test

* resolve circular import

* merge conflict resolution

* linting

* correct failing tests

* corrected t7 tests
  • Loading branch information
raftmsohani authored Jan 22, 2024
1 parent d5a44ff commit 92b0a69
Show file tree
Hide file tree
Showing 33 changed files with 283 additions and 149 deletions.
1 change: 1 addition & 0 deletions tdrs-backend/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ RUN apt-get -y update
RUN apt-get -y upgrade
# Install a new package:
RUN apt-get install -y gcc && apt-get install -y graphviz && apt-get install -y graphviz-dev
RUN apt-get install postgresql-client -y
# Install pipenv
RUN pip install --upgrade pip pipenv
RUN pipenv install --dev --system --deploy
Expand Down
57 changes: 57 additions & 0 deletions tdrs-backend/tdpservice/parsers/aggregates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""Aggregate methods for the parsers."""
from .row_schema import SchemaManager
from .models import ParserError
from .util import month_to_int, get_program_models, get_text_from_df, \
transform_to_months, fiscal_to_calendar, get_prog_from_section


def case_aggregates_by_month(df, dfs_status):
"""Return case aggregates by month."""
section = str(df.section) # section -> text
program_type = get_prog_from_section(section) # section -> program_type -> text

# from datafile year/quarter, generate short month names for each month in quarter ala 'Jan', 'Feb', 'Mar'
calendar_year, calendar_qtr = fiscal_to_calendar(df.year, df.quarter)
month_list = transform_to_months(calendar_qtr)

short_section = get_text_from_df(df)['section']
schema_models_dict = get_program_models(program_type, short_section)
schema_models = [model for model in schema_models_dict.values()]

aggregate_data = {"months": [], "rejected": 0}
for month in month_list:
total = 0
cases_with_errors = 0
accepted = 0
month_int = month_to_int(month)
rpt_month_year = int(f"{calendar_year}{month_int}")
if dfs_status == "Rejected":
# we need to be careful here on examples of bad headers or empty files, since no month will be found
# but we can rely on the frontend submitted year-quarter to still generate the list of months
aggregate_data["months"].append({"accepted_with_errors": "N/A",
"accepted_without_errors": "N/A",
"month": month})
continue

case_numbers = set()
for schema_model in schema_models:
if isinstance(schema_model, SchemaManager):
schema_model = schema_model.schemas[0]

curr_case_numbers = set(schema_model.document.Django.model.objects.filter(datafile=df)
.filter(RPT_MONTH_YEAR=rpt_month_year)
.distinct("CASE_NUMBER").values_list("CASE_NUMBER", flat=True))
case_numbers = case_numbers.union(curr_case_numbers)

total += len(case_numbers)
cases_with_errors += ParserError.objects.filter(file=df).filter(
case_number__in=case_numbers).distinct('case_number').count()
accepted = total - cases_with_errors

aggregate_data['months'].append({"month": month,
"accepted_without_errors": accepted,
"accepted_with_errors": cases_with_errors})

aggregate_data['rejected'] = ParserError.objects.filter(file=df).filter(case_number=None).count()

return aggregate_data
1 change: 0 additions & 1 deletion tdrs-backend/tdpservice/parsers/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

logger = logging.getLogger(__name__)


class Field:
"""Provides a mapping between a field name and its position."""

Expand Down
12 changes: 12 additions & 0 deletions tdrs-backend/tdpservice/parsers/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,18 @@ def parse_datafile(datafile):
bulk_create_errors(unsaved_parser_errors, 1, flush=True)
return errors

rpt_month_year_is_valid, rpt_month_year_error = validators.validate_header_rpt_month_year(
datafile,
header,
util.make_generate_parser_error(datafile, 1)
)
if not rpt_month_year_is_valid:
logger.info(f"Preparser Error -> Rpt Month Year is not valid: {rpt_month_year_error.error_message}")
errors['document'] = [rpt_month_year_error]
unsaved_parser_errors = {1: [rpt_month_year_error]}
bulk_create_errors(unsaved_parser_errors, 1, flush=True)
return errors

line_errors = parse_datafile_lines(datafile, program_type, section, is_encrypted)

errors = errors | line_errors
Expand Down
27 changes: 25 additions & 2 deletions tdrs-backend/tdpservice/parsers/row_schema.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
"""Row schema for datafile."""
from .models import ParserErrorCategoryChoices
from .fields import Field
from .fields import Field, TransformField
from .validators import value_is_empty
import logging

logger = logging.getLogger(__name__)


class RowSchema:
"""Maps the schema for data lines."""

Expand Down Expand Up @@ -182,3 +181,27 @@ def get_field_by_name(self, name):
if field.name == name:
return field
return None


class SchemaManager:
"""Manages one or more RowSchema's and runs all parsers and validators."""

def __init__(self, schemas):
self.schemas = schemas

def parse_and_validate(self, line, generate_error):
"""Run `parse_and_validate` for each schema provided and bubble up errors."""
records = []

for schema in self.schemas:
record, is_valid, errors = schema.parse_and_validate(line, generate_error)
records.append((record, is_valid, errors))

return records

def update_encrypted_fields(self, is_encrypted):
"""Update whether schema fields are encrypted or not."""
for schema in self.schemas:
for field in schema.fields:
if type(field) == TransformField and "is_encrypted" in field.kwargs:
field.kwargs['is_encrypted'] = is_encrypted
3 changes: 1 addition & 2 deletions tdrs-backend/tdpservice/parsers/schema_defs/ssp/m1.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
"""Schema for SSP M1 record type."""


from tdpservice.parsers.util import SchemaManager
from tdpservice.parsers.fields import Field
from tdpservice.parsers.row_schema import RowSchema
from tdpservice.parsers.row_schema import RowSchema, SchemaManager
from tdpservice.parsers import validators
from tdpservice.search_indexes.documents.ssp import SSP_M1DataSubmissionDocument

Expand Down
3 changes: 1 addition & 2 deletions tdrs-backend/tdpservice/parsers/schema_defs/ssp/m2.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
"""Schema for SSP M1 record type."""


from tdpservice.parsers.util import SchemaManager
from tdpservice.parsers.transforms import ssp_ssn_decryption_func
from tdpservice.parsers.fields import TransformField, Field
from tdpservice.parsers.row_schema import RowSchema
from tdpservice.parsers.row_schema import RowSchema, SchemaManager
from tdpservice.parsers import validators
from tdpservice.search_indexes.documents.ssp import SSP_M2DataSubmissionDocument

Expand Down
3 changes: 1 addition & 2 deletions tdrs-backend/tdpservice/parsers/schema_defs/ssp/m3.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
"""Schema for SSP M1 record type."""


from tdpservice.parsers.util import SchemaManager
from tdpservice.parsers.transforms import ssp_ssn_decryption_func
from tdpservice.parsers.fields import TransformField, Field
from tdpservice.parsers.row_schema import RowSchema
from tdpservice.parsers.row_schema import RowSchema, SchemaManager
from tdpservice.parsers import validators
from tdpservice.search_indexes.documents.ssp import SSP_M3DataSubmissionDocument

Expand Down
3 changes: 1 addition & 2 deletions tdrs-backend/tdpservice/parsers/schema_defs/ssp/m4.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
"""Schema for SSP M1 record type."""


from tdpservice.parsers.util import SchemaManager
from tdpservice.parsers.fields import Field
from tdpservice.parsers.row_schema import RowSchema
from tdpservice.parsers.row_schema import RowSchema, SchemaManager
from tdpservice.parsers import validators
from tdpservice.search_indexes.documents.ssp import SSP_M4DataSubmissionDocument

Expand Down
3 changes: 1 addition & 2 deletions tdrs-backend/tdpservice/parsers/schema_defs/ssp/m5.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
"""Schema for SSP M1 record type."""


from tdpservice.parsers.util import SchemaManager
from tdpservice.parsers.transforms import ssp_ssn_decryption_func
from tdpservice.parsers.fields import TransformField, Field
from tdpservice.parsers.row_schema import RowSchema
from tdpservice.parsers.row_schema import RowSchema, SchemaManager
from tdpservice.parsers import validators
from tdpservice.search_indexes.documents.ssp import SSP_M5DataSubmissionDocument

Expand Down
3 changes: 1 addition & 2 deletions tdrs-backend/tdpservice/parsers/schema_defs/ssp/m6.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
"""Schema for HEADER row of all submission types."""


from ...util import SchemaManager
from ...transforms import calendar_quarter_to_rpt_month_year
from ...fields import Field, TransformField
from ...row_schema import RowSchema
from ...row_schema import RowSchema, SchemaManager
from ... import validators
from tdpservice.search_indexes.documents.ssp import SSP_M6DataSubmissionDocument

Expand Down
3 changes: 1 addition & 2 deletions tdrs-backend/tdpservice/parsers/schema_defs/ssp/m7.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
"""Schema for TANF T7 Row."""

from ...util import SchemaManager
from ...fields import Field, TransformField
from ...row_schema import RowSchema
from ...row_schema import RowSchema, SchemaManager
from ...transforms import calendar_quarter_to_rpt_month_year
from ... import validators
from tdpservice.search_indexes.documents.ssp import SSP_M7DataSubmissionDocument
Expand Down
3 changes: 1 addition & 2 deletions tdrs-backend/tdpservice/parsers/schema_defs/tanf/t1.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
"""Schema for t1 record types."""

from tdpservice.parsers.util import SchemaManager
from tdpservice.parsers.fields import Field
from tdpservice.parsers.row_schema import RowSchema
from tdpservice.parsers.row_schema import RowSchema, SchemaManager
from tdpservice.parsers import validators
from tdpservice.search_indexes.documents.tanf import TANF_T1DataSubmissionDocument

Expand Down
3 changes: 1 addition & 2 deletions tdrs-backend/tdpservice/parsers/schema_defs/tanf/t2.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
"""Schema for HEADER row of all submission types."""


from tdpservice.parsers.util import SchemaManager
from tdpservice.parsers.transforms import tanf_ssn_decryption_func
from tdpservice.parsers.fields import TransformField, Field
from tdpservice.parsers.row_schema import RowSchema
from tdpservice.parsers.row_schema import RowSchema, SchemaManager
from tdpservice.parsers import validators
from tdpservice.search_indexes.documents.tanf import TANF_T2DataSubmissionDocument

Expand Down
3 changes: 1 addition & 2 deletions tdrs-backend/tdpservice/parsers/schema_defs/tanf/t3.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
"""Schema for HEADER row of all submission types."""


from tdpservice.parsers.util import SchemaManager
from tdpservice.parsers.transforms import tanf_ssn_decryption_func
from tdpservice.parsers.fields import TransformField, Field
from tdpservice.parsers.row_schema import RowSchema
from tdpservice.parsers.row_schema import RowSchema, SchemaManager
from tdpservice.parsers import validators
from tdpservice.search_indexes.documents.tanf import TANF_T3DataSubmissionDocument

Expand Down
3 changes: 1 addition & 2 deletions tdrs-backend/tdpservice/parsers/schema_defs/tanf/t4.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
"""Schema for HEADER row of all submission types."""


from tdpservice.parsers.util import SchemaManager
from tdpservice.parsers.fields import Field
from tdpservice.parsers.row_schema import RowSchema
from tdpservice.parsers.row_schema import RowSchema, SchemaManager
from tdpservice.parsers import validators
from tdpservice.search_indexes.documents.tanf import TANF_T4DataSubmissionDocument

Expand Down
3 changes: 1 addition & 2 deletions tdrs-backend/tdpservice/parsers/schema_defs/tanf/t5.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
"""Schema for HEADER row of all submission types."""


from tdpservice.parsers.util import SchemaManager
from tdpservice.parsers.transforms import tanf_ssn_decryption_func
from tdpservice.parsers.fields import TransformField, Field
from tdpservice.parsers.row_schema import RowSchema
from tdpservice.parsers.row_schema import RowSchema, SchemaManager
from tdpservice.parsers import validators
from tdpservice.search_indexes.documents.tanf import TANF_T5DataSubmissionDocument

Expand Down
3 changes: 1 addition & 2 deletions tdrs-backend/tdpservice/parsers/schema_defs/tanf/t6.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
"""Schema for HEADER row of all submission types."""


from tdpservice.parsers.util import SchemaManager
from tdpservice.parsers.transforms import calendar_quarter_to_rpt_month_year
from tdpservice.parsers.fields import Field, TransformField
from tdpservice.parsers.row_schema import RowSchema
from tdpservice.parsers.row_schema import RowSchema, SchemaManager
from tdpservice.parsers import validators
from tdpservice.search_indexes.documents.tanf import TANF_T6DataSubmissionDocument

Expand Down
3 changes: 1 addition & 2 deletions tdrs-backend/tdpservice/parsers/schema_defs/tanf/t7.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
"""Schema for TANF T7 Row."""

from tdpservice.parsers.util import SchemaManager
from tdpservice.parsers.fields import Field, TransformField
from tdpservice.parsers.row_schema import RowSchema
from tdpservice.parsers.row_schema import RowSchema, SchemaManager
from tdpservice.parsers.transforms import calendar_quarter_to_rpt_month_year
from tdpservice.parsers import validators
from tdpservice.search_indexes.documents.tanf import TANF_T7DataSubmissionDocument
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
"""Schema for Tribal TANF T1 record types."""

from ...util import SchemaManager
from ...fields import Field
from ...row_schema import RowSchema
from ...row_schema import RowSchema, SchemaManager
from ... import validators
from tdpservice.search_indexes.documents.tribal import Tribal_TANF_T1DataSubmissionDocument

Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
"""Schema for Tribal TANF T2 row of all submission types."""


from ...util import SchemaManager
from ...transforms import tanf_ssn_decryption_func
from ...fields import TransformField, Field
from ...row_schema import RowSchema
from ...row_schema import RowSchema, SchemaManager
from ... import validators
from tdpservice.search_indexes.documents.tribal import Tribal_TANF_T2DataSubmissionDocument

Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
"""Schema for Tribal TANF T3 row of all submission types."""


from ...util import SchemaManager
from ...transforms import tanf_ssn_decryption_func
from ...fields import TransformField, Field
from ...row_schema import RowSchema
from ...row_schema import RowSchema, SchemaManager
from ... import validators
from tdpservice.search_indexes.documents.tribal import Tribal_TANF_T3DataSubmissionDocument

Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
"""Schema for Tribal TANF T4 record types."""

from ...util import SchemaManager
from ...fields import Field
from ...row_schema import RowSchema
from ...row_schema import RowSchema, SchemaManager
from ... import validators
from tdpservice.search_indexes.documents.tribal import Tribal_TANF_T4DataSubmissionDocument

Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
"""Schema for Tribal TANF T5 row of all submission types."""


from ...util import SchemaManager
from ...transforms import tanf_ssn_decryption_func
from ...fields import TransformField, Field
from ...row_schema import RowSchema
from ...row_schema import RowSchema, SchemaManager
from ... import validators
from tdpservice.search_indexes.documents.tribal import Tribal_TANF_T5DataSubmissionDocument

Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
"""Schema for Tribal T6 record."""


from tdpservice.parsers.util import SchemaManager
from tdpservice.parsers.transforms import calendar_quarter_to_rpt_month_year
from tdpservice.parsers.fields import Field, TransformField
from tdpservice.parsers.row_schema import RowSchema
from tdpservice.parsers.row_schema import RowSchema, SchemaManager
from tdpservice.parsers import validators
from tdpservice.search_indexes.documents.tribal import Tribal_TANF_T6DataSubmissionDocument

Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
"""Schema for Tribal TANF T7 Row."""

from tdpservice.parsers.util import SchemaManager
from tdpservice.parsers.fields import Field, TransformField
from tdpservice.parsers.row_schema import RowSchema
from tdpservice.parsers.row_schema import RowSchema, SchemaManager
from tdpservice.parsers.transforms import calendar_quarter_to_rpt_month_year
from tdpservice.parsers import validators
from tdpservice.search_indexes.documents.tribal import Tribal_TANF_T7DataSubmissionDocument
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
HEADER20204A06 TAN1 N
T12020101111111111223003403361110213120000300000000000008730010000000000000000000000000000000000222222000000002229012
TRAILER0000001
Loading

0 comments on commit 92b0a69

Please sign in to comment.