Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

2711-catch-rpt-month-year-mismatches #2789

Merged
merged 21 commits into from
Jan 22, 2024
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions tdrs-backend/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ RUN apt-get -y update
RUN apt-get -y upgrade
# Install a new package:
RUN apt-get install -y gcc && apt-get install -y graphviz && apt-get install -y graphviz-dev
RUN apt-get install postgresql-client -y
elipe17 marked this conversation as resolved.
Show resolved Hide resolved
# Install pipenv
RUN pip install --upgrade pip pipenv
RUN pipenv install --dev --system --deploy
Expand Down
56 changes: 56 additions & 0 deletions tdrs-backend/tdpservice/parsers/aggregates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
"""Aggregate methods for the parsers."""
from .row_schema import SchemaManager
from .models import ParserError
from .util import month_to_int, get_program_models, get_text_from_df, \
transform_to_months, fiscal_to_calendar, get_prog_from_section


def case_aggregates_by_month(df, dfs_status):
"""Return case aggregates by month."""
section = str(df.section) # section -> text
program_type = get_prog_from_section(section) # section -> program_type -> text

# from datafile year/quarter, generate short month names for each month in quarter ala 'Jan', 'Feb', 'Mar'
calendar_year, calendar_qtr = fiscal_to_calendar(df.year, df.quarter)
month_list = transform_to_months(calendar_qtr)

short_section = get_text_from_df(df)['section']
schema_models_dict = get_program_models(program_type, short_section)
schema_models = [model for model in schema_models_dict.values()]

aggregate_data = {"months": [], "rejected": 0}
for month in month_list:
total = 0
cases_with_errors = 0
accepted = 0
month_int = month_to_int(month)
rpt_month_year = int(f"{calendar_year}{month_int}")
if dfs_status == "Rejected":
# we need to be careful here on examples of bad headers or empty files, since no month will be found
# but we can rely on the frontend submitted year-quarter to still generate the list of months
aggregate_data["months"].append({"accepted_with_errors": "N/A",
"accepted_without_errors": "N/A",
"month": month})
continue

case_numbers = set()
for schema_model in schema_models:
if isinstance(schema_model, SchemaManager):
schema_model = schema_model.schemas[0]

curr_case_numbers = set(schema_model.model.objects.filter(datafile=df).filter(RPT_MONTH_YEAR=rpt_month_year)
.distinct("CASE_NUMBER").values_list("CASE_NUMBER", flat=True))
case_numbers = case_numbers.union(curr_case_numbers)

total += len(case_numbers)
cases_with_errors += ParserError.objects.filter(file=df).filter(
case_number__in=case_numbers).distinct('case_number').count()
accepted = total - cases_with_errors

aggregate_data['months'].append({"month": month,
"accepted_without_errors": accepted,
"accepted_with_errors": cases_with_errors})

aggregate_data['rejected'] = ParserError.objects.filter(file=df).filter(case_number=None).count()

return aggregate_data
1 change: 0 additions & 1 deletion tdrs-backend/tdpservice/parsers/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

logger = logging.getLogger(__name__)


class Field:
"""Provides a mapping between a field name and its position."""

Expand Down
12 changes: 12 additions & 0 deletions tdrs-backend/tdpservice/parsers/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,18 @@ def parse_datafile(datafile):
bulk_create_errors(unsaved_parser_errors, 1, flush=True)
return errors

rpt_month_year_is_valid, rpt_month_year_error = validators.validate_header_rpt_month_year(
datafile,
header,
util.make_generate_parser_error(datafile, 1)
)
if not rpt_month_year_is_valid:
logger.info(f"Preparser Error -> Rpt Month Year is not valid: {rpt_month_year_error.error_message}")
errors['document'] = [rpt_month_year_error]
unsaved_parser_errors = {1: [rpt_month_year_error]}
bulk_create_errors(unsaved_parser_errors, 1, flush=True)
return errors

line_errors = parse_datafile_lines(datafile, program_type, section, is_encrypted)

errors = errors | line_errors
Expand Down
27 changes: 25 additions & 2 deletions tdrs-backend/tdpservice/parsers/row_schema.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
"""Row schema for datafile."""
from .models import ParserErrorCategoryChoices
from .fields import Field
from .fields import Field, TransformField
from .validators import value_is_empty
import logging

logger = logging.getLogger(__name__)


class RowSchema:
"""Maps the schema for data lines."""

Expand Down Expand Up @@ -182,3 +181,27 @@ def get_field_by_name(self, name):
if field.name == name:
return field
return None


class SchemaManager:
"""Manages one or more RowSchema's and runs all parsers and validators."""

def __init__(self, schemas):
self.schemas = schemas

def parse_and_validate(self, line, generate_error):
"""Run `parse_and_validate` for each schema provided and bubble up errors."""
records = []

for schema in self.schemas:
record, is_valid, errors = schema.parse_and_validate(line, generate_error)
records.append((record, is_valid, errors))

return records

def update_encrypted_fields(self, is_encrypted):
"""Update whether schema fields are encrypted or not."""
for schema in self.schemas:
for field in schema.fields:
if type(field) == TransformField and "is_encrypted" in field.kwargs:
field.kwargs['is_encrypted'] = is_encrypted
3 changes: 1 addition & 2 deletions tdrs-backend/tdpservice/parsers/schema_defs/ssp/m1.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
"""Schema for SSP M1 record type."""


from tdpservice.parsers.util import SchemaManager
raftmsohani marked this conversation as resolved.
Show resolved Hide resolved
from tdpservice.parsers.fields import Field
from tdpservice.parsers.row_schema import RowSchema
from tdpservice.parsers.row_schema import RowSchema, SchemaManager
from tdpservice.parsers import validators
from tdpservice.search_indexes.models.ssp import SSP_M1

Expand Down
3 changes: 1 addition & 2 deletions tdrs-backend/tdpservice/parsers/schema_defs/ssp/m2.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
"""Schema for SSP M1 record type."""


from tdpservice.parsers.util import SchemaManager
from tdpservice.parsers.transforms import ssp_ssn_decryption_func
from tdpservice.parsers.fields import TransformField, Field
from tdpservice.parsers.row_schema import RowSchema
from tdpservice.parsers.row_schema import RowSchema, SchemaManager
from tdpservice.parsers import validators
from tdpservice.search_indexes.models.ssp import SSP_M2

Expand Down
3 changes: 1 addition & 2 deletions tdrs-backend/tdpservice/parsers/schema_defs/ssp/m3.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
"""Schema for SSP M1 record type."""


from tdpservice.parsers.util import SchemaManager
from tdpservice.parsers.transforms import ssp_ssn_decryption_func
from tdpservice.parsers.fields import TransformField, Field
from tdpservice.parsers.row_schema import RowSchema
from tdpservice.parsers.row_schema import RowSchema, SchemaManager
from tdpservice.parsers import validators
from tdpservice.search_indexes.models.ssp import SSP_M3

Expand Down
3 changes: 1 addition & 2 deletions tdrs-backend/tdpservice/parsers/schema_defs/ssp/m4.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
"""Schema for SSP M1 record type."""


from tdpservice.parsers.util import SchemaManager
from tdpservice.parsers.fields import Field
from tdpservice.parsers.row_schema import RowSchema
from tdpservice.parsers.row_schema import RowSchema, SchemaManager
from tdpservice.parsers import validators
from tdpservice.search_indexes.models.ssp import SSP_M4

Expand Down
3 changes: 1 addition & 2 deletions tdrs-backend/tdpservice/parsers/schema_defs/ssp/m5.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
"""Schema for SSP M1 record type."""


from tdpservice.parsers.util import SchemaManager
from tdpservice.parsers.transforms import ssp_ssn_decryption_func
from tdpservice.parsers.fields import TransformField, Field
from tdpservice.parsers.row_schema import RowSchema
from tdpservice.parsers.row_schema import RowSchema, SchemaManager
from tdpservice.parsers import validators
from tdpservice.search_indexes.models.ssp import SSP_M5

Expand Down
3 changes: 1 addition & 2 deletions tdrs-backend/tdpservice/parsers/schema_defs/ssp/m6.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
"""Schema for HEADER row of all submission types."""


from ...util import SchemaManager
from ...transforms import calendar_quarter_to_rpt_month_year
from ...fields import Field, TransformField
from ...row_schema import RowSchema
from ...row_schema import RowSchema, SchemaManager
from ... import validators
from tdpservice.search_indexes.models.ssp import SSP_M6

Expand Down
3 changes: 1 addition & 2 deletions tdrs-backend/tdpservice/parsers/schema_defs/ssp/m7.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
"""Schema for TANF T7 Row."""

from ...util import SchemaManager
from ...fields import Field, TransformField
from ...row_schema import RowSchema
from ...row_schema import RowSchema, SchemaManager
from ...transforms import calendar_quarter_to_rpt_month_year
from ... import validators
from tdpservice.search_indexes.models.ssp import SSP_M7
Expand Down
3 changes: 1 addition & 2 deletions tdrs-backend/tdpservice/parsers/schema_defs/tanf/t1.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
"""Schema for t1 record types."""

from tdpservice.parsers.util import SchemaManager
from tdpservice.parsers.fields import Field
from tdpservice.parsers.row_schema import RowSchema
from tdpservice.parsers.row_schema import RowSchema, SchemaManager
from tdpservice.parsers import validators
from tdpservice.search_indexes.models.tanf import TANF_T1

Expand Down
3 changes: 1 addition & 2 deletions tdrs-backend/tdpservice/parsers/schema_defs/tanf/t2.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
"""Schema for HEADER row of all submission types."""


from tdpservice.parsers.util import SchemaManager
from tdpservice.parsers.transforms import tanf_ssn_decryption_func
from tdpservice.parsers.fields import TransformField, Field
from tdpservice.parsers.row_schema import RowSchema
from tdpservice.parsers.row_schema import RowSchema, SchemaManager
from tdpservice.parsers import validators
from tdpservice.search_indexes.models.tanf import TANF_T2

Expand Down
3 changes: 1 addition & 2 deletions tdrs-backend/tdpservice/parsers/schema_defs/tanf/t3.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
"""Schema for HEADER row of all submission types."""


from tdpservice.parsers.util import SchemaManager
from tdpservice.parsers.transforms import tanf_ssn_decryption_func
from tdpservice.parsers.fields import TransformField, Field
from tdpservice.parsers.row_schema import RowSchema
from tdpservice.parsers.row_schema import RowSchema, SchemaManager
from tdpservice.parsers import validators
from tdpservice.search_indexes.models.tanf import TANF_T3

Expand Down
3 changes: 1 addition & 2 deletions tdrs-backend/tdpservice/parsers/schema_defs/tanf/t4.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
"""Schema for HEADER row of all submission types."""


from tdpservice.parsers.util import SchemaManager
from tdpservice.parsers.fields import Field
from tdpservice.parsers.row_schema import RowSchema
from tdpservice.parsers.row_schema import RowSchema, SchemaManager
from tdpservice.parsers import validators
from tdpservice.search_indexes.models.tanf import TANF_T4

Expand Down
3 changes: 1 addition & 2 deletions tdrs-backend/tdpservice/parsers/schema_defs/tanf/t5.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
"""Schema for HEADER row of all submission types."""


from tdpservice.parsers.util import SchemaManager
from tdpservice.parsers.transforms import tanf_ssn_decryption_func
from tdpservice.parsers.fields import TransformField, Field
from tdpservice.parsers.row_schema import RowSchema
from tdpservice.parsers.row_schema import RowSchema, SchemaManager
from tdpservice.parsers import validators
from tdpservice.search_indexes.models.tanf import TANF_T5

Expand Down
3 changes: 1 addition & 2 deletions tdrs-backend/tdpservice/parsers/schema_defs/tanf/t6.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
"""Schema for HEADER row of all submission types."""


from tdpservice.parsers.util import SchemaManager
from tdpservice.parsers.transforms import calendar_quarter_to_rpt_month_year
from tdpservice.parsers.fields import Field, TransformField
from tdpservice.parsers.row_schema import RowSchema
from tdpservice.parsers.row_schema import RowSchema, SchemaManager
from tdpservice.parsers import validators
from tdpservice.search_indexes.models.tanf import TANF_T6

Expand Down
3 changes: 1 addition & 2 deletions tdrs-backend/tdpservice/parsers/schema_defs/tanf/t7.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
"""Schema for TANF T7 Row."""

from tdpservice.parsers.util import SchemaManager
from tdpservice.parsers.fields import Field, TransformField
from tdpservice.parsers.row_schema import RowSchema
from tdpservice.parsers.row_schema import RowSchema, SchemaManager
from tdpservice.parsers.transforms import calendar_quarter_to_rpt_month_year
from tdpservice.parsers import validators
from tdpservice.search_indexes.models.tanf import TANF_T7
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
"""Schema for Tribal TANF T1 record types."""

from ...util import SchemaManager
from ...fields import Field
from ...row_schema import RowSchema
from ...row_schema import RowSchema, SchemaManager
from ... import validators
from tdpservice.search_indexes.models.tribal import Tribal_TANF_T1

Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
"""Schema for Tribal TANF T2 row of all submission types."""


from ...util import SchemaManager
from ...transforms import tanf_ssn_decryption_func
from ...fields import TransformField, Field
from ...row_schema import RowSchema
from ...row_schema import RowSchema, SchemaManager
from ... import validators
from tdpservice.search_indexes.models.tribal import Tribal_TANF_T2

Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
"""Schema for Tribal TANF T3 row of all submission types."""


from ...util import SchemaManager
from ...transforms import tanf_ssn_decryption_func
from ...fields import TransformField, Field
from ...row_schema import RowSchema
from ...row_schema import RowSchema, SchemaManager
from ... import validators
from tdpservice.search_indexes.models.tribal import Tribal_TANF_T3

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
HEADER20204A06 TAN1 N
T12020101111111111223003403361110213120000300000000000008730010000000000000000000000000000000000222222000000002229012
TRAILER0000001
Loading
Loading