diff --git a/tdrs-backend/docker-compose.yml b/tdrs-backend/docker-compose.yml index 69e08bc64..a6624688b 100644 --- a/tdrs-backend/docker-compose.yml +++ b/tdrs-backend/docker-compose.yml @@ -45,12 +45,25 @@ services: # Copy in the Localstack setup script to configure any buckets needed - ../scripts/localstack-setup.sh:/docker-entrypoint-initaws.d/localstack-setup.sh + kibana: + image: elastic/kibana:7.17.10 + ports: + - 5601:5601 + environment: + - xpack.security.encryptionKey="something_at_least_32_characters" + - xpack.security.session.idleTimeout="1h" + - xpack.security.session.lifespan="30d" + volumes: + - ./kibana.yml:/usr/share/kibana/config/kibana.yml + depends_on: + - elastic + elastic: image: elasticsearch:7.17.6 environment: - discovery.type=single-node - - xpack.security.enabled=false - logger.discovery.level=debug + - xpack.security.enabled=false ports: - 9200:9200 - 9300:9300 diff --git a/tdrs-backend/kibana.yml b/tdrs-backend/kibana.yml new file mode 100644 index 000000000..dad4335d0 --- /dev/null +++ b/tdrs-backend/kibana.yml @@ -0,0 +1,2 @@ +elasticsearch.hosts: ["http://elastic:9200"] +server.host: kibana diff --git a/tdrs-backend/tdpservice/parsers/parse.py b/tdrs-backend/tdpservice/parsers/parse.py index dbe80980b..a5bf46053 100644 --- a/tdrs-backend/tdpservice/parsers/parse.py +++ b/tdrs-backend/tdpservice/parsers/parse.py @@ -75,22 +75,29 @@ def parse_datafile(datafile): return errors - def bulk_create_records(unsaved_records, line_number, header_count, batch_size=10000, flush=False): """Bulk create passed in records.""" if (line_number % batch_size == 0 and header_count > 0) or flush: logger.debug("Bulk creating records.") try: - num_created = 0 - num_expected = 0 - for model, records in unsaved_records.items(): - num_expected += len(records) - num_created += len(model.objects.bulk_create(records)) - if num_created != num_expected: - logger.error(f"Bulk create only created {num_created}/{num_expected}!") + num_db_records_created = 0 + num_expected_db_records = 0 + num_elastic_records_created = 0 + for document, records in unsaved_records.items(): + num_expected_db_records += len(records) + created_objs = document.Django.model.objects.bulk_create(records) + num_elastic_records_created += document.update(created_objs)[0] + num_db_records_created += len(created_objs) + if num_db_records_created != num_expected_db_records: + logger.error(f"Bulk Django record creation only created {num_db_records_created}/" + + f"{num_expected_db_records}!") + elif num_elastic_records_created != num_expected_db_records: + logger.error(f"Bulk Elastic document creation only created {num_elastic_records_created}/" + + f"{num_expected_db_records}!") else: - logger.info(f"Created {num_created}/{num_expected} records.") - return num_created == num_expected, {} + logger.info(f"Created {num_db_records_created}/{num_expected_db_records} records.") + return num_db_records_created == num_expected_db_records and \ + num_elastic_records_created == num_expected_db_records, {} except DatabaseError as e: logger.error(f"Encountered error while creating datafile records: {e}") return False, unsaved_records @@ -127,7 +134,8 @@ def evaluate_trailer(datafile, trailer_count, multiple_trailer_errors, is_last_l def rollback_records(unsaved_records, datafile): """Delete created records in the event of a failure.""" logger.info("Rolling back created records.") - for model in unsaved_records: + for document in unsaved_records: + model = document.Django.model num_deleted, models = model.objects.filter(datafile=datafile).delete() logger.debug(f"Deleted {num_deleted} records of type: {model}.") @@ -218,7 +226,7 @@ def parse_datafile_lines(datafile, program_type, section, is_encrypted): if record: s = schema_manager.schemas[i] record.datafile = datafile - unsaved_records.setdefault(s.model, []).append(record) + unsaved_records.setdefault(s.document, []).append(record) all_created, unsaved_records = bulk_create_records(unsaved_records, line_number, header_count,) unsaved_parser_errors, num_errors = bulk_create_errors(unsaved_parser_errors, num_errors) diff --git a/tdrs-backend/tdpservice/parsers/row_schema.py b/tdrs-backend/tdpservice/parsers/row_schema.py index 126fb1f4d..acd990c35 100644 --- a/tdrs-backend/tdpservice/parsers/row_schema.py +++ b/tdrs-backend/tdpservice/parsers/row_schema.py @@ -12,13 +12,13 @@ class RowSchema: def __init__( self, - model=dict, + document, preparsing_validators=[], postparsing_validators=[], fields=[], - quiet_preparser_errors=False + quiet_preparser_errors=False, ): - self.model = model + self.document = document self.preparsing_validators = preparsing_validators self.postparsing_validators = postparsing_validators self.fields = fields @@ -90,7 +90,7 @@ def run_preparsing_validators(self, line, generate_error): def parse_line(self, line): """Create a model for the line based on the schema.""" - record = self.model() + record = self.document.Django.model() if self.document is not None else dict() for field in self.fields: value = field.parse_value(line) diff --git a/tdrs-backend/tdpservice/parsers/schema_defs/header.py b/tdrs-backend/tdpservice/parsers/schema_defs/header.py index 5a9960af1..a0574ab7b 100644 --- a/tdrs-backend/tdpservice/parsers/schema_defs/header.py +++ b/tdrs-backend/tdpservice/parsers/schema_defs/header.py @@ -7,7 +7,7 @@ header = RowSchema( - model=dict, + document=None, preparsing_validators=[ validators.hasLength( 23, diff --git a/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m1.py b/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m1.py index ea0aaf189..ff4ad2b00 100644 --- a/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m1.py +++ b/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m1.py @@ -5,12 +5,12 @@ from tdpservice.parsers.fields import Field from tdpservice.parsers.row_schema import RowSchema from tdpservice.parsers import validators -from tdpservice.search_indexes.models.ssp import SSP_M1 +from tdpservice.search_indexes.documents.ssp import SSP_M1DataSubmissionDocument m1 = SchemaManager( schemas=[ RowSchema( - model=SSP_M1, + document=SSP_M1DataSubmissionDocument(), preparsing_validators=[ validators.hasLength(150), ], diff --git a/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m2.py b/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m2.py index 4dd0a00ec..937b8e00a 100644 --- a/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m2.py +++ b/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m2.py @@ -6,13 +6,13 @@ from tdpservice.parsers.fields import TransformField, Field from tdpservice.parsers.row_schema import RowSchema from tdpservice.parsers import validators -from tdpservice.search_indexes.models.ssp import SSP_M2 +from tdpservice.search_indexes.documents.ssp import SSP_M2DataSubmissionDocument m2 = SchemaManager( schemas=[ RowSchema( - model=SSP_M2, + document=SSP_M2DataSubmissionDocument(), preparsing_validators=[ validators.hasLength(150), ], @@ -78,7 +78,7 @@ result_field='EDUCATION_LEVEL', result_function=validators.or_validators( validators.isInStringRange(1, 16), - validators.isInStringRange(98, 99) + validators.isInStringRange(98, 99), ), ), validators.if_then_validator( @@ -367,8 +367,7 @@ required=False, validators=[ validators.or_validators( - validators.isInLimits(0, 16), - validators.isInLimits(98, 99) + validators.isInLimits(0, 16), validators.isInLimits(98, 99) ) ] ), @@ -414,7 +413,7 @@ validators.or_validators( validators.isInLimits(1, 4), validators.isInLimits(6, 9), - validators.isInLimits(11, 12) + validators.isInLimits(11, 12), ) ] ), diff --git a/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m3.py b/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m3.py index fb05903f6..7ce6eb787 100644 --- a/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m3.py +++ b/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m3.py @@ -6,10 +6,10 @@ from tdpservice.parsers.fields import TransformField, Field from tdpservice.parsers.row_schema import RowSchema from tdpservice.parsers import validators -from tdpservice.search_indexes.models.ssp import SSP_M3 +from tdpservice.search_indexes.documents.ssp import SSP_M3DataSubmissionDocument first_part_schema = RowSchema( - model=SSP_M3, + document=SSP_M3DataSubmissionDocument(), preparsing_validators=[ validators.notEmpty(start=19, end=60), ], @@ -315,7 +315,7 @@ ) second_part_schema = RowSchema( - model=SSP_M3, + document=SSP_M3DataSubmissionDocument(), quiet_preparser_errors=True, preparsing_validators=[ validators.notEmpty(start=60, end=101), diff --git a/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m4.py b/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m4.py index 4d8f04f64..4041a484a 100644 --- a/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m4.py +++ b/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m4.py @@ -5,12 +5,12 @@ from tdpservice.parsers.fields import Field from tdpservice.parsers.row_schema import RowSchema from tdpservice.parsers import validators -from tdpservice.search_indexes.models.ssp import SSP_M4 +from tdpservice.search_indexes.documents.ssp import SSP_M4DataSubmissionDocument m4 = SchemaManager( schemas=[ RowSchema( - model=SSP_M4, + document=SSP_M4DataSubmissionDocument(), preparsing_validators=[ validators.hasLength(66), ], diff --git a/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m5.py b/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m5.py index a63cd6591..f0bea295d 100644 --- a/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m5.py +++ b/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m5.py @@ -6,13 +6,13 @@ from tdpservice.parsers.fields import TransformField, Field from tdpservice.parsers.row_schema import RowSchema from tdpservice.parsers import validators -from tdpservice.search_indexes.models.ssp import SSP_M5 +from tdpservice.search_indexes.documents.ssp import SSP_M5DataSubmissionDocument m5 = SchemaManager( schemas=[ RowSchema( - model=SSP_M5, + document=SSP_M5DataSubmissionDocument(), preparsing_validators=[ validators.hasLength(66), ], diff --git a/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m6.py b/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m6.py index 6f266168b..12488abd5 100644 --- a/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m6.py +++ b/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m6.py @@ -6,10 +6,10 @@ from ...fields import Field, TransformField from ...row_schema import RowSchema from ... import validators -from tdpservice.search_indexes.models.ssp import SSP_M6 +from tdpservice.search_indexes.documents.ssp import SSP_M6DataSubmissionDocument s1 = RowSchema( - model=SSP_M6, + document=SSP_M6DataSubmissionDocument(), preparsing_validators=[ validators.hasLength(259), ], @@ -170,7 +170,7 @@ ) s2 = RowSchema( - model=SSP_M6, + document=SSP_M6DataSubmissionDocument(), preparsing_validators=[ validators.hasLength(259), ], @@ -331,7 +331,7 @@ ) s3 = RowSchema( - model=SSP_M6, + document=SSP_M6DataSubmissionDocument(), preparsing_validators=[ validators.hasLength(259), ], @@ -492,10 +492,4 @@ ) -m6 = SchemaManager( - schemas=[ - s1, - s2, - s3 - ] -) +m6 = SchemaManager(schemas=[s1, s2, s3]) diff --git a/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m7.py b/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m7.py index cde0c1fc3..5f3d429c9 100644 --- a/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m7.py +++ b/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m7.py @@ -5,7 +5,7 @@ from ...row_schema import RowSchema from ...transforms import calendar_quarter_to_rpt_month_year from ... import validators -from tdpservice.search_indexes.models.ssp import SSP_M7 +from tdpservice.search_indexes.documents.ssp import SSP_M7DataSubmissionDocument schemas = [] @@ -20,7 +20,7 @@ for i in range(1, 31): schemas.append( RowSchema( - model=SSP_M7, + document=SSP_M7DataSubmissionDocument(), quiet_preparser_errors=i > 1, preparsing_validators=[ validators.hasLength(247), diff --git a/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t1.py b/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t1.py index 76ce8f0f5..3c09ffe97 100644 --- a/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t1.py +++ b/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t1.py @@ -4,13 +4,13 @@ from tdpservice.parsers.fields import Field from tdpservice.parsers.row_schema import RowSchema from tdpservice.parsers import validators -from tdpservice.search_indexes.models.tanf import TANF_T1 +from tdpservice.search_indexes.documents.tanf import TANF_T1DataSubmissionDocument t1 = SchemaManager( schemas=[ RowSchema( - model=TANF_T1, + document=TANF_T1DataSubmissionDocument(), preparsing_validators=[ validators.hasLength(156), ], diff --git a/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t2.py b/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t2.py index 1331490ea..af8dc3b30 100644 --- a/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t2.py +++ b/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t2.py @@ -6,13 +6,13 @@ from tdpservice.parsers.fields import TransformField, Field from tdpservice.parsers.row_schema import RowSchema from tdpservice.parsers import validators -from tdpservice.search_indexes.models.tanf import TANF_T2 +from tdpservice.search_indexes.documents.tanf import TANF_T2DataSubmissionDocument t2 = SchemaManager( schemas=[ RowSchema( - model=TANF_T2, + document=TANF_T2DataSubmissionDocument(), preparsing_validators=[ validators.hasLength(156), ], diff --git a/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t3.py b/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t3.py index 54995f23a..2257e4d29 100644 --- a/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t3.py +++ b/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t3.py @@ -6,11 +6,11 @@ from tdpservice.parsers.fields import TransformField, Field from tdpservice.parsers.row_schema import RowSchema from tdpservice.parsers import validators -from tdpservice.search_indexes.models.tanf import TANF_T3 +from tdpservice.search_indexes.documents.tanf import TANF_T3DataSubmissionDocument child_one = RowSchema( - model=TANF_T3, + document=TANF_T3DataSubmissionDocument(), preparsing_validators=[ validators.notEmpty(start=19, end=60), ], @@ -313,7 +313,7 @@ ) child_two = RowSchema( - model=TANF_T3, + document=TANF_T3DataSubmissionDocument(), quiet_preparser_errors=True, preparsing_validators=[ validators.notEmpty(start=60, end=101), diff --git a/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t4.py b/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t4.py index 1a5a13c54..ae897a04a 100644 --- a/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t4.py +++ b/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t4.py @@ -5,13 +5,13 @@ from tdpservice.parsers.fields import Field from tdpservice.parsers.row_schema import RowSchema from tdpservice.parsers import validators -from tdpservice.search_indexes.models.tanf import TANF_T4 +from tdpservice.search_indexes.documents.tanf import TANF_T4DataSubmissionDocument t4 = SchemaManager( schemas=[ RowSchema( - model=TANF_T4, + document=TANF_T4DataSubmissionDocument(), preparsing_validators=[ validators.hasLength(71), ], diff --git a/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t5.py b/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t5.py index 199c08f0c..f6db5778c 100644 --- a/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t5.py +++ b/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t5.py @@ -6,13 +6,13 @@ from tdpservice.parsers.fields import TransformField, Field from tdpservice.parsers.row_schema import RowSchema from tdpservice.parsers import validators -from tdpservice.search_indexes.models.tanf import TANF_T5 +from tdpservice.search_indexes.documents.tanf import TANF_T5DataSubmissionDocument t5 = SchemaManager( schemas=[ RowSchema( - model=TANF_T5, + document=TANF_T5DataSubmissionDocument(), preparsing_validators=[ validators.hasLength(71), ], diff --git a/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t6.py b/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t6.py index 77925781a..f1fafeb8c 100644 --- a/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t6.py +++ b/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t6.py @@ -6,11 +6,11 @@ from tdpservice.parsers.fields import Field, TransformField from tdpservice.parsers.row_schema import RowSchema from tdpservice.parsers import validators -from tdpservice.search_indexes.models.tanf import TANF_T6 +from tdpservice.search_indexes.documents.tanf import TANF_T6DataSubmissionDocument s1 = RowSchema( - model=TANF_T6, + document=TANF_T6DataSubmissionDocument(), preparsing_validators=[ validators.hasLength(379), ], @@ -227,7 +227,7 @@ ) s2 = RowSchema( - model=TANF_T6, + document=TANF_T6DataSubmissionDocument(), preparsing_validators=[ validators.hasLength(379), ], @@ -438,7 +438,7 @@ ) s3 = RowSchema( - model=TANF_T6, + document=TANF_T6DataSubmissionDocument(), preparsing_validators=[ validators.hasLength(379), ], diff --git a/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t7.py b/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t7.py index e6a05bb9a..48c56cd38 100644 --- a/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t7.py +++ b/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t7.py @@ -5,7 +5,7 @@ from tdpservice.parsers.row_schema import RowSchema from tdpservice.parsers.transforms import calendar_quarter_to_rpt_month_year from tdpservice.parsers import validators -from tdpservice.search_indexes.models.tanf import TANF_T7 +from tdpservice.search_indexes.documents.tanf import TANF_T7DataSubmissionDocument schemas = [] @@ -20,7 +20,7 @@ schemas.append( RowSchema( - model=TANF_T7, + document=TANF_T7DataSubmissionDocument(), quiet_preparser_errors=i > 1, preparsing_validators=[ validators.hasLength(247), diff --git a/tdrs-backend/tdpservice/parsers/schema_defs/trailer.py b/tdrs-backend/tdpservice/parsers/schema_defs/trailer.py index 8b3325424..06a23dcb3 100644 --- a/tdrs-backend/tdpservice/parsers/schema_defs/trailer.py +++ b/tdrs-backend/tdpservice/parsers/schema_defs/trailer.py @@ -7,7 +7,7 @@ trailer = RowSchema( - model=dict, + document=None, preparsing_validators=[ validators.hasLength( 23, diff --git a/tdrs-backend/tdpservice/parsers/schema_defs/tribal_tanf/t1.py b/tdrs-backend/tdpservice/parsers/schema_defs/tribal_tanf/t1.py index 2820b76fa..1e17d7856 100644 --- a/tdrs-backend/tdpservice/parsers/schema_defs/tribal_tanf/t1.py +++ b/tdrs-backend/tdpservice/parsers/schema_defs/tribal_tanf/t1.py @@ -4,13 +4,13 @@ from ...fields import Field from ...row_schema import RowSchema from ... import validators -from tdpservice.search_indexes.models.tribal import Tribal_TANF_T1 +from tdpservice.search_indexes.documents.tribal import Tribal_TANF_T1DataSubmissionDocument t1 = SchemaManager( schemas=[ RowSchema( - model=Tribal_TANF_T1, + document=Tribal_TANF_T1DataSubmissionDocument(), preparsing_validators=[ validators.hasLength(122), ], diff --git a/tdrs-backend/tdpservice/parsers/schema_defs/tribal_tanf/t2.py b/tdrs-backend/tdpservice/parsers/schema_defs/tribal_tanf/t2.py index 980e653fd..1a56934d4 100644 --- a/tdrs-backend/tdpservice/parsers/schema_defs/tribal_tanf/t2.py +++ b/tdrs-backend/tdpservice/parsers/schema_defs/tribal_tanf/t2.py @@ -6,13 +6,13 @@ from ...fields import TransformField, Field from ...row_schema import RowSchema from ... import validators -from tdpservice.search_indexes.models.tribal import Tribal_TANF_T2 +from tdpservice.search_indexes.documents.tribal import Tribal_TANF_T2DataSubmissionDocument t2 = SchemaManager( schemas=[ RowSchema( - model=Tribal_TANF_T2, + document=Tribal_TANF_T2DataSubmissionDocument(), preparsing_validators=[ validators.hasLength(122), ], diff --git a/tdrs-backend/tdpservice/parsers/schema_defs/tribal_tanf/t3.py b/tdrs-backend/tdpservice/parsers/schema_defs/tribal_tanf/t3.py index 4d68ec457..e26120bd5 100644 --- a/tdrs-backend/tdpservice/parsers/schema_defs/tribal_tanf/t3.py +++ b/tdrs-backend/tdpservice/parsers/schema_defs/tribal_tanf/t3.py @@ -6,11 +6,11 @@ from ...fields import TransformField, Field from ...row_schema import RowSchema from ... import validators -from tdpservice.search_indexes.models.tribal import Tribal_TANF_T3 +from tdpservice.search_indexes.documents.tribal import Tribal_TANF_T3DataSubmissionDocument child_one = RowSchema( - model=Tribal_TANF_T3, + document=Tribal_TANF_T3DataSubmissionDocument(), preparsing_validators=[ validators.notEmpty(start=19, end=60), validators.hasLength(122), @@ -314,7 +314,7 @@ ) child_two = RowSchema( - model=Tribal_TANF_T3, + document=Tribal_TANF_T3DataSubmissionDocument(), quiet_preparser_errors=True, preparsing_validators=[ validators.notEmpty(start=60, end=101), diff --git a/tdrs-backend/tdpservice/parsers/schema_defs/tribal_tanf/t4.py b/tdrs-backend/tdpservice/parsers/schema_defs/tribal_tanf/t4.py index f5227781b..6128b07ff 100644 --- a/tdrs-backend/tdpservice/parsers/schema_defs/tribal_tanf/t4.py +++ b/tdrs-backend/tdpservice/parsers/schema_defs/tribal_tanf/t4.py @@ -4,13 +4,13 @@ from ...fields import Field from ...row_schema import RowSchema from ... import validators -from tdpservice.search_indexes.models.tribal import Tribal_TANF_T4 +from tdpservice.search_indexes.documents.tribal import Tribal_TANF_T4DataSubmissionDocument t4 = SchemaManager( schemas=[ RowSchema( - model=Tribal_TANF_T4, + document=Tribal_TANF_T4DataSubmissionDocument(), preparsing_validators=[ validators.hasLength(71), ], diff --git a/tdrs-backend/tdpservice/parsers/schema_defs/tribal_tanf/t5.py b/tdrs-backend/tdpservice/parsers/schema_defs/tribal_tanf/t5.py index d1a38ae50..cc5c3bed1 100644 --- a/tdrs-backend/tdpservice/parsers/schema_defs/tribal_tanf/t5.py +++ b/tdrs-backend/tdpservice/parsers/schema_defs/tribal_tanf/t5.py @@ -6,13 +6,13 @@ from ...fields import TransformField, Field from ...row_schema import RowSchema from ... import validators -from tdpservice.search_indexes.models.tribal import Tribal_TANF_T5 +from tdpservice.search_indexes.documents.tribal import Tribal_TANF_T5DataSubmissionDocument t5 = SchemaManager( schemas=[ RowSchema( - model=Tribal_TANF_T5, + document=Tribal_TANF_T5DataSubmissionDocument(), preparsing_validators=[ validators.hasLength(71), ], diff --git a/tdrs-backend/tdpservice/parsers/schema_defs/tribal_tanf/t6.py b/tdrs-backend/tdpservice/parsers/schema_defs/tribal_tanf/t6.py index 844427c77..0f25676f6 100644 --- a/tdrs-backend/tdpservice/parsers/schema_defs/tribal_tanf/t6.py +++ b/tdrs-backend/tdpservice/parsers/schema_defs/tribal_tanf/t6.py @@ -6,11 +6,11 @@ from tdpservice.parsers.fields import Field, TransformField from tdpservice.parsers.row_schema import RowSchema from tdpservice.parsers import validators -from tdpservice.search_indexes.models.tribal import Tribal_TANF_T6 +from tdpservice.search_indexes.documents.tribal import Tribal_TANF_T6DataSubmissionDocument s1 = RowSchema( - model=Tribal_TANF_T6, + document=Tribal_TANF_T6DataSubmissionDocument(), preparsing_validators=[ validators.hasLength(379), ], @@ -215,7 +215,7 @@ ) s2 = RowSchema( - model=Tribal_TANF_T6, + document=Tribal_TANF_T6DataSubmissionDocument(), preparsing_validators=[ validators.hasLength(379), ], @@ -414,7 +414,7 @@ ) s3 = RowSchema( - model=Tribal_TANF_T6, + document=Tribal_TANF_T6DataSubmissionDocument(), preparsing_validators=[ validators.hasLength(379), ], diff --git a/tdrs-backend/tdpservice/parsers/schema_defs/tribal_tanf/t7.py b/tdrs-backend/tdpservice/parsers/schema_defs/tribal_tanf/t7.py index 332caed97..8f7fdd2e8 100644 --- a/tdrs-backend/tdpservice/parsers/schema_defs/tribal_tanf/t7.py +++ b/tdrs-backend/tdpservice/parsers/schema_defs/tribal_tanf/t7.py @@ -5,7 +5,7 @@ from tdpservice.parsers.row_schema import RowSchema from tdpservice.parsers.transforms import calendar_quarter_to_rpt_month_year from tdpservice.parsers import validators -from tdpservice.search_indexes.models.tribal import Tribal_TANF_T7 +from tdpservice.search_indexes.documents.tribal import Tribal_TANF_T7DataSubmissionDocument schemas = [] @@ -20,7 +20,7 @@ schemas.append( RowSchema( - model=Tribal_TANF_T7, + document=Tribal_TANF_T7DataSubmissionDocument(), quiet_preparser_errors=i > 1, preparsing_validators=[ validators.hasLength(247), diff --git a/tdrs-backend/tdpservice/parsers/test/data/ADS.E2J.NDM1.TS53_fake b/tdrs-backend/tdpservice/parsers/test/data/ADS.E2J.NDM1.TS53_fake.txt similarity index 100% rename from tdrs-backend/tdpservice/parsers/test/data/ADS.E2J.NDM1.TS53_fake rename to tdrs-backend/tdpservice/parsers/test/data/ADS.E2J.NDM1.TS53_fake.txt diff --git a/tdrs-backend/tdpservice/parsers/test/test_parse.py b/tdrs-backend/tdpservice/parsers/test/test_parse.py index 02cd7365b..9fb8f6878 100644 --- a/tdrs-backend/tdpservice/parsers/test/test_parse.py +++ b/tdrs-backend/tdpservice/parsers/test/test_parse.py @@ -8,6 +8,7 @@ from tdpservice.search_indexes.models.tribal import Tribal_TANF_T1, Tribal_TANF_T2, Tribal_TANF_T3, Tribal_TANF_T4 from tdpservice.search_indexes.models.tribal import Tribal_TANF_T5, Tribal_TANF_T6, Tribal_TANF_T7 from tdpservice.search_indexes.models.ssp import SSP_M1, SSP_M2, SSP_M3, SSP_M4, SSP_M5, SSP_M6, SSP_M7 +from tdpservice.search_indexes import documents from .factories import DataFileSummaryFactory from tdpservice.data_files.models import DataFile from .. import schema_defs, util @@ -141,7 +142,6 @@ def test_big_file(stt_user, stt): @pytest.mark.django_db -@pytest.mark.skip(reason="long runtime") # big_files def test_parse_big_file(test_big_file, dfs): """Test parsing of ADS.E2J.FTP1.TS06.""" expected_t1_record_count = 815 @@ -156,30 +156,38 @@ def test_parse_big_file(test_big_file, dfs): assert dfs.status == DataFileSummary.Status.ACCEPTED_WITH_ERRORS dfs.case_aggregates = util.case_aggregates_by_month( dfs.datafile, dfs.status) - assert dfs.case_aggregates == {'rejected': 0, - 'months': [ - {'accepted_without_errors': 171, - 'accepted_with_errors': 99, 'month': 'Oct'}, - {'accepted_without_errors': 169, - 'accepted_with_errors': 104, 'month': 'Nov'}, - {'accepted_without_errors': 166, - 'accepted_with_errors': 106, 'month': 'Dec'} - ]} - - parser_errors = ParserError.objects.filter(file=test_big_file) - - error_message = "14 is not in ['01', '02', '05', '07', '09', '15', '16', '17', '18', '19', '99']. " + \ - "or 14 is not blank." - row_118_error = parser_errors.get(row_number=118, error_message=error_message) - assert row_118_error.error_type == ParserErrorCategoryChoices.FIELD_VALUE - assert row_118_error.error_message == error_message - assert row_118_error.content_type.model == 'tanf_t2' - assert row_118_error.object_id is not None + assert dfs.case_aggregates == {'months': [ + {'month': 'Oct', 'accepted_without_errors': 129, 'accepted_with_errors': 141}, + {'month': 'Nov', 'accepted_without_errors': 143, 'accepted_with_errors': 130}, + {'month': 'Dec', 'accepted_without_errors': 131, 'accepted_with_errors': 141} + ], + 'rejected': 0} assert TANF_T1.objects.count() == expected_t1_record_count assert TANF_T2.objects.count() == expected_t2_record_count assert TANF_T3.objects.count() == expected_t3_record_count + search = documents.tanf.TANF_T1DataSubmissionDocument.search().query( + 'match', + datafile__id=test_big_file.id + ) + assert search.count() == expected_t1_record_count + search.delete() + + search = documents.tanf.TANF_T2DataSubmissionDocument.search().query( + 'match', + datafile__id=test_big_file.id + ) + assert search.count() == expected_t2_record_count + search.delete() + + search = documents.tanf.TANF_T3DataSubmissionDocument.search().query( + 'match', + datafile__id=test_big_file.id + ) + assert search.count() == expected_t3_record_count + search.delete() + @pytest.fixture def bad_test_file(stt_user, stt): @@ -567,7 +575,7 @@ def test_parse_tanf_section1_datafile_t3s(small_tanf_section1_datafile): @pytest.fixture def super_big_s1_file(stt_user, stt): """Fixture for ADS.E2J.NDM1.TS53_fake.""" - return util.create_test_datafile('ADS.E2J.NDM1.TS53_fake', stt_user, stt) + return util.create_test_datafile('ADS.E2J.NDM1.TS53_fake.txt', stt_user, stt) @pytest.mark.django_db() @@ -576,15 +584,40 @@ def test_parse_super_big_s1_file(super_big_s1_file): """Test parsing of super_big_s1_file and validate all T1/T2/T3 records are created.""" parse.parse_datafile(super_big_s1_file) - assert TANF_T1.objects.count() == 96642 - assert TANF_T2.objects.count() == 112794 - assert TANF_T3.objects.count() == 172595 + expected_t1_record_count = 96642 + expected_t2_record_count = 112794 + expected_t3_record_count = 172595 + + assert TANF_T1.objects.count() == expected_t1_record_count + assert TANF_T2.objects.count() == expected_t2_record_count + assert TANF_T3.objects.count() == expected_t3_record_count + + search = documents.tanf.TANF_T1DataSubmissionDocument.search().query( + 'match', + datafile__id=super_big_s1_file.id + ) + assert search.count() == expected_t1_record_count + search.delete() + + search = documents.tanf.TANF_T2DataSubmissionDocument.search().query( + 'match', + datafile__id=super_big_s1_file.id + ) + assert search.count() == expected_t2_record_count + search.delete() + + search = documents.tanf.TANF_T3DataSubmissionDocument.search().query( + 'match', + datafile__id=super_big_s1_file.id + ) + assert search.count() == expected_t3_record_count + search.delete() @pytest.fixture def super_big_s1_rollback_file(stt_user, stt): """Fixture for ADS.E2J.NDM1.TS53_fake.rollback.""" - return util.create_test_datafile('ADS.E2J.NDM1.TS53_fake.rollback', stt_user, stt) + return util.create_test_datafile('ADS.E2J.NDM1.TS53_fake.rollback.txt', stt_user, stt) @pytest.mark.django_db() @@ -611,6 +644,24 @@ def test_parse_super_big_s1_file_with_rollback(super_big_s1_rollback_file): assert TANF_T2.objects.count() == 0 assert TANF_T3.objects.count() == 0 + search = documents.tanf.TANF_T1DataSubmissionDocument.search().query( + 'match', + datafile__id=super_big_s1_rollback_file.id + ) + assert search.count() == 0 + + search = documents.tanf.TANF_T2DataSubmissionDocument.search().query( + 'match', + datafile__id=super_big_s1_rollback_file.id + ) + assert search.count() == 0 + + search = documents.tanf.TANF_T3DataSubmissionDocument.search().query( + 'match', + datafile__id=super_big_s1_rollback_file.id + ) + assert search.count() == 0 + @pytest.fixture def bad_tanf_s1__row_missing_required_field(stt_user, stt): @@ -950,8 +1001,25 @@ def test_parse_ssp_section2_file(ssp_section2_file): m4_objs = SSP_M4.objects.all().order_by('id') m5_objs = SSP_M5.objects.all().order_by('AMOUNT_EARNED_INCOME') - assert SSP_M4.objects.all().count() == 2205 - assert SSP_M5.objects.all().count() == 6736 + expected_m4_count = 2205 + expected_m5_count = 6736 + + assert SSP_M4.objects.all().count() == expected_m4_count + assert SSP_M5.objects.all().count() == expected_m5_count + + search = documents.ssp.SSP_M4DataSubmissionDocument.search().query( + 'match', + datafile__id=ssp_section2_file.id + ) + assert search.count() == expected_m4_count + search.delete() + + search = documents.ssp.SSP_M5DataSubmissionDocument.search().query( + 'match', + datafile__id=ssp_section2_file.id + ) + assert search.count() == expected_m5_count + search.delete() m4 = m4_objs.first() assert m4.DISPOSITION == 1 diff --git a/tdrs-backend/tdpservice/parsers/test/test_util.py b/tdrs-backend/tdpservice/parsers/test/test_util.py index 5bb8c72e2..b9f19f536 100644 --- a/tdrs-backend/tdpservice/parsers/test/test_util.py +++ b/tdrs-backend/tdpservice/parsers/test/test_util.py @@ -33,6 +33,7 @@ def test_run_preparsing_validators_returns_valid(): """Test run_preparsing_validators executes all preparsing_validators provided in schema.""" line = '12345' schema = RowSchema( + document=None, preparsing_validators=[ passing_validator() ] @@ -47,6 +48,7 @@ def test_run_preparsing_validators_returns_invalid_and_errors(): """Test that run_preparsing_validators executes all preparsing_validators provided in schema and returns errors.""" line = '12345' schema = RowSchema( + document=None, preparsing_validators=[ passing_validator(), failing_validator() @@ -62,7 +64,7 @@ def test_parse_line_parses_line_from_schema_to_dict(): """Test that parse_line parses a string into a dict given start and end indices for all fields.""" line = '12345001' schema = RowSchema( - model=dict, + document=None, fields=[ Field(item=1, name='first', friendly_name='first', type='string', startIndex=0, endIndex=3), Field(item=2, name='second', friendly_name='second', type='string', startIndex=3, endIndex=4), @@ -90,9 +92,13 @@ class TestModel: fourth = None fifth = None + class TestDocument: + class Django: + model = TestModel + line = '12345001' schema = RowSchema( - model=TestModel, + document=TestDocument(), fields=[ Field(item=1, name='first', friendly_name='first', type='string', startIndex=0, endIndex=3), Field(item=2, name='second', friendly_name='second', type='string', startIndex=3, endIndex=4), @@ -119,7 +125,7 @@ def test_run_field_validators_returns_valid_with_dict(): 'third': '5' } schema = RowSchema( - model=dict, + document=None, fields=[ Field(item=1, name='first', friendly_name='first', type='string', startIndex=0, endIndex=3, validators=[ passing_validator() @@ -145,13 +151,20 @@ class TestModel: second = None third = None + class TestDocument: + class Django: + model = TestModel + instance = TestModel() instance.first = '123' instance.second = '4' instance.third = '5' + document = TestDocument() + document.Django.model = instance + schema = RowSchema( - model=TestModel, + document=document, fields=[ Field(item=1, name='first', friendly_name='first', type='string', startIndex=0, endIndex=3, validators=[ passing_validator() @@ -178,7 +191,7 @@ def test_run_field_validators_returns_invalid_with_dict(): 'third': '5' } schema = RowSchema( - model=dict, + document=None, fields=[ Field(item=1, name='first', friendly_name='first', type='string', startIndex=0, endIndex=3, validators=[ passing_validator(), @@ -205,13 +218,20 @@ class TestModel: second = None third = None + class TestDocument: + class Django: + model = TestModel + instance = TestModel() instance.first = '123' instance.second = '4' instance.third = '5' + document = TestDocument() + document.Django.model = instance + schema = RowSchema( - model=TestModel, + document=document, fields=[ Field(item=1, name='first', friendly_name='first', type='string', startIndex=0, endIndex=3, validators=[ passing_validator(), @@ -244,7 +264,7 @@ def test_field_validators_blank_and_required_returns_error(first, second): 'second': second, } schema = RowSchema( - model=dict, + document=None, fields=[ Field( item=1, @@ -292,7 +312,7 @@ def test_field_validators_blank_and_not_required_returns_valid(first, expected_v 'first': first, } schema = RowSchema( - model=dict, + document=None, fields=[ Field( item=1, @@ -319,6 +339,7 @@ def test_run_postparsing_validators_returns_valid(): """Test run_postparsing_validators executes all postparsing_validators provided in schema.""" instance = {} schema = RowSchema( + document=None, postparsing_validators=[ passing_postparsing_validator() ] @@ -333,6 +354,7 @@ def test_run_postparsing_validators_returns_invalid_and_errors(): """Test run_postparsing_validators executes all postparsing_validators provided in schema and returns errors.""" instance = {} schema = RowSchema( + document=None, postparsing_validators=[ passing_postparsing_validator(), failing_postparsing_validator() @@ -350,7 +372,7 @@ def test_multi_record_schema_parses_and_validates(): schema_manager = SchemaManager( schemas=[ RowSchema( - model=dict, + document=None, preparsing_validators=[ passing_validator() ], @@ -370,7 +392,7 @@ def test_multi_record_schema_parses_and_validates(): ] ), RowSchema( - model=dict, + document=None, preparsing_validators=[ passing_validator() ], @@ -389,7 +411,7 @@ def test_multi_record_schema_parses_and_validates(): ] ), RowSchema( - model=dict, + document=None, preparsing_validators=[ failing_validator() ], @@ -409,7 +431,7 @@ def test_multi_record_schema_parses_and_validates(): ] ), RowSchema( - model=dict, + document=None, preparsing_validators=[ passing_validator() ], @@ -469,7 +491,7 @@ def postparse_validator(): instance = {} schema = RowSchema( - model=dict, + document=None, postparsing_validators=[ postparse_validator() ], diff --git a/tdrs-backend/tdpservice/parsers/util.py b/tdrs-backend/tdpservice/parsers/util.py index 58df9bbdd..2f2a7c227 100644 --- a/tdrs-backend/tdpservice/parsers/util.py +++ b/tdrs-backend/tdpservice/parsers/util.py @@ -48,7 +48,7 @@ def generate_parser_error(datafile, line_number, schema, error_category, error_m error_message=error_message, error_type=error_category, content_type=ContentType.objects.get_for_model( - model=schema.model if schema else None + model=schema.document.Django.model if schema else None ) if record and not isinstance(record, dict) else None, object_id=getattr(record, 'id', None) if record and not isinstance(record, dict) else None, fields_json=fields_json @@ -344,7 +344,8 @@ def case_aggregates_by_month(df, dfs_status): if isinstance(schema_model, SchemaManager): schema_model = schema_model.schemas[0] - curr_case_numbers = set(schema_model.model.objects.filter(datafile=df).filter(RPT_MONTH_YEAR=rpt_month_year) + curr_case_numbers = set(schema_model.document.Django.model.objects.filter(datafile=df) + .filter(RPT_MONTH_YEAR=rpt_month_year) .distinct("CASE_NUMBER").values_list("CASE_NUMBER", flat=True)) case_numbers = case_numbers.union(curr_case_numbers) diff --git a/tdrs-backend/tdpservice/search_indexes/documents/document_base.py b/tdrs-backend/tdpservice/search_indexes/documents/document_base.py index bcaa4f488..ea377b283 100644 --- a/tdrs-backend/tdpservice/search_indexes/documents/document_base.py +++ b/tdrs-backend/tdpservice/search_indexes/documents/document_base.py @@ -1,16 +1,18 @@ """Elasticsearch base document mappings.""" -from django_elasticsearch_dsl import fields +from django_elasticsearch_dsl import fields, Document from tdpservice.data_files.models import DataFile -class DocumentBase: - """Elastic search model mapping for a parsed SSP M1 data file.""" + +class DocumentBase(Document): + """Elastic search base document to be overridden.""" datafile = fields.ObjectField(properties={ - 'pk': fields.IntegerField(), + 'id': fields.IntegerField(), 'created_at': fields.DateField(), 'version': fields.IntegerField(), - 'quarter': fields.TextField() + 'quarter': fields.TextField(), + 'year': fields.IntegerField(), }) def get_instances_from_related(self, related_instance): diff --git a/tdrs-backend/tdpservice/search_indexes/documents/ssp.py b/tdrs-backend/tdpservice/search_indexes/documents/ssp.py index c8018287f..ed5a95b39 100644 --- a/tdrs-backend/tdpservice/search_indexes/documents/ssp.py +++ b/tdrs-backend/tdpservice/search_indexes/documents/ssp.py @@ -1,13 +1,12 @@ """Elasticsearch document mappings for SSP submission models.""" -from django_elasticsearch_dsl import Document from django_elasticsearch_dsl.registries import registry from ..models.ssp import SSP_M1, SSP_M2, SSP_M3, SSP_M4, SSP_M5, SSP_M6, SSP_M7 from .document_base import DocumentBase @registry.register_document -class SSP_M1DataSubmissionDocument(DocumentBase, Document): +class SSP_M1DataSubmissionDocument(DocumentBase): """Elastic search model mapping for a parsed SSP M1 data file.""" class Index: @@ -71,7 +70,7 @@ class Django: @registry.register_document -class SSP_M2DataSubmissionDocument(DocumentBase, Document): +class SSP_M2DataSubmissionDocument(DocumentBase): """Elastic search model mapping for a parsed SSP M2 data file.""" class Index: @@ -159,7 +158,7 @@ class Django: @registry.register_document -class SSP_M3DataSubmissionDocument(DocumentBase, Document): +class SSP_M3DataSubmissionDocument(DocumentBase): """Elastic search model mapping for a parsed SSP M3 data file.""" class Index: @@ -201,7 +200,7 @@ class Django: ] @registry.register_document -class SSP_M4DataSubmissionDocument(DocumentBase, Document): +class SSP_M4DataSubmissionDocument(DocumentBase): """Elastic search model mapping for a parsed SSP M4 data file.""" class Index: @@ -233,7 +232,7 @@ class Django: ] @registry.register_document -class SSP_M5DataSubmissionDocument(DocumentBase, Document): +class SSP_M5DataSubmissionDocument(DocumentBase): """Elastic search model mapping for a parsed SSP M5 data file.""" class Index: @@ -280,7 +279,7 @@ class Django: ] @registry.register_document -class SSP_M6DataSubmissionDocument(DocumentBase, Document): +class SSP_M6DataSubmissionDocument(DocumentBase): """Elastic search model mapping for a parsed SSP M6 data file.""" class Index: @@ -313,7 +312,7 @@ class Django: ] @registry.register_document -class SSP_M7DataSubmissionDocument(DocumentBase, Document): +class SSP_M7DataSubmissionDocument(DocumentBase): """Elastic search model mapping for a parsed SSP M7 data file.""" class Index: diff --git a/tdrs-backend/tdpservice/search_indexes/documents/tanf.py b/tdrs-backend/tdpservice/search_indexes/documents/tanf.py index a6b5fd6b4..61c093980 100644 --- a/tdrs-backend/tdpservice/search_indexes/documents/tanf.py +++ b/tdrs-backend/tdpservice/search_indexes/documents/tanf.py @@ -1,12 +1,11 @@ """Elasticsearch document mappings for TANF submission models.""" -from django_elasticsearch_dsl import Document from django_elasticsearch_dsl.registries import registry from ..models.tanf import TANF_T1, TANF_T2, TANF_T3, TANF_T4, TANF_T5, TANF_T6, TANF_T7 from .document_base import DocumentBase @registry.register_document -class TANF_T1DataSubmissionDocument(DocumentBase, Document): +class TANF_T1DataSubmissionDocument(DocumentBase): """Elastic search model mapping for a parsed TANF T1 data file.""" class Index: @@ -72,7 +71,7 @@ class Django: @registry.register_document -class TANF_T2DataSubmissionDocument(DocumentBase, Document): +class TANF_T2DataSubmissionDocument(DocumentBase): """Elastic search model mapping for a parsed TANF T2 data file.""" class Index: @@ -162,7 +161,7 @@ class Django: @registry.register_document -class TANF_T3DataSubmissionDocument(DocumentBase, Document): +class TANF_T3DataSubmissionDocument(DocumentBase): """Elastic search model mapping for a parsed TANF T3 data file.""" class Index: @@ -204,7 +203,7 @@ class Django: @registry.register_document -class TANF_T4DataSubmissionDocument(DocumentBase, Document): +class TANF_T4DataSubmissionDocument(DocumentBase): """Elastic search model mapping for a parsed TANF T4 data file.""" class Index: @@ -237,7 +236,7 @@ class Django: @registry.register_document -class TANF_T5DataSubmissionDocument(DocumentBase, Document): +class TANF_T5DataSubmissionDocument(DocumentBase): """Elastic search model mapping for a parsed TANF T5 data file.""" class Index: @@ -287,7 +286,7 @@ class Django: @registry.register_document -class TANF_T6DataSubmissionDocument(DocumentBase, Document): +class TANF_T6DataSubmissionDocument(DocumentBase): """Elastic search model mapping for a parsed TANF T6 data file.""" class Index: @@ -326,7 +325,7 @@ class Django: @registry.register_document -class TANF_T7DataSubmissionDocument(DocumentBase, Document): +class TANF_T7DataSubmissionDocument(DocumentBase): """Elastic search model mapping for a parsed TANF T7 data file.""" class Index: diff --git a/tdrs-backend/tdpservice/settings/common.py b/tdrs-backend/tdpservice/settings/common.py index 9abbb2c15..dc4e4c51e 100644 --- a/tdrs-backend/tdpservice/settings/common.py +++ b/tdrs-backend/tdpservice/settings/common.py @@ -468,7 +468,7 @@ class Common(Configuration): # Elastic ELASTICSEARCH_DSL = { 'default': { - 'hosts': os.getenv('ELASTIC_HOST', 'elastic:9200') + 'hosts': os.getenv('ELASTIC_HOST', 'elastic:9200'), }, }