From e21fc2115f6d6694299d8c0a406cffb1423b90ab Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Tue, 17 Dec 2024 15:26:23 -0500 Subject: [PATCH 1/6] - Updated function to prioritize querset based on file section - added secton check to download view --- tdrs-backend/tdpservice/data_files/util.py | 9 ++++++++- tdrs-backend/tdpservice/data_files/views.py | 4 +++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/tdrs-backend/tdpservice/data_files/util.py b/tdrs-backend/tdpservice/data_files/util.py index b7cc836b0..1548becc9 100644 --- a/tdrs-backend/tdpservice/data_files/util.py +++ b/tdrs-backend/tdpservice/data_files/util.py @@ -21,7 +21,7 @@ class ParserErrorCategoryChoices(models.TextChoices): HISTORICAL_CONSISTENCY = "6", _("Historical consistency") -def get_prioritized_queryset(parser_errors): +def get_prioritized_queryset(parser_errors, is_s3_s4): """Generate a prioritized queryset of ParserErrors.""" PRIORITIZED_CAT2 = ( ("FAMILY_AFFILIATION", "CITIZENSHIP_STATUS", "CLOSURE_REASON"), @@ -42,6 +42,13 @@ def get_prioritized_queryset(parser_errors): Q(error_type=ParserErrorCategoryChoices.CASE_CONSISTENCY) filtered_errors = parser_errors.filter(error_type_query) + # If we are a Stratum or Aggregate file, we want all cat2 and cat3 errors. + if is_s3_s4: + all_cat2_cat3 = Q(error_type=ParserErrorCategoryChoices.FIELD_VALUE) | \ + Q(error_type=ParserErrorCategoryChoices.VALUE_CONSISTENCY) + filtered_errors = filtered_errors.union(parser_errors.filter(all_cat2_cat3)) + return filtered_errors + for fields in PRIORITIZED_CAT2: filtered_errors = filtered_errors.union(parser_errors.filter( field_name__in=fields, diff --git a/tdrs-backend/tdpservice/data_files/views.py b/tdrs-backend/tdpservice/data_files/views.py index 8263fe62b..87784449c 100644 --- a/tdrs-backend/tdpservice/data_files/views.py +++ b/tdrs-backend/tdpservice/data_files/views.py @@ -147,7 +147,9 @@ def download_error_report(self, request, pk=None): """Generate and return the parsing error report xlsx.""" datafile = self.get_object() all_errors = ParserError.objects.filter(file=datafile) - filtered_errors = get_prioritized_queryset(all_errors) + is_s3_s4 = (DataFile.Section.STRATUM_DATA in datafile.section or + DataFile.Section.AGGREGATE_DATA in datafile.section) + filtered_errors = get_prioritized_queryset(all_errors, is_s3_s4) return Response(get_xls_serialized_file(all_errors, filtered_errors)) From 1469b6928485cc00a12e439a60db613824189d6a Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Tue, 17 Dec 2024 15:27:07 -0500 Subject: [PATCH 2/6] - Linting --- tdrs-backend/tdpservice/data_files/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tdrs-backend/tdpservice/data_files/views.py b/tdrs-backend/tdpservice/data_files/views.py index 87784449c..5018ee504 100644 --- a/tdrs-backend/tdpservice/data_files/views.py +++ b/tdrs-backend/tdpservice/data_files/views.py @@ -147,7 +147,7 @@ def download_error_report(self, request, pk=None): """Generate and return the parsing error report xlsx.""" datafile = self.get_object() all_errors = ParserError.objects.filter(file=datafile) - is_s3_s4 = (DataFile.Section.STRATUM_DATA in datafile.section or + is_s3_s4 = (DataFile.Section.STRATUM_DATA in datafile.section or DataFile.Section.AGGREGATE_DATA in datafile.section) filtered_errors = get_prioritized_queryset(all_errors, is_s3_s4) From 0625807618b9de29d3e5ffa1faf559c2dd40e9d5 Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Tue, 17 Dec 2024 15:40:58 -0500 Subject: [PATCH 3/6] - exclude cat2 error about update indicator wrt S3/S4 files --- tdrs-backend/tdpservice/data_files/util.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tdrs-backend/tdpservice/data_files/util.py b/tdrs-backend/tdpservice/data_files/util.py index 1548becc9..692d782a0 100644 --- a/tdrs-backend/tdpservice/data_files/util.py +++ b/tdrs-backend/tdpservice/data_files/util.py @@ -46,7 +46,9 @@ def get_prioritized_queryset(parser_errors, is_s3_s4): if is_s3_s4: all_cat2_cat3 = Q(error_type=ParserErrorCategoryChoices.FIELD_VALUE) | \ Q(error_type=ParserErrorCategoryChoices.VALUE_CONSISTENCY) - filtered_errors = filtered_errors.union(parser_errors.filter(all_cat2_cat3)) + filtered_errors = filtered_errors.union(parser_errors.filter(all_cat2_cat3).exclude( + error_message__contains="HEADER Update Indicator") + ) return filtered_errors for fields in PRIORITIZED_CAT2: From 61de69969e863fb92c4ecf2a0ce32724e1356120 Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Tue, 17 Dec 2024 15:51:06 -0500 Subject: [PATCH 4/6] - Changed order for consistency --- tdrs-backend/tdpservice/data_files/util.py | 2 +- tdrs-backend/tdpservice/data_files/views.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tdrs-backend/tdpservice/data_files/util.py b/tdrs-backend/tdpservice/data_files/util.py index 692d782a0..35870de3b 100644 --- a/tdrs-backend/tdpservice/data_files/util.py +++ b/tdrs-backend/tdpservice/data_files/util.py @@ -42,7 +42,7 @@ def get_prioritized_queryset(parser_errors, is_s3_s4): Q(error_type=ParserErrorCategoryChoices.CASE_CONSISTENCY) filtered_errors = parser_errors.filter(error_type_query) - # If we are a Stratum or Aggregate file, we want all cat2 and cat3 errors. + # If we are a Aggregate or Stratum file, we want all cat2 and cat3 errors. if is_s3_s4: all_cat2_cat3 = Q(error_type=ParserErrorCategoryChoices.FIELD_VALUE) | \ Q(error_type=ParserErrorCategoryChoices.VALUE_CONSISTENCY) diff --git a/tdrs-backend/tdpservice/data_files/views.py b/tdrs-backend/tdpservice/data_files/views.py index 5018ee504..7abbddf05 100644 --- a/tdrs-backend/tdpservice/data_files/views.py +++ b/tdrs-backend/tdpservice/data_files/views.py @@ -147,8 +147,8 @@ def download_error_report(self, request, pk=None): """Generate and return the parsing error report xlsx.""" datafile = self.get_object() all_errors = ParserError.objects.filter(file=datafile) - is_s3_s4 = (DataFile.Section.STRATUM_DATA in datafile.section or - DataFile.Section.AGGREGATE_DATA in datafile.section) + is_s3_s4 = (DataFile.Section.AGGREGATE_DATA in datafile.section or + DataFile.Section.STRATUM_DATA in datafile.section) filtered_errors = get_prioritized_queryset(all_errors, is_s3_s4) return Response(get_xls_serialized_file(all_errors, filtered_errors)) From 8a85e746eb389c8802219a0eb8cc0d492432fbe6 Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Wed, 18 Dec 2024 09:57:51 -0500 Subject: [PATCH 5/6] - Updated spreadsheet writer to be a bit smarter with respect to the file's section --- tdrs-backend/tdpservice/data_files/util.py | 63 +++++++++++++++------ tdrs-backend/tdpservice/data_files/views.py | 2 +- 2 files changed, 47 insertions(+), 18 deletions(-) diff --git a/tdrs-backend/tdpservice/data_files/util.py b/tdrs-backend/tdpservice/data_files/util.py index 35870de3b..e48668057 100644 --- a/tdrs-backend/tdpservice/data_files/util.py +++ b/tdrs-backend/tdpservice/data_files/util.py @@ -130,36 +130,65 @@ def format_header(header_list: list): return ' '.join([i.capitalize() for i in header_list.split('_')]) -def write_prioritized_errors(worksheet, prioritized_errors, bold): - """Write prioritized errors to spreadsheet.""" - row, col = 5, 0 +def active_closed_generator(record, rpt_month_year, fields_json): + """Generate error report row for S1/S2 files.""" + return (record.case_number, + rpt_month_year[:4], + calendar.month_name[int(rpt_month_year[4:])] if rpt_month_year[4:] else None, + format_error_msg(record.error_message, fields_json), + record.item_number, + friendly_names(fields_json), + internal_names(fields_json), + record.row_number, + str(ParserErrorCategoryChoices(record.error_type).label)) + + +def aggregate_stratum_generator(record, rpt_month_year, fields_json): + """Generate error report row for S3/S4 files.""" + return (rpt_month_year[:4], + calendar.month_name[int(rpt_month_year[4:])] if rpt_month_year[4:] else None, + format_error_msg(record.error_message, fields_json), + record.item_number, + friendly_names(fields_json), + internal_names(fields_json), + record.row_number, + str(ParserErrorCategoryChoices(record.error_type).label)) + + +def get_row_generator(is_s3_s4): + """Get the correct row generator.""" + if is_s3_s4: + return aggregate_stratum_generator + return active_closed_generator - # We will write the headers in the first row + +def get_sheet_columns(is_s3_s4): + """Get the correct columns based on file section.""" columns = ['case_number', 'year', 'month', 'error_message', 'item_number', 'item_name', 'internal_variable_name', 'row_number', 'error_type', ] + columns = columns[1:] if is_s3_s4 else columns + return columns + + +def write_prioritized_errors(worksheet, prioritized_errors, bold, is_s3_s4): + """Write prioritized errors to spreadsheet.""" + # We will write the headers in the first row, remove case_number if we are s3/s4 + columns = get_sheet_columns(is_s3_s4) for idx, col in enumerate(columns): - worksheet.write(row, idx, format_header(col), bold) + worksheet.write(5, idx, format_header(col), bold) + row_generator = get_row_generator(is_s3_s4) paginator = Paginator(prioritized_errors.order_by('pk'), settings.BULK_CREATE_BATCH_SIZE) row_idx = 6 for page in paginator: for record in page.object_list: rpt_month_year = getattr(record, 'rpt_month_year', None) rpt_month_year = str(rpt_month_year) if rpt_month_year else "" - fields_json = check_fields_json(getattr(record, 'fields_json', {}), record.field_name) - worksheet.write(row_idx, 0, record.case_number) - worksheet.write(row_idx, 1, rpt_month_year[:4]) - worksheet.write(row_idx, 2, calendar.month_name[int(rpt_month_year[4:])] if rpt_month_year[4:] else None) - worksheet.write(row_idx, 3, format_error_msg(record.error_message, fields_json)) - worksheet.write(row_idx, 4, record.item_number) - worksheet.write(row_idx, 5, friendly_names(fields_json)) - worksheet.write(row_idx, 6, internal_names(fields_json)) - worksheet.write(row_idx, 7, record.row_number) - worksheet.write(row_idx, 8, str(ParserErrorCategoryChoices(record.error_type).label)) + worksheet.write_row(row_idx, 0, row_generator(record, rpt_month_year, fields_json)) row_idx += 1 @@ -198,7 +227,7 @@ def write_aggregate_errors(worksheet, all_errors, bold): row_idx += 1 -def get_xls_serialized_file(all_errors, prioritized_errors): +def get_xls_serialized_file(all_errors, prioritized_errors, is_s3_s4): """Return xls file created from the error.""" output = BytesIO() workbook = xlsxwriter.Workbook(output) @@ -209,7 +238,7 @@ def get_xls_serialized_file(all_errors, prioritized_errors): write_worksheet_banner(aggregate_sheet) bold = workbook.add_format({'bold': True}) - write_prioritized_errors(prioritized_sheet, prioritized_errors, bold) + write_prioritized_errors(prioritized_sheet, prioritized_errors, bold, is_s3_s4) write_aggregate_errors(aggregate_sheet, all_errors, bold) # autofit all columns except for the first one diff --git a/tdrs-backend/tdpservice/data_files/views.py b/tdrs-backend/tdpservice/data_files/views.py index 7abbddf05..88c5159e1 100644 --- a/tdrs-backend/tdpservice/data_files/views.py +++ b/tdrs-backend/tdpservice/data_files/views.py @@ -151,7 +151,7 @@ def download_error_report(self, request, pk=None): DataFile.Section.STRATUM_DATA in datafile.section) filtered_errors = get_prioritized_queryset(all_errors, is_s3_s4) - return Response(get_xls_serialized_file(all_errors, filtered_errors)) + return Response(get_xls_serialized_file(all_errors, filtered_errors, is_s3_s4)) class GetYearList(APIView): From aba6139c1d83d98780f1a656c9842264c24cc3d2 Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Thu, 2 Jan 2025 09:43:55 -0500 Subject: [PATCH 6/6] - update tests --- .../tdpservice/parsers/validators/test/test_category3.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tdrs-backend/tdpservice/parsers/validators/test/test_category3.py b/tdrs-backend/tdpservice/parsers/validators/test/test_category3.py index 40090e8e6..6ce06e457 100644 --- a/tdrs-backend/tdpservice/parsers/validators/test/test_category3.py +++ b/tdrs-backend/tdpservice/parsers/validators/test/test_category3.py @@ -223,11 +223,11 @@ def test_isNotZero(val, number_of_zeros, kwargs, exp_result, exp_message): ('199510', 18, {}, True, None), ( f'{datetime.date.today().year - 18}01', 18, {}, False, - '2006 must be less than or equal to 2006 to meet the minimum age requirement.' + '2007 must be less than or equal to 2007 to meet the minimum age requirement.' ), ( '202010', 18, {}, False, - '2020 must be less than or equal to 2006 to meet the minimum age requirement.' + '2020 must be less than or equal to 2007 to meet the minimum age requirement.' ), ]) def test_isOlderThan(val, min_age, kwargs, exp_result, exp_message):