-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain_logic.py
165 lines (141 loc) · 6.96 KB
/
main_logic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
import pandas as pd
from excel_class import ExcelFile
from utilities import (category_order, remove_duplicates,
interpret_instruction, calculate_percentages, insert_total_row)
def start_code(db_name: str, table_type: int, db: pd.DataFrame, db_vars: pd.DataFrame, footer_text=None,
split_df_by=None) -> None:
"""
Parameters
----------
db_name: str
Name of the Excel file to create.
db: pd.DataFrame
DataFrame from which the output tables will be created on the pages of the Excel workbook.
db_vars: pd.DataFrame
Dataframe containing the list of variables to be used to generate the tabulations, the variables used for data
cross-referencing, and the method for processing the results to be displayed.
footer_text: str | None
Table footer for the tabulations.
split_df_by : tuple
Value by which the data source is divided into groups.
It is a tuple where the 1st element is the name of the column to be
filtered and the 2nd is the value to be filtered by.
table_type : int
Specifies the format in which the data is presented within the tables.
table_type = 0 -> Absolute value
table_type = 1 -> % total
table_type = 2 -> % columns
table_type = 3 -> % rows
table_type = 4 -> % Simple absolute value and % total
table_type = 5 -> Long description
Returns
-------
None
"""
metadata = pd.DataFrame()
if table_type == 4:
file_name = f"{db_name}_simple_table.xlsx"
elif table_type != 0:
file_name = f"{db_name}_percentage_table.xlsx"
else:
file_name = f"{db_name}_absolute_value_table.xlsx"
secondary_vars_list = [db_vars.loc[index,"VAR"] for index in db_vars.index if db_vars.loc[index, "DISAGGREGATE"]]
if not secondary_vars_list and table_type != 4:
raise TypeError("A disaggregation of data was indicated (table_type != 4) but there is no variable indicated to"
"disaggregate. (All variables in DISAGGREGATE of db_vars = False)")
elif table_type == 4:
secondary_vars_list = ["pass"]
if table_type <= 4:
excel_file = ExcelFile(file_name=file_name, tables_type=table_type)
else:
excel_file = None
def structure_generator(field: str) -> dict:
field_set = remove_duplicates([x for x in db[field].tolist() if str(x) != "nan"])
field_set.insert(0, "Var")
try:
if set(field_set).issubset(category_order[field]):
category = category_order[field]
else:
category = field_set
except KeyError:
category = field_set
result = {}
for i in category:
result[i] = []
return result
for page in remove_duplicates(db_vars["PAGE"]):
data = []
for index in db_vars.index:
if db_vars.loc[index, "PAGE"] == page:
question = db_vars.loc[index, "VAR"]
if "${" in db_vars.loc[index, "SUM"]:
factor = interpret_instruction(db_vars.loc[index, "SUM"])
else:
factor = db_vars.loc[index, "SUM"]
for secondary_var in secondary_vars_list:
if table_type == 4:
secondary_var = question
df_dict = {}
structure = structure_generator(secondary_var)
columns_to_convert = [x for x in structure if x not in ("Var", "Total")]
list_var = [x for x in structure_generator(question).keys() if x not in ("Var", "Total")]
for var in list_var:
structure["Var"].append(var)
for s_var in columns_to_convert:
if split_df_by:
_filter = (db[secondary_var] == s_var) & (db[question] == var) & (
db[split_df_by[0]] == split_df_by[1])
else:
_filter = (db[secondary_var] == s_var) & (db[question] == var)
if type(factor) is tuple:
if factor[3] == "*":
multiplication = db.loc[_filter, factor[0]] * db.loc[_filter, factor[2]]
value = multiplication.sum()
else:
raise ValueError("Unrecognized operation in SUM of Var column")
else:
value = db.loc[_filter, factor].sum()
structure[s_var].append(value)
df = pd.DataFrame(structure)
df_without_var = df.drop(columns=["Var"])
df["Total"] = df_without_var.sum(axis=1)
cols = df.columns.tolist()
cols.insert(1, cols.pop(cols.index("Total")))
df = df[cols]
if table_type != 0:
df = calculate_percentages(structure, df, table_type)
elif table_type == 4:
df = calculate_percentages(None, df, table_type)
if table_type != 3:
df_dict[secondary_var] = insert_total_row(df)
else:
df_dict[secondary_var] = df
data.append((question, df_dict, f"This table was calculated by a variable operation {factor[0]} * "
f"{factor[2]}" if type(factor) is tuple else None))
if table_type == 4:
break
elif table_type == 5:
df.insert(0, 'Ask', question)
if len(metadata) > 0:
metadata = pd.concat([metadata, df], ignore_index=True)
else:
metadata = df
break
if table_type <= 4:
for question, df_dict, footer_text_factor in data:
if footer_text and footer_text_factor:
footer_text_table = str(footer_text) + " // " + str(footer_text_factor)
elif footer_text_factor:
footer_text_table = str(footer_text_factor)
elif footer_text:
footer_text_table = str(footer_text)
else:
footer_text_table = None
excel_file.add_sheet(sheet_name=f"Section - {page}", data=df_dict, main_var_name=question,
footer_text=footer_text_table)
if table_type <= 4:
excel_file.restart()
if table_type <= 4:
excel_file.save()
else:
metadata.to_excel("results//METADATA.xlsx", index=False, header=True)