-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathods_converter.py
143 lines (125 loc) · 5.48 KB
/
ods_converter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import io
import zipfile
from pyexcel_ods3 import save_data
import odf
from odf import style
import odf.element
from odf.element import Element
from odf.namespaces import nsdict
from odf.opendocument import load as odf_load
from odf.table import Table
from utils.configs import converters
from utils.filler_utils import read_dataframes
styles = {
"must": "#afd095",
"must not": "#ffa6a6",
"recommended": "#b4c7dc",
"not recommended": "#ffffa6",
}
def _escape_complete(data, entities=None):
""" Escape &, ", <, and > in a string of data.
You can escape other strings of data by passing a dictionary as
the optional entities parameter. The keys and values must all be
strings; each key will be replaced with its corresponding value.
"""
if entities is None:
entities = {}
data = data.replace("&", "&")
data = data.replace("<", "<")
data = data.replace(">", ">")
data = data.replace('"', """)
data = data.replace("'", "'")
for chars, entity in entities.items():
data = data.replace(chars, entity)
return data
# the escape function in the library is not complete, so we need to replace it
odf.element._escape = _escape_complete
CALCEXTNS = 'urn:org:documentfoundation:names:experimental:calc:xmlns:calcext:1.0'
CALCEXT_NAME = 'calcext'
nsdict[CALCEXTNS] = CALCEXT_NAME
if __name__ == "__main__":
dataframe = read_dataframes({"converters": converters, "dtype": str})
ods_data = {}
for name, df in dataframe.items():
df = df.map(lambda x: x.strip() if isinstance(x, str) else x)
df = df.to_dict()
sheet_data = []
# Extract headers from the keys of inner dictionaries
headers = [key[0] for key in df.keys()]
headers2 = [key[1] for key in df.keys()]
sheet_data.append(headers)
sheet_data.append(headers2)
# Gather rows based on the first element of the tuples
for index in range(max(len(inner_dict) for inner_dict in df.values())):
row = [df[key].get(index, "") for key in df.keys()]
row = [x if str(x) != "nan" else "" for x in row]
sheet_data.append(row)
ods_data[name] = sheet_data
save_data("output_clean.ods", ods_data)
# save the data in memory
content = io.BytesIO()
save_data(content, ods_data)
root: odf.opendocument.OpenDocument = odf_load(content)
for style_name, style_color in styles.items():
new_style = style.Style(
name=style_name, family="table-cell", parentstylename="Default")
bg_color = style.TableCellProperties()
bg_color.setAttribute("backgroundcolor", style_color)
new_style.addElement(bg_color)
root.styles.addElement(new_style)
for sheet in root.spreadsheet.getElementsByType(Table):
# this is the element containing the conditional formattings
conditional_formattings = Element(
qname=(CALCEXTNS, "conditional-formats"))
for style_name in styles:
sheet_name = sheet.getAttribute('name')
if " " in sheet_name:
sheet_name = "'"+sheet_name+"'"
base_cell_address = f"{sheet_name}.B3"
target_range_address = f"{sheet_name}.B3:{sheet_name}.IV65536"
# this is a single conditional formatting
conditional_formatting = Element(
qname=(CALCEXTNS, "conditional-format"))
conditional_formatting.setAttrNS(
CALCEXTNS, "target-range-address", target_range_address)
# prepare the condition's element
condition = Element(qname=(CALCEXTNS, "condition"))
condition.setAttrNS(CALCEXTNS, "apply-style-name", style_name)
condition.setAttrNS(CALCEXTNS, "value", f'="{style_name}"')
condition.setAttrNS(
CALCEXTNS, "base-cell-address", base_cell_address)
# add the condition to the conditional formatting
conditional_formatting.appendChild(condition)
# add the conditional formatting to the conditional formattings
conditional_formattings.appendChild(conditional_formatting)
sheet.appendChild(conditional_formattings)
output_buffer = io.BytesIO()
root.save(output_buffer)
archive = zipfile.ZipFile(output_buffer, "r")
archive_final = zipfile.ZipFile("requirements.ods", "w")
added = set()
for file in archive.infolist():
if file.filename in added:
continue
added.add(file.filename)
content = archive.read(file)
if file.filename in ["content.xml", "meta.xml", "styles.xml"]:
if file.filename == "content.xml":
content = content.replace(b"<table:table-column />", b"")
content = content.replace(b"office:value-type=\"string\"",
b"office:value-type=\"string\" calcext:value-type=\"string\"")
content = content.replace(b"'", b"\"")
archive_final.writestr(file, content)
elif file.filename == "META-INF/manifest.xml":
content = content.split(b"><")
content.pop(-2)
content.pop(-2)
content = b"><".join(content)
content = content.replace(b"'", b"\"")
archive_final.writestr(file, content)
elif file.filename == "mimetype":
archive_final.writestr(file, content,
compress_type=zipfile.ZIP_STORED)
else:
archive_final.writestr(file, content)
archive_final.close()