-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcombine.py
executable file
·236 lines (208 loc) · 10 KB
/
combine.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
#!/usr/bin/env python3
"""Tool to bundle multiple C/C++ source files, inlining any includes.
Latest version available here: https://github.com/cwoffenden/combiner
Note: there are two types of exclusion options: the '-x' flag, which besides
excluding a file also adds an #error directive in place of the #include, and
the '-k' flag, which keeps the #include and doesn't inline the file. The
intended use cases are: '-x' for files that would normally be #if'd out, so
features that 100% won't be used in the amalgamated file, for which every
occurrence adds the error, and '-k' for headers that we wish to manually
include, such as a project's public API, for which occurrences after the first
are removed.
Todo: the error handling could be better, which currently throws and halts
(which is functional just not very friendly).
Author: Carl Woffenden, Numfum GmbH (this script is released under a CC0 license
or Public Domain, whichever is applicable in your jurisdiction)
"""
import argparse
import re
import sys
from pathlib import Path
from typing import Any, List, Optional, Pattern, Set, TextIO
# Set of file roots when searching (equivalent compiler -I paths).
roots: Set[Path] = set()
# Set of (canonical) file Path objects to exclude from inlining (and not only
# exclude but to add a compiler error directive when they're encountered).
excludes: Set[Path] = set()
# Set of (canonical) file Path objects to keep as include directives.
keeps: Set[Path] = set()
# Whether to keep the #pragma once directives (unlikely, since this will result
# in a warning, but the option is there).
keep_pragma: bool = False
# Destination file object (or stdout if no output file was supplied).
destn: TextIO = sys.stdout
# Set of previously inlined includes (and to ignore if reencountering).
found: Set[Path] = set()
# Compiled regex Pattern to handle the following type of file includes:
#
# #include "file"
# #include "file"
# # include "file"
# #include "file"
# #include "file" // comment
# #include "file" // comment with quote "
#
# And all combinations of, as well as ignoring the following:
#
# #include <file>
# //#include "file"
# /*#include "file"*/
#
# We don't try to catch errors since the compiler will do this (and the code is
# expected to be valid before processing) and we don't care what follows the
# file (whether it's a valid comment or not, since anything after the quoted
# string is ignored)
#
include_regex: Pattern[str] = re.compile(r'^\s*#\s*include\s*"(.+?)"')
# Compiled regex Pattern to handle "#pragma once" in various formats:
#
# #pragma once
# #pragma once
# # pragma once
# #pragma once
# #pragma once // comment
#
# Ignoring commented versions, same as include_regex.
#
pragma_regex: Pattern[str] = re.compile(r'^\s*#\s*pragma\s*once\s*')
def test_match_include() -> bool:
"""Simple tests to prove include_regex's cases."""
if (include_regex.match('#include "file"') and
include_regex.match(' #include "file"') and
include_regex.match('# include "file"') and
include_regex.match('#include "file"') and
include_regex.match('#include "file" // comment')):
if (not include_regex.match('#include <file>') and
not include_regex.match('//#include "file"') and
not include_regex.match('/*#include "file"*/')):
matched = include_regex.match('#include "file" // "')
if (matched and matched.group(1) == 'file'):
print('#include match valid')
return True
return False
def test_match_pragma() -> bool:
"""Simple tests to prove pragma_regex's cases."""
if (pragma_regex.match('#pragma once') and
pragma_regex.match(' #pragma once') and
pragma_regex.match('# pragma once') and
pragma_regex.match('#pragma once') and
pragma_regex.match('#pragma once // comment')):
if (not pragma_regex.match('//#pragma once') and
not pragma_regex.match('/*#pragma once*/')):
print('#pragma once match valid')
return True
return False
def resolve_include(file: str, parent: Optional[Path] = None) -> Optional[Path]:
"""Finds a file. First the list of 'root' paths are searched, followed by
the the currently processing file's 'parent' path, returning a valid Path in
canonical form. If no match is found None is returned.
"""
for root in roots:
joined = root.joinpath(file).resolve()
if joined.is_file():
return joined
if parent:
joined = parent.joinpath(file).resolve()
else:
joined = Path(file)
if joined.is_file():
return joined
return None
def resolve_excluded_files(file_list: Optional[List[str]], resolved: Set[Path], parent: Optional[Path] = None) -> None:
"""Helper to resolve lists of files. 'file_list' is passed in from the args
and each entry resolved to its canonical path (like any include entry,
either from the list of root paths or the owning file's 'parent', which in
this case is case is the input file). The results are stored in 'resolved'.
"""
if file_list:
for filename in file_list:
inc_path = resolve_include(filename, parent)
if inc_path:
resolved.add(inc_path)
else:
error_line(f'Warning: excluded file not found: {filename}')
def write_line(line: str) -> None:
"""Writes 'line' to the open 'destn' (or stdout)."""
print(line, file=destn)
def error_line(line: Any) -> None:
"""Logs 'line' to stderr. This is also used for general notifications that
we don't want to go to stdout (so the source can be piped)."""
print(line, file=sys.stderr)
def add_file(file: Path, file_name: Optional[str] = None) -> None:
"""Inline the contents of 'file' (also inlining its includes, etc.).
Note: text encoding errors are ignored and replaced with ? when reading the
input files. This isn't ideal, but it's more than likely in the comments
than code and a) the text editor has probably also failed to read the same
content, and b) the compiler probably did too.
"""
if file.is_file():
if not file_name:
file_name = file.name
error_line(f'Processing: {file_name}')
with file.open('r', errors='replace') as opened:
for line in opened:
line = line.rstrip('\n')
match_include = include_regex.match(line)
if match_include:
# We have a quoted include directive so grab the file
inc_name = match_include.group(1)
resolved = resolve_include(inc_name, file.parent)
if resolved:
if resolved in excludes:
# The file was excluded so error if the compiler uses it
write_line(f'#error Using excluded file: {inc_name} (re-amalgamate source to fix)')
error_line(f'Excluding: {inc_name}')
else:
if resolved not in found:
# The file was not previously encountered
found.add(resolved)
if resolved in keeps:
# But the include was flagged to keep as included
write_line(f'/**** *NOT* inlining {inc_name} ****/')
write_line(line)
error_line(f'Not inlining: {inc_name}')
else:
# The file was neither excluded nor seen before so inline it
write_line(f'/**** start inlining {inc_name} ****/')
add_file(resolved, inc_name)
write_line(f'/**** ended inlining {inc_name} ****/')
else:
write_line(f'/**** skipping file: {inc_name} ****/')
else:
# The include file didn't resolve to a file
write_line(f'#error Unable to find: {inc_name}')
error_line(f'Error: Unable to find: {inc_name}')
else:
# Skip any 'pragma once' directives, otherwise write the source line
if (keep_pragma or not pragma_regex.match(line)):
write_line(line)
else:
error_line(f'Error: Invalid file: {file}')
# Start here
parser = argparse.ArgumentParser(description='Amalgamate Tool', epilog=f'example: {sys.argv[0]} -r ../my/path -r ../other/path -o out.c in.c')
parser.add_argument('-r', '--root', action='append', type=Path, help='file root search path')
parser.add_argument('-x', '--exclude', action='append', help='file to completely exclude from inlining')
parser.add_argument('-k', '--keep', action='append', help='file to exclude from inlining but keep the include directive')
parser.add_argument('-p', '--pragma', action='store_true', default=False, help='keep any "#pragma once" directives (removed by default)')
parser.add_argument('-o', '--output', type=argparse.FileType('w'), help='output file (otherwise stdout)')
parser.add_argument('input', type=Path, help='input file')
args = parser.parse_args()
# Fail early on an invalid input (and store it so we don't recurse)
args.input = args.input.resolve(strict=True)
found.add(args.input)
# Resolve all of the root paths upfront (we'll halt here on invalid roots)
if args.root:
for path in args.root:
roots.add(path.resolve(strict=True))
# The remaining params: so resolve the excluded files and #pragma once directive
resolve_excluded_files(args.exclude, excludes, args.input.parent)
resolve_excluded_files(args.keep, keeps, args.input.parent)
keep_pragma = args.pragma
# Then recursively process the input file
try:
if args.output:
destn = args.output
add_file(args.input)
finally:
if destn:
destn.close()