From f00994d3391a7a91403e0d60d7b6adcfbb2e73c4 Mon Sep 17 00:00:00 2001 From: Dustin Spicuzza Date: Sun, 22 Dec 2019 16:56:28 -0500 Subject: [PATCH 01/14] Fix additional enum ambiguities --- CppHeaderParser/CppHeaderParser.py | 18 ++++++++++-- CppHeaderParser/lexer.py | 3 ++ CppHeaderParser/test/test_CppHeaderParser.py | 29 ++++++++++++++++++++ 3 files changed, 47 insertions(+), 3 deletions(-) diff --git a/CppHeaderParser/CppHeaderParser.py b/CppHeaderParser/CppHeaderParser.py index afb5dd2..d422c4f 100644 --- a/CppHeaderParser/CppHeaderParser.py +++ b/CppHeaderParser/CppHeaderParser.py @@ -2528,6 +2528,7 @@ def evalute_forward_decl(self): "+", "STRING_LITERAL", "ELLIPSIS", + "SHIFT_LEFT", } _namestack_pass_tokens = { @@ -3021,7 +3022,16 @@ def _consume_balanced_tokens(self, *init_tokens): if tok.type in self._end_balanced_tokens: expected = match_stack.pop() if tok.type != expected: - raise self._parse_error(consumed, match_stack[-1]) + # hack: ambiguous right-shift issues here, really + # should be looking at the context + if tok.type == ">": + tok = self.lex.token_if(">") + if tok: + consumed.append(tok) + match_stack.append(expected) + continue + + raise self._parse_error(consumed, expected) if len(match_stack) == 0: return consumed @@ -3390,11 +3400,13 @@ def _parse_enumerator_list(self, values): while True: tok = self.lex.token() if tok.type == "}": - value["value"] = " ".join(v) + value["value"] = (" ".join(v)).replace(": :", "::") return elif tok.type == ",": - value["value"] = " ".join(v) + value["value"] = (" ".join(v)).replace(": :", "::") break + elif tok.type in self._balanced_token_map: + v.extend(t.value for t in self._consume_balanced_tokens(tok)) else: v.append(tok.value) diff --git a/CppHeaderParser/lexer.py b/CppHeaderParser/lexer.py index ee30238..154b95d 100644 --- a/CppHeaderParser/lexer.py +++ b/CppHeaderParser/lexer.py @@ -22,6 +22,7 @@ class Lexer(object): "ELLIPSIS", "DBL_LBRACKET", "DBL_RBRACKET", + "SHIFT_LEFT", ] literals = [ @@ -84,6 +85,8 @@ def t_COMMENT_SINGLELINE(self, t): t_ELLIPSIS = r"\.\.\." t_DBL_LBRACKET = r"\[\[" t_DBL_RBRACKET = r"\]\]" + t_SHIFT_LEFT = r"<<" + # SHIFT_RIGHT introduces ambiguity # found at http://wordaligned.org/articles/string-literals-and-regular-expressions # TODO: This does not work with the string "bla \" bla" diff --git a/CppHeaderParser/test/test_CppHeaderParser.py b/CppHeaderParser/test/test_CppHeaderParser.py index 9019be0..16044c3 100644 --- a/CppHeaderParser/test/test_CppHeaderParser.py +++ b/CppHeaderParser/test/test_CppHeaderParser.py @@ -3186,5 +3186,34 @@ def test_existance(self): self.assertIn("AS", self.cppHeader.classes) +class EnumWithTemplates_TestCase(unittest.TestCase): + def setUp(self): + self.cppHeader = CppHeaderParser.CppHeader( + """ +enum { + IsRandomAccess = std::is_base_of::value, + IsBidirectional = std::is_base_of::value, + }; +""", + "string", + ) + + def test_values(self): + e = self.cppHeader.enums[0] + v0 = e["values"][0] + self.assertEqual( + v0["value"], + "std :: is_base_of < std :: random_access_iterator_tag , IteratorCategoryT > :: value", + ) + + v1 = e["values"][1] + self.assertEqual( + v1["value"], + "std :: is_base_of < std :: bidirectional_iterator_tag , IteratorCategoryT > :: value", + ) + + if __name__ == "__main__": unittest.main() From b6b9320aedd441fa54dd14930f7460c19559d619 Mon Sep 17 00:00:00 2001 From: Dustin Spicuzza Date: Sun, 22 Dec 2019 18:26:31 -0500 Subject: [PATCH 02/14] Remove CppStruct, doesn't seem to be used anymore --- CppHeaderParser/CppHeaderParser.py | 94 +----------------------------- docs/api.rst | 2 +- 2 files changed, 4 insertions(+), 92 deletions(-) diff --git a/CppHeaderParser/CppHeaderParser.py b/CppHeaderParser/CppHeaderParser.py index d422c4f..f62575c 100644 --- a/CppHeaderParser/CppHeaderParser.py +++ b/CppHeaderParser/CppHeaderParser.py @@ -593,8 +593,7 @@ class CppClass(dict): and values are lists of :class:`.CppVariable` * ``enums`` - Dictionary where keys are from supportedAccessSpecifier and values are lists of :class:`.CppEnum` - * ``structs`` - Dictionary where keys are from supportedAccessSpecifier and - values are lists of nested :class:`.CppStruct` + * ``nested_classes`` - Classes and structs defined within this class * ``final`` - True if final * ``abstract`` - True if abstract @@ -658,7 +657,6 @@ def __init__(self, nameStack, curTemplate, doxygen, location): self["abstract"] = False self["final"] = False self._public_enums = {} - self._public_structs = {} self._public_typedefs = {} self._public_forward_declares = [] self["namespace"] = "" @@ -690,7 +688,6 @@ def __init__(self, nameStack, curTemplate, doxygen, location): methodAccessSpecificList = {} propertyAccessSpecificList = {} enumAccessSpecificList = {} - structAccessSpecificList = {} typedefAccessSpecificList = {} forwardAccessSpecificList = {} @@ -698,14 +695,12 @@ def __init__(self, nameStack, curTemplate, doxygen, location): methodAccessSpecificList[accessSpecifier] = [] propertyAccessSpecificList[accessSpecifier] = [] enumAccessSpecificList[accessSpecifier] = [] - structAccessSpecificList[accessSpecifier] = [] typedefAccessSpecificList[accessSpecifier] = [] forwardAccessSpecificList[accessSpecifier] = [] self["methods"] = methodAccessSpecificList self["properties"] = propertyAccessSpecificList self["enums"] = enumAccessSpecificList - self["structs"] = structAccessSpecificList self["typedefs"] = typedefAccessSpecificList self["forward_declares"] = forwardAccessSpecificList @@ -1338,26 +1333,6 @@ def __init__(self, name, doxygen, location): set_location_info(self, location) -class CppStruct(dict): - """ - Dictionary that contains at least the following keys: - - * ``type`` - Name of this struct - * ``fields`` - List of :class:`.CppVariable` - * ``line_number`` - Line number this struct was found on - """ - - Structs = [] - - def __init__(self, nameStack, location): - if len(nameStack) >= 2: - self["type"] = nameStack[1] - else: - self["type"] = None - self["fields"] = [] - set_location_info(self, location) - self.Structs.append(self) - C99_NONSTANDARD = { "int8": "signed char", @@ -1392,21 +1367,16 @@ class Resolver(object): SubTypedefs = {} # TODO deprecate? NAMESPACES = [] CLASSES = {} - STRUCTS = {} def initextra(self): self.typedefs = {} self.typedefs_order = [] self.classes_order = [] - self.structs = Resolver.STRUCTS - self.structs_order = [] self.namespaces = Resolver.NAMESPACES # save all namespaces - self.curStruct = None self.stack = ( [] ) # full name stack, good idea to keep both stacks? (simple stack and full stack) self._classes_brace_level = {} # class name : level - self._structs_brace_level = {} # struct type : level self._method_body = None self._forward_decls = [] self._template_typenames = [] # template @@ -1565,13 +1535,6 @@ def resolve_type(self, string, result): # recursive result["unresolved"] = False def finalize_vars(self): - for ( - s - ) in ( - CppStruct.Structs - ): # vars within structs can be ignored if they do not resolve - for var in s["fields"]: - var["parent"] = s["type"] # for c in self.classes.values(): # for var in c.get_all_properties(): var['parent'] = c['name'] @@ -1600,8 +1563,6 @@ def finalize_vars(self): klass = var["method"]["parent"] if tag in var["method"]["parent"]._public_enums: nestedEnum = var["method"]["parent"]._public_enums[tag] - elif tag in var["method"]["parent"]._public_structs: - nestedStruct = var["method"]["parent"]._public_structs[tag] elif tag in var["method"]["parent"]._public_typedefs: nestedTypedef = var["method"]["parent"]._public_typedefs[ tag @@ -1648,14 +1609,6 @@ def finalize_vars(self): var["concrete_type"] ) - elif tag in self.structs: - trace_print("STRUCT", var) - var["struct"] = tag - var["ctypes_type"] = "ctypes.c_void_p" - var["raw_type"] = ( - self.structs[tag]["namespace"] + "::" + tag - ) - elif tag in self._forward_decls: var["forward_declared"] = tag var["ctypes_type"] = "ctypes.c_void_p" @@ -2039,23 +1992,6 @@ def finalize(self): cls["abstract"] = True break - def _evaluate_struct_stack(self): - """Create a Struct out of the name stack (but not its parts)""" - # print( 'eval struct stack', self.nameStack ) - # if self.braceDepth != len(self.nameSpaces): return - struct = CppStruct(self.nameStack, self._get_location(self.nameStack)) - struct["namespace"] = self.cur_namespace() - self.structs[struct["type"]] = struct - self.structs_order.append(struct) - if self.curClass: - struct["parent"] = self.curClass - klass = self.classes[self.curClass] - klass["structs"][self.curAccessSpecifier].append(struct) - if self.curAccessSpecifier == "public": - klass._public_structs[struct["type"]] = struct - self.curStruct = struct - self._structs_brace_level[struct["type"]] = self.braceDepth - _method_type_defaults = { n: False for n in "defined pure_virtual operator constructor destructor extern template virtual static explicit inline friend returns returns_pointer returns_fundamental returns_class default".split() @@ -2205,11 +2141,6 @@ def parse_method_type(self, stack): def _evaluate_method_stack(self): """Create a method out of the name stack""" - if self.curStruct: - trace_print("WARN - struct contains methods - skipping") - trace_print(self.stack) - assert 0 - info = self.parse_method_type(self.stack) if info: if ( @@ -2337,7 +2268,7 @@ def _evaluate_property_stack(self, clearStack=True, addToVar=None): Resolver.SubTypedefs[name] = self.curClass else: assert 0 - elif self.curStruct or self.curClass: + elif self.curClass: if len(self.nameStack) == 1: # See if we can de anonymize the type filteredParseHistory = [ @@ -2384,10 +2315,7 @@ def _evaluate_property_stack(self, clearStack=True, addToVar=None): self._get_location(self.nameStack), ) newVar["namespace"] = self.current_namespace() - if self.curStruct: - self.curStruct["fields"].append(newVar) - newVar["property_of_struct"] = self.curStruct - elif self.curClass: + if self.curClass: klass = self.classes[self.curClass] klass["properties"][self.curAccessSpecifier].append(newVar) newVar["property_of_class"] = klass["name"] @@ -2565,7 +2493,6 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): """ ## reset global state ## CppVariable.Vars = [] - CppStruct.Structs = [] if argType == "file": self.headerFileName = os.path.expandvars(headerFileName) @@ -2829,18 +2756,8 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): self.curClass = self.classes[self.curClass]["parent"] else: self.curClass = "" - # self.curStruct = None self.stack = [] - # if self.curStruct: self.curStruct = None - if self.braceDepth == 0 or ( - self.curStruct - and self._structs_brace_level[self.curStruct["type"]] - == self.braceDepth - ): - trace_print("END OF STRUCT DEF") - self.curStruct = None - if self._method_body and (self.braceDepth + 1) <= self._method_body: self._method_body = None self.stack = [] @@ -2960,11 +2877,9 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): "_forward_decls", "stack", "mainClass", - "curStruct", "_template_typenames", "_method_body", "braceDepth", - "_structs_brace_level", "typedefs_order", "curTemplate", ]: @@ -3130,7 +3045,6 @@ def _evaluate_stack(self, token=None): trace_print("INSIDE METHOD DEF") elif ( is_method_namestack(self.stack) - and not self.curStruct and "(" in self.nameStack ): debug_print("trace") @@ -3189,8 +3103,6 @@ def _evaluate_stack(self, token=None): elif not self.curClass: debug_print("trace") - if self.curStruct and self.stack[-1] == ";": - self._evaluate_property_stack() # this catches fields of global structs self.nameStack = [] elif self.braceDepth < 1: debug_print("trace") diff --git a/docs/api.rst b/docs/api.rst index 6da7045..f3d83b8 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -31,7 +31,7 @@ CppHeaderParser .. automodule:: CppHeaderParser.CppHeaderParser :members: CppBaseDecl, CppClass, CppEnum, CppHeader, CppMethod, CppParseError, - CppStruct, CppTemplateParam, CppUnion, CppVariable, TagStr, + CppTemplateParam, CppUnion, CppVariable, TagStr, ignoreSymbols :undoc-members: :show-inheritance: From 9e9a515e1c02bf02c05a2c419f19210f476b3753 Mon Sep 17 00:00:00 2001 From: Dustin Spicuzza Date: Sun, 22 Dec 2019 19:14:38 -0500 Subject: [PATCH 03/14] Discard function/method contents in a simpler way --- CppHeaderParser/CppHeaderParser.py | 49 +++++++++----------- CppHeaderParser/test/test_CppHeaderParser.py | 24 ++++++++++ 2 files changed, 47 insertions(+), 26 deletions(-) diff --git a/CppHeaderParser/CppHeaderParser.py b/CppHeaderParser/CppHeaderParser.py index f62575c..025645c 100644 --- a/CppHeaderParser/CppHeaderParser.py +++ b/CppHeaderParser/CppHeaderParser.py @@ -1333,7 +1333,6 @@ def __init__(self, name, doxygen, location): set_location_info(self, location) - C99_NONSTANDARD = { "int8": "signed char", "int16": "short int", @@ -1377,7 +1376,6 @@ def initextra(self): [] ) # full name stack, good idea to keep both stacks? (simple stack and full stack) self._classes_brace_level = {} # class name : level - self._method_body = None self._forward_decls = [] self._template_typenames = [] # template @@ -2023,13 +2021,12 @@ def parse_method_type(self, stack): header = header.replace("default ", "default") header = header.strip() - if "{" in stack: + if stack[-1] == "{": info["defined"] = True - self._method_body = self.braceDepth + 1 - trace_print("NEW METHOD WITH BODY", self.braceDepth) + self._discard_function_contents(stack) + self.braceHandled = True elif stack[-1] == ";": info["defined"] = False - self._method_body = None # not a great idea to be clearing here else: assert 0 @@ -2721,11 +2718,13 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): self.nameStack = origNameStack[classLocationNS:] self.stack = origStack[classLocationS:] + self.braceHandled = False if self.nameStack: self._evaluate_stack() if self.stack and self.stack[0] == "class": self.stack = [] - self.braceDepth += 1 + if not self.braceHandled: + self.braceDepth += 1 elif tok.type == "}": if self.braceDepth == 0: @@ -2733,10 +2732,8 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): if self.braceDepth == len(self.nameSpaces): tmp = self.nameSpaces.pop() self.stack = [] # clear stack when namespace ends? - elif self.braceDepth < 10: - self._evaluate_stack() else: - self.nameStack = [] + self._evaluate_stack() self.braceDepth -= 1 # self.stack = []; print 'BRACE DEPTH', self.braceDepth, 'NS', len(self.nameSpaces) if self.curClass: @@ -2758,12 +2755,6 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): self.curClass = "" self.stack = [] - if self._method_body and (self.braceDepth + 1) <= self._method_body: - self._method_body = None - self.stack = [] - self.nameStack = [] - trace_print("FORCE CLEAR METHBODY") - if tok.type in _namestack_append_tokens: self.nameStack.append(tok.value) elif tok.type in _namestack_pass_tokens: @@ -2834,8 +2825,7 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): self.stack = saved_stack self.anon_union_counter = [-1, 0] - if self.braceDepth < 10: - self._evaluate_stack(tok.type) + self._evaluate_stack(tok.type) self.stack = [] self.nameStack = [] @@ -2878,7 +2868,6 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): "stack", "mainClass", "_template_typenames", - "_method_body", "braceDepth", "typedefs_order", "curTemplate", @@ -2956,6 +2945,20 @@ def _consume_balanced_tokens(self, *init_tokens): if next_end: match_stack.append(next_end) + def _discard_function_contents(self, stack): + # use this instead of consume_balanced_tokens because + # we don't care at all about the internals + level = 1 + get_token = self.lex.token + while True: + tok = get_token() + if tok.type == "{": + level += 1 + elif tok.type == "}": + level -= 1 + if level == 0: + break + def _evaluate_stack(self, token=None): """Evaluates the current name stack""" @@ -3040,13 +3043,7 @@ def _evaluate_stack(self, token=None): atype["raw_type"] = ns + atype["type"] alias = self.current_namespace() + alias self.using[alias] = atype - - elif self._method_body and (self.braceDepth + 1) > self._method_body: - trace_print("INSIDE METHOD DEF") - elif ( - is_method_namestack(self.stack) - and "(" in self.nameStack - ): + elif is_method_namestack(self.stack) and "(" in self.nameStack: debug_print("trace") if self.braceDepth > 0: if ( diff --git a/CppHeaderParser/test/test_CppHeaderParser.py b/CppHeaderParser/test/test_CppHeaderParser.py index 16044c3..82a8a8c 100644 --- a/CppHeaderParser/test/test_CppHeaderParser.py +++ b/CppHeaderParser/test/test_CppHeaderParser.py @@ -3215,5 +3215,29 @@ def test_values(self): ) +class FreeTemplates_TestCase(unittest.TestCase): + def setUp(self): + self.cppHeader = CppHeaderParser.CppHeader( + """ + +template +StringRef copy(Allocator &A) const { + // Don't request a length 0 copy from the allocator. + if (empty()) + return StringRef(); + char *S = A.template Allocate(Length); + std::copy(begin(), end(), S); + return StringRef(S, Length); +} + +""", + "string", + ) + + def test_fn(self): + fn = self.cppHeader.functions[0] + self.assertEqual("copy", fn["name"]) + + if __name__ == "__main__": unittest.main() From 7b8870a6fa5db86a4c2de4f36db7ac01cdb026bf Mon Sep 17 00:00:00 2001 From: Dustin Spicuzza Date: Sun, 22 Dec 2019 22:25:24 -0500 Subject: [PATCH 04/14] Remove __attribute__ scanning since we already do that --- CppHeaderParser/CppHeaderParser.py | 31 ------------------------------ 1 file changed, 31 deletions(-) diff --git a/CppHeaderParser/CppHeaderParser.py b/CppHeaderParser/CppHeaderParser.py index 025645c..512e371 100644 --- a/CppHeaderParser/CppHeaderParser.py +++ b/CppHeaderParser/CppHeaderParser.py @@ -226,35 +226,6 @@ def set_location_info(thing, location): thing["line_number"] = line_number -def filter_out_attribute_keyword(stack): - """Strips __attribute__ and its parenthetical expression from the stack""" - if "__attribute__" not in stack: - return stack - try: - debug_print("Stripping __attribute__ from %s" % stack) - attr_index = stack.index("__attribute__") - attr_end = ( - attr_index + 1 - ) # Assuming not followed by parenthetical expression which wont happen - # Find final paren - if stack[attr_index + 1] == "(": - paren_count = 1 - for i in range(attr_index + 2, len(stack)): - elm = stack[i] - if elm == "(": - paren_count += 1 - elif elm == ")": - paren_count -= 1 - if paren_count == 0: - attr_end = i + 1 - break - new_stack = stack[0:attr_index] + stack[attr_end:] - debug_print("stripped stack is %s" % new_stack) - return new_stack - except: - return stack - - _nhack = re.compile(r"[A-Za-z_][A-Za-z0-9_]*") @@ -2962,8 +2933,6 @@ def _discard_function_contents(self, stack): def _evaluate_stack(self, token=None): """Evaluates the current name stack""" - self.nameStack = filter_out_attribute_keyword(self.nameStack) - self.stack = filter_out_attribute_keyword(self.stack) nameStackCopy = self.nameStack[:] debug_print( From 465560e3dea5b050aec3092fa2375601c1c1a6da Mon Sep 17 00:00:00 2001 From: Dustin Spicuzza Date: Sun, 22 Dec 2019 23:21:53 -0500 Subject: [PATCH 05/14] Fix current location to use lookahead buffer --- CppHeaderParser/lexer.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CppHeaderParser/lexer.py b/CppHeaderParser/lexer.py index 154b95d..2c6fd76 100644 --- a/CppHeaderParser/lexer.py +++ b/CppHeaderParser/lexer.py @@ -134,6 +134,8 @@ def __init__(self, filename): self.lookahead = deque() def current_location(self): + if self.lookahead: + return self.lookahead[0].location return self.filename, self.lex.lineno - self.line_offset def get_doxygen(self): @@ -163,6 +165,8 @@ def get_doxygen(self): if tok is None: break + + tok.location = (self.filename, tok.lineno - self.line_offset) ttype = tok.type if ttype == "NEWLINE": self.lookahead.append(tok) @@ -197,6 +201,7 @@ def token(self, eof_ok=False): break if tok.type not in self._discard_types: + tok.location = (self.filename, tok.lineno - self.line_offset) break return tok From 27d9dca5cff4b9fa37f251226d557dd69ea1eab9 Mon Sep 17 00:00:00 2001 From: Dustin Spicuzza Date: Mon, 23 Dec 2019 21:54:39 -0500 Subject: [PATCH 06/14] Add parser notes --- CppHeaderParser/CppHeaderParser.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/CppHeaderParser/CppHeaderParser.py b/CppHeaderParser/CppHeaderParser.py index 512e371..aa1b442 100644 --- a/CppHeaderParser/CppHeaderParser.py +++ b/CppHeaderParser/CppHeaderParser.py @@ -2611,6 +2611,18 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): self.lex = lex self.headerFileNames = lex.filenames + # + # A note on parsing methodology + # + # The idea here is to consume as many tokens as needed to determine + # what the thing is that we're parsing. While some items can be identified + # early, typically the code below consumes until a '{', '}', or ; and + # then looks at the accumulated tokens to figure out what it is. + # + # Unfortunately, the code isn't always particularly consistent (but + # it's slowly getting there!), so take this with a grain of salt. + # + tok = None try: while True: @@ -2622,7 +2634,7 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): and self.anon_union_counter[1] ): self.anon_union_counter[1] -= 1 - tok.value = TagStr(tok.value, location=lex.current_location()) + tok.value = TagStr(tok.value, location=tok.location) # debug_print("TOK: %s"%tok) if tok.type == "NAME": if tok.value in self.IGNORE_NAMES: From 4602c99a697850c7a6d4624d8a7a0194f7b0e3dc Mon Sep 17 00:00:00 2001 From: Dustin Spicuzza Date: Mon, 23 Dec 2019 22:28:22 -0500 Subject: [PATCH 07/14] Improve error messages --- CppHeaderParser/CppHeaderParser.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/CppHeaderParser/CppHeaderParser.py b/CppHeaderParser/CppHeaderParser.py index aa1b442..4844581 100644 --- a/CppHeaderParser/CppHeaderParser.py +++ b/CppHeaderParser/CppHeaderParser.py @@ -290,7 +290,9 @@ def __new__(cls, *args, **kwargs): class CppParseError(Exception): - pass + def __init__(self, msg, tok=None): + Exception.__init__(self, msg) + self.tok = tok class CppTemplateParam(dict): @@ -2815,15 +2817,21 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): except Exception as e: if debug: raise + context = "" + if isinstance(e, CppParseError): + context = ": " + str(e) + if e.tok: + tok = e.tok + if tok: - filename, lineno = tok.value.location + filename, lineno = tok.location msg = ( - 'Not able to parse %s on line %d evaluating "%s"\nError around: %s' - % (filename, lineno, tok.value, " ".join(self.nameStack)) + "Not able to parse %s on line %d evaluating '%s'%s\nError around: %s" + % (filename, lineno, tok.value, context, " ".join(self.nameStack)) ) else: - msg = "Error parsing %s\nError around: %s" % ( - self.headerFileName, + msg = "Error parsing %s%s\nError around: %s" % ( + self.headerFileName, context, " ".join(self.nameStack), ) @@ -2872,12 +2880,12 @@ def _parse_error(self, tokens, expected): else: errtok = tokens[-1] if expected: - expected = ", expected " + expected + expected = ", expected '" + expected + "'" - msg = "unexpected %s%s" % (errtok.value, expected) + msg = "unexpected '%s'%s" % (errtok.value, expected) # TODO: better error message - return CppParseError(msg) + return CppParseError(msg, errtok) def _next_token_must_be(self, *tokenTypes): tok = self.lex.token() From d9092098387db8671d6fa2637c8190ed1ed7460e Mon Sep 17 00:00:00 2001 From: Dustin Spicuzza Date: Mon, 23 Dec 2019 23:22:15 -0500 Subject: [PATCH 08/14] Make debug logging suck mildly less --- CppHeaderParser/CppHeaderParser.py | 98 ++++++++++++++++-------------- 1 file changed, 54 insertions(+), 44 deletions(-) diff --git a/CppHeaderParser/CppHeaderParser.py b/CppHeaderParser/CppHeaderParser.py index 4844581..66417ff 100644 --- a/CppHeaderParser/CppHeaderParser.py +++ b/CppHeaderParser/CppHeaderParser.py @@ -45,6 +45,8 @@ # http://www.opensource.org/licenses/bsd-license.php # +from __future__ import print_function + from collections import deque import os @@ -69,7 +71,7 @@ # Controls warning_print print_warnings = 1 # Controls debug_print -debug = 0 +debug = 1 if os.environ.get("CPPHEADERPARSER_DEBUG") == "1" else 0 # Controls trace_print debug_trace = 0 @@ -82,25 +84,31 @@ def raise_exc(e, src_e): raise e -def error_print(arg): +def error_print(fmt, *args): if print_errors: - print(("[%4d] %s" % (inspect.currentframe().f_back.f_lineno, arg))) + fmt = "[%4d] " + fmt + args = (inspect.currentframe().f_back.f_lineno,) + args + print(fmt % args) -def warning_print(arg): +def warning_print(fmt, *args): if print_warnings: - print(("[%4d] %s" % (inspect.currentframe().f_back.f_lineno, arg))) + fmt = "[%4d] " + fmt + args = (inspect.currentframe().f_back.f_lineno,) + args + print(fmt % args) -def debug_print(arg): +def debug_print(fmt, *args): if debug: - print(("[%4d] %s" % (inspect.currentframe().f_back.f_lineno, arg))) + fmt = "[%4d] " + fmt + args = (inspect.currentframe().f_back.f_lineno,) + args + print(fmt % args) -def trace_print(*arg): +def trace_print(*args): if debug_trace: sys.stdout.write("[%s] " % (inspect.currentframe().f_back.f_lineno)) - for a in arg: + for a in args: sys.stdout.write("%s " % a) sys.stdout.write("\n") @@ -392,7 +400,7 @@ def _consume_parens(stack): def _parse_template_decl(stack): - debug_print("_parse_template_decl: %s" % stack) + debug_print("_parse_template_decl: %s", stack) params = [] param = CppTemplateParam() i = 0 @@ -492,7 +500,7 @@ def _parse_cppclass_name(c, stack): def _parse_cpp_base(stack): - debug_print("Parsing base: %s" % stack) + debug_print("Parsing base: %s", stack) inherits = [] i = 0 sl = len(stack) @@ -634,8 +642,8 @@ def __init__(self, nameStack, curTemplate, doxygen, location): self._public_forward_declares = [] self["namespace"] = "" - debug_print("Class: %s" % nameStack) - debug_print("Template: %s" % curTemplate) + debug_print("Class: %s", nameStack) + debug_print("Template: %s", curTemplate) if len(nameStack) < 2: nameStack.insert(1, "") # anonymous struct @@ -911,8 +919,8 @@ def show(self): return "\n\t\t ".join(r) def __init__(self, nameStack, curClass, methinfo, curTemplate, doxygen, location): - debug_print("Method: %s" % nameStack) - debug_print("Template: %s" % curTemplate) + debug_print("Method: %s", nameStack) + debug_print("Template: %s", curTemplate) if doxygen: self["doxygen"] = doxygen @@ -994,10 +1002,10 @@ def __init__(self, nameStack, curClass, methinfo, curTemplate, doxygen, location paramsStack = self._params_helper1(nameStack) - debug_print("curTemplate: %s" % curTemplate) + debug_print("curTemplate: %s", curTemplate) if curTemplate: self["template"] = curTemplate - debug_print("SET self['template'] to `%s`" % self["template"]) + debug_print("SET self['template'] to `%s`", self["template"]) params = [] # See if there is a doxygen comment for the variable @@ -1118,7 +1126,7 @@ class CppVariable(_CppVariable): Vars = [] def __init__(self, nameStack, doxygen, location, **kwargs): - debug_print("trace %s" % nameStack) + debug_print("trace %s", nameStack) if len(nameStack) and nameStack[0] == "extern": self["extern"] = True del nameStack[0] @@ -1130,7 +1138,7 @@ def __init__(self, nameStack, doxygen, location, **kwargs): arrayStack = nameStack[nameStack.index("[") :] if nameStack.count("[") > 1: debug_print("Multi dimensional array") - debug_print("arrayStack=%s" % arrayStack) + debug_print("arrayStack=%s", arrayStack) nums = [x for x in arrayStack if x.isdigit()] # Calculate size by multiplying all dimensions p = 1 @@ -1153,7 +1161,7 @@ def __init__(self, nameStack, doxygen, location, **kwargs): if doxygen: self["doxygen"] = doxygen - debug_print("Variable: %s" % nameStack) + debug_print("Variable: %s", nameStack) set_location_info(self, location) self["function_pointer"] = 0 @@ -1163,7 +1171,7 @@ def __init__(self, nameStack, doxygen, location, **kwargs): self["type"] = nameStack[0] self["name"] = "" else: - error_print(_stack_) + error_print("%s", _stack_) assert 0 elif is_function_pointer_stack(nameStack): # function pointer @@ -1596,7 +1604,7 @@ def finalize_vars(self): var["fundamental"] = True elif var["parent"]: - warning_print("WARN unresolved %s" % _tag) + warning_print("WARN unresolved %s", _tag) var["ctypes_type"] = "ctypes.c_void_p" var["unresolved"] = True @@ -1710,7 +1718,7 @@ def finalize_vars(self): elif tag.startswith( "_" ): # assume starting with underscore is not important for wrapping - warning_print("WARN unresolved %s" % _tag) + warning_print("WARN unresolved %s", _tag) var["ctypes_type"] = "ctypes.c_void_p" var["unresolved"] = True @@ -1803,9 +1811,7 @@ def finalize_vars(self): trace_print("Adding #include %s" % macro) self.includes.append(re.split("[\t ]+", macro, 1)[1].strip()) else: - debug_print( - "Cant detect what to do with precomp macro '%s'" % macro - ) + debug_print("Cant detect what to do with precomp macro '%s'", macro) except: pass self._precomp_macro_buf = None @@ -1887,7 +1893,7 @@ def finalize(self): klass = self.classes[b] meth["returns_class"] = a + "::" + b elif "<" in b and ">" in b: - warning_print("WARN-can not return template: %s" % b) + warning_print("WARN-can not return template: %s", b) meth["returns_unknown"] = True elif b in self.global_enums: enum = self.global_enums[b] @@ -2250,8 +2256,9 @@ def _evaluate_property_stack(self, clearStack=True, addToVar=None): ): self.nameStack.insert(0, filteredParseHistory[-1]["item"]["name"]) debug_print( - "DEANONYMOIZING %s to type '%s'" - % (self.nameStack[1], self.nameStack[0]) + "DEANONYMOIZING %s to type '%s'", + self.nameStack[1], + self.nameStack[0], ) if "," in self.nameStack: # Maybe we have a variable list # Figure out what part is the variable separator but remember templates of function pointer @@ -2333,7 +2340,7 @@ def _evaluate_class_stack(self): else: # struct self.curAccessSpecifier = "public" debug_print( - "curAccessSpecifier changed/defaulted to %s" % self.curAccessSpecifier + "curAccessSpecifier changed/defaulted to %s", self.curAccessSpecifier ) if self.nameStack[0] == "union": newClass = CppUnion( @@ -2518,7 +2525,7 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): self.accessSpecifierStack = [] self.accessSpecifierScratch = [] debug_print( - "curAccessSpecifier changed/defaulted to %s" % self.curAccessSpecifier + "curAccessSpecifier changed/defaulted to %s", self.curAccessSpecifier ) self.initextra() # Old namestacks for a given level @@ -2600,7 +2607,7 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): if locEnd: # Strip it out but keep the linecount the same so line numbers are right match_str = headerFileStr[locStart:locEnd] - debug_print("Striping out '%s'" % match_str) + debug_print("Striping out '%s'", match_str) num_newlines = len([a for a in match_str if a == "\n"]) headerFileStr = headerFileStr.replace( headerFileStr[locStart:locEnd], "\n" * num_newlines @@ -2637,7 +2644,7 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): ): self.anon_union_counter[1] -= 1 tok.value = TagStr(tok.value, location=tok.location) - # debug_print("TOK: %s"%tok) + # debug_print("TOK: %s", tok) if tok.type == "NAME": if tok.value in self.IGNORE_NAMES: continue @@ -2662,7 +2669,7 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): self.stack.append(tok.value) if tok.type in ("PRECOMP_MACRO", "PRECOMP_MACRO_CONT"): - debug_print("PRECOMP: %s" % tok) + debug_print("PRECOMP: %s", tok) self._precomp_macro_buf.append(tok.value) self.stack = [] self.nameStack = [] @@ -2723,7 +2730,7 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): # self.stack = []; print 'BRACE DEPTH', self.braceDepth, 'NS', len(self.nameSpaces) if self.curClass: debug_print( - "CURBD %s" % self._classes_brace_level[self.curClass] + "CURBD %s", self._classes_brace_level[self.curClass] ) if (self.braceDepth == 0) or ( @@ -2746,7 +2753,7 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): pass elif tok.type in _namestack_str_tokens: if tok.value in ignoreSymbols: - debug_print("Ignore symbol %s" % tok.value) + debug_print("Ignore symbol %s", tok.value) elif tok.value == "class": self.nameStack.append(tok.value) elif tok.value in supportedAccessSpecifier: @@ -2764,8 +2771,8 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): self.curAccessSpecifier = tok.value self.accessSpecifierScratch.append(tok.value) debug_print( - "curAccessSpecifier updated to %s" - % self.curAccessSpecifier + "curAccessSpecifier updated to %s", + self.curAccessSpecifier, ) self.stack = [] else: @@ -2831,7 +2838,8 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): ) else: msg = "Error parsing %s%s\nError around: %s" % ( - self.headerFileName, context, + self.headerFileName, + context, " ".join(self.nameStack), ) @@ -2956,8 +2964,10 @@ def _evaluate_stack(self, token=None): nameStackCopy = self.nameStack[:] debug_print( - "Evaluating stack %s\n BraceDepth: %s (called from %d)" - % (self.nameStack, self.braceDepth, inspect.currentframe().f_back.f_lineno) + "Evaluating stack %s\n BraceDepth: %s (called from %d)", + self.nameStack, + self.braceDepth, + inspect.currentframe().f_back.f_lineno, ) # Handle special case of overloading operator () @@ -2968,9 +2978,9 @@ def _evaluate_stack(self, token=None): self.nameStack[operator_index] = "operator()" if len(self.curClass): - debug_print("%s (%s) " % (self.curClass, self.curAccessSpecifier)) + debug_print("%s (%s) ", self.curClass, self.curAccessSpecifier) else: - debug_print(" (%s) " % self.curAccessSpecifier) + debug_print(" (%s) ", self.curAccessSpecifier) # Filter special case of array with casting in it try: @@ -2981,7 +2991,7 @@ def _evaluate_stack(self, token=None): self.nameStack = ( self.nameStack[: bracePos + 1] + self.nameStack[endParen + 1 :] ) - debug_print("Filtered namestack to=%s" % self.nameStack) + debug_print("Filtered namestack to=%s", self.nameStack) except: pass From d2dd07520128705382478ef44ff876e9d438fce9 Mon Sep 17 00:00:00 2001 From: Dustin Spicuzza Date: Mon, 23 Dec 2019 23:04:29 -0500 Subject: [PATCH 09/14] Simplify access specifier parsing --- CppHeaderParser/CppHeaderParser.py | 43 +++++++----------------------- 1 file changed, 9 insertions(+), 34 deletions(-) diff --git a/CppHeaderParser/CppHeaderParser.py b/CppHeaderParser/CppHeaderParser.py index 66417ff..cf81d08 100644 --- a/CppHeaderParser/CppHeaderParser.py +++ b/CppHeaderParser/CppHeaderParser.py @@ -2523,7 +2523,6 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): self.curAccessSpecifier = "private" # private is default self.curTemplate = None self.accessSpecifierStack = [] - self.accessSpecifierScratch = [] debug_print( "curAccessSpecifier changed/defaulted to %s", self.curAccessSpecifier ) @@ -2747,7 +2746,7 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): self.curClass = "" self.stack = [] - if tok.type in _namestack_append_tokens: + elif tok.type in _namestack_append_tokens: self.nameStack.append(tok.value) elif tok.type in _namestack_pass_tokens: pass @@ -2756,47 +2755,24 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): debug_print("Ignore symbol %s", tok.value) elif tok.value == "class": self.nameStack.append(tok.value) - elif tok.value in supportedAccessSpecifier: - if len(self.nameStack) and self.nameStack[0] in ( - "class", - "struct", - "union", - ): - self.nameStack.append(tok.value) - elif self.braceDepth == len( - self.nameSpaces - ) + 1 or self.braceDepth == ( - len(self.nameSpaces) + len(self.curClass.split("::")) - ): - self.curAccessSpecifier = tok.value - self.accessSpecifierScratch.append(tok.value) - debug_print( - "curAccessSpecifier updated to %s", - self.curAccessSpecifier, - ) - self.stack = [] else: self.nameStack.append(tok.value) if self.anon_union_counter[0] == self.braceDepth: self.anon_union_counter = [-1, 0] elif tok.type == ":": - # Dont want colon to be first in stack - if len(self.nameStack) == 0: - self.accessSpecifierScratch = [] - continue - - # Handle situation where access specifiers can be multi words such as "public slots" - jns = " ".join(self.accessSpecifierScratch + self.nameStack) - if jns in supportedAccessSpecifier: - self.curAccessSpecifier = jns + if self.nameStack and self.nameStack[0] in supportedAccessSpecifier: + specifier = " ".join(self.nameStack) + if specifier in supportedAccessSpecifier: + self.curAccessSpecifier = specifier + else: + self.curAccessSpecifier = self.nameStack[0] debug_print( - "curAccessSpecifier updated to %s" % self.curAccessSpecifier + "curAccessSpecifier updated to %s", self.curAccessSpecifier ) - self.stack = [] self.nameStack = [] + self.stack = [] else: self.nameStack.append(tok.value) - self.accessSpecifierScratch = [] elif tok.type == ";": if ( @@ -2858,7 +2834,6 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): "nameSpaces", "curAccessSpecifier", "accessSpecifierStack", - "accessSpecifierScratch", "nameStackHistory", "anon_struct_counter", "anon_union_counter", From 4260f06fec8eade5c66603a4a83c68cae97eead6 Mon Sep 17 00:00:00 2001 From: Dustin Spicuzza Date: Tue, 24 Dec 2019 01:41:27 -0500 Subject: [PATCH 10/14] Handle doxygen comments for elements separated with a newline --- CppHeaderParser/CppHeaderParser.py | 38 ++++++++++---- CppHeaderParser/lexer.py | 4 +- CppHeaderParser/test/test_CppHeaderParser.py | 55 ++++++++++++++++++++ 3 files changed, 85 insertions(+), 12 deletions(-) diff --git a/CppHeaderParser/CppHeaderParser.py b/CppHeaderParser/CppHeaderParser.py index cf81d08..f05683b 100644 --- a/CppHeaderParser/CppHeaderParser.py +++ b/CppHeaderParser/CppHeaderParser.py @@ -2127,7 +2127,7 @@ def _evaluate_method_stack(self): info["name"], info, self.curTemplate, - self.lex.get_doxygen(), + self._get_stmt_doxygen(), self._get_location(self.nameStack), ) klass = self.classes[info["class"]] @@ -2144,7 +2144,7 @@ def _evaluate_method_stack(self): self.curClass, info, self.curTemplate, - self.lex.get_doxygen(), + self._get_stmt_doxygen(), self._get_location(self.nameStack), ) klass = self.classes[self.curClass] @@ -2161,7 +2161,7 @@ def _evaluate_method_stack(self): None, info, self.curTemplate, - self.lex.get_doxygen(), + self._get_stmt_doxygen(), self._get_location(self.nameStack), ) self.functions.append(newMethod) @@ -2288,7 +2288,7 @@ def _evaluate_property_stack(self, clearStack=True, addToVar=None): newVar = CppVariable( self.nameStack, - self.lex.get_doxygen(), + self._get_stmt_doxygen(), self._get_location(self.nameStack), ) newVar["namespace"] = self.current_namespace() @@ -2305,7 +2305,7 @@ def _evaluate_property_stack(self, clearStack=True, addToVar=None): debug_print("Found Global variable") newVar = CppVariable( self.nameStack, - self.lex.get_doxygen(), + self._get_stmt_doxygen(), self._get_location(self.nameStack), ) if addToVar: @@ -2345,7 +2345,7 @@ def _evaluate_class_stack(self): if self.nameStack[0] == "union": newClass = CppUnion( self.nameStack, - self.lex.get_doxygen(), + self._get_stmt_doxygen(), self._get_location(self.nameStack), ) if newClass["name"] == "union ": @@ -2357,7 +2357,7 @@ def _evaluate_class_stack(self): newClass = CppClass( self.nameStack, self.curTemplate, - self.lex.get_doxygen(), + self._get_stmt_doxygen(), self._get_location(self.nameStack), ) trace_print("NEW CLASS", newClass["name"]) @@ -2631,6 +2631,7 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): # it's slowly getting there!), so take this with a grain of salt. # + self._doxygen_cache = None tok = None try: while True: @@ -2648,6 +2649,7 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): if tok.value in self.IGNORE_NAMES: continue elif tok.value == "template": + self._doxygen_cache = self.lex.get_doxygen() self._parse_template() continue elif tok.value == "alignas": @@ -2666,6 +2668,7 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): continue self.stack.append(tok.value) + nslen = len(self.nameStack) if tok.type in ("PRECOMP_MACRO", "PRECOMP_MACRO_CONT"): debug_print("PRECOMP: %s", tok) @@ -2748,6 +2751,7 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): elif tok.type in _namestack_append_tokens: self.nameStack.append(tok.value) + nameStackAppended = True elif tok.type in _namestack_pass_tokens: pass elif tok.type in _namestack_str_tokens: @@ -2797,6 +2801,11 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): self.stack = [] self.nameStack = [] + newNsLen = len(self.nameStack) + if nslen != newNsLen and newNsLen == 1: + if not self.curTemplate: + self._doxygen_cache = self.lex.get_doxygen() + except Exception as e: if debug: raise @@ -2829,6 +2838,7 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): # Delete some temporary variables for key in [ "_precomp_macro_buf", + "_doxygen_cache", "lex", "nameStack", "nameSpaces", @@ -2856,6 +2866,14 @@ def _get_location(self, stack): return self.lex.current_location() + def _get_stmt_doxygen(self): + # retrieves the doxygen comment associated with an accumulated + # statement (since doxygen comments have to be retrieved immediately) + doxygen, self._doxygen_cache = self._doxygen_cache, "" + if not doxygen: + doxygen = self.lex.get_doxygen() + return doxygen + def _parse_error(self, tokens, expected): if not tokens: # common case after a failed token_if @@ -3003,13 +3021,13 @@ def _evaluate_stack(self, token=None): alias = self.nameStack[1] ns, stack = _split_namespace(self.nameStack[3:]) atype = CppVariable( - stack, self.lex.get_doxygen(), self._get_location(stack) + stack, self._get_stmt_doxygen(), self._get_location(stack) ) else: # using foo::bar ns, stack = _split_namespace(self.nameStack[1:]) atype = CppVariable( - stack, self.lex.get_doxygen(), self._get_location(stack) + stack, self._get_stmt_doxygen(), self._get_location(stack) ) alias = atype["type"] @@ -3158,7 +3176,7 @@ def _parse_enum(self): """ # entry: enum token was just consumed - doxygen = self.lex.get_doxygen() + doxygen = self._get_stmt_doxygen() location = self.lex.current_location() nametok = self.lex.token() diff --git a/CppHeaderParser/lexer.py b/CppHeaderParser/lexer.py index 2c6fd76..cb3f76a 100644 --- a/CppHeaderParser/lexer.py +++ b/CppHeaderParser/lexer.py @@ -147,8 +147,8 @@ def get_doxygen(self): comments don't exist. """ - # assuption: only time you call this function is after a name - # token is consumed along with all its pieces + # Assumption: This function is either called at the beginning of a + # statement or at the end of a statement if self.comments: comments = self.comments diff --git a/CppHeaderParser/test/test_CppHeaderParser.py b/CppHeaderParser/test/test_CppHeaderParser.py index 82a8a8c..c631adb 100644 --- a/CppHeaderParser/test/test_CppHeaderParser.py +++ b/CppHeaderParser/test/test_CppHeaderParser.py @@ -3239,5 +3239,60 @@ def test_fn(self): self.assertEqual("copy", fn["name"]) +class MessedUpDoxygen_TestCase(unittest.TestCase): + def setUp(self): + self.cppHeader = CppHeaderParser.CppHeader( + """ + +/// fn comment +void +fn(); + +/// var comment +int +v1 = 0; + +int +v2 = 0; /// var2 comment + +/// cls comment +class +C {}; + +/// template comment +template +class +C2 {}; + +""", + "string", + ) + + def test_fn(self): + fn = self.cppHeader.functions[0] + self.assertEqual("fn", fn["name"]) + self.assertEqual("/// fn comment", fn["doxygen"]) + + def test_var1(self): + v = self.cppHeader.variables[0] + self.assertEqual("v1", v["name"]) + self.assertEqual("/// var comment", v["doxygen"]) + + def test_var2(self): + v = self.cppHeader.variables[1] + self.assertEqual("v2", v["name"]) + self.assertEqual("/// var2 comment", v["doxygen"]) + + def test_cls(self): + c = self.cppHeader.classes["C"] + self.assertEqual("C", c["name"]) + self.assertEqual("/// cls comment", c["doxygen"]) + + def test_cls2(self): + c = self.cppHeader.classes["C2"] + self.assertEqual("C2", c["name"]) + self.assertEqual("/// template comment", c["doxygen"]) + + if __name__ == "__main__": unittest.main() From 0030ea4a56d3d849261c39df337d00e4c6bc0872 Mon Sep 17 00:00:00 2001 From: Dustin Spicuzza Date: Tue, 24 Dec 2019 02:12:00 -0500 Subject: [PATCH 11/14] foo(void) should be recorded as having zero parameters --- CppHeaderParser/CppHeaderParser.py | 4 ++++ CppHeaderParser/test/test_CppHeaderParser.py | 4 +++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CppHeaderParser/CppHeaderParser.py b/CppHeaderParser/CppHeaderParser.py index f05683b..b453083 100644 --- a/CppHeaderParser/CppHeaderParser.py +++ b/CppHeaderParser/CppHeaderParser.py @@ -1062,6 +1062,10 @@ def __init__(self, nameStack, curClass, methinfo, curTemplate, doxygen, location params.append(param) break + # foo(void) should be zero parameters + if len(params) == 1 and params[0]["type"] == "void": + params = [] + self["parameters"] = params self._params_helper2(params) # mods params inplace diff --git a/CppHeaderParser/test/test_CppHeaderParser.py b/CppHeaderParser/test/test_CppHeaderParser.py index c631adb..ec9f556 100644 --- a/CppHeaderParser/test/test_CppHeaderParser.py +++ b/CppHeaderParser/test/test_CppHeaderParser.py @@ -1908,7 +1908,9 @@ def setUp(self): self.cppHeader = CppHeaderParser.CppHeader("TestSampleClass.h") def test_termite_function(self): - self.assertEqual(self.cppHeader.functions[5]["name"], "termite") + f = self.cppHeader.functions[5] + self.assertEqual(f["name"], "termite") + self.assertEqual(len(f["parameters"]), 0) # Bug: 3569622 From e32ecadd4a2537cd661ec445f51b8b7149f04d0f Mon Sep 17 00:00:00 2001 From: Dustin Spicuzza Date: Tue, 24 Dec 2019 02:23:35 -0500 Subject: [PATCH 12/14] Fix enum parsing with methods that have return values or parameters --- CppHeaderParser/CppHeaderParser.py | 71 ++++++++++++++++---- CppHeaderParser/lexer.py | 3 + CppHeaderParser/test/test_CppHeaderParser.py | 55 +++++++++++++++ 3 files changed, 116 insertions(+), 13 deletions(-) diff --git a/CppHeaderParser/CppHeaderParser.py b/CppHeaderParser/CppHeaderParser.py index b453083..05c075e 100644 --- a/CppHeaderParser/CppHeaderParser.py +++ b/CppHeaderParser/CppHeaderParser.py @@ -227,6 +227,17 @@ def is_property_namestack(nameStack): return r +def is_enum_namestack(nameStack): + """Determines if a namestack is an enum namestack""" + if not nameStack: + return False + if nameStack[0] == "enum": + return True + if len(nameStack) > 1 and nameStack[0] == "typedef" and nameStack[1] == "enum": + return True + return False + + def set_location_info(thing, location): filename, line_number = location if filename: @@ -1598,10 +1609,11 @@ def finalize_vars(self): elif tag in self.global_enums: enum = self.global_enums[tag] - if enum["type"] is int: + enum_type = enum.get("type") + if enum_type is int: var["ctypes_type"] = "ctypes.c_int" var["raw_type"] = "int" - elif enum["type"] is str: + elif enum_type is str: var["ctypes_type"] = "ctypes.c_char_p" var["raw_type"] = "char*" var["enum"] = enum["namespace"] + enum["name"] @@ -2181,6 +2193,7 @@ def _evaluate_method_stack(self): trace_print("free function?", self.nameStack) self.stack = [] + self.stmtTokens = [] def _parse_typedef(self, stack, namespace=""): if not stack or "typedef" not in stack: @@ -2318,6 +2331,7 @@ def _evaluate_property_stack(self, clearStack=True, addToVar=None): if clearStack: self.stack = [] # CLEAR STACK + self.stmtTokens = [] def _evaluate_class_stack(self): """Create a Class out of the name stack (but not its parts)""" @@ -2368,6 +2382,7 @@ def _evaluate_class_stack(self): newClass["declaration_method"] = self.nameStack[0] self.classes_order.append(newClass) # good idea to save ordering self.stack = [] # fixes if class declared with ';' in closing brace + self.stmtTokens = [] classKey = newClass["name"] if parent: @@ -2637,6 +2652,8 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): self._doxygen_cache = None tok = None + self.stmtTokens = [] + try: while True: tok = lex.token(eof_ok=True) @@ -2659,11 +2676,6 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): elif tok.value == "alignas": self._parse_attribute_specifier_seq(tok) continue - elif tok.value == "enum": - self._parse_enum() - self.stack = [] - self.nameStack = [] - continue elif tok.value == "__attribute__": self._parse_gcc_attribute() continue @@ -2671,15 +2683,20 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): self._parse_attribute_specifier_seq(tok) continue + # TODO: get rid of stack, move to stmtTokens self.stack.append(tok.value) + self.stmtTokens.append(tok) + nslen = len(self.nameStack) if tok.type in ("PRECOMP_MACRO", "PRECOMP_MACRO_CONT"): debug_print("PRECOMP: %s", tok) self._precomp_macro_buf.append(tok.value) self.stack = [] + self.stmtTokens = [] self.nameStack = [] continue + if tok.type == "{": if len(self.nameStack) >= 2 and is_namespace( self.nameStack @@ -2692,6 +2709,7 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): self.nameSpaces.append(self.nameStack[1]) ns = self.cur_namespace() self.stack = [] + self.stmtTokens = [] if ns not in self.namespaces: self.namespaces.append(ns) # Detect special condition of macro magic before class declaration so we @@ -2716,11 +2734,14 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): self.nameStack = origNameStack[classLocationNS:] self.stack = origStack[classLocationS:] + # If set to True, indicates that the callee consumed + # all of the tokens between { and } self.braceHandled = False if self.nameStack: self._evaluate_stack() if self.stack and self.stack[0] == "class": self.stack = [] + self.stmtTokens = [] if not self.braceHandled: self.braceDepth += 1 @@ -2730,6 +2751,7 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): if self.braceDepth == len(self.nameSpaces): tmp = self.nameSpaces.pop() self.stack = [] # clear stack when namespace ends? + self.stmtTokens = [] else: self._evaluate_stack() self.braceDepth -= 1 @@ -2752,6 +2774,7 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): else: self.curClass = "" self.stack = [] + self.stmtTokens = [] elif tok.type in _namestack_append_tokens: self.nameStack.append(tok.value) @@ -2779,6 +2802,7 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): ) self.nameStack = [] self.stack = [] + self.stmtTokens = [] else: self.nameStack.append(tok.value) @@ -2804,6 +2828,7 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): self._evaluate_stack(tok.type) self.stack = [] self.nameStack = [] + self.stmtTokens = [] newNsLen = len(self.nameStack) if nslen != newNsLen and newNsLen == 1: @@ -2997,7 +3022,11 @@ def _evaluate_stack(self, token=None): not self.curClass and "typedef" in self.nameStack and ( - ("struct" not in self.nameStack and "union" not in self.nameStack) + ( + "struct" not in self.nameStack + and "union" not in self.nameStack + and "enum" not in self.nameStack + ) or self.stack[-1] == ";" ) ): @@ -3055,6 +3084,12 @@ def _evaluate_stack(self, token=None): else: # Free function self._evaluate_method_stack() + elif is_enum_namestack(self.nameStack): + debug_print("trace") + self._parse_enum() + self.nameStack = [] + self.stack = [] + self.stmtTokens = [] elif ( len(self.nameStack) == 1 and len(self.nameStackHistory) > self.braceDepth @@ -3178,10 +3213,20 @@ def _parse_enum(self): enum_base: ":" type_specifier_seq """ + is_typedef = False + self.lex.return_tokens(self.stmtTokens) - # entry: enum token was just consumed doxygen = self._get_stmt_doxygen() - location = self.lex.current_location() + + tok = self.lex.token() + if tok.value == "typedef": + is_typedef = True + tok = self.lex.token() + + if tok.value != "enum": + raise self._parse_error((tok,), "enum") + + location = tok.location nametok = self.lex.token() if nametok.value in ("class", "struct"): @@ -3212,15 +3257,15 @@ def _parse_enum(self): base.append(tok.value) newEnum = CppEnum(name, doxygen, location) - if self.nameStack: - if self.nameStack[0] == "typedef": - newEnum["typedef"] = True + if is_typedef: + newEnum["typedef"] = True if base: newEnum["type"] = "".join(base) instancesData = [] if tok.type == "{": + self.braceHandled = True self._parse_enumerator_list(newEnum["values"]) newEnum.resolve_enum_values(newEnum["values"]) tok = self.lex.token() diff --git a/CppHeaderParser/lexer.py b/CppHeaderParser/lexer.py index cb3f76a..1cd411e 100644 --- a/CppHeaderParser/lexer.py +++ b/CppHeaderParser/lexer.py @@ -220,6 +220,9 @@ def token_if(self, *types): def return_token(self, tok): self.lookahead.appendleft(tok) + def return_tokens(self, toks): + self.lookahead.extendleft(reversed(toks)) + if __name__ == "__main__": try: diff --git a/CppHeaderParser/test/test_CppHeaderParser.py b/CppHeaderParser/test/test_CppHeaderParser.py index ec9f556..49d58d4 100644 --- a/CppHeaderParser/test/test_CppHeaderParser.py +++ b/CppHeaderParser/test/test_CppHeaderParser.py @@ -3296,5 +3296,60 @@ def test_cls2(self): self.assertEqual("/// template comment", c["doxygen"]) +class EnumParameter_TestCase(unittest.TestCase): + def setUp(self): + self.cppHeader = CppHeaderParser.CppHeader( + """ +enum E { + VALUE, +}; + +void fn_with_enum_param1(const enum E e); + +void fn_with_enum_param2(const enum E e) { + // code here +} + +enum E fn_with_enum_retval1(void); + +enum E fn_with_enum_retval2(void) { + // code here +} + +""", + "string", + ) + + def test_enum_param(self): + fn = self.cppHeader.functions[0] + self.assertEqual("fn_with_enum_param1", fn["name"]) + self.assertEqual(1, len(fn["parameters"])) + + p1 = fn["parameters"][0] + self.assertEqual("e", p1["name"]) + self.assertEqual("const enum E", p1["type"]) + self.assertEqual("int", p1["raw_type"]) + + fn = self.cppHeader.functions[1] + self.assertEqual("fn_with_enum_param2", fn["name"]) + self.assertEqual(1, len(fn["parameters"])) + + p1 = fn["parameters"][0] + self.assertEqual("e", p1["name"]) + self.assertEqual("const enum E", p1["type"]) + self.assertEqual("int", p1["raw_type"]) + + def test_enum_retval(self): + fn = self.cppHeader.functions[2] + self.assertEqual("fn_with_enum_retval1", fn["name"]) + self.assertEqual(0, len(fn["parameters"])) + self.assertEqual("enum E", fn["rtnType"]) + + fn = self.cppHeader.functions[3] + self.assertEqual("fn_with_enum_retval2", fn["name"]) + self.assertEqual(0, len(fn["parameters"])) + self.assertEqual("enum E", fn["rtnType"]) + + if __name__ == "__main__": unittest.main() From 27682a154b7d1dbb224add0f9fc560193dd86e51 Mon Sep 17 00:00:00 2001 From: Dustin Spicuzza Date: Tue, 24 Dec 2019 02:24:31 -0500 Subject: [PATCH 13/14] Add additional debug_prints --- CppHeaderParser/CppHeaderParser.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/CppHeaderParser/CppHeaderParser.py b/CppHeaderParser/CppHeaderParser.py index 05c075e..77e5336 100644 --- a/CppHeaderParser/CppHeaderParser.py +++ b/CppHeaderParser/CppHeaderParser.py @@ -501,6 +501,7 @@ def _parse_cppclass_name(c, stack): name = "<" + name + ">" c["name"] = name c["bare_name"] = name + debug_print("Found class '%s'", name) # backwards compat classParams = c.get("class_params") @@ -1909,7 +1910,6 @@ def finalize(self): klass = self.classes[b] meth["returns_class"] = a + "::" + b elif "<" in b and ">" in b: - warning_print("WARN-can not return template: %s", b) meth["returns_unknown"] = True elif b in self.global_enums: enum = self.global_enums[b] @@ -2665,6 +2665,7 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): ): self.anon_union_counter[1] -= 1 tok.value = TagStr(tok.value, location=tok.location) + # debug_print("TOK: %s", tok) if tok.type == "NAME": if tok.value in self.IGNORE_NAMES: @@ -2755,7 +2756,7 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): else: self._evaluate_stack() self.braceDepth -= 1 - # self.stack = []; print 'BRACE DEPTH', self.braceDepth, 'NS', len(self.nameSpaces) + if self.curClass: debug_print( "CURBD %s", self._classes_brace_level[self.curClass] @@ -3030,13 +3031,13 @@ def _evaluate_stack(self, token=None): or self.stack[-1] == ";" ) ): - trace_print("STACK", self.stack) + debug_print("trace") + trace_print("typedef %s", self.stack) self._evaluate_typedef() return elif len(self.nameStack) == 0: - debug_print("trace") - debug_print("(Empty Stack)") + debug_print("trace (Empty Stack)") return elif self.nameStack[0] == "namespace": # Taken care of outside of here @@ -3140,6 +3141,8 @@ def _evaluate_stack(self, token=None): elif self.braceDepth > len(self.nameSpaces) + 1: debug_print("trace") self.nameStack = [] + else: + debug_print("Discarded statement %s" % (self.nameStack,)) try: self.nameStackHistory[self.braceDepth] = (nameStackCopy, self.curClass) @@ -3213,6 +3216,8 @@ def _parse_enum(self): enum_base: ":" type_specifier_seq """ + debug_print("parsing enum") + is_typedef = False self.lex.return_tokens(self.stmtTokens) @@ -3244,8 +3249,10 @@ def _parse_enum(self): name = "" if nametok.type == "NAME": name = nametok.value + debug_print("enum name is '%s'", name) tok = self.lex.token() else: + debug_print("anonymous enum") tok = nametok base = [] @@ -3336,6 +3343,8 @@ def _parse_enumerator_list(self, values): value["doxygen"] = doxygen values.append(value) + debug_print("enumerator value '%s'", value["name"]) + tok = self._next_token_must_be("}", ",", "=", "DBL_LBRACKET") if tok.type == "DBL_LBRACKET": self._parse_attribute_specifier_seq(tok) From 4a882ef73418328cd495df5e03216decc127c1b0 Mon Sep 17 00:00:00 2001 From: Dustin Spicuzza Date: Tue, 24 Dec 2019 02:40:14 -0500 Subject: [PATCH 14/14] Discard static_assert statements --- CppHeaderParser/CppHeaderParser.py | 13 +++++++++---- CppHeaderParser/test/test_CppHeaderParser.py | 15 +++++++++++++++ 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/CppHeaderParser/CppHeaderParser.py b/CppHeaderParser/CppHeaderParser.py index 77e5336..0b830b7 100644 --- a/CppHeaderParser/CppHeaderParser.py +++ b/CppHeaderParser/CppHeaderParser.py @@ -2018,7 +2018,7 @@ def parse_method_type(self, stack): if stack[-1] == "{": info["defined"] = True - self._discard_function_contents(stack) + self._discard_contents("{", "}") self.braceHandled = True elif stack[-1] == ";": info["defined"] = False @@ -2680,6 +2680,11 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): elif tok.value == "__attribute__": self._parse_gcc_attribute() continue + elif not self.stack and tok.value == "static_assert": + self._next_token_must_be("(") + self._discard_contents("(", ")") + continue + elif tok.type == "DBL_LBRACKET": self._parse_attribute_specifier_seq(tok) continue @@ -2967,16 +2972,16 @@ def _consume_balanced_tokens(self, *init_tokens): if next_end: match_stack.append(next_end) - def _discard_function_contents(self, stack): + def _discard_contents(self, start_type, end_type): # use this instead of consume_balanced_tokens because # we don't care at all about the internals level = 1 get_token = self.lex.token while True: tok = get_token() - if tok.type == "{": + if tok.type == start_type: level += 1 - elif tok.type == "}": + elif tok.type == end_type: level -= 1 if level == 0: break diff --git a/CppHeaderParser/test/test_CppHeaderParser.py b/CppHeaderParser/test/test_CppHeaderParser.py index 49d58d4..6cfc5a9 100644 --- a/CppHeaderParser/test/test_CppHeaderParser.py +++ b/CppHeaderParser/test/test_CppHeaderParser.py @@ -3351,5 +3351,20 @@ def test_enum_retval(self): self.assertEqual("enum E", fn["rtnType"]) +class StaticAssert_TestCase(unittest.TestCase): + def setUp(self): + self.cppHeader = CppHeaderParser.CppHeader( + """ +static_assert(sizeof(int) == 4, + "integer size is wrong" + "for some reason"); +""", + "string", + ) + + def test_nothing(self): + self.assertEqual(self.cppHeader.functions, []) + + if __name__ == "__main__": unittest.main()