From 212ae62dd14c0e2f9ba94b19070ffe5e03901784 Mon Sep 17 00:00:00 2001 From: Dustin Spicuzza Date: Tue, 24 Dec 2019 22:17:20 -0500 Subject: [PATCH 1/7] Introduce :: lexical token, should make parsing easier --- CppHeaderParser/CppHeaderParser.py | 31 +++++++++++++++--------------- CppHeaderParser/lexer.py | 2 ++ 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/CppHeaderParser/CppHeaderParser.py b/CppHeaderParser/CppHeaderParser.py index 0b830b7..89f195a 100644 --- a/CppHeaderParser/CppHeaderParser.py +++ b/CppHeaderParser/CppHeaderParser.py @@ -255,11 +255,13 @@ def _split_namespace(namestack): :rtype: Tuple[str, list] """ + # TODO: this should be using tokens instead of nhack + last_colon = None for i, n in enumerate(namestack): - if n == ":": + if n == "::": last_colon = i - if i and n != ":" and not _nhack.match(n): + if i and n != "::" and not _nhack.match(n): break if last_colon: @@ -472,12 +474,8 @@ def _parse_cppclass_name(c, stack): if t == ":": if i >= sl: raise CppParseError("class decl ended with ':'") - t = stack[i] - if t != ":": - # reached the base declaration - break - - i += 1 + break + elif t == "::": name += "::" continue elif t == "final": @@ -954,7 +952,7 @@ def __init__(self, nameStack, curClass, methinfo, curTemplate, doxygen, location if len(self["rtnType"]) == 0 or self["name"] == curClass: self["rtnType"] = "void" - self["rtnType"] = self["rtnType"].replace(" : : ", "::") + self["rtnType"] = self["rtnType"].replace(" :: ", "::") self["rtnType"] = self["rtnType"].replace(" < ", "<") self["rtnType"] = self["rtnType"].replace(" > ", "> ").replace(">>", "> >") self["rtnType"] = self["rtnType"].replace(" ,", ",") @@ -1959,8 +1957,8 @@ def finalize(self): ) meth["returns_unknown"] = True - if meth["returns"].startswith(": : "): - meth["returns"] = meth["returns"].replace(": : ", "::") + if meth["returns"].startswith(":: "): + meth["returns"] = meth["returns"].replace(":: ", "::") for cls in list(self.classes.values()): methnames = cls.get_all_method_names() @@ -1996,7 +1994,7 @@ def parse_method_type(self, stack): stack = stack[1:] info = { "debug": " ".join(stack) - .replace(" : : ", "::") + .replace(" :: ", "::") .replace(" < ", "<") .replace(" > ", "> ") .replace(" >", ">") @@ -2010,7 +2008,7 @@ def parse_method_type(self, stack): header = stack[: stack.index("(")] header = " ".join(header) - header = header.replace(" : : ", "::") + header = header.replace(" :: ", "::") header = header.replace(" < ", "<") header = header.replace(" > ", "> ") header = header.replace("default ", "default") @@ -2452,6 +2450,7 @@ def evalute_forward_decl(self): "+", "STRING_LITERAL", "ELLIPSIS", + "DBL_COLON", "SHIFT_LEFT", } @@ -3164,7 +3163,7 @@ def _parse_template(self): consumed = self._consume_balanced_tokens(tok) tmpl = " ".join(tok.value for tok in consumed) tmpl = ( - tmpl.replace(" : : ", "::") + tmpl.replace(" :: ", "::") .replace(" <", "<") .replace("< ", "<") .replace(" >", ">") @@ -3364,10 +3363,10 @@ def _parse_enumerator_list(self, values): while True: tok = self.lex.token() if tok.type == "}": - value["value"] = (" ".join(v)).replace(": :", "::") + value["value"] = " ".join(v) return elif tok.type == ",": - value["value"] = (" ".join(v)).replace(": :", "::") + value["value"] = " ".join(v) break elif tok.type in self._balanced_token_map: v.extend(t.value for t in self._consume_balanced_tokens(tok)) diff --git a/CppHeaderParser/lexer.py b/CppHeaderParser/lexer.py index 1cd411e..4531ab2 100644 --- a/CppHeaderParser/lexer.py +++ b/CppHeaderParser/lexer.py @@ -22,6 +22,7 @@ class Lexer(object): "ELLIPSIS", "DBL_LBRACKET", "DBL_RBRACKET", + "DBL_COLON", "SHIFT_LEFT", ] @@ -85,6 +86,7 @@ def t_COMMENT_SINGLELINE(self, t): t_ELLIPSIS = r"\.\.\." t_DBL_LBRACKET = r"\[\[" t_DBL_RBRACKET = r"\]\]" + t_DBL_COLON = r"::" t_SHIFT_LEFT = r"<<" # SHIFT_RIGHT introduces ambiguity From 9607663d3015001fd06df4c1fd0b0e7a008079fe Mon Sep 17 00:00:00 2001 From: Dustin Spicuzza Date: Tue, 24 Dec 2019 22:35:07 -0500 Subject: [PATCH 2/7] Remove function parsing special case that's no longer needed --- CppHeaderParser/CppHeaderParser.py | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/CppHeaderParser/CppHeaderParser.py b/CppHeaderParser/CppHeaderParser.py index 89f195a..295c388 100644 --- a/CppHeaderParser/CppHeaderParser.py +++ b/CppHeaderParser/CppHeaderParser.py @@ -3075,20 +3075,7 @@ def _evaluate_stack(self, token=None): self.using[alias] = atype elif is_method_namestack(self.stack) and "(" in self.nameStack: debug_print("trace") - if self.braceDepth > 0: - if ( - "{" in self.stack - and self.stack[0] != "{" - and self.stack[-1] == ";" - and self.braceDepth == 1 - ): - # Special case of a method defined outside a class that has a body - pass - else: - self._evaluate_method_stack() - else: - # Free function - self._evaluate_method_stack() + self._evaluate_method_stack() elif is_enum_namestack(self.nameStack): debug_print("trace") self._parse_enum() From 33b7958576e916b939daef80ed634d6fa32f9404 Mon Sep 17 00:00:00 2001 From: Dustin Spicuzza Date: Tue, 24 Dec 2019 22:44:50 -0500 Subject: [PATCH 3/7] Remove more mess --- CppHeaderParser/CppHeaderParser.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/CppHeaderParser/CppHeaderParser.py b/CppHeaderParser/CppHeaderParser.py index 295c388..95b192b 100644 --- a/CppHeaderParser/CppHeaderParser.py +++ b/CppHeaderParser/CppHeaderParser.py @@ -3079,7 +3079,6 @@ def _evaluate_stack(self, token=None): elif is_enum_namestack(self.nameStack): debug_print("trace") self._parse_enum() - self.nameStack = [] self.stack = [] self.stmtTokens = [] elif ( @@ -3123,15 +3122,12 @@ def _evaluate_stack(self, token=None): elif not self.curClass: debug_print("trace") - self.nameStack = [] elif self.braceDepth < 1: debug_print("trace") # Ignore global stuff for now debug_print("Global stuff: %s" % self.nameStack) - self.nameStack = [] elif self.braceDepth > len(self.nameSpaces) + 1: debug_print("trace") - self.nameStack = [] else: debug_print("Discarded statement %s" % (self.nameStack,)) @@ -3139,9 +3135,9 @@ def _evaluate_stack(self, token=None): self.nameStackHistory[self.braceDepth] = (nameStackCopy, self.curClass) except: self.nameStackHistory.append((nameStackCopy, self.curClass)) - self.nameStack = ( - [] - ) # its a little confusing to have some if/else above return and others not, and then clearning the nameStack down here + + # its a little confusing to have some if/else above return and others not, and then clearning the nameStack down here + self.nameStack = [] self.lex.doxygenCommentCache = "" self.curTemplate = None From 9b354bdcff58623e2196e6851408a026317ff0aa Mon Sep 17 00:00:00 2001 From: Dustin Spicuzza Date: Tue, 24 Dec 2019 22:51:09 -0500 Subject: [PATCH 4/7] Remove unneeded method parsing special case --- CppHeaderParser/CppHeaderParser.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/CppHeaderParser/CppHeaderParser.py b/CppHeaderParser/CppHeaderParser.py index 95b192b..53c589e 100644 --- a/CppHeaderParser/CppHeaderParser.py +++ b/CppHeaderParser/CppHeaderParser.py @@ -1990,8 +1990,6 @@ def finalize(self): def parse_method_type(self, stack): trace_print("meth type info", stack) - if stack[0] in ":;" and stack[1] != ":": - stack = stack[1:] info = { "debug": " ".join(stack) .replace(" :: ", "::") From 67a5956f4251141be30ce52d4639aed3fac0fadf Mon Sep 17 00:00:00 2001 From: Dustin Spicuzza Date: Tue, 24 Dec 2019 23:46:40 -0500 Subject: [PATCH 5/7] More robust ctor initializer discard code --- CppHeaderParser/CppHeaderParser.py | 87 +++++++++++++++----- CppHeaderParser/test/test_CppHeaderParser.py | 53 ++++++++++++ 2 files changed, 121 insertions(+), 19 deletions(-) diff --git a/CppHeaderParser/CppHeaderParser.py b/CppHeaderParser/CppHeaderParser.py index 53c589e..e940dd2 100644 --- a/CppHeaderParser/CppHeaderParser.py +++ b/CppHeaderParser/CppHeaderParser.py @@ -193,7 +193,9 @@ def is_method_namestack(stack): elif "{" in stack and stack.index("{") < stack.index("("): r = False # struct that looks like a method/class elif "(" in stack and ")" in stack: - if "{" in stack and "}" in stack: + if stack[-1] == ":": + r = True + elif "{" in stack and "}" in stack: r = True elif stack[-1] == ";": if is_function_pointer_stack(stack): @@ -994,22 +996,6 @@ def __init__(self, nameStack, curClass, methinfo, curTemplate, doxygen, location self.update(methinfo) set_location_info(self, location) - # Filter out initializer lists used in constructors - try: - paren_depth_counter = 0 - for i in range(0, len(nameStack)): - elm = nameStack[i] - if elm == "(": - paren_depth_counter += 1 - if elm == ")": - paren_depth_counter -= 1 - if paren_depth_counter == 0 and nameStack[i + 1] == ":": - debug_print("Stripping out initializer list") - nameStack = nameStack[: i + 1] - break - except: - pass - paramsStack = self._params_helper1(nameStack) debug_print("curTemplate: %s", curTemplate) @@ -2018,6 +2004,10 @@ def parse_method_type(self, stack): self.braceHandled = True elif stack[-1] == ";": info["defined"] = False + elif stack[-1] == ":": + info["defined"] = True + self._discard_ctor_initializer() + self.braceHandled = True else: assert 0 @@ -2806,6 +2796,12 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): self.nameStack = [] self.stack = [] self.stmtTokens = [] + elif is_method_namestack(self.stack): + debug_print("trace") + self._evaluate_method_stack() + self.nameStack = [] + self.stack = [] + self.stmtTokens = [] else: self.nameStack.append(tok.value) @@ -2923,7 +2919,7 @@ def _parse_error(self, tokens, expected): def _next_token_must_be(self, *tokenTypes): tok = self.lex.token() if tok.type not in tokenTypes: - raise self._parse_error((tok,), " or ".join(tokenTypes)) + raise self._parse_error((tok,), "' or '".join(tokenTypes)) return tok _end_balanced_tokens = {">", "}", "]", ")", "DBL_RBRACKET"} @@ -2983,6 +2979,59 @@ def _discard_contents(self, start_type, end_type): if level == 0: break + def _discard_ctor_initializer(self): + """ + ctor_initializer: ":" mem_initializer_list + + mem_initializer_list: mem_initializer ["..."] + | mem_initializer "," mem_initializer_list ["..."] + + mem_initializer: mem_initializer_id "(" [expression_list] ")" + | mem_initializer_id braced_init_list + + mem_initializer_id: class_or_decltype + | IDENTIFIER + """ + debug_print("discarding ctor intializer") + # all of this is discarded.. the challenge is to determine + # when the initializer ends and the function starts + while True: + tok = self.lex.token() + if tok.type == "DBL_COLON": + tok = self.lex.token() + + if tok.type == "decltype": + tok = self._next_token_must_be("(") + self._consume_balanced_tokens(tok) + tok = self.lex.token() + + # each initializer is either foo() or foo{}, so look for that + while True: + if tok.type not in ("{", "("): + tok = self.lex.token() + continue + + if tok.type == "{": + self._discard_contents("{", "}") + elif tok.type == "(": + self._discard_contents("(", ")") + + tok = self.lex.token() + break + + # at the end + if tok.type == "ELLIPSIS": + tok = self.lex.token() + + if tok.type == ",": + continue + elif tok.type == "{": + # reached the function + self._discard_contents("{", "}") + return + else: + raise self._parse_error((tok,), ",' or '{") + def _evaluate_stack(self, token=None): """Evaluates the current name stack""" @@ -3133,7 +3182,7 @@ def _evaluate_stack(self, token=None): self.nameStackHistory[self.braceDepth] = (nameStackCopy, self.curClass) except: self.nameStackHistory.append((nameStackCopy, self.curClass)) - + # its a little confusing to have some if/else above return and others not, and then clearning the nameStack down here self.nameStack = [] self.lex.doxygenCommentCache = "" diff --git a/CppHeaderParser/test/test_CppHeaderParser.py b/CppHeaderParser/test/test_CppHeaderParser.py index 6cfc5a9..ec52861 100644 --- a/CppHeaderParser/test/test_CppHeaderParser.py +++ b/CppHeaderParser/test/test_CppHeaderParser.py @@ -3366,5 +3366,58 @@ def test_nothing(self): self.assertEqual(self.cppHeader.functions, []) +class InitializerWithInitializerList_TestCase(unittest.TestCase): + def setUp(self): + self.cppHeader = CppHeaderParser.CppHeader( + """ +struct ComplexInit : SomeBase { + ComplexInit(int i) : + m_stuff{i,2} + { + auto i = something(); + } + + void fn(); + + std::vector m_stuff; +}; + +template +class future final { +public: + template + future(future&& oth) noexcept + : future(oth.then([](R&& val) -> T { return val; })) {} +}; + + +""", + "string", + ) + + def test_cls_props(self): + c = self.cppHeader.classes["ComplexInit"] + self.assertEqual(2, len(c["methods"]["public"])) + self.assertEqual(0, len(c["methods"]["private"])) + self.assertEqual(0, len(c["methods"]["private"])) + self.assertEqual(1, len(c["properties"]["public"])) + self.assertEqual(0, len(c["properties"]["private"])) + self.assertEqual(0, len(c["properties"]["protected"])) + + self.assertEqual(c["methods"]["public"][0]["name"], "ComplexInit") + self.assertEqual(c["methods"]["public"][1]["name"], "fn") + + self.assertEqual(c["properties"]["public"][0]["name"], "m_stuff") + + def test_future(self): + c = self.cppHeader.classes["future"] + self.assertEqual(1, len(c["methods"]["public"])) + self.assertEqual(0, len(c["methods"]["private"])) + self.assertEqual(0, len(c["methods"]["private"])) + self.assertEqual(0, len(c["properties"]["public"])) + self.assertEqual(0, len(c["properties"]["private"])) + self.assertEqual(0, len(c["properties"]["protected"])) + self.assertEqual(c["methods"]["public"][0]["name"], "future") + if __name__ == "__main__": unittest.main() From 6863871e8ca87bd856adb7fbda239e4466f1a0ae Mon Sep 17 00:00:00 2001 From: Dustin Spicuzza Date: Wed, 25 Dec 2019 00:23:30 -0500 Subject: [PATCH 6/7] Remove temporary variable --- CppHeaderParser/CppHeaderParser.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CppHeaderParser/CppHeaderParser.py b/CppHeaderParser/CppHeaderParser.py index e940dd2..e97a175 100644 --- a/CppHeaderParser/CppHeaderParser.py +++ b/CppHeaderParser/CppHeaderParser.py @@ -2638,6 +2638,7 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): # self._doxygen_cache = None + self.braceHandled = False tok = None self.stmtTokens = [] @@ -2867,6 +2868,7 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): for key in [ "_precomp_macro_buf", "_doxygen_cache", + "braceHandled", "lex", "nameStack", "nameSpaces", From f6109d2fe9a647bbbb36b1463fd3b74ebbb78fd8 Mon Sep 17 00:00:00 2001 From: Dustin Spicuzza Date: Wed, 25 Dec 2019 02:06:15 -0500 Subject: [PATCH 7/7] Fix formatting --- CppHeaderParser/test/test_CppHeaderParser.py | 1 + 1 file changed, 1 insertion(+) diff --git a/CppHeaderParser/test/test_CppHeaderParser.py b/CppHeaderParser/test/test_CppHeaderParser.py index ec52861..2a048ce 100644 --- a/CppHeaderParser/test/test_CppHeaderParser.py +++ b/CppHeaderParser/test/test_CppHeaderParser.py @@ -3419,5 +3419,6 @@ def test_future(self): self.assertEqual(0, len(c["properties"]["protected"])) self.assertEqual(c["methods"]["public"][0]["name"], "future") + if __name__ == "__main__": unittest.main()