From a5dfe18e4c360bb6d49b141ad9470d5a49550916 Mon Sep 17 00:00:00 2001 From: Jose Fonseca Date: Sat, 9 Nov 2024 12:09:05 +0000 Subject: [PATCH] Prevent long node IDs. Fixes https://github.com/jrfonseca/gprof2dot/issues/99 --- gprof2dot.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/gprof2dot.py b/gprof2dot.py index 1fec633..074dc39 100755 --- a/gprof2dot.py +++ b/gprof2dot.py @@ -34,6 +34,7 @@ import fnmatch import codecs import io +import hashlib assert sys.version_info[0] >= 3 @@ -3535,15 +3536,15 @@ def attr(self, what, **attrs): def node(self, node, **attrs): self.write("\t") - self.id(node) + self.node_id(node) self.attr_list(attrs) self.write(";\n") def edge(self, src, dst, **attrs): self.write("\t") - self.id(src) + self.node_id(src) self.write(" -> ") - self.id(dst) + self.node_id(dst) self.attr_list(attrs) self.write(";\n") @@ -3559,11 +3560,22 @@ def attr_list(self, attrs): first = False else: self.write(", ") - self.id(name) + assert isinstance(name, str) + assert name.isidentifier() + self.write(name) self.write('=') self.id(value) self.write(']') + def node_id(self, id): + # Node IDs need to be unique (can't be truncated) but dot doesn't allow + # IDs longer than 16384 characters, so use an hash instead for the huge + # C++ symbols that can arise, as seen in + # https://github.com/jrfonseca/gprof2dot/issues/99 + if isinstance(id, str) and len(id) > 1024: + id = '_' + hashlib.sha1(id.encode('utf-8'), usedforsecurity=False).hexdigest() + self.id(id) + def id(self, id): if isinstance(id, (int, float)): s = str(id)