Merge pull request #2 from idaholab/mandd/LML_graphdump

lml graph dump
idaholab · Dec 4, 2024 · 6d8baed · 6d8baed
2 parents 6e5dbb4 + c33fa96
commit 6d8baed
Show file tree

Hide file tree

Showing 7 changed files with 124 additions and 75 deletions.
diff --git a/data/abbreviations.xlsx b/data/abbreviations.xlsx
diff --git a/data/health_status_keywords_negative.csv b/data/health_status_keywords_negative.csv
@@ -31,8 +31,8 @@ Breach,Alter,Incorrect,
 Cessation,Regress ,Mistaken,
 Disconnection,Decrease,Inaccurate,
 Dissolution,Separate,Faulty,
-Shock,(merge from verbs derived from noun list),Broken,
-Tremor ,,Cracked,
+Shock,harm,Broken,
+Tremor ,(merge from verbs derived from noun list),Cracked,
 Vibration,,Damaged,
 Explosion,,Defective,
 Termination,,Deficient ,
@@ -111,4 +111,8 @@ Interruption,,Blemished,
 ,,Exhausted,
 ,,Depleted,
 ,,Minimal,
-,,Sparse,
+,,Sparse,
+,,inoperable,
+,,unexpected,
+,,unavailable,
+,,neglected,
diff --git a/data/tag_keywords_lists.xlsx b/data/tag_keywords_lists.xlsx
diff --git a/others/tagKeywordListReader.py b/others/tagKeywordListReader.py
@@ -92,7 +92,7 @@ def __init__(self,fileName):
     Initialization method
     Args:
 
-      fileName, string, file containing nuclear related entities 
+      fileName, string, file containing nuclear related entities
 
     Returns:
 
@@ -101,10 +101,10 @@ def __init__(self,fileName):
     self.library = self.keyWordListGenerator(fileName)
     self.cleanTagDict()
     self.expander()
-  
+
   def checker(self):
     """
-    Method designed to check the structure of the set of nuclear related entities and identify entities 
+    Method designed to check the structure of the set of nuclear related entities and identify entities
     that might share multiple labels
 
     Args:
@@ -120,7 +120,7 @@ def checker(self):
         commonElements = list(set(self.library[key1]).intersection(self.library[key2]))
         if key1!=key2 and commonElements:
           print('Elements in common between ' +str(key1)+ ' and ' +str(key2)+ ' are:' + str(commonElements))
-  
+
   def getLibrary(self):
     """
     Method designed to return self.library
@@ -134,7 +134,7 @@ def getLibrary(self):
       self.library, dict, dictionary containing for each label a list of entities
     """
     return self.library
-  
+
   def getAcronymsDict(self):
     """
     Method designed to return self.acronymsDict
@@ -146,7 +146,7 @@ def getAcronymsDict(self):
     Returns:
 
       self.acronymsDict, dict, dictionary containing the acronyms contained in the library
-    """  
+    """
     return self.acronymsDict
 
   def expander(self):
@@ -162,7 +162,7 @@ def expander(self):
     Returns:
 
       None
-    """ 
+    """
     for key in self.library.keys():
       for elem in self.library[key]:
         if '-' in elem:
@@ -171,18 +171,18 @@ def expander(self):
 
 
   def keyWordListGenerator(self, fileName):
-    """ 
+    """
     Method designed to read the file and generate a dictionary which contains, for each tag,
     the set of keywords that should be associate to such tag.
-    
+
     Args:
 
       fileName, string, file containing nuclear related entities
 
     Returns:
 
       tagsDict, dict, dictionary containing for each label a list of entities
-    """  
+    """
 
     df = pd.read_excel(fileName, None)
     # retrieve list of sheets in excel file
@@ -209,20 +209,20 @@ def keyWordListGenerator(self, fileName):
 
 
   def cleanTagDict(self):
-    """ 
+    """
     Method designed to clean the dictionary generated by the method keyWordListGenerator(.)
     Here, specific characters or sub strings are removed.
     In addition, if an acronym is defined (within round parentheses), then the acronyms_dict is
     populated {acronym: acronym_definition}
-    
+
     Args:
 
       None
 
     Returns:
 
       None
-    """  
+    """
 
     self.acronymsDict = {}
     n_keywords = 0

diff --git a/pyproject.toml b/pyproject.toml
@@ -31,7 +31,9 @@ dependencies = [
   "jupyterlab",
   "openpyxl",
   "quantulum3",
-  "python>=3.9"
+  "python>=3.9",
+  "plotly",
+  "xlrd"
 ]
 classifiers = [
     "Programming Language :: Python :: 3",
@@ -41,4 +43,4 @@ classifiers = [
 ]
 
 [project.urls]
-"Homepage" = "https://github.inl.gov/congjian-wang/DACKAR"
+"Homepage" = "https://github.com/idaholab/DACKAR"
diff --git a/src/dackar/utils/mbse/LMLparser.py b/src/dackar/utils/mbse/LMLparser.py
@@ -11,6 +11,7 @@
 import re
 import networkx as nx
 import pandas as pd
+import csv
 
 class LMLobject(object):
   """
@@ -191,7 +192,7 @@ def returnGraph(self):
 
       Returns:
 
-        self.LMLgraph: networkx object, graph containing entities specified in the LML MBSE model
+        self.LMLgraph: networkx object, graph containing entities specified in the LML model
     """
     return self.LMLgraph
 
@@ -254,65 +255,94 @@ def cleanedGraph(self):
 
     return self.cleanedGraph
 
+  def printOnFile(self, name, csv=True):
+    """
+      This method is designed to print on file the graph from networkx.
+      This is to test a method to import a graph into neo4j as indicated in:
+      https://stackoverflow.com/questions/52210619/how-to-import-a-networkx-graph-to-neo4j
+      Args:
+
+        None
+
+      Returns:
+
+        None
+    """
+    if csv:
+      name = name + ".csv"
+      nx.write_edgelist(self.LMLgraph, name, delimiter=',', data=True, encoding='utf-8')
+    else:
+      name = name + ".graphml"
+      nx.write_graphml(self.LMLgraph, name)
 
-  def createNeo4jGraph(self):
 
+  def dumpNodesEdgesFiles(self, name):
+    """
+      This method is designed to save the graph structure into gds entity
+      See Example 3.2 in https://neo4j.com/docs/graph-data-science-client/current/graph-object/
+      Args:
+
+        None
+
+      Returns:
+
+        None
+    """
     NXnodes = list(self.LMLgraph.nodes(data=True))
     NXedges = list(self.LMLgraph.edges)
-
     mapping = {}
 
     nodes = {
             "nodeId": [],
-            "labels": [],
+            "label": [],
             "ID": [],
             "type": []
             }
 
     for index,node in enumerate(NXnodes):
       nodes['nodeId'].append(index)
-      nodeInfo = node
-
-      mapping[node] = index
 
-      if nodeInfo[0] is None:
-        nodes['labels'].append(nodeInfo[1])
-        nodes['ID'].append(nodeInfo[1])
+      mapping[index] = node[0]
+      nodeInfo = node[0]
+
+      if len(nodeInfo)==2:
+        if nodeInfo[0] == 'None':
+          nodes['label'].append(nodeInfo[1])
+          nodes['ID'].append(nodeInfo[1])
 
-      elif nodeInfo[1] is None:
-        nodes['labels'].append(nodeInfo[0])
-        nodes['ID'].append(nodeInfo[0])
+        elif nodeInfo[1] is None:
+          nodes['label'].append(nodeInfo[0])
+          nodes['ID'].append('None')
 
+        else:
+          nodes['label'].append(nodeInfo[0])
+          nodes['ID'].append(nodeInfo[1])
+
+        nodes['type'].append(node[1]['key'])
       else:
-        nodes['labels'].append(nodeInfo[0])
-        nodes['ID'].append(nodeInfo[1])
-
-      nodes['type'].append(node[1]['key'])
+        nodes['label'].append('pipe')
+        nodes['ID'].append(nodeInfo)
+        nodes['type'].append('LML_link')
 
     nodes = pd.DataFrame(nodes)
 
-    relationships = pd.DataFrame(
-        {
-            "sourceNodeId": [],
-            "targetNodeId": [],
-            "type": []
-        }
-    )
+    relationships = {
+                    "sourceNodeId": [],
+                    "targetNodeId": [],
+                    "type"        : []
+                    }
 
     for index,edge in enumerate(NXedges):
-      relationships['sourceNodeId'].append(mapping[edge[0]])
-      relationships['targetNodeId'].append(mapping[edge[1]])
+      father = [key for key, val in mapping.items() if val == edge[0]][0]
+      child  = [key for key, val in mapping.items() if val == edge[1]][0]
+      relationships['sourceNodeId'].append(father) 
+      relationships['targetNodeId'].append(child) 
       relationships['type'].append(edge[2])
 
-    '''
-    self.G = gds.graph.construct(
-        "my-graph",      # Graph name
-        nodes,           # One or more dataframes containing node data
-        relationships    # One or more dataframes containing relationship data
-    )'''
-
-    return nodes, relationships
+    relationships = pd.DataFrame(relationships)
 
+    nodes.to_csv(name+'_nodes.csv',index=False, quoting=csv.QUOTE_NONE)
+    relationships.to_csv(name+'_edges.csv',index=False)
 
 
 def parseEntityDescription(text):
@@ -325,16 +355,19 @@ def parseEntityDescription(text):
       text: str, text contained in the description node of the MBSE model
 
     Returns:
-
+    
         out: tuple, tuple containing the list of elements specified in square brackets and separated
         by commas (e.g., ['FV304,'305']) and the link to an external MBSE model
         (e.g., ('centrifugalPumpFull', 'body'))
-
   """
 
   if '[' in text:
+    listOfElems = []
     txtPortion1 = text[text.find("[")+1:text.find("]")]
-    listOfElems = txtPortion1.split(';')
+    listOfElemstemp = txtPortion1.split(';')
+    for elem in listOfElemstemp:
+      temp=elem[elem.find("(")+1:elem.find(")")].split(',')
+      listOfElems.append((temp[0],temp[1]))
   else:
     listOfElems = None
 

diff --git a/tests/LML_parser/LML_functionality_test.ipynb b/tests/LML_parser/LML_functionality_test.ipynb