Merge pull request #158 from Public-Health-Bioinformatics/cancogen_NM…

…L_LIMS_v0.2 WIP Cancogen nml lims v0.2 export
cidgoh · Mar 8, 2021 · c5a360e · c5a360e
2 parents 9534809 + 8e28c1c
commit c5a360e
Show file tree

Hide file tree

Showing 10 changed files with 3,421 additions and 2,167 deletions.
diff --git a/script/export_utils.js b/script/export_utils.js
@@ -22,8 +22,8 @@ const getFieldNameMap = (fields) => {
  * This code works on exportHeaders as either a Map or an array of
  * [['field_name',[fields],...] 
  * @param {Array} exportHeaders See `export.js`.
- * @param {Array<Object>} array of all source fields.
- * @param {String} export column prefix
+ * @param {Array<Object>} fields array of all source fields.
+ * @param {String} prefix export column prefix
  */
 const getHeaderMap = (exportHeaders, fields, prefix) => {
 	var headerMap = {};
@@ -44,51 +44,153 @@ const getHeaderMap = (exportHeaders, fields, prefix) => {
 		}
     }
 
+    let field_message = [];
+    let field_export_message = [];
+
 	for (const [fieldIndex, field] of fields.entries()) {
 		if (field.exportField && prefix in field.exportField) {
 			for (const target of field.exportField[prefix]) {
 				if ('field' in target) {
-					if (target.field in headerMap) {
-						var sources;
-						if (exportHeaders instanceof Map) {
-							sources = exportHeaders.get(target.field);
-							// If given field isn't already mapped, add it.
-							if (sources.indexOf(field.fieldName) == -1) {
-								sources.push(field.fieldName);
-							};
-							exportHeaders.set(target.field, sources);
+					var sources;
+					if (exportHeaders instanceof Map) {
+						if (target.field in headerMap) {
+							field_export_message.push(target.field);
+						}
+						else {
+							if (!exportHeaders.has(target.field)) {
+								field_message.push(target.field);
+								// Issue: all template driven exportHeader fields are showing AFTER export.js mentioned ones.
+								headerMap[target.field] = exportHeaders.length;
+								exportHeaders.set(target.field, []);
+							}
 						}
-						else { // Save to array
-							sources = exportHeaders[headerMap[target.field]][1];
-							// As above
-							if (sources.indexOf(field.fieldName) == -1) {
-								sources.push(field.fieldName);
-							};
-							exportHeaders[headerMap[target.field]][1] = sources;
+						let sources = exportHeaders.get(target.field);
+						if (!sources)
+							console.log('Malformed export.js exportHeader field:', target.field)
+						// If given field isn't already mapped, add it.
+						if (sources.indexOf(field.fieldName) == -1) {
+							sources.push(field.fieldName);
 						};
+						exportHeaders.set(target.field, sources);
 					}
-					else {
-						const msg = 'The EXPORT_' + prefix + ' column for ' + field.fieldName +' requests a map to a non-existen export template field: ' + target.field;
-						console.log (msg);
+					else { // Save to array
+						if (target.field in headerMap) {
+							field_export_message.push(target.field);
+						}
+						else {
+							// Add field to exportHeaders
+							// Issue: can this handle many-to-one mapping?
+							field_message.push(target.field);
+							headerMap[target.field] = exportHeaders.length;
+							exportHeaders.push([target.field, []]);
+						}
+						sources = exportHeaders[headerMap[target.field]][1];
+						// As above
+						if (sources.indexOf(field.fieldName) == -1) {
+							sources.push(field.fieldName);
+						};
+						exportHeaders[headerMap[target.field]][1] = sources;
 					};
+
 				};
 			};
 		};
     };
+    // This will output a list of fields added to exportHeaders by way of template specification which haven't been included in export.js
+    if (field_message)
+    	console.log('Export fields added by template:', field_message)
+    if (field_export_message)
+    	console.log('Export fields stated in export.js):', field_export_message)
 };
 
-const getMappedField = (sourceRow, sourceFieldNames, fieldNameMap, delimiter) => {
-	// This provides an export field composed of one or more more input
-	// fields, separated by a ';' delimiter if not null.
+/**
+ * This provides an export field composed of one or more more input
+ * fields, separated by a ';' delimiter if not null.
+ * nullOptionsDict allows conversion of "Missing" etc. metadata options to 
+ * target export system's version of these.
+ * @param {Object} sourceRow 
+ * @param {Array<Object>} sourceFieldNames array of all source fields.
+ * @param {Object} fieldNameMap
+ * @param {String} delimiter to separate multi-source field values with
+ * @param {String} prefix of export format
+ * @param {Map} nullOptionsMap conversion of Missing etc. to export db equivalent.
+ * @returm {String} Concatenated string of values.
+ */
+const getMappedField = (sourceRow, sourceFieldNames, sourceFields, fieldNameMap, delimiter, prefix, nullOptionsMap = null) => {
+
 	const mappedCell = [];
 	for (const fieldName of sourceFieldNames) {
-		const mappedCellVal = sourceRow[fieldNameMap[fieldName]];
+		let mappedCellVal = sourceRow[fieldNameMap[fieldName]];
 		if (!mappedCellVal) continue;
-		mappedCell.push(mappedCellVal);
+		mappedCellVal = mappedCellVal.trim();
+		if (mappedCellVal.length === 0) continue;
+		if (nullOptionsMap && nullOptionsMap.has(mappedCellVal)){
+			mappedCellVal = nullOptionsMap.get(mappedCellVal);
+		};
+		let field = sourceFields[fieldNameMap[fieldName]];
+		if (field.datatype === 'select') {
+			mappedCell.push( getTransformedField(mappedCellVal, field, prefix));
+		}
+		else if (field.datatype === 'multiple') {
+			// ISSUE: relying on semicolon delimiter in input
+			for (let cellVal of mappedCellVal.split(';')) {
+				mappedCell.push( getTransformedField(cellVal.trim(), field, prefix));
+			}
+		}
+		else {
+			mappedCell.push(mappedCellVal)
+		}
 	};
 	return mappedCell.join(delimiter);
 }
 
+/**
+ * Some vocabulary fields get mapped over to export format values.
+ *
+ * @param {String} value to be exported.
+ * @param {Array<String>} fields list of source fields to examine for mappings.
+ * @param {String} prefix of export format to examine.
+ */
+const getTransformedField = (value, field, prefix) => {
+
+ 	if (field['schema:ItemList']) {
+ 		const term = findById(field['schema:ItemList'], value);
+
+		// Looking for term.exportField['GRDI'] for example:
+		if (term && 'exportField' in term && prefix in term.exportField) {
+			// Here mapping involves a value substitution
+			// Note possible [target field]:[value] twist
+			for (let mapping of term.exportField[prefix]) {
+				return mapping.value;
+			};
+		};
+
+	};
+	return value;
+};
+
+/* Find key in nested object (nested dictionaries)
+ * Adapted from: https://codereview.stackexchange.com/questions/73714/find-a-nested-property-in-an-object
+ * @param {Dictionary<Dictionary>} o nested Dictionaries
+ * @param {String}Key to find in dictionaries
+ * @return {Dictionary} or null
+ */
+function findById(o, key) {
+	if (key in o)
+		return o[key];
+    var result, p; 
+    for (p in o) {
+        if( o.hasOwnProperty(p) && typeof o[p] === 'object' ) {
+            result = findById(o[p], key);
+            if(result){
+                return result;
+            }
+        }
+    }
+    return result;
+}
+
+
 /**
  * Get a dictionary of empty arrays for each ExportHeader field
  * FUTURE: enable it to work with hierarchic vocabulary lists
@@ -112,9 +214,9 @@ const getRowMap = (sourceRow, sourceFields, RuleDB, fields, fieldNameMap, prefix
     // has a mapping for export to a GRDI target field above, then set target
     // to value.
     if (value && value.length > 0) {
-      const vocabulary = fields[sourceIndex].vocabulary;
-      if (value in vocabulary) { 
-        const term = vocabulary[value];
+      const vocab_list = fields[sourceIndex]['schema:ItemList'];
+      if (value in vocab_list) { 
+        const term = vocab_list[value];
         // Looking for term.exportField['GRDI'] for example:
         if ('exportField' in term && prefix in term.exportField) {
           for (let mapping of term.exportField[prefix]) {

diff --git a/script/main.js b/script/main.js
@@ -76,15 +76,17 @@ const processData = (data) => {
   const flatVocabularies = {};
   const fields = getFields(data);
   for (const field of fields) {
-    if (field.vocabulary) {
+    if ('schema:ItemList' in field) {
       flatVocabularies[field.fieldName] =
-          stringifyNestedVocabulary(field.vocabulary);
+          stringifyNestedVocabulary(field['schema:ItemList']);
     }
   }
 
+  // parent is each data section
   for (const parent of data) {
+    // parent.children is list of fields
     for (const child of parent.children) {
-      if (child.vocabulary) {
+      if ('schema:ItemList' in child) {
         child.flatVocabulary = flatVocabularies[child.fieldName];
 
         if (child.source) {
@@ -257,7 +259,9 @@ const getColumns = (data) => {
   let ret = [];
   for (const field of getFields(data)) {
     const col = {};
-    if (field.requirement) col.requirement = field.requirement;
+    if (field.requirement) {
+      col.requirement = field.requirement;
+    }
     switch (field.datatype) {
       case 'xs:date': 
         col.type = 'date';
@@ -303,13 +307,14 @@ const getColumns = (data) => {
  *     processing.
  * @return {Array<String>} Flattened vocabulary.
  */
-const stringifyNestedVocabulary = (vocabulary, level=0) => {
+const stringifyNestedVocabulary = (vocab_list, level=0) => {
 
   let ret = [];
-  for (const val of Object.keys(vocabulary)) {
-    if (val != 'exportField') { // Ignore field map values used for export.
-      ret.push('  '.repeat(level) + val);
-      ret = ret.concat(stringifyNestedVocabulary(vocabulary[val], level+1));
+  for (const val of Object.keys(vocab_list)) {
+    //if (val != 'exportField') { // Ignore field map values used for export.
+    ret.push('  '.repeat(level) + val);
+    if ('schema:ItemList' in vocab_list[val]) {
+      ret = ret.concat(stringifyNestedVocabulary(vocab_list[val]['schema:ItemList'], level+1));
     }
   }
   return ret;

diff --git a/script/make_data.py b/script/make_data.py
@@ -22,28 +22,68 @@
 reference_html = ''; # Content of a report that details section fields
 search_root = '/';
 
-# Consolidates all EXPORT_XYZ terms into one data structure
-# exportField: {PREFIX:[[field name],[value rename],...]}
-def export_fields (EXPORT_FORMAT, field, row):
+# For a column in input spreadsheet named EXPORT_[EXPORT_FORMAT], add to
+# dictionary structure (field) a field.exportField datastructure containing
+# transforms to each EXPORT_FORMAT value, or column and value combination.
+# e.g.
+#	"Confusion": {
+#		"exportField": {
+#			"NML_LIMS": [
+#				{
+#					"field": "HC_SYMPTOMS",
+#                   "value": "CONFUSION"
+#               }
+#            ],
+#        },
+#		 ... other child terms
+#
+# exportField: {[PREFIX]:[{"field":[value],"value":[value transform],...]}
+# input spreadsheet EXPORT_[EXPORT_FORMAT] is coded as:
+#    [column1]:[value];[column2]:[value]; // multiple column targets
+#    or [value];[value]; // default column target
+#
+# @param Array<String> EXPORT_FORMAT list of export formats to search for
+# @param Dict field Dictionary of vocabulary field details
+# @param Dict row containing all field data
+# @return Dict field modified
+
+def export_fields (EXPORT_FORMAT, field, row, as_field = False):
 	if len(EXPORT_FORMAT) > 0:
 		formats = {};
 		for export_field in EXPORT_FORMAT:
 			prefix = export_field[7:]; # Get rid of "EXPORT_" part.
 			if row[export_field] == None:
 				print ('Error: ', export_field, 'not found in row with label [',row['label'], ']. Malformed text in row?');
 				continue;
+
+			# An export field may have one or more [field name]:[field value] transforms, separated by ";"
 			for item in row[export_field].split(";"):
-			# an export field may have one or more [field name]:[new field value] mapping.
 				item = item.strip();
-				if len(item.strip()) > 0:
-					binding = item.strip().split(":",1);
-					conversion = {}
-					if binding[0].strip() > '':
-						conversion['field'] = binding[0].strip();
-					if len (binding) > 1 and binding[1].strip() > '':
-						conversion['value'] = binding[1].strip();
+				if len(item) > 0:
+					conversion = {};
+					# We have a transform of some kind
 					if not prefix in formats:
 						formats[prefix] = [];
+
+					# A colon indicates a different target field is in play
+					if ":" in item:
+						binding = item.split(":",1);
+						binding[0] = binding[0].strip();
+						binding[1] = binding[1].strip();
+						if binding[0] > '':
+							conversion['field'] = binding[0];
+						if binding[1] > '':
+							conversion['value'] = binding[1];
+						else:
+							# A single ":" value enables clearing out of a value.
+							conversion['value'] = '';
+
+					# No colon
+					elif as_field == True:
+						conversion['field'] = item;
+					else:
+						conversion['value'] = item;	
+
 					formats[prefix].append(conversion);
 
 		if formats: # Only if some keys have been added.
@@ -109,7 +149,7 @@ def export_fields (EXPORT_FORMAT, field, row):
 							'examples':			row['examples']
 						}
 
-						export_fields (EXPORT_FORMAT, field, row);
+						export_fields (EXPORT_FORMAT, field, row, True);
 
 						reference_html += '''
 						<tr>
@@ -126,7 +166,7 @@ def export_fields (EXPORT_FORMAT, field, row):
 							choice = collections.OrderedDict(); 
 							# Top level case-sensitive field index, curators must be exact
 							CHOICE_INDEX[label] = choice; 
-							field['vocabulary'] = choice;
+							field['schema:ItemList'] = choice;
 
 						section['children'].append(field)
 						FIELD_INDEX[label.lower()] = field;
@@ -144,12 +184,12 @@ def export_fields (EXPORT_FORMAT, field, row):
 								search_root = parent_label;
 								print ('vocabulary field:', parent_label);
 
-							if not 'vocabulary' in FIELD_INDEX[parent_label_lc]:
+							if not 'schema:ItemList' in FIELD_INDEX[parent_label_lc]:
 								print ("error: field ",parent_label, "not marked as select or multiple but it has child term", label);
 							else:
 								# Basically top-level entries in field_map:
 								choice = collections.OrderedDict();
-								FIELD_INDEX[parent_label_lc]['vocabulary'][label] = choice;
+								FIELD_INDEX[parent_label_lc]['schema:ItemList'][label] = choice;
 
 								# Parent_label is top level field name:
 								CHOICE_INDEX[parent_label][label] = choice;
@@ -163,7 +203,11 @@ def export_fields (EXPORT_FORMAT, field, row):
 							# in parent label switches that to a wildcard.
 							try:
 								result = dpath.util.get(CHOICE_INDEX, '/' + search_root +'/**/' + parent_label.replace('/','?'), separator='/');
-								result[label] = collections.OrderedDict(); # new child {}
+								choice = collections.OrderedDict(); # new child {}
+								if not 'schema:ItemList' in result:
+									result['schema:ItemList'] = {};
+								result['schema:ItemList'][label] = choice; 
+								export_fields(EXPORT_FORMAT, choice, row);
 							except:
 								print ("Error: parent class ", parent_label, "doesn't exist as section or field for term. Make sure parent term is trimmed of whitespace.", label);
 								pass