Skip to content

Commit

Permalink
Merge pull request #158 from Public-Health-Bioinformatics/cancogen_NM…
Browse files Browse the repository at this point in the history
…L_LIMS_v0.2

WIP Cancogen nml lims v0.2 export
  • Loading branch information
ddooley authored Mar 8, 2021
2 parents 9534809 + 8e28c1c commit c5a360e
Show file tree
Hide file tree
Showing 10 changed files with 3,421 additions and 2,167 deletions.
160 changes: 131 additions & 29 deletions script/export_utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ const getFieldNameMap = (fields) => {
* This code works on exportHeaders as either a Map or an array of
* [['field_name',[fields],...]
* @param {Array} exportHeaders See `export.js`.
* @param {Array<Object>} array of all source fields.
* @param {String} export column prefix
* @param {Array<Object>} fields array of all source fields.
* @param {String} prefix export column prefix
*/
const getHeaderMap = (exportHeaders, fields, prefix) => {
var headerMap = {};
Expand All @@ -44,51 +44,153 @@ const getHeaderMap = (exportHeaders, fields, prefix) => {
}
}

let field_message = [];
let field_export_message = [];

for (const [fieldIndex, field] of fields.entries()) {
if (field.exportField && prefix in field.exportField) {
for (const target of field.exportField[prefix]) {
if ('field' in target) {
if (target.field in headerMap) {
var sources;
if (exportHeaders instanceof Map) {
sources = exportHeaders.get(target.field);
// If given field isn't already mapped, add it.
if (sources.indexOf(field.fieldName) == -1) {
sources.push(field.fieldName);
};
exportHeaders.set(target.field, sources);
var sources;
if (exportHeaders instanceof Map) {
if (target.field in headerMap) {
field_export_message.push(target.field);
}
else {
if (!exportHeaders.has(target.field)) {
field_message.push(target.field);
// Issue: all template driven exportHeader fields are showing AFTER export.js mentioned ones.
headerMap[target.field] = exportHeaders.length;
exportHeaders.set(target.field, []);
}
}
else { // Save to array
sources = exportHeaders[headerMap[target.field]][1];
// As above
if (sources.indexOf(field.fieldName) == -1) {
sources.push(field.fieldName);
};
exportHeaders[headerMap[target.field]][1] = sources;
let sources = exportHeaders.get(target.field);
if (!sources)
console.log('Malformed export.js exportHeader field:', target.field)
// If given field isn't already mapped, add it.
if (sources.indexOf(field.fieldName) == -1) {
sources.push(field.fieldName);
};
exportHeaders.set(target.field, sources);
}
else {
const msg = 'The EXPORT_' + prefix + ' column for ' + field.fieldName +' requests a map to a non-existen export template field: ' + target.field;
console.log (msg);
else { // Save to array
if (target.field in headerMap) {
field_export_message.push(target.field);
}
else {
// Add field to exportHeaders
// Issue: can this handle many-to-one mapping?
field_message.push(target.field);
headerMap[target.field] = exportHeaders.length;
exportHeaders.push([target.field, []]);
}
sources = exportHeaders[headerMap[target.field]][1];
// As above
if (sources.indexOf(field.fieldName) == -1) {
sources.push(field.fieldName);
};
exportHeaders[headerMap[target.field]][1] = sources;
};

};
};
};
};
// This will output a list of fields added to exportHeaders by way of template specification which haven't been included in export.js
if (field_message)
console.log('Export fields added by template:', field_message)
if (field_export_message)
console.log('Export fields stated in export.js):', field_export_message)
};

const getMappedField = (sourceRow, sourceFieldNames, fieldNameMap, delimiter) => {
// This provides an export field composed of one or more more input
// fields, separated by a ';' delimiter if not null.
/**
* This provides an export field composed of one or more more input
* fields, separated by a ';' delimiter if not null.
* nullOptionsDict allows conversion of "Missing" etc. metadata options to
* target export system's version of these.
* @param {Object} sourceRow
* @param {Array<Object>} sourceFieldNames array of all source fields.
* @param {Object} fieldNameMap
* @param {String} delimiter to separate multi-source field values with
* @param {String} prefix of export format
* @param {Map} nullOptionsMap conversion of Missing etc. to export db equivalent.
* @returm {String} Concatenated string of values.
*/
const getMappedField = (sourceRow, sourceFieldNames, sourceFields, fieldNameMap, delimiter, prefix, nullOptionsMap = null) => {

const mappedCell = [];
for (const fieldName of sourceFieldNames) {
const mappedCellVal = sourceRow[fieldNameMap[fieldName]];
let mappedCellVal = sourceRow[fieldNameMap[fieldName]];
if (!mappedCellVal) continue;
mappedCell.push(mappedCellVal);
mappedCellVal = mappedCellVal.trim();
if (mappedCellVal.length === 0) continue;
if (nullOptionsMap && nullOptionsMap.has(mappedCellVal)){
mappedCellVal = nullOptionsMap.get(mappedCellVal);
};
let field = sourceFields[fieldNameMap[fieldName]];
if (field.datatype === 'select') {
mappedCell.push( getTransformedField(mappedCellVal, field, prefix));
}
else if (field.datatype === 'multiple') {
// ISSUE: relying on semicolon delimiter in input
for (let cellVal of mappedCellVal.split(';')) {
mappedCell.push( getTransformedField(cellVal.trim(), field, prefix));
}
}
else {
mappedCell.push(mappedCellVal)
}
};
return mappedCell.join(delimiter);
}

/**
* Some vocabulary fields get mapped over to export format values.
*
* @param {String} value to be exported.
* @param {Array<String>} fields list of source fields to examine for mappings.
* @param {String} prefix of export format to examine.
*/
const getTransformedField = (value, field, prefix) => {

if (field['schema:ItemList']) {
const term = findById(field['schema:ItemList'], value);

// Looking for term.exportField['GRDI'] for example:
if (term && 'exportField' in term && prefix in term.exportField) {
// Here mapping involves a value substitution
// Note possible [target field]:[value] twist
for (let mapping of term.exportField[prefix]) {
return mapping.value;
};
};

};
return value;
};

/* Find key in nested object (nested dictionaries)
* Adapted from: https://codereview.stackexchange.com/questions/73714/find-a-nested-property-in-an-object
* @param {Dictionary<Dictionary>} o nested Dictionaries
* @param {String}Key to find in dictionaries
* @return {Dictionary} or null
*/
function findById(o, key) {
if (key in o)
return o[key];
var result, p;
for (p in o) {
if( o.hasOwnProperty(p) && typeof o[p] === 'object' ) {
result = findById(o[p], key);
if(result){
return result;
}
}
}
return result;
}


/**
* Get a dictionary of empty arrays for each ExportHeader field
* FUTURE: enable it to work with hierarchic vocabulary lists
Expand All @@ -112,9 +214,9 @@ const getRowMap = (sourceRow, sourceFields, RuleDB, fields, fieldNameMap, prefix
// has a mapping for export to a GRDI target field above, then set target
// to value.
if (value && value.length > 0) {
const vocabulary = fields[sourceIndex].vocabulary;
if (value in vocabulary) {
const term = vocabulary[value];
const vocab_list = fields[sourceIndex]['schema:ItemList'];
if (value in vocab_list) {
const term = vocab_list[value];
// Looking for term.exportField['GRDI'] for example:
if ('exportField' in term && prefix in term.exportField) {
for (let mapping of term.exportField[prefix]) {
Expand Down
23 changes: 14 additions & 9 deletions script/main.js
Original file line number Diff line number Diff line change
Expand Up @@ -76,15 +76,17 @@ const processData = (data) => {
const flatVocabularies = {};
const fields = getFields(data);
for (const field of fields) {
if (field.vocabulary) {
if ('schema:ItemList' in field) {
flatVocabularies[field.fieldName] =
stringifyNestedVocabulary(field.vocabulary);
stringifyNestedVocabulary(field['schema:ItemList']);
}
}

// parent is each data section
for (const parent of data) {
// parent.children is list of fields
for (const child of parent.children) {
if (child.vocabulary) {
if ('schema:ItemList' in child) {
child.flatVocabulary = flatVocabularies[child.fieldName];

if (child.source) {
Expand Down Expand Up @@ -257,7 +259,9 @@ const getColumns = (data) => {
let ret = [];
for (const field of getFields(data)) {
const col = {};
if (field.requirement) col.requirement = field.requirement;
if (field.requirement) {
col.requirement = field.requirement;
}
switch (field.datatype) {
case 'xs:date':
col.type = 'date';
Expand Down Expand Up @@ -303,13 +307,14 @@ const getColumns = (data) => {
* processing.
* @return {Array<String>} Flattened vocabulary.
*/
const stringifyNestedVocabulary = (vocabulary, level=0) => {
const stringifyNestedVocabulary = (vocab_list, level=0) => {

let ret = [];
for (const val of Object.keys(vocabulary)) {
if (val != 'exportField') { // Ignore field map values used for export.
ret.push(' '.repeat(level) + val);
ret = ret.concat(stringifyNestedVocabulary(vocabulary[val], level+1));
for (const val of Object.keys(vocab_list)) {
//if (val != 'exportField') { // Ignore field map values used for export.
ret.push(' '.repeat(level) + val);
if ('schema:ItemList' in vocab_list[val]) {
ret = ret.concat(stringifyNestedVocabulary(vocab_list[val]['schema:ItemList'], level+1));
}
}
return ret;
Expand Down
76 changes: 60 additions & 16 deletions script/make_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,28 +22,68 @@
reference_html = ''; # Content of a report that details section fields
search_root = '/';

# Consolidates all EXPORT_XYZ terms into one data structure
# exportField: {PREFIX:[[field name],[value rename],...]}
def export_fields (EXPORT_FORMAT, field, row):
# For a column in input spreadsheet named EXPORT_[EXPORT_FORMAT], add to
# dictionary structure (field) a field.exportField datastructure containing
# transforms to each EXPORT_FORMAT value, or column and value combination.
# e.g.
# "Confusion": {
# "exportField": {
# "NML_LIMS": [
# {
# "field": "HC_SYMPTOMS",
# "value": "CONFUSION"
# }
# ],
# },
# ... other child terms
#
# exportField: {[PREFIX]:[{"field":[value],"value":[value transform],...]}
# input spreadsheet EXPORT_[EXPORT_FORMAT] is coded as:
# [column1]:[value];[column2]:[value]; // multiple column targets
# or [value];[value]; // default column target
#
# @param Array<String> EXPORT_FORMAT list of export formats to search for
# @param Dict field Dictionary of vocabulary field details
# @param Dict row containing all field data
# @return Dict field modified

def export_fields (EXPORT_FORMAT, field, row, as_field = False):
if len(EXPORT_FORMAT) > 0:
formats = {};
for export_field in EXPORT_FORMAT:
prefix = export_field[7:]; # Get rid of "EXPORT_" part.
if row[export_field] == None:
print ('Error: ', export_field, 'not found in row with label [',row['label'], ']. Malformed text in row?');
continue;

# An export field may have one or more [field name]:[field value] transforms, separated by ";"
for item in row[export_field].split(";"):
# an export field may have one or more [field name]:[new field value] mapping.
item = item.strip();
if len(item.strip()) > 0:
binding = item.strip().split(":",1);
conversion = {}
if binding[0].strip() > '':
conversion['field'] = binding[0].strip();
if len (binding) > 1 and binding[1].strip() > '':
conversion['value'] = binding[1].strip();
if len(item) > 0:
conversion = {};
# We have a transform of some kind
if not prefix in formats:
formats[prefix] = [];

# A colon indicates a different target field is in play
if ":" in item:
binding = item.split(":",1);
binding[0] = binding[0].strip();
binding[1] = binding[1].strip();
if binding[0] > '':
conversion['field'] = binding[0];
if binding[1] > '':
conversion['value'] = binding[1];
else:
# A single ":" value enables clearing out of a value.
conversion['value'] = '';

# No colon
elif as_field == True:
conversion['field'] = item;
else:
conversion['value'] = item;

formats[prefix].append(conversion);

if formats: # Only if some keys have been added.
Expand Down Expand Up @@ -109,7 +149,7 @@ def export_fields (EXPORT_FORMAT, field, row):
'examples': row['examples']
}

export_fields (EXPORT_FORMAT, field, row);
export_fields (EXPORT_FORMAT, field, row, True);

reference_html += '''
<tr>
Expand All @@ -126,7 +166,7 @@ def export_fields (EXPORT_FORMAT, field, row):
choice = collections.OrderedDict();
# Top level case-sensitive field index, curators must be exact
CHOICE_INDEX[label] = choice;
field['vocabulary'] = choice;
field['schema:ItemList'] = choice;

section['children'].append(field)
FIELD_INDEX[label.lower()] = field;
Expand All @@ -144,12 +184,12 @@ def export_fields (EXPORT_FORMAT, field, row):
search_root = parent_label;
print ('vocabulary field:', parent_label);

if not 'vocabulary' in FIELD_INDEX[parent_label_lc]:
if not 'schema:ItemList' in FIELD_INDEX[parent_label_lc]:
print ("error: field ",parent_label, "not marked as select or multiple but it has child term", label);
else:
# Basically top-level entries in field_map:
choice = collections.OrderedDict();
FIELD_INDEX[parent_label_lc]['vocabulary'][label] = choice;
FIELD_INDEX[parent_label_lc]['schema:ItemList'][label] = choice;

# Parent_label is top level field name:
CHOICE_INDEX[parent_label][label] = choice;
Expand All @@ -163,7 +203,11 @@ def export_fields (EXPORT_FORMAT, field, row):
# in parent label switches that to a wildcard.
try:
result = dpath.util.get(CHOICE_INDEX, '/' + search_root +'/**/' + parent_label.replace('/','?'), separator='/');
result[label] = collections.OrderedDict(); # new child {}
choice = collections.OrderedDict(); # new child {}
if not 'schema:ItemList' in result:
result['schema:ItemList'] = {};
result['schema:ItemList'][label] = choice;
export_fields(EXPORT_FORMAT, choice, row);
except:
print ("Error: parent class ", parent_label, "doesn't exist as section or field for term. Make sure parent term is trimmed of whitespace.", label);
pass
Expand Down
Loading

0 comments on commit c5a360e

Please sign in to comment.