From ef2798240ac8912bfaa240c2368e4fead6523175 Mon Sep 17 00:00:00 2001 From: Yibei Chen Date: Mon, 25 Dec 2023 18:50:10 +0000 Subject: [PATCH 01/21] convert js to python --- reproschema/redcap2reproschema.py | 385 ++++++++++++++++++++++++++++++ 1 file changed, 385 insertions(+) create mode 100644 reproschema/redcap2reproschema.py diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py new file mode 100644 index 0000000..1d2966c --- /dev/null +++ b/reproschema/redcap2reproschema.py @@ -0,0 +1,385 @@ +# User inputs: these are specific to your protocol, fill out before using the script + +# 1. your protocol id: use underscore for spaces, avoid special characters. +# The display name is the one that will show up in the app, this will be parsed as a string. +protocol_name = "sc_dd" + +# 2. your protocol display name: this will show up in the app and be parsed as a string +protocol_display_name = "Your protocol display name" + +# 3. create your raw GitHub repo URL +user_name = 'sanuann' +repo_name = 'reproschema' +branch_name = 'master' + +your_repo_url = f"https://raw.githubusercontent.com/{user_name}/{repo_name}/{branch_name}" + +# 4. add a description to your protocol +protocol_description = "Description for your protocol" + +# 5. where are you hosting your images? For example: openmoji +image_path = 'https://raw.githubusercontent.com/hfg-gmuend/openmoji/master/color/618x618/' + +import sys +import os +import csv +import json +import re +from collections import defaultdict +from bs4 import BeautifulSoup + +def create_form_context_schema(form, row_list): + item_obj = defaultdict(dict) + item_obj["@version"] = 1.1 + item_obj[form] = f"{your_repo_url}/activities/{form}/items/" + + for field in row_list: + field_name = field["Variable / Field Name"] + item_obj[field_name] = {"@id": f"{form}:{field_name}", "@type": "@id"} + + form_context = {"@context": item_obj} + fc = json.dumps(form_context, indent=4) + + try: + with open(f"activities/{form}/{form}_context", "w") as file: + file.write(fc) + print(f"Context created for form {form}") + except Exception as e: + print(e) + +def create_protocol_context(activity_list, your_repo_url, protocol_name): + # Create protocol context file + activity_obj = { + "@version": 1.1, + "activity_path": f"{your_repo_url}/activities/" + } + + for activity in activity_list: + # Define item_x urls to be inserted in context for the corresponding form + activity_obj[activity] = { + "@id": f"activity_path:{activity}/{activity}_schema", + "@type": "@id" + } + + protocol_context = { + "@context": activity_obj + } + + pc = json.dumps(protocol_context, indent=4) + + protocol_dir = f'protocols/{protocol_name}' + os.makedirs(protocol_dir, exist_ok=True) + + with open(f'{protocol_dir}/{protocol_name}_context', 'w') as file: + file.write(pc) + + print(f'Protocol context created for {protocol_name}') + + +def process_visibility(data): + condition = data.get('Branching Logic (Show field only if...)') + + if condition: + # Normalize the condition field to resemble a JavaScript-like condition + condition = re.sub(r"\(([0-9]*)\)", r"___\1", condition) + condition = re.sub(r"([^>|<])=", r"\1 ==", condition) + condition = condition.replace(" and ", " && ") + condition = condition.replace(" or ", " || ") + condition = re.sub(r"\[([^\]]*)\]", r" \1 ", condition) + + visibility_obj = { + "variableName": data['Variable / Field Name'], + "isAbout": f"items/{data['Variable / Field Name']}", + "isVis": condition if condition else True + } + return visibility_obj + +def parse_field_type_and_value(data, input_type_map): + field_type = data.get('Field Type', '') + + input_type = input_type_map.get(field_type, field_type) + + value_type_map = { + 'number': 'xsd:int', + 'date_': 'xsd:date', + 'datetime_': 'datetime', + 'time_': 'xsd:date', + 'email': 'email', + 'phone': 'phone' + } + validation_type = data.get('Text Validation Type OR Show Slider Number', '') + + value_type = value_type_map.get(validation_type, 'xsd:string') + + return input_type, value_type + +def process_choices(choices_str, image_path): + choices = [] + for choice in choices_str.split('|'): + parts = choice.split(', ') + choice_obj = {'schema:value': int(parts[0]), 'schema:name': parts[1]} + if len(parts) == 3: + choice_obj['schema:image'] = f"{image_path}{parts[2]}.png" + choices.append(choice_obj) + return choices + +def normalize_condition(condition_str): + condition_str = re.sub(r"\[([^\]]*)\]", r"\1", condition_str) + condition_str = re.sub(r"\(([0-9]*)\)", r"___\1", condition_str) + condition_str = condition_str.replace(" and ", " && ") + condition_str = condition_str.replace(" or ", " || ") + return condition_str + +def write_to_file(form, field_name, rowData): + try: + file_path = os.path.join('activities', form, 'items', f'{field_name}') + with open(file_path, 'w') as file: + json.dump(rowData, file, indent=4) + print(f"Item schema for {form} written successfully.") + except Exception as e: + print(f"Error in writing item schema for {form}: {e}") + +def parse_html(input_string, default_language='en'): + result = {} + soup = BeautifulSoup(input_string, 'html.parser') + + lang_elements = soup.find_all(True, {'lang': True}) + if lang_elements: + for element in lang_elements: + lang = element.get('lang', default_language) + text = element.get_text(strip=True) + if text: + result[lang] = text + if not result: + result[default_language] = soup.get_text(strip=True) + else: + result[default_language] = input_string + + return result + +def process_row(schema_context_url, form, field, schema_map, input_type_map, ui_list, response_list, additional_notes_list): + rowData = { + '@context': schema_context_url, + '@type': 'reproschema:Field', + } + + field_type = field.get('Field Type', '') + schema_map['Choices, Calculations, OR Slider Labels'] = 'scoringLogic' if field_type == 'calc' else 'choices' + + input_type, value_type = parse_field_type_and_value(field, input_type_map) + rowData['ui'] = {'inputType': input_type} + if value_type: + rowData['responseOptions'] = {'valueType': value_type} + + for key, value in field.items(): + if schema_map.get(key) == 'allow' and value: + rowData.setdefault('ui', {}).update({schema_map[key]: value.split(', ')}) + + elif key in ui_list and value: + rowData.setdefault('ui', {}).update({schema_map[key]: input_type_map.get(value, value)}) + + elif key in response_list and value: + if key == 'multipleChoice': + value = value == '1' + rowData.setdefault('responseOptions', {}).update({schema_map[key]: value}) + + elif schema_map.get(key) == 'choices' and value: + rowData.setdefault('responseOptions', {}).update({'choices': process_choices(value, image_path)}) + + elif schema_map.get(key) == 'scoringLogic' and value: + condition = normalize_condition(value) + rowData.setdefault('ui', {}).update({'hidden': True}) + rowData.setdefault('scoringLogic', []).append({"variableName": field['Variable / Field Name'], "jsExpression": condition}) + + elif schema_map.get(key) == 'visibility' and value: + condition = normalize_condition(value) + rowData.setdefault('visibility', []).append({"variableName": field['Variable / Field Name'], "isVis": condition}) + + elif key in ['question', 'schema:description', 'preamble'] and value: + rowData.update({schema_map[key]: parse_html(value)}) + + elif key == 'Identifier?' and value: + identifier_val = value.lower() == 'y' + rowData.update({schema_map[key]: [{"legalStandard": "unknown", "isIdentifier": identifier_val}]}) + + elif key in additional_notes_list and value: + notes_obj = {"source": "redcap", "column": key, "value": value} + rowData.setdefault('additionalNotesObj', []).append(notes_obj) + + write_to_file(form, field['Variable / Field Name'], rowData) + +def create_form_schema(schema_context_url, form, activity_display_name, activity_description, order, bl_list, matrix_list, scores_list): + # Construct the JSON-LD structure + json_ld = { + "@context": schema_context_url, + "@type": "reproschema:Activity", + "@id": f"{form}_schema", + "prefLabel": activity_display_name, + "description": activity_description, + "schemaVersion": "1.0.0-rc4", + "version": "0.0.1", + "ui": { + "order": order.get(form, []), + "addProperties": bl_list, + "shuffle": False + } + } + + if matrix_list: + json_ld['matrixInfo'] = matrix_list + if scores_list: + json_ld['scoringLogic'] = scores_list + + try: + path = f'activities/{form}' + os.makedirs(path, exist_ok=True) # Ensure the directory exists + filename = f'{form}_schema' + with open(os.path.join(path, filename), 'w') as file: + json.dump(json_ld, file, indent=4) + print(f"{form} Instrument schema created") + except Exception as err: + print(f"Error in writing {form} form schema:", err) + +def process_activities(activity_name, protocol_visibility_obj, protocol_variable_map, protocol_order): + # Set default visibility condition + protocol_visibility_obj[activity_name] = True + + # Add activity to variableMap and Order + protocol_variable_map.append({ + "variableName": activity_name, + "isAbout": f"items/{activity_name}" + }) + protocol_order.append(activity_name) + +def create_protocol_schema(schema_context_url, protocol_name, protocol_display_name, protocol_description, protocol_variable_map, protocol_order, protocol_visibility_obj): + # Construct the protocol schema + protocol_schema = { + "@context": schema_context_url, + "@type": "reproschema:ActivitySet", + "@id": f"{protocol_name}_schema", + "skos:prefLabel": protocol_display_name, + "skos:altLabel": f"{protocol_name}_schema", + "schema:description": protocol_description, + "schema:schemaVersion": "1.0.0-rc4", + "schema:version": "0.0.1", + "variableMap": protocol_variable_map, + "ui": { + "order": protocol_order, + "shuffle": False, + "visibility": protocol_visibility_obj + } + } + + # Write the protocol schema to a file + try: + os.makedirs(f'protocols/{protocol_name}', exist_ok=True) # Ensure the directory exists + with open(f'protocols/{protocol_name}/{protocol_name}_schema', 'w') as file: + json.dump(protocol_schema, file, indent=4) + print("Protocol schema created") + except Exception as err: + print("Error in writing protocol schema:", err) + +def parse_language_iso_codes(input_string): + soup = BeautifulSoup(input_string, 'lxml') + return [element.get('lang') for element in soup.find_all(True, {'lang': True})] + +def main(csv_path, schema_context_url): + # Initialize variables + schema_map = { + "Variable / Field Name": "@id", + "Item Display Name": "prefLabel", + "Field Annotation": "description", + "Section Header": "preamble", + "Field Label": "question", + "Field Type": "inputType", + "Allow": "allow", + "Required Field?": "requiredValue", + "Text Validation Min": "minValue", + "Text Validation Max": "maxValue", + "Choices, Calculations, OR Slider Labels": "choices", + "Branching Logic (Show field only if...)": "visibility", + "Custom Alignment": "customAlignment", + "Identifier?": "identifiable", + "multipleChoice": "multipleChoice", + "responseType": "@type" + } + + input_type_map = { + "calc": "number", + "checkbox": "radio", + "descriptive": "static", + "dropdown": "select", + "notes": "text" + } + + ui_list = ['inputType', 'shuffle', 'allow', 'customAlignment'] + response_list = ['valueType', 'minValue', 'maxValue', 'requiredValue', 'multipleChoice'] + additional_notes_list = ['Field Note', 'Question Number (surveys only)'] + datas = {} + order = {} + bl_list = [] + sl_list = [] + visibility_obj = {} + scores_obj = {} + scores_list = [] + visibility_list = [] + languages = [] + variable_map = [] + matrix_list = [] + protocol_variable_map = [] + protocol_visibility_obj = {} + protocol_order = [] + + # Read and process the CSV file + with open(csv_path, mode='r', encoding='utf-8') as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + form_name = row['Form Name'] + datas.setdefault(form_name, []).append(row) + os.makedirs(f'activities/{form_name}/items', exist_ok=True) + os.makedirs(f'protocols/{protocol_name}', exist_ok=True) + + scores_list = [] + order[form_name] = [] + bl_list = [] + visibility_list = [] + variable_map = [] + matrix_list = [] + activity_display_name = row['Form Name'] + activity_description = row['Form Note'] + + for field in datas[form_name]: + if not languages: + languages = parse_language_iso_codes(field['Field Label']) + + field_name = field['Variable / Field Name'] + visibility_obj = process_visibility(field) + bl_list.append(visibility_obj) + variable_map.append({"variableName": field_name, "isAbout": f"items/{field_name}"}) + + if field.get('Matrix Group Name') or field.get('Matrix Ranking?'): + matrix_list.append({"variableName": field_name, "matrixGroupName": field['Matrix Group Name'], "matrixRanking": field['Matrix Ranking?']}) + + order[form_name].append(f"items/{field_name}") + process_row(schema_context_url, form_name, field, schema_map, input_type_map, ui_list, response_list, additional_notes_list) + + create_form_schema(form_name, activity_display_name, activity_description, order[form_name], bl_list, matrix_list, scores_list) + + # Create protocol context and schema + activity_list = list(datas.keys()) + for activity_name in activity_list: + process_activities(activity_name, protocol_visibility_obj, protocol_variable_map, protocol_order) + + create_protocol_schema(schema_context_url, protocol_name, protocol_display_name, protocol_description, protocol_variable_map, protocol_order, protocol_visibility_obj) + +if __name__ == "__main__": + # Make sure we got a filename on the command line + if len(sys.argv) < 3: + print(f'Usage: {sys.argv[0]} your_data_dic.csv') + sys.exit(1) + + # Read the CSV file + csv_path = sys.argv[2] + schema_context_url = 'https://raw.githubusercontent.com/ReproNim/reproschema/1.0.0-rc4/contexts/generic' + + main(csv_path, schema_context_url) From 142089eeb4661f08ae11df4bc4055f5c5f2aea9f Mon Sep 17 00:00:00 2001 From: Yibei Chen Date: Mon, 25 Dec 2023 19:40:37 +0000 Subject: [PATCH 02/21] add redcap2rs yaml template --- templates/redcap2rs.yaml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 templates/redcap2rs.yaml diff --git a/templates/redcap2rs.yaml b/templates/redcap2rs.yaml new file mode 100644 index 0000000..1faf87c --- /dev/null +++ b/templates/redcap2rs.yaml @@ -0,0 +1,19 @@ +# User inputs: these are specific to your protocol, fill out before using the script + +# 1. your protocol id: use underscore for spaces, avoid special characters. +# The display name is the one that will show up in the app, this will be parsed as a string. +protocol_name = "your_protocol_name" # e.g. "My_Protocol" + +# 2. your protocol display name: this will show up in the app and be parsed as a string +protocol_display_name = "Your protocol display name" + +# 3. create your raw GitHub repo URL +user_name = 'your_github_username' +repo_name = 'your_repo_name' +branch_name = 'main' + +repo_url = f"https://github.com/{user_name}/{repo_name}" # no need to change +raw_repo_url = f"https://raw.githubusercontent.com/{user_name}/{repo_name}/{branch_name}" # no need to change + +# 4. add a description to your protocol +protocol_description = "Description for your protocol" # e.g. "This is a protocol for ..." \ No newline at end of file From abd9c480db188e1b16bbfb3d3b19598331690170 Mon Sep 17 00:00:00 2001 From: Yibei Chen Date: Mon, 25 Dec 2023 20:46:40 +0000 Subject: [PATCH 03/21] update yaml template --- templates/redcap2rs.yaml | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/templates/redcap2rs.yaml b/templates/redcap2rs.yaml index 1faf87c..4033ad4 100644 --- a/templates/redcap2rs.yaml +++ b/templates/redcap2rs.yaml @@ -1,19 +1,21 @@ -# User inputs: these are specific to your protocol, fill out before using the script +# Reproschema Protocol Configuration -# 1. your protocol id: use underscore for spaces, avoid special characters. -# The display name is the one that will show up in the app, this will be parsed as a string. -protocol_name = "your_protocol_name" # e.g. "My_Protocol" +# Protocol Name: +# Use underscores for spaces and avoid special characters. +# This is the unique identifier for your protocol. +protocol_name: "your_protocol_name" # Example: "My_Protocol" -# 2. your protocol display name: this will show up in the app and be parsed as a string -protocol_display_name = "Your protocol display name" +# Protocol Display Name: +# This name will be displayed in the application. +protocol_display_name: "Your protocol display name" -# 3. create your raw GitHub repo URL -user_name = 'your_github_username' -repo_name = 'your_repo_name' -branch_name = 'main' +# GitHub Repository Information: +# Create a GitHub repository named 'reproschema' to store your reproschema protocols. +# Replace 'your_github_username' with your actual GitHub username. +user_name: "your_github_username" +repo_name: "reproschema" # Recommended name; can be different if preferred. +repo_url: "https://github.com/your_github_username/reproschema" -repo_url = f"https://github.com/{user_name}/{repo_name}" # no need to change -raw_repo_url = f"https://raw.githubusercontent.com/{user_name}/{repo_name}/{branch_name}" # no need to change - -# 4. add a description to your protocol -protocol_description = "Description for your protocol" # e.g. "This is a protocol for ..." \ No newline at end of file +# Protocol Description: +# Provide a brief description of your protocol. +protocol_description: "Description for your protocol" # Example: "This protocol is for ..." \ No newline at end of file From 675a4b2ceac608b6d7ad1e4d5a37113c3002627b Mon Sep 17 00:00:00 2001 From: Yibei Chen Date: Mon, 25 Dec 2023 20:53:55 +0000 Subject: [PATCH 04/21] update instruction to use redcap2reproschema.py --- README.md | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/README.md b/README.md index bc6d120..bd26e1a 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,47 @@ Commands: validate ``` +## redcap2reproschema.py Usage + +### Prerequisites +Before using the conversion script, ensure you have the following: + +1. **GitHub Repository**: + - [Create a GitHub repository](https://docs.github.com/en/repositories/creating-and-managing-repositories/creating-a-new-repository) named `reproschema` to store all your reproschema protocols. + - This repository should be set up before converting any data using the script. + +2. **YAML Configuration File**: + - Fill out the `templates/redcap2rs.yaml` file with your protocol details. + +### YAML File Configuration +In the `templates/redcap2rs.yaml` file, provide the following information: + +- **protocol_name**: This is a unique identifier for your protocol. Use underscores for spaces and avoid special characters. +- **protocol_display_name**: The name that will appear in the application. +- **user_name**: Your GitHub username. +- **repo_name**: The repository name where your protocols are stored. It's recommended to use `reproschema`. +- **protocol_description**: A brief description of your protocol. + +Example: +```yaml +protocol_name: "My_Protocol" +protocol_display_name: "Assessment Protocol" +user_name: "john_doe" +repo_name: "reproschema" +protocol_description: "This protocol is for assessing cognitive skills." +``` + +### Using the Script + +After configuring the YAML file: + +1. Run the Python script with the paths to your CSV file and the YAML file as arguments. +2. Command Format: `python script_name.py path/to/your_redcap_data_dic.csv path/to/your_redcap2rs.yaml` + +### Notes +1. The script requires an active internet connection to access the GitHub repository. +2. Make sure you use `git add`, `git commit`, `git push` properly afterwards to maintain a good version control for your converted data. + ## Developer installation Install repo in developer mode: From 1f8746df197d067381de58069b31c8b3c294d1ee Mon Sep 17 00:00:00 2001 From: Yibei Chen Date: Mon, 25 Dec 2023 20:54:25 +0000 Subject: [PATCH 05/21] add additional python package --- setup.cfg | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.cfg b/setup.cfg index ef22d32..36fda82 100644 --- a/setup.cfg +++ b/setup.cfg @@ -31,6 +31,8 @@ install_requires = PyLD requests requests_cache + beautifulsoup4 + lxml test_requires = pytest >= 4.4.0 From 550db3564908d3122e1b3b12d4cee11d4c367280 Mon Sep 17 00:00:00 2001 From: Yibei Chen Date: Mon, 25 Dec 2023 20:54:51 +0000 Subject: [PATCH 06/21] convert redcap2rs too from js to python --- reproschema/redcap2reproschema.py | 356 +++++++++++++++--------------- 1 file changed, 178 insertions(+), 178 deletions(-) diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py index 1d2966c..b53a836 100644 --- a/reproschema/redcap2reproschema.py +++ b/reproschema/redcap2reproschema.py @@ -1,96 +1,42 @@ -# User inputs: these are specific to your protocol, fill out before using the script - -# 1. your protocol id: use underscore for spaces, avoid special characters. -# The display name is the one that will show up in the app, this will be parsed as a string. -protocol_name = "sc_dd" - -# 2. your protocol display name: this will show up in the app and be parsed as a string -protocol_display_name = "Your protocol display name" - -# 3. create your raw GitHub repo URL -user_name = 'sanuann' -repo_name = 'reproschema' -branch_name = 'master' - -your_repo_url = f"https://raw.githubusercontent.com/{user_name}/{repo_name}/{branch_name}" - -# 4. add a description to your protocol -protocol_description = "Description for your protocol" - -# 5. where are you hosting your images? For example: openmoji -image_path = 'https://raw.githubusercontent.com/hfg-gmuend/openmoji/master/color/618x618/' - import sys import os +import argparse +import subprocess import csv import json import re +import yaml from collections import defaultdict from bs4 import BeautifulSoup -def create_form_context_schema(form, row_list): - item_obj = defaultdict(dict) - item_obj["@version"] = 1.1 - item_obj[form] = f"{your_repo_url}/activities/{form}/items/" - - for field in row_list: - field_name = field["Variable / Field Name"] - item_obj[field_name] = {"@id": f"{form}:{field_name}", "@type": "@id"} - - form_context = {"@context": item_obj} - fc = json.dumps(form_context, indent=4) - - try: - with open(f"activities/{form}/{form}_context", "w") as file: - file.write(fc) - print(f"Context created for form {form}") - except Exception as e: - print(e) - -def create_protocol_context(activity_list, your_repo_url, protocol_name): - # Create protocol context file - activity_obj = { - "@version": 1.1, - "activity_path": f"{your_repo_url}/activities/" - } - - for activity in activity_list: - # Define item_x urls to be inserted in context for the corresponding form - activity_obj[activity] = { - "@id": f"activity_path:{activity}/{activity}_schema", - "@type": "@id" - } - - protocol_context = { - "@context": activity_obj - } - - pc = json.dumps(protocol_context, indent=4) - - protocol_dir = f'protocols/{protocol_name}' - os.makedirs(protocol_dir, exist_ok=True) - - with open(f'{protocol_dir}/{protocol_name}_context', 'w') as file: - file.write(pc) - - print(f'Protocol context created for {protocol_name}') +def parse_arguments(): + parser = argparse.ArgumentParser(description='Process REDCap data dictionary and reproschema protocol.') + parser.add_argument('csv_file', help='Path to the REDCap data dictionary CSV file.') + parser.add_argument('yaml_file', help='Path to the reproschema protocol YAML file.') + return parser.parse_args() +def normalize_condition(condition_str): + re_parentheses = re.compile(r"\(([0-9]*)\)") + re_non_gt_lt_equal = re.compile(r"([^>|<])=") + re_brackets = re.compile(r"\[([^\]]*)\]") + + condition_str = re_parentheses.sub(r"___\1", condition_str) + condition_str = re_non_gt_lt_equal.sub(r"\1 ==", condition_str) + condition_str = condition_str.replace(" and ", " && ").replace(" or ", " || ") + condition_str = re_brackets.sub(r" \1 ", condition_str) + return condition_str def process_visibility(data): condition = data.get('Branching Logic (Show field only if...)') - if condition: - # Normalize the condition field to resemble a JavaScript-like condition - condition = re.sub(r"\(([0-9]*)\)", r"___\1", condition) - condition = re.sub(r"([^>|<])=", r"\1 ==", condition) - condition = condition.replace(" and ", " && ") - condition = condition.replace(" or ", " || ") - condition = re.sub(r"\[([^\]]*)\]", r" \1 ", condition) + condition = normalize_condition(condition) + else: + condition = True visibility_obj = { "variableName": data['Variable / Field Name'], "isAbout": f"items/{data['Variable / Field Name']}", - "isVis": condition if condition else True + "isVis": condition } return visibility_obj @@ -113,31 +59,26 @@ def parse_field_type_and_value(data, input_type_map): return input_type, value_type -def process_choices(choices_str, image_path): +def process_choices(choices_str): choices = [] for choice in choices_str.split('|'): parts = choice.split(', ') choice_obj = {'schema:value': int(parts[0]), 'schema:name': parts[1]} if len(parts) == 3: - choice_obj['schema:image'] = f"{image_path}{parts[2]}.png" + # TODO: handle image url + choice_obj['schema:image'] = f"{parts[2]}.png" choices.append(choice_obj) return choices -def normalize_condition(condition_str): - condition_str = re.sub(r"\[([^\]]*)\]", r"\1", condition_str) - condition_str = re.sub(r"\(([0-9]*)\)", r"___\1", condition_str) - condition_str = condition_str.replace(" and ", " && ") - condition_str = condition_str.replace(" or ", " || ") - return condition_str - -def write_to_file(form, field_name, rowData): +def write_to_file(form_name, field_name, rowData): + file_path = os.path.join('activities', form_name, 'items', f'{field_name}') + os.makedirs(os.path.dirname(file_path), exist_ok=True) try: - file_path = os.path.join('activities', form, 'items', f'{field_name}') with open(file_path, 'w') as file: json.dump(rowData, file, indent=4) - print(f"Item schema for {form} written successfully.") + print(f"Item schema for {form_name} written successfully.") except Exception as e: - print(f"Error in writing item schema for {form}: {e}") + print(f"Error in writing item schema for {form_name}: {e}") def parse_html(input_string, default_language='en'): result = {} @@ -157,7 +98,7 @@ def parse_html(input_string, default_language='en'): return result -def process_row(schema_context_url, form, field, schema_map, input_type_map, ui_list, response_list, additional_notes_list): +def process_row(schema_context_url, form_name, field, schema_map, input_type_map, ui_list, response_list, additional_notes_list): rowData = { '@context': schema_context_url, '@type': 'reproschema:Field', @@ -184,7 +125,7 @@ def process_row(schema_context_url, form, field, schema_map, input_type_map, ui_ rowData.setdefault('responseOptions', {}).update({schema_map[key]: value}) elif schema_map.get(key) == 'choices' and value: - rowData.setdefault('responseOptions', {}).update({'choices': process_choices(value, image_path)}) + rowData.setdefault('responseOptions', {}).update({'choices': process_choices(value)}) elif schema_map.get(key) == 'scoringLogic' and value: condition = normalize_condition(value) @@ -206,20 +147,20 @@ def process_row(schema_context_url, form, field, schema_map, input_type_map, ui_ notes_obj = {"source": "redcap", "column": key, "value": value} rowData.setdefault('additionalNotesObj', []).append(notes_obj) - write_to_file(form, field['Variable / Field Name'], rowData) + write_to_file(form_name, field['Variable / Field Name'], rowData) -def create_form_schema(schema_context_url, form, activity_display_name, activity_description, order, bl_list, matrix_list, scores_list): +def create_form_schema(schema_context_url, form_name, activity_display_name, activity_description, order, bl_list, matrix_list, scores_list): # Construct the JSON-LD structure json_ld = { "@context": schema_context_url, "@type": "reproschema:Activity", - "@id": f"{form}_schema", + "@id": f"{form_name}_schema", "prefLabel": activity_display_name, "description": activity_description, "schemaVersion": "1.0.0-rc4", "version": "0.0.1", "ui": { - "order": order.get(form, []), + "order": order.get(form_name, []), "addProperties": bl_list, "shuffle": False } @@ -230,15 +171,18 @@ def create_form_schema(schema_context_url, form, activity_display_name, activity if scores_list: json_ld['scoringLogic'] = scores_list + path = os.path.join('activities', form_name) + filename = f'{form_name}_schema' + file_path = os.path.join(path, filename) try: - path = f'activities/{form}' - os.makedirs(path, exist_ok=True) # Ensure the directory exists - filename = f'{form}_schema' - with open(os.path.join(path, filename), 'w') as file: + os.makedirs(path, exist_ok=True) + with open(file_path, 'w') as file: json.dump(json_ld, file, indent=4) - print(f"{form} Instrument schema created") - except Exception as err: - print(f"Error in writing {form} form schema:", err) + print(f"{form_name} Instrument schema created") + except OSError as e: + print(f"Error creating directory {path}: {e}") + except IOError as e: + print(f"Error writing to file {file_path}: {e}") def process_activities(activity_name, protocol_visibility_obj, protocol_variable_map, protocol_order): # Set default visibility condition @@ -270,36 +214,68 @@ def create_protocol_schema(schema_context_url, protocol_name, protocol_display_n } } - # Write the protocol schema to a file + protocol_dir = f'{protocol_name}' + schema_file = f'{protocol_name}_schema' + file_path = os.path.join(protocol_dir, schema_file) + try: - os.makedirs(f'protocols/{protocol_name}', exist_ok=True) # Ensure the directory exists - with open(f'protocols/{protocol_name}/{protocol_name}_schema', 'w') as file: + os.makedirs(protocol_dir, exist_ok=True) + with open(file_path, 'w') as file: json.dump(protocol_schema, file, indent=4) print("Protocol schema created") - except Exception as err: - print("Error in writing protocol schema:", err) + except OSError as e: + print(f"Error creating directory {protocol_dir}: {e}") + except IOError as e: + print(f"Error writing to file {file_path}: {e}") def parse_language_iso_codes(input_string): soup = BeautifulSoup(input_string, 'lxml') return [element.get('lang') for element in soup.find_all(True, {'lang': True})] -def main(csv_path, schema_context_url): +def process_csv(csv_path, schema_context_url, schema_map, input_type_map, ui_list, response_list, additional_notes_list, protocol_name): + datas = {} + order = {} + languages = [] + + with open(csv_path, mode='r', encoding='utf-8') as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + form_name = row['Form Name'] + if form_name not in datas: + datas[form_name] = [] + order[form_name] = [] + os.makedirs(f'activities/{form_name}/items', exist_ok=True) + + datas[form_name].append(row) + + if not languages: + languages = parse_language_iso_codes(row['Field Label']) + + for field in datas[form_name]: + field_name = field['Variable / Field Name'] + order[form_name].append(f"items/{field_name}") + process_row(schema_context_url, form_name, field, schema_map, input_type_map, ui_list, response_list, additional_notes_list) + + os.makedirs(f'protocols/{protocol_name}', exist_ok=True) + return datas, order, languages + +def main(csv_path, schema_context_url, protocol_name, protocol_display_name, protocol_description): # Initialize variables schema_map = { - "Variable / Field Name": "@id", + "Variable / Field Name": "@id", # column A "Item Display Name": "prefLabel", - "Field Annotation": "description", - "Section Header": "preamble", - "Field Label": "question", - "Field Type": "inputType", + "Field Annotation": "description", # column R + "Section Header": "preamble", # column C (need double-check) + "Field Label": "question", # column E + "Field Type": "inputType", # column D "Allow": "allow", - "Required Field?": "requiredValue", - "Text Validation Min": "minValue", - "Text Validation Max": "maxValue", - "Choices, Calculations, OR Slider Labels": "choices", - "Branching Logic (Show field only if...)": "visibility", - "Custom Alignment": "customAlignment", - "Identifier?": "identifiable", + "Required Field?": "requiredValue", # column M + "Text Validation Min": "minValue", # column I + "Text Validation Max": "maxValue", # column J + "Choices, Calculations, OR Slider Labels": "choices", # column F + "Branching Logic (Show field only if...)": "visibility", # column L + "Custom Alignment": "customAlignment", # column N + "Identifier?": "identifiable", # column K "multipleChoice": "multipleChoice", "responseType": "@type" } @@ -315,71 +291,95 @@ def main(csv_path, schema_context_url): ui_list = ['inputType', 'shuffle', 'allow', 'customAlignment'] response_list = ['valueType', 'minValue', 'maxValue', 'requiredValue', 'multipleChoice'] additional_notes_list = ['Field Note', 'Question Number (surveys only)'] - datas = {} - order = {} - bl_list = [] - sl_list = [] - visibility_obj = {} - scores_obj = {} - scores_list = [] - visibility_list = [] - languages = [] - variable_map = [] - matrix_list = [] + + # Process the CSV file + datas, order, languages = process_csv( + csv_path, schema_context_url, schema_map, + input_type_map, ui_list, response_list, additional_notes_list, + protocol_name + ) + # Initialize other variables for protocol context and schema protocol_variable_map = [] protocol_visibility_obj = {} protocol_order = [] - # Read and process the CSV file - with open(csv_path, mode='r', encoding='utf-8') as csvfile: - reader = csv.DictReader(csvfile) - for row in reader: - form_name = row['Form Name'] - datas.setdefault(form_name, []).append(row) - os.makedirs(f'activities/{form_name}/items', exist_ok=True) - os.makedirs(f'protocols/{protocol_name}', exist_ok=True) - - scores_list = [] - order[form_name] = [] - bl_list = [] - visibility_list = [] - variable_map = [] - matrix_list = [] - activity_display_name = row['Form Name'] - activity_description = row['Form Note'] - - for field in datas[form_name]: - if not languages: - languages = parse_language_iso_codes(field['Field Label']) - - field_name = field['Variable / Field Name'] - visibility_obj = process_visibility(field) - bl_list.append(visibility_obj) - variable_map.append({"variableName": field_name, "isAbout": f"items/{field_name}"}) - - if field.get('Matrix Group Name') or field.get('Matrix Ranking?'): - matrix_list.append({"variableName": field_name, "matrixGroupName": field['Matrix Group Name'], "matrixRanking": field['Matrix Ranking?']}) - - order[form_name].append(f"items/{field_name}") - process_row(schema_context_url, form_name, field, schema_map, input_type_map, ui_list, response_list, additional_notes_list) - - create_form_schema(form_name, activity_display_name, activity_description, order[form_name], bl_list, matrix_list, scores_list) + # Create form schemas and process activities + for form_name, rows in datas.items(): + bl_list = [] + scores_list = [] + matrix_list = [] + + for field in rows: + visibility_obj = process_visibility(field) + bl_list.append(visibility_obj) + + if field.get('Matrix Group Name') or field.get('Matrix Ranking?'): + matrix_list.append({ + "variableName": field['Variable / Field Name'], + "matrixGroupName": field['Matrix Group Name'], + "matrixRanking": field['Matrix Ranking?'] + }) + + activity_display_name = rows[0]['Form Name'] + activity_description = rows[0]['Form Note'] + create_form_schema( + schema_context_url, + form_name, + activity_display_name, + activity_description, + order.get(form_name, []), + bl_list, + matrix_list, + scores_list + ) + + process_activities( + form_name, + protocol_visibility_obj, + protocol_variable_map, + protocol_order + ) + + # Create protocol schema + create_protocol_schema( + schema_context_url, + protocol_name, + protocol_display_name, + protocol_description, + protocol_variable_map, + protocol_order, + protocol_visibility_obj + ) - # Create protocol context and schema - activity_list = list(datas.keys()) - for activity_name in activity_list: - process_activities(activity_name, protocol_visibility_obj, protocol_variable_map, protocol_order) +if __name__ == "__main__": + + schema_context_url = 'https://raw.githubusercontent.com/ReproNim/reproschema/1.0.0-rc4/contexts/generic' # we may also want to keep this schema version updated or in the yaml file + + args = parse_arguments() - create_protocol_schema(schema_context_url, protocol_name, protocol_display_name, protocol_description, protocol_variable_map, protocol_order, protocol_visibility_obj) + # Read the CSV file path + csv_path = args.csv_file -if __name__ == "__main__": - # Make sure we got a filename on the command line - if len(sys.argv) < 3: - print(f'Usage: {sys.argv[0]} your_data_dic.csv') + # Read the YAML configuration + yaml_path = args.yaml_file + try: + with open(yaml_path, 'r') as f: + protocol = yaml.safe_load(f) + except FileNotFoundError: + print(f"Error: YAML file '{yaml_path}' not found.") sys.exit(1) - # Read the CSV file - csv_path = sys.argv[2] - schema_context_url = 'https://raw.githubusercontent.com/ReproNim/reproschema/1.0.0-rc4/contexts/generic' + # Extract values from YAML file + protocol_name = protocol.get('protocol_name') + protocol_display_name = protocol.get('protocol_display_name') + protocol_description = protocol.get('protocol_description') + repo_url = protocol.get('repo_url') + + # git clone the repo + subprocess.run(['git', 'clone', repo_url]) + # set up branch and checkout + subprocess.run(['git', 'checkout', 'main']) + # cd to the repo + os.chdir(repo_url.split('/')[-1]) - main(csv_path, schema_context_url) + main(csv_path, schema_context_url, protocol_name, protocol_display_name, protocol_description) From 788130573214a45b71a3bdf266375c5c9a11f83c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 25 Dec 2023 21:18:32 +0000 Subject: [PATCH 07/21] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- README.md | 4 +- reproschema/redcap2reproschema.py | 398 +++++++++++++++++++----------- templates/redcap2rs.yaml | 2 +- 3 files changed, 253 insertions(+), 151 deletions(-) diff --git a/README.md b/README.md index bd26e1a..7ba69ae 100644 --- a/README.md +++ b/README.md @@ -43,8 +43,8 @@ Commands: ### Prerequisites Before using the conversion script, ensure you have the following: -1. **GitHub Repository**: - - [Create a GitHub repository](https://docs.github.com/en/repositories/creating-and-managing-repositories/creating-a-new-repository) named `reproschema` to store all your reproschema protocols. +1. **GitHub Repository**: + - [Create a GitHub repository](https://docs.github.com/en/repositories/creating-and-managing-repositories/creating-a-new-repository) named `reproschema` to store all your reproschema protocols. - This repository should be set up before converting any data using the script. 2. **YAML Configuration File**: diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py index b53a836..6ae975b 100644 --- a/reproschema/redcap2reproschema.py +++ b/reproschema/redcap2reproschema.py @@ -9,12 +9,16 @@ from collections import defaultdict from bs4 import BeautifulSoup + def parse_arguments(): - parser = argparse.ArgumentParser(description='Process REDCap data dictionary and reproschema protocol.') - parser.add_argument('csv_file', help='Path to the REDCap data dictionary CSV file.') - parser.add_argument('yaml_file', help='Path to the reproschema protocol YAML file.') + parser = argparse.ArgumentParser( + description="Process REDCap data dictionary and reproschema protocol." + ) + parser.add_argument("csv_file", help="Path to the REDCap data dictionary CSV file.") + parser.add_argument("yaml_file", help="Path to the reproschema protocol YAML file.") return parser.parse_args() + def normalize_condition(condition_str): re_parentheses = re.compile(r"\(([0-9]*)\)") re_non_gt_lt_equal = re.compile(r"([^>|<])=") @@ -26,68 +30,73 @@ def normalize_condition(condition_str): condition_str = re_brackets.sub(r" \1 ", condition_str) return condition_str + def process_visibility(data): - condition = data.get('Branching Logic (Show field only if...)') + condition = data.get("Branching Logic (Show field only if...)") if condition: condition = normalize_condition(condition) else: condition = True visibility_obj = { - "variableName": data['Variable / Field Name'], + "variableName": data["Variable / Field Name"], "isAbout": f"items/{data['Variable / Field Name']}", - "isVis": condition + "isVis": condition, } return visibility_obj + def parse_field_type_and_value(data, input_type_map): - field_type = data.get('Field Type', '') - + field_type = data.get("Field Type", "") + input_type = input_type_map.get(field_type, field_type) - + value_type_map = { - 'number': 'xsd:int', - 'date_': 'xsd:date', - 'datetime_': 'datetime', - 'time_': 'xsd:date', - 'email': 'email', - 'phone': 'phone' + "number": "xsd:int", + "date_": "xsd:date", + "datetime_": "datetime", + "time_": "xsd:date", + "email": "email", + "phone": "phone", } - validation_type = data.get('Text Validation Type OR Show Slider Number', '') - - value_type = value_type_map.get(validation_type, 'xsd:string') + validation_type = data.get("Text Validation Type OR Show Slider Number", "") + + value_type = value_type_map.get(validation_type, "xsd:string") return input_type, value_type + def process_choices(choices_str): choices = [] - for choice in choices_str.split('|'): - parts = choice.split(', ') - choice_obj = {'schema:value': int(parts[0]), 'schema:name': parts[1]} + for choice in choices_str.split("|"): + parts = choice.split(", ") + choice_obj = {"schema:value": int(parts[0]), "schema:name": parts[1]} if len(parts) == 3: # TODO: handle image url - choice_obj['schema:image'] = f"{parts[2]}.png" + choice_obj["schema:image"] = f"{parts[2]}.png" choices.append(choice_obj) return choices + def write_to_file(form_name, field_name, rowData): - file_path = os.path.join('activities', form_name, 'items', f'{field_name}') + file_path = os.path.join("activities", form_name, "items", f"{field_name}") os.makedirs(os.path.dirname(file_path), exist_ok=True) try: - with open(file_path, 'w') as file: + with open(file_path, "w") as file: json.dump(rowData, file, indent=4) print(f"Item schema for {form_name} written successfully.") except Exception as e: print(f"Error in writing item schema for {form_name}: {e}") -def parse_html(input_string, default_language='en'): + +def parse_html(input_string, default_language="en"): result = {} - soup = BeautifulSoup(input_string, 'html.parser') + soup = BeautifulSoup(input_string, "html.parser") - lang_elements = soup.find_all(True, {'lang': True}) + lang_elements = soup.find_all(True, {"lang": True}) if lang_elements: for element in lang_elements: - lang = element.get('lang', default_language) + lang = element.get("lang", default_language) text = element.get_text(strip=True) if text: result[lang] = text @@ -98,58 +107,97 @@ def parse_html(input_string, default_language='en'): return result -def process_row(schema_context_url, form_name, field, schema_map, input_type_map, ui_list, response_list, additional_notes_list): + +def process_row( + schema_context_url, + form_name, + field, + schema_map, + input_type_map, + ui_list, + response_list, + additional_notes_list, +): rowData = { - '@context': schema_context_url, - '@type': 'reproschema:Field', + "@context": schema_context_url, + "@type": "reproschema:Field", } - field_type = field.get('Field Type', '') - schema_map['Choices, Calculations, OR Slider Labels'] = 'scoringLogic' if field_type == 'calc' else 'choices' + field_type = field.get("Field Type", "") + schema_map["Choices, Calculations, OR Slider Labels"] = ( + "scoringLogic" if field_type == "calc" else "choices" + ) input_type, value_type = parse_field_type_and_value(field, input_type_map) - rowData['ui'] = {'inputType': input_type} + rowData["ui"] = {"inputType": input_type} if value_type: - rowData['responseOptions'] = {'valueType': value_type} + rowData["responseOptions"] = {"valueType": value_type} for key, value in field.items(): - if schema_map.get(key) == 'allow' and value: - rowData.setdefault('ui', {}).update({schema_map[key]: value.split(', ')}) + if schema_map.get(key) == "allow" and value: + rowData.setdefault("ui", {}).update({schema_map[key]: value.split(", ")}) elif key in ui_list and value: - rowData.setdefault('ui', {}).update({schema_map[key]: input_type_map.get(value, value)}) + rowData.setdefault("ui", {}).update( + {schema_map[key]: input_type_map.get(value, value)} + ) elif key in response_list and value: - if key == 'multipleChoice': - value = value == '1' - rowData.setdefault('responseOptions', {}).update({schema_map[key]: value}) + if key == "multipleChoice": + value = value == "1" + rowData.setdefault("responseOptions", {}).update({schema_map[key]: value}) - elif schema_map.get(key) == 'choices' and value: - rowData.setdefault('responseOptions', {}).update({'choices': process_choices(value)}) + elif schema_map.get(key) == "choices" and value: + rowData.setdefault("responseOptions", {}).update( + {"choices": process_choices(value)} + ) - elif schema_map.get(key) == 'scoringLogic' and value: + elif schema_map.get(key) == "scoringLogic" and value: condition = normalize_condition(value) - rowData.setdefault('ui', {}).update({'hidden': True}) - rowData.setdefault('scoringLogic', []).append({"variableName": field['Variable / Field Name'], "jsExpression": condition}) - - elif schema_map.get(key) == 'visibility' and value: + rowData.setdefault("ui", {}).update({"hidden": True}) + rowData.setdefault("scoringLogic", []).append( + { + "variableName": field["Variable / Field Name"], + "jsExpression": condition, + } + ) + + elif schema_map.get(key) == "visibility" and value: condition = normalize_condition(value) - rowData.setdefault('visibility', []).append({"variableName": field['Variable / Field Name'], "isVis": condition}) + rowData.setdefault("visibility", []).append( + {"variableName": field["Variable / Field Name"], "isVis": condition} + ) - elif key in ['question', 'schema:description', 'preamble'] and value: + elif key in ["question", "schema:description", "preamble"] and value: rowData.update({schema_map[key]: parse_html(value)}) - elif key == 'Identifier?' and value: - identifier_val = value.lower() == 'y' - rowData.update({schema_map[key]: [{"legalStandard": "unknown", "isIdentifier": identifier_val}]}) + elif key == "Identifier?" and value: + identifier_val = value.lower() == "y" + rowData.update( + { + schema_map[key]: [ + {"legalStandard": "unknown", "isIdentifier": identifier_val} + ] + } + ) elif key in additional_notes_list and value: notes_obj = {"source": "redcap", "column": key, "value": value} - rowData.setdefault('additionalNotesObj', []).append(notes_obj) + rowData.setdefault("additionalNotesObj", []).append(notes_obj) - write_to_file(form_name, field['Variable / Field Name'], rowData) + write_to_file(form_name, field["Variable / Field Name"], rowData) -def create_form_schema(schema_context_url, form_name, activity_display_name, activity_description, order, bl_list, matrix_list, scores_list): + +def create_form_schema( + schema_context_url, + form_name, + activity_display_name, + activity_description, + order, + bl_list, + matrix_list, + scores_list, +): # Construct the JSON-LD structure json_ld = { "@context": schema_context_url, @@ -162,21 +210,21 @@ def create_form_schema(schema_context_url, form_name, activity_display_name, act "ui": { "order": order.get(form_name, []), "addProperties": bl_list, - "shuffle": False - } + "shuffle": False, + }, } if matrix_list: - json_ld['matrixInfo'] = matrix_list + json_ld["matrixInfo"] = matrix_list if scores_list: - json_ld['scoringLogic'] = scores_list + json_ld["scoringLogic"] = scores_list - path = os.path.join('activities', form_name) - filename = f'{form_name}_schema' + path = os.path.join("activities", form_name) + filename = f"{form_name}_schema" file_path = os.path.join(path, filename) try: os.makedirs(path, exist_ok=True) - with open(file_path, 'w') as file: + with open(file_path, "w") as file: json.dump(json_ld, file, indent=4) print(f"{form_name} Instrument schema created") except OSError as e: @@ -184,18 +232,29 @@ def create_form_schema(schema_context_url, form_name, activity_display_name, act except IOError as e: print(f"Error writing to file {file_path}: {e}") -def process_activities(activity_name, protocol_visibility_obj, protocol_variable_map, protocol_order): + +def process_activities( + activity_name, protocol_visibility_obj, protocol_variable_map, protocol_order +): # Set default visibility condition protocol_visibility_obj[activity_name] = True # Add activity to variableMap and Order - protocol_variable_map.append({ - "variableName": activity_name, - "isAbout": f"items/{activity_name}" - }) + protocol_variable_map.append( + {"variableName": activity_name, "isAbout": f"items/{activity_name}"} + ) protocol_order.append(activity_name) -def create_protocol_schema(schema_context_url, protocol_name, protocol_display_name, protocol_description, protocol_variable_map, protocol_order, protocol_visibility_obj): + +def create_protocol_schema( + schema_context_url, + protocol_name, + protocol_display_name, + protocol_description, + protocol_variable_map, + protocol_order, + protocol_visibility_obj, +): # Construct the protocol schema protocol_schema = { "@context": schema_context_url, @@ -210,17 +269,17 @@ def create_protocol_schema(schema_context_url, protocol_name, protocol_display_n "ui": { "order": protocol_order, "shuffle": False, - "visibility": protocol_visibility_obj - } + "visibility": protocol_visibility_obj, + }, } - protocol_dir = f'{protocol_name}' - schema_file = f'{protocol_name}_schema' + protocol_dir = f"{protocol_name}" + schema_file = f"{protocol_name}_schema" file_path = os.path.join(protocol_dir, schema_file) try: os.makedirs(protocol_dir, exist_ok=True) - with open(file_path, 'w') as file: + with open(file_path, "w") as file: json.dump(protocol_schema, file, indent=4) print("Protocol schema created") except OSError as e: @@ -228,56 +287,83 @@ def create_protocol_schema(schema_context_url, protocol_name, protocol_display_n except IOError as e: print(f"Error writing to file {file_path}: {e}") -def parse_language_iso_codes(input_string): - soup = BeautifulSoup(input_string, 'lxml') - return [element.get('lang') for element in soup.find_all(True, {'lang': True})] -def process_csv(csv_path, schema_context_url, schema_map, input_type_map, ui_list, response_list, additional_notes_list, protocol_name): +def parse_language_iso_codes(input_string): + soup = BeautifulSoup(input_string, "lxml") + return [element.get("lang") for element in soup.find_all(True, {"lang": True})] + + +def process_csv( + csv_path, + schema_context_url, + schema_map, + input_type_map, + ui_list, + response_list, + additional_notes_list, + protocol_name, +): datas = {} order = {} languages = [] - with open(csv_path, mode='r', encoding='utf-8') as csvfile: + with open(csv_path, mode="r", encoding="utf-8") as csvfile: reader = csv.DictReader(csvfile) for row in reader: - form_name = row['Form Name'] + form_name = row["Form Name"] if form_name not in datas: datas[form_name] = [] order[form_name] = [] - os.makedirs(f'activities/{form_name}/items', exist_ok=True) + os.makedirs(f"activities/{form_name}/items", exist_ok=True) datas[form_name].append(row) if not languages: - languages = parse_language_iso_codes(row['Field Label']) + languages = parse_language_iso_codes(row["Field Label"]) for field in datas[form_name]: - field_name = field['Variable / Field Name'] + field_name = field["Variable / Field Name"] order[form_name].append(f"items/{field_name}") - process_row(schema_context_url, form_name, field, schema_map, input_type_map, ui_list, response_list, additional_notes_list) - - os.makedirs(f'protocols/{protocol_name}', exist_ok=True) + process_row( + schema_context_url, + form_name, + field, + schema_map, + input_type_map, + ui_list, + response_list, + additional_notes_list, + ) + + os.makedirs(f"protocols/{protocol_name}", exist_ok=True) return datas, order, languages -def main(csv_path, schema_context_url, protocol_name, protocol_display_name, protocol_description): + +def main( + csv_path, + schema_context_url, + protocol_name, + protocol_display_name, + protocol_description, +): # Initialize variables schema_map = { - "Variable / Field Name": "@id", # column A + "Variable / Field Name": "@id", # column A "Item Display Name": "prefLabel", - "Field Annotation": "description", # column R - "Section Header": "preamble", # column C (need double-check) - "Field Label": "question", # column E - "Field Type": "inputType", # column D + "Field Annotation": "description", # column R + "Section Header": "preamble", # column C (need double-check) + "Field Label": "question", # column E + "Field Type": "inputType", # column D "Allow": "allow", - "Required Field?": "requiredValue", # column M - "Text Validation Min": "minValue", # column I - "Text Validation Max": "maxValue", # column J - "Choices, Calculations, OR Slider Labels": "choices", # column F - "Branching Logic (Show field only if...)": "visibility", # column L - "Custom Alignment": "customAlignment", # column N - "Identifier?": "identifiable", # column K + "Required Field?": "requiredValue", # column M + "Text Validation Min": "minValue", # column I + "Text Validation Max": "maxValue", # column J + "Choices, Calculations, OR Slider Labels": "choices", # column F + "Branching Logic (Show field only if...)": "visibility", # column L + "Custom Alignment": "customAlignment", # column N + "Identifier?": "identifiable", # column K "multipleChoice": "multipleChoice", - "responseType": "@type" + "responseType": "@type", } input_type_map = { @@ -285,18 +371,29 @@ def main(csv_path, schema_context_url, protocol_name, protocol_display_name, pro "checkbox": "radio", "descriptive": "static", "dropdown": "select", - "notes": "text" + "notes": "text", } - ui_list = ['inputType', 'shuffle', 'allow', 'customAlignment'] - response_list = ['valueType', 'minValue', 'maxValue', 'requiredValue', 'multipleChoice'] - additional_notes_list = ['Field Note', 'Question Number (surveys only)'] + ui_list = ["inputType", "shuffle", "allow", "customAlignment"] + response_list = [ + "valueType", + "minValue", + "maxValue", + "requiredValue", + "multipleChoice", + ] + additional_notes_list = ["Field Note", "Question Number (surveys only)"] # Process the CSV file datas, order, languages = process_csv( - csv_path, schema_context_url, schema_map, - input_type_map, ui_list, response_list, additional_notes_list, - protocol_name + csv_path, + schema_context_url, + schema_map, + input_type_map, + ui_list, + response_list, + additional_notes_list, + protocol_name, ) # Initialize other variables for protocol context and schema protocol_variable_map = [] @@ -313,48 +410,47 @@ def main(csv_path, schema_context_url, protocol_name, protocol_display_name, pro visibility_obj = process_visibility(field) bl_list.append(visibility_obj) - if field.get('Matrix Group Name') or field.get('Matrix Ranking?'): - matrix_list.append({ - "variableName": field['Variable / Field Name'], - "matrixGroupName": field['Matrix Group Name'], - "matrixRanking": field['Matrix Ranking?'] - }) - - activity_display_name = rows[0]['Form Name'] - activity_description = rows[0]['Form Note'] + if field.get("Matrix Group Name") or field.get("Matrix Ranking?"): + matrix_list.append( + { + "variableName": field["Variable / Field Name"], + "matrixGroupName": field["Matrix Group Name"], + "matrixRanking": field["Matrix Ranking?"], + } + ) + + activity_display_name = rows[0]["Form Name"] + activity_description = rows[0]["Form Note"] create_form_schema( - schema_context_url, - form_name, - activity_display_name, - activity_description, - order.get(form_name, []), - bl_list, - matrix_list, - scores_list + schema_context_url, + form_name, + activity_display_name, + activity_description, + order.get(form_name, []), + bl_list, + matrix_list, + scores_list, ) process_activities( - form_name, - protocol_visibility_obj, - protocol_variable_map, - protocol_order + form_name, protocol_visibility_obj, protocol_variable_map, protocol_order ) # Create protocol schema create_protocol_schema( - schema_context_url, - protocol_name, - protocol_display_name, - protocol_description, - protocol_variable_map, - protocol_order, - protocol_visibility_obj + schema_context_url, + protocol_name, + protocol_display_name, + protocol_description, + protocol_variable_map, + protocol_order, + protocol_visibility_obj, ) + if __name__ == "__main__": - - schema_context_url = 'https://raw.githubusercontent.com/ReproNim/reproschema/1.0.0-rc4/contexts/generic' # we may also want to keep this schema version updated or in the yaml file - + schema_context_url = "https://raw.githubusercontent.com/ReproNim/reproschema/1.0.0-rc4/contexts/generic" # we may also want to keep this schema version updated or in the yaml file + args = parse_arguments() # Read the CSV file path @@ -363,23 +459,29 @@ def main(csv_path, schema_context_url, protocol_name, protocol_display_name, pro # Read the YAML configuration yaml_path = args.yaml_file try: - with open(yaml_path, 'r') as f: + with open(yaml_path, "r") as f: protocol = yaml.safe_load(f) except FileNotFoundError: print(f"Error: YAML file '{yaml_path}' not found.") sys.exit(1) # Extract values from YAML file - protocol_name = protocol.get('protocol_name') - protocol_display_name = protocol.get('protocol_display_name') - protocol_description = protocol.get('protocol_description') - repo_url = protocol.get('repo_url') + protocol_name = protocol.get("protocol_name") + protocol_display_name = protocol.get("protocol_display_name") + protocol_description = protocol.get("protocol_description") + repo_url = protocol.get("repo_url") # git clone the repo - subprocess.run(['git', 'clone', repo_url]) + subprocess.run(["git", "clone", repo_url]) # set up branch and checkout - subprocess.run(['git', 'checkout', 'main']) + subprocess.run(["git", "checkout", "main"]) # cd to the repo - os.chdir(repo_url.split('/')[-1]) - - main(csv_path, schema_context_url, protocol_name, protocol_display_name, protocol_description) + os.chdir(repo_url.split("/")[-1]) + + main( + csv_path, + schema_context_url, + protocol_name, + protocol_display_name, + protocol_description, + ) diff --git a/templates/redcap2rs.yaml b/templates/redcap2rs.yaml index 4033ad4..b8bf76e 100644 --- a/templates/redcap2rs.yaml +++ b/templates/redcap2rs.yaml @@ -18,4 +18,4 @@ repo_url: "https://github.com/your_github_username/reproschema" # Protocol Description: # Provide a brief description of your protocol. -protocol_description: "Description for your protocol" # Example: "This protocol is for ..." \ No newline at end of file +protocol_description: "Description for your protocol" # Example: "This protocol is for ..." From a3606fb27aee9436ed39ef1e757bc03fa45bd374 Mon Sep 17 00:00:00 2001 From: Yibei Chen Date: Tue, 26 Dec 2023 19:30:17 +0000 Subject: [PATCH 08/21] integrate redcap2rs in cli.py --- reproschema/cli.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/reproschema/cli.py b/reproschema/cli.py index adbf509..1a4385b 100644 --- a/reproschema/cli.py +++ b/reproschema/cli.py @@ -3,6 +3,7 @@ from . import get_logger, set_logger_level from . import __version__ +from .redcap2reproschema import main as redcap2rs lgr = get_logger() @@ -95,3 +96,24 @@ def serve(port): from .utils import start_server start_server(port=port) + +@main.command() +@click.argument('csv_path', type=click.Path(exists=True, dir_okay=False)) +@click.argument('yaml_path', type=click.Path(exists=True, dir_okay=False)) +@click.option('--schema-url', default='https://raw.githubusercontent.com/ReproNim/reproschema/1.0.0-rc4/contexts/generic', show_default=True, help='URL of the schema context') +def redcap2reproschema(csv_path, yaml_path, schema_url): + """ + Convert REDCap CSV files to Reproschema format. + + Provide the path to the REDCap CSV file and the YAML configuration file. + """ + if not os.path.exists(csv_path): + raise click.ClickException(f"CSV file not found at {csv_path}") + if not os.path.exists(yaml_path): + raise click.ClickException(f"YAML file not found at {yaml_path}") + + try: + # Call the redcap2reproschema main function with provided arguments + redcap2rs(csv_path, schema_url, yaml_path) + except Exception as e: + raise click.ClickException(f"Error during conversion: {e}") \ No newline at end of file From 73da5714499f16c18164c464f83539c4afb61d0e Mon Sep 17 00:00:00 2001 From: Yibei Chen Date: Tue, 26 Dec 2023 20:11:51 +0000 Subject: [PATCH 09/21] rewrite main() for cli use --- README.md | 24 ++++++++-- reproschema/cli.py | 12 ++--- reproschema/redcap2reproschema.py | 80 ++++++++++++++----------------- 3 files changed, 63 insertions(+), 53 deletions(-) diff --git a/README.md b/README.md index 7ba69ae..33b12cd 100644 --- a/README.md +++ b/README.md @@ -38,10 +38,11 @@ Commands: validate ``` -## redcap2reproschema.py Usage +## redcap2reproschema Usage +The `redcap2reproschema` function is designed to process a given REDCap CSV file and YAML configuration to generate the output in the reproschema format. ### Prerequisites -Before using the conversion script, ensure you have the following: +Before the conversion, ensure you have the following: 1. **GitHub Repository**: - [Create a GitHub repository](https://docs.github.com/en/repositories/creating-and-managing-repositories/creating-a-new-repository) named `reproschema` to store all your reproschema protocols. @@ -67,8 +68,25 @@ user_name: "john_doe" repo_name: "reproschema" protocol_description: "This protocol is for assessing cognitive skills." ``` +### Command-Line Usage -### Using the Script +The `redcap2reproschema`` function has been integrated into a CLI tool, use the following command: +```bash +reproschema redcap2reproschema path/to/your_redcap_data_dic.csv path/to/your_redcap2rs.yaml +``` + +### Python Function Usage + +You can also use the `redcap2reproschema` function from the `reproschema-py` package in your Python code. + +```python +from reproschema import redcap2reproschema + +csv_path = "path-to/your_redcap_data_dic.csv" +yaml_path = "path-to/your_redcap2rs.yaml" + +reproschema2redcap(input_dir_path, output_csv_filename) +``` After configuring the YAML file: diff --git a/reproschema/cli.py b/reproschema/cli.py index 1a4385b..88655e5 100644 --- a/reproschema/cli.py +++ b/reproschema/cli.py @@ -1,5 +1,6 @@ import os import click +import yaml from . import get_logger, set_logger_level from . import __version__ @@ -100,11 +101,10 @@ def serve(port): @main.command() @click.argument('csv_path', type=click.Path(exists=True, dir_okay=False)) @click.argument('yaml_path', type=click.Path(exists=True, dir_okay=False)) -@click.option('--schema-url', default='https://raw.githubusercontent.com/ReproNim/reproschema/1.0.0-rc4/contexts/generic', show_default=True, help='URL of the schema context') -def redcap2reproschema(csv_path, yaml_path, schema_url): +def redcap2reproschema(csv_path, yaml_path): """ Convert REDCap CSV files to Reproschema format. - + Provide the path to the REDCap CSV file and the YAML configuration file. """ if not os.path.exists(csv_path): @@ -113,7 +113,7 @@ def redcap2reproschema(csv_path, yaml_path, schema_url): raise click.ClickException(f"YAML file not found at {yaml_path}") try: - # Call the redcap2reproschema main function with provided arguments - redcap2rs(csv_path, schema_url, yaml_path) + redcap2rs(csv_path, yaml_path) + click.echo("Converted REDCap data dictionary to Reproschema format.") except Exception as e: - raise click.ClickException(f"Error during conversion: {e}") \ No newline at end of file + raise click.ClickException(f"Error during conversion: {e}") diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py index 6ae975b..c08c6b1 100644 --- a/reproschema/redcap2reproschema.py +++ b/reproschema/redcap2reproschema.py @@ -6,19 +6,8 @@ import json import re import yaml -from collections import defaultdict from bs4 import BeautifulSoup - -def parse_arguments(): - parser = argparse.ArgumentParser( - description="Process REDCap data dictionary and reproschema protocol." - ) - parser.add_argument("csv_file", help="Path to the REDCap data dictionary CSV file.") - parser.add_argument("yaml_file", help="Path to the reproschema protocol YAML file.") - return parser.parse_args() - - def normalize_condition(condition_str): re_parentheses = re.compile(r"\(([0-9]*)\)") re_non_gt_lt_equal = re.compile(r"([^>|<])=") @@ -339,13 +328,28 @@ def process_csv( return datas, order, languages -def main( - csv_path, - schema_context_url, - protocol_name, - protocol_display_name, - protocol_description, -): +def redcap2reproschema(csv_path, yaml_path, schema_context_url=None): + """ + Convert a REDCap data dictionary to Reproschema format. + + :param csv_path: Path to the REDCap CSV file. + :param yaml_path: Path to the YAML configuration file. + :param schema_context_url: URL of the schema context. Optional. + """ + if schema_context_url is None: + schema_context_url = "https://raw.githubusercontent.com/ReproNim/reproschema/1.0.0-rc4/contexts/generic" + + # Read YAML configuration + try: + with open(yaml_path, "r") as f: + protocol_info = yaml.safe_load(f) + except FileNotFoundError: + raise FileNotFoundError(f"YAML file '{yaml_path}' not found.") + + protocol_name = protocol.get("protocol_name") + protocol_display_name = protocol.get("protocol_display_name") + protocol_description = protocol.get("protocol_description") + # Initialize variables schema_map = { "Variable / Field Name": "@id", # column A @@ -447,41 +451,29 @@ def main( protocol_visibility_obj, ) - -if __name__ == "__main__": - schema_context_url = "https://raw.githubusercontent.com/ReproNim/reproschema/1.0.0-rc4/contexts/generic" # we may also want to keep this schema version updated or in the yaml file - - args = parse_arguments() - - # Read the CSV file path - csv_path = args.csv_file +def main(): + import argparse + parser = argparse.ArgumentParser(description="Convert REDCap data dictionary to Reproschema format.") + parser.add_argument("csv_file", help="Path to the REDCap data dictionary CSV file.") + parser.add_argument("yaml_file", help="Path to the Reproschema protocol YAML file.") + args = parser.parse_args() # Read the YAML configuration - yaml_path = args.yaml_file try: - with open(yaml_path, "r") as f: + with open(args.yaml_file, "r") as f: protocol = yaml.safe_load(f) except FileNotFoundError: - print(f"Error: YAML file '{yaml_path}' not found.") - sys.exit(1) + raise FileNotFoundError(f"YAML file '{args.yaml_file}' not found.") - # Extract values from YAML file - protocol_name = protocol.get("protocol_name") - protocol_display_name = protocol.get("protocol_display_name") - protocol_description = protocol.get("protocol_description") repo_url = protocol.get("repo_url") - # git clone the repo + # Git operations subprocess.run(["git", "clone", repo_url]) - # set up branch and checkout subprocess.run(["git", "checkout", "main"]) - # cd to the repo os.chdir(repo_url.split("/")[-1]) - main( - csv_path, - schema_context_url, - protocol_name, - protocol_display_name, - protocol_description, - ) + # Call the main conversion function + redcap2reproschema(args.csv_file, args.yaml_file) + +if __name__ == "__main__": + main() \ No newline at end of file From 4906d7606975b3ff2f4192e2983c2c9b8d2e623f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 26 Dec 2023 20:11:58 +0000 Subject: [PATCH 10/21] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- reproschema/cli.py | 5 +++-- reproschema/redcap2reproschema.py | 10 ++++++++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/reproschema/cli.py b/reproschema/cli.py index 88655e5..663ce56 100644 --- a/reproschema/cli.py +++ b/reproschema/cli.py @@ -98,9 +98,10 @@ def serve(port): start_server(port=port) + @main.command() -@click.argument('csv_path', type=click.Path(exists=True, dir_okay=False)) -@click.argument('yaml_path', type=click.Path(exists=True, dir_okay=False)) +@click.argument("csv_path", type=click.Path(exists=True, dir_okay=False)) +@click.argument("yaml_path", type=click.Path(exists=True, dir_okay=False)) def redcap2reproschema(csv_path, yaml_path): """ Convert REDCap CSV files to Reproschema format. diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py index c08c6b1..de24ae9 100644 --- a/reproschema/redcap2reproschema.py +++ b/reproschema/redcap2reproschema.py @@ -8,6 +8,7 @@ import yaml from bs4 import BeautifulSoup + def normalize_condition(condition_str): re_parentheses = re.compile(r"\(([0-9]*)\)") re_non_gt_lt_equal = re.compile(r"([^>|<])=") @@ -451,9 +452,13 @@ def redcap2reproschema(csv_path, yaml_path, schema_context_url=None): protocol_visibility_obj, ) + def main(): import argparse - parser = argparse.ArgumentParser(description="Convert REDCap data dictionary to Reproschema format.") + + parser = argparse.ArgumentParser( + description="Convert REDCap data dictionary to Reproschema format." + ) parser.add_argument("csv_file", help="Path to the REDCap data dictionary CSV file.") parser.add_argument("yaml_file", help="Path to the Reproschema protocol YAML file.") args = parser.parse_args() @@ -475,5 +480,6 @@ def main(): # Call the main conversion function redcap2reproschema(args.csv_file, args.yaml_file) + if __name__ == "__main__": - main() \ No newline at end of file + main() From 25932f8268fcc6a67c0ee313313f3c1171adf5fa Mon Sep 17 00:00:00 2001 From: Yibei Chen Date: Wed, 27 Dec 2023 20:39:12 +0000 Subject: [PATCH 11/21] update the template --- reproschema/redcap2reproschema.py | 4 +++- templates/redcap2rs.yaml | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py index de24ae9..5dbce46 100644 --- a/reproschema/redcap2reproschema.py +++ b/reproschema/redcap2reproschema.py @@ -470,7 +470,9 @@ def main(): except FileNotFoundError: raise FileNotFoundError(f"YAML file '{args.yaml_file}' not found.") - repo_url = protocol.get("repo_url") + user_name = protocol.get("user_name") + repo_name = protocol.get("repo_name") + repo_url = f"https://github.com/{user_name}/{repo_name}" # Git operations subprocess.run(["git", "clone", repo_url]) diff --git a/templates/redcap2rs.yaml b/templates/redcap2rs.yaml index b8bf76e..ed56cd1 100644 --- a/templates/redcap2rs.yaml +++ b/templates/redcap2rs.yaml @@ -14,7 +14,6 @@ protocol_display_name: "Your protocol display name" # Replace 'your_github_username' with your actual GitHub username. user_name: "your_github_username" repo_name: "reproschema" # Recommended name; can be different if preferred. -repo_url: "https://github.com/your_github_username/reproschema" # Protocol Description: # Provide a brief description of your protocol. From 451af3cccf4300ba3a0cca5357111004d3cf5191 Mon Sep 17 00:00:00 2001 From: Yibei Chen Date: Wed, 27 Dec 2023 20:39:31 +0000 Subject: [PATCH 12/21] major updates for converting --- reproschema/redcap2reproschema.py | 140 +++++++++++++++++------------- 1 file changed, 82 insertions(+), 58 deletions(-) diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py index 5dbce46..da8880c 100644 --- a/reproschema/redcap2reproschema.py +++ b/reproschema/redcap2reproschema.py @@ -68,8 +68,8 @@ def process_choices(choices_str): return choices -def write_to_file(form_name, field_name, rowData): - file_path = os.path.join("activities", form_name, "items", f"{field_name}") +def write_to_file(abs_repo_path, form_name, field_name, rowData): + file_path = os.path.join(f"{abs_repo_path}", "activities", form_name, "items", f"{field_name}") os.makedirs(os.path.dirname(file_path), exist_ok=True) try: with open(file_path, "w") as file: @@ -99,15 +99,16 @@ def parse_html(input_string, default_language="en"): def process_row( - schema_context_url, - form_name, - field, - schema_map, - input_type_map, - ui_list, - response_list, - additional_notes_list, -): + abs_repo_path, + schema_context_url, + form_name, + field, + schema_map, + input_type_map, + ui_list, + response_list, + additional_notes_list, + ): rowData = { "@context": schema_context_url, "@type": "reproschema:Field", @@ -175,19 +176,20 @@ def process_row( notes_obj = {"source": "redcap", "column": key, "value": value} rowData.setdefault("additionalNotesObj", []).append(notes_obj) - write_to_file(form_name, field["Variable / Field Name"], rowData) + write_to_file(abs_repo_path, form_name, field["Variable / Field Name"], rowData) def create_form_schema( - schema_context_url, - form_name, - activity_display_name, - activity_description, - order, - bl_list, - matrix_list, - scores_list, -): + abs_repo_path, + schema_context_url, + form_name, + activity_display_name, + activity_description, + order, + bl_list, + matrix_list, + scores_list, + ): # Construct the JSON-LD structure json_ld = { "@context": schema_context_url, @@ -209,7 +211,7 @@ def create_form_schema( if scores_list: json_ld["scoringLogic"] = scores_list - path = os.path.join("activities", form_name) + path = os.path.join(f"{abs_repo_path}", "activities", form_name) filename = f"{form_name}_schema" file_path = os.path.join(path, filename) try: @@ -237,14 +239,15 @@ def process_activities( def create_protocol_schema( - schema_context_url, - protocol_name, - protocol_display_name, - protocol_description, - protocol_variable_map, - protocol_order, - protocol_visibility_obj, -): + abs_repo_path, + schema_context_url, + protocol_name, + protocol_display_name, + protocol_description, + protocol_variable_map, + protocol_order, + protocol_visibility_obj, + ): # Construct the protocol schema protocol_schema = { "@context": schema_context_url, @@ -257,13 +260,27 @@ def create_protocol_schema( "schema:version": "0.0.1", "variableMap": protocol_variable_map, "ui": { + "addProperties": [], "order": protocol_order, "shuffle": False, - "visibility": protocol_visibility_obj, }, } - - protocol_dir = f"{protocol_name}" + + # Populate addProperties list + for activity in protocol_order: + add_property = { + "isAbout": f"../activities/{activity}/{activity}_schema", + "variableName": f"{activity}_schema", + # Assuming activity name as prefLabel, update as needed + "prefLabel": activity.replace("_", " ").title() + } + protocol_schema["ui"]["addProperties"].append(add_property) + + # Add visibility if needed + if protocol_visibility_obj: + protocol_schema["ui"]["visibility"] = protocol_visibility_obj + + protocol_dir = f"{abs_repo_path}/{protocol_name}" schema_file = f"{protocol_name}_schema" file_path = os.path.join(protocol_dir, schema_file) @@ -285,6 +302,7 @@ def parse_language_iso_codes(input_string): def process_csv( csv_path, + abs_repo_path, schema_context_url, schema_map, input_type_map, @@ -304,7 +322,7 @@ def process_csv( if form_name not in datas: datas[form_name] = [] order[form_name] = [] - os.makedirs(f"activities/{form_name}/items", exist_ok=True) + os.makedirs(f"{abs_repo_path}/activities/{form_name}/items", exist_ok=True) datas[form_name].append(row) @@ -315,6 +333,7 @@ def process_csv( field_name = field["Variable / Field Name"] order[form_name].append(f"items/{field_name}") process_row( + abs_repo_path, schema_context_url, form_name, field, @@ -325,11 +344,11 @@ def process_csv( additional_notes_list, ) - os.makedirs(f"protocols/{protocol_name}", exist_ok=True) + os.makedirs(f"{abs_repo_path}/protocols/{protocol_name}", exist_ok=True) return datas, order, languages -def redcap2reproschema(csv_path, yaml_path, schema_context_url=None): +def redcap2reproschema(csv_path, abs_repo_path, protocol_name, protocol_display_name, protocol_description, schema_context_url=None): """ Convert a REDCap data dictionary to Reproschema format. @@ -340,17 +359,6 @@ def redcap2reproschema(csv_path, yaml_path, schema_context_url=None): if schema_context_url is None: schema_context_url = "https://raw.githubusercontent.com/ReproNim/reproschema/1.0.0-rc4/contexts/generic" - # Read YAML configuration - try: - with open(yaml_path, "r") as f: - protocol_info = yaml.safe_load(f) - except FileNotFoundError: - raise FileNotFoundError(f"YAML file '{yaml_path}' not found.") - - protocol_name = protocol.get("protocol_name") - protocol_display_name = protocol.get("protocol_display_name") - protocol_description = protocol.get("protocol_description") - # Initialize variables schema_map = { "Variable / Field Name": "@id", # column A @@ -390,8 +398,9 @@ def redcap2reproschema(csv_path, yaml_path, schema_context_url=None): additional_notes_list = ["Field Note", "Question Number (surveys only)"] # Process the CSV file - datas, order, languages = process_csv( + datas, order, _ = process_csv( csv_path, + abs_repo_path, schema_context_url, schema_map, input_type_map, @@ -425,13 +434,15 @@ def redcap2reproschema(csv_path, yaml_path, schema_context_url=None): ) activity_display_name = rows[0]["Form Name"] - activity_description = rows[0]["Form Note"] + activity_description = rows[0].get("Form Note", "Default description") + create_form_schema( + abs_repo_path, schema_context_url, form_name, activity_display_name, activity_description, - order.get(form_name, []), + order, bl_list, matrix_list, scores_list, @@ -443,6 +454,7 @@ def redcap2reproschema(csv_path, yaml_path, schema_context_url=None): # Create protocol schema create_protocol_schema( + abs_repo_path, schema_context_url, protocol_name, protocol_display_name, @@ -452,7 +464,6 @@ def redcap2reproschema(csv_path, yaml_path, schema_context_url=None): protocol_visibility_obj, ) - def main(): import argparse @@ -472,16 +483,29 @@ def main(): user_name = protocol.get("user_name") repo_name = protocol.get("repo_name") - repo_url = f"https://github.com/{user_name}/{repo_name}" + protocol_name = protocol.get("protocol_name") + protocol_display_name = protocol.get("protocol_display_name") + protocol_description = protocol.get("protocol_description") - # Git operations - subprocess.run(["git", "clone", repo_url]) - subprocess.run(["git", "checkout", "main"]) - os.chdir(repo_url.split("/")[-1]) + if not user_name or not repo_name: + raise ValueError("User name and/or repo name not specified in the YAML file.") + repo_url = f"https://github.com/{user_name}/{repo_name}" + local_repo_path = repo_name # Assuming repo is cloned into a folder with the repo's name + + # Check if the directory already exists + if not os.path.exists(local_repo_path): + # Git operations + subprocess.run(["git", "clone", repo_url]) + + # Get absolute path of the local repository + abs_repo_path = os.path.abspath(local_repo_path) + print(f"Local repository path: {abs_repo_path}") + os.chdir(abs_repo_path) + subprocess.run(["git", "checkout", "main"]) + os.chdir('..') # Call the main conversion function - redcap2reproschema(args.csv_file, args.yaml_file) - + redcap2reproschema(args.csv_file, abs_repo_path, protocol_name, protocol_display_name, protocol_description) if __name__ == "__main__": main() From 5582c74c720003f20dc858792ae3d2bbe0336337 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 27 Dec 2023 20:39:49 +0000 Subject: [PATCH 13/21] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- reproschema/redcap2reproschema.py | 97 +++++++++++++++++++------------ 1 file changed, 59 insertions(+), 38 deletions(-) diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py index da8880c..20c5de3 100644 --- a/reproschema/redcap2reproschema.py +++ b/reproschema/redcap2reproschema.py @@ -69,7 +69,9 @@ def process_choices(choices_str): def write_to_file(abs_repo_path, form_name, field_name, rowData): - file_path = os.path.join(f"{abs_repo_path}", "activities", form_name, "items", f"{field_name}") + file_path = os.path.join( + f"{abs_repo_path}", "activities", form_name, "items", f"{field_name}" + ) os.makedirs(os.path.dirname(file_path), exist_ok=True) try: with open(file_path, "w") as file: @@ -99,16 +101,16 @@ def parse_html(input_string, default_language="en"): def process_row( - abs_repo_path, - schema_context_url, - form_name, - field, - schema_map, - input_type_map, - ui_list, - response_list, - additional_notes_list, - ): + abs_repo_path, + schema_context_url, + form_name, + field, + schema_map, + input_type_map, + ui_list, + response_list, + additional_notes_list, +): rowData = { "@context": schema_context_url, "@type": "reproschema:Field", @@ -180,16 +182,16 @@ def process_row( def create_form_schema( - abs_repo_path, - schema_context_url, - form_name, - activity_display_name, - activity_description, - order, - bl_list, - matrix_list, - scores_list, - ): + abs_repo_path, + schema_context_url, + form_name, + activity_display_name, + activity_description, + order, + bl_list, + matrix_list, + scores_list, +): # Construct the JSON-LD structure json_ld = { "@context": schema_context_url, @@ -239,15 +241,15 @@ def process_activities( def create_protocol_schema( - abs_repo_path, - schema_context_url, - protocol_name, - protocol_display_name, - protocol_description, - protocol_variable_map, - protocol_order, - protocol_visibility_obj, - ): + abs_repo_path, + schema_context_url, + protocol_name, + protocol_display_name, + protocol_description, + protocol_variable_map, + protocol_order, + protocol_visibility_obj, +): # Construct the protocol schema protocol_schema = { "@context": schema_context_url, @@ -265,14 +267,14 @@ def create_protocol_schema( "shuffle": False, }, } - + # Populate addProperties list for activity in protocol_order: add_property = { "isAbout": f"../activities/{activity}/{activity}_schema", "variableName": f"{activity}_schema", # Assuming activity name as prefLabel, update as needed - "prefLabel": activity.replace("_", " ").title() + "prefLabel": activity.replace("_", " ").title(), } protocol_schema["ui"]["addProperties"].append(add_property) @@ -322,7 +324,9 @@ def process_csv( if form_name not in datas: datas[form_name] = [] order[form_name] = [] - os.makedirs(f"{abs_repo_path}/activities/{form_name}/items", exist_ok=True) + os.makedirs( + f"{abs_repo_path}/activities/{form_name}/items", exist_ok=True + ) datas[form_name].append(row) @@ -348,7 +352,14 @@ def process_csv( return datas, order, languages -def redcap2reproschema(csv_path, abs_repo_path, protocol_name, protocol_display_name, protocol_description, schema_context_url=None): +def redcap2reproschema( + csv_path, + abs_repo_path, + protocol_name, + protocol_display_name, + protocol_description, + schema_context_url=None, +): """ Convert a REDCap data dictionary to Reproschema format. @@ -464,6 +475,7 @@ def redcap2reproschema(csv_path, abs_repo_path, protocol_name, protocol_display_ protocol_visibility_obj, ) + def main(): import argparse @@ -491,21 +503,30 @@ def main(): raise ValueError("User name and/or repo name not specified in the YAML file.") repo_url = f"https://github.com/{user_name}/{repo_name}" - local_repo_path = repo_name # Assuming repo is cloned into a folder with the repo's name + local_repo_path = ( + repo_name # Assuming repo is cloned into a folder with the repo's name + ) # Check if the directory already exists if not os.path.exists(local_repo_path): # Git operations subprocess.run(["git", "clone", repo_url]) - + # Get absolute path of the local repository abs_repo_path = os.path.abspath(local_repo_path) print(f"Local repository path: {abs_repo_path}") os.chdir(abs_repo_path) subprocess.run(["git", "checkout", "main"]) - os.chdir('..') + os.chdir("..") # Call the main conversion function - redcap2reproschema(args.csv_file, abs_repo_path, protocol_name, protocol_display_name, protocol_description) + redcap2reproschema( + args.csv_file, + abs_repo_path, + protocol_name, + protocol_display_name, + protocol_description, + ) + if __name__ == "__main__": main() From b6685e0ce1f0f2de354349b5b3c413cfbac3c6b6 Mon Sep 17 00:00:00 2001 From: Yibei Chen Date: Thu, 28 Dec 2023 19:40:37 +0000 Subject: [PATCH 14/21] remove the github part --- README.md | 12 +----- reproschema/redcap2reproschema.py | 72 ++++++++++++------------------- templates/redcap2rs.yaml | 8 +--- 3 files changed, 30 insertions(+), 62 deletions(-) diff --git a/README.md b/README.md index 33b12cd..8eedf83 100644 --- a/README.md +++ b/README.md @@ -44,28 +44,20 @@ The `redcap2reproschema` function is designed to process a given REDCap CSV file ### Prerequisites Before the conversion, ensure you have the following: -1. **GitHub Repository**: - - [Create a GitHub repository](https://docs.github.com/en/repositories/creating-and-managing-repositories/creating-a-new-repository) named `reproschema` to store all your reproschema protocols. - - This repository should be set up before converting any data using the script. - -2. **YAML Configuration File**: - - Fill out the `templates/redcap2rs.yaml` file with your protocol details. +**YAML Configuration File**: + - Download [templates/redcap2rs.yaml](templates/redcap2rs.yaml) and fill it out with your protocol details. ### YAML File Configuration In the `templates/redcap2rs.yaml` file, provide the following information: - **protocol_name**: This is a unique identifier for your protocol. Use underscores for spaces and avoid special characters. - **protocol_display_name**: The name that will appear in the application. -- **user_name**: Your GitHub username. -- **repo_name**: The repository name where your protocols are stored. It's recommended to use `reproschema`. - **protocol_description**: A brief description of your protocol. Example: ```yaml protocol_name: "My_Protocol" protocol_display_name: "Assessment Protocol" -user_name: "john_doe" -repo_name: "reproschema" protocol_description: "This protocol is for assessing cognitive skills." ``` ### Command-Line Usage diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py index 20c5de3..03a8e54 100644 --- a/reproschema/redcap2reproschema.py +++ b/reproschema/redcap2reproschema.py @@ -68,9 +68,9 @@ def process_choices(choices_str): return choices -def write_to_file(abs_repo_path, form_name, field_name, rowData): +def write_to_file(abs_folder_path, form_name, field_name, rowData): file_path = os.path.join( - f"{abs_repo_path}", "activities", form_name, "items", f"{field_name}" + f"{abs_folder_path}", "activities", form_name, "items", f"{field_name}" ) os.makedirs(os.path.dirname(file_path), exist_ok=True) try: @@ -101,7 +101,7 @@ def parse_html(input_string, default_language="en"): def process_row( - abs_repo_path, + abs_folder_path, schema_context_url, form_name, field, @@ -178,11 +178,11 @@ def process_row( notes_obj = {"source": "redcap", "column": key, "value": value} rowData.setdefault("additionalNotesObj", []).append(notes_obj) - write_to_file(abs_repo_path, form_name, field["Variable / Field Name"], rowData) + write_to_file(abs_folder_path, form_name, field["Variable / Field Name"], rowData) def create_form_schema( - abs_repo_path, + abs_folder_path, schema_context_url, form_name, activity_display_name, @@ -213,7 +213,7 @@ def create_form_schema( if scores_list: json_ld["scoringLogic"] = scores_list - path = os.path.join(f"{abs_repo_path}", "activities", form_name) + path = os.path.join(f"{abs_folder_path}", "activities", form_name) filename = f"{form_name}_schema" file_path = os.path.join(path, filename) try: @@ -241,7 +241,7 @@ def process_activities( def create_protocol_schema( - abs_repo_path, + abs_folder_path, schema_context_url, protocol_name, protocol_display_name, @@ -282,7 +282,7 @@ def create_protocol_schema( if protocol_visibility_obj: protocol_schema["ui"]["visibility"] = protocol_visibility_obj - protocol_dir = f"{abs_repo_path}/{protocol_name}" + protocol_dir = f"{abs_folder_path}/{protocol_name}" schema_file = f"{protocol_name}_schema" file_path = os.path.join(protocol_dir, schema_file) @@ -304,7 +304,7 @@ def parse_language_iso_codes(input_string): def process_csv( csv_path, - abs_repo_path, + abs_folder_path, schema_context_url, schema_map, input_type_map, @@ -325,7 +325,7 @@ def process_csv( datas[form_name] = [] order[form_name] = [] os.makedirs( - f"{abs_repo_path}/activities/{form_name}/items", exist_ok=True + f"{abs_folder_path}/activities/{form_name}/items", exist_ok=True ) datas[form_name].append(row) @@ -337,7 +337,7 @@ def process_csv( field_name = field["Variable / Field Name"] order[form_name].append(f"items/{field_name}") process_row( - abs_repo_path, + abs_folder_path, schema_context_url, form_name, field, @@ -348,13 +348,13 @@ def process_csv( additional_notes_list, ) - os.makedirs(f"{abs_repo_path}/protocols/{protocol_name}", exist_ok=True) + os.makedirs(f"{abs_folder_path}/protocols/{protocol_name}", exist_ok=True) return datas, order, languages def redcap2reproschema( csv_path, - abs_repo_path, + abs_folder_path, protocol_name, protocol_display_name, protocol_description, @@ -411,7 +411,7 @@ def redcap2reproschema( # Process the CSV file datas, order, _ = process_csv( csv_path, - abs_repo_path, + abs_folder_path, schema_context_url, schema_map, input_type_map, @@ -448,7 +448,7 @@ def redcap2reproschema( activity_description = rows[0].get("Form Note", "Default description") create_form_schema( - abs_repo_path, + abs_folder_path, schema_context_url, form_name, activity_display_name, @@ -465,7 +465,7 @@ def redcap2reproschema( # Create protocol schema create_protocol_schema( - abs_repo_path, + abs_folder_path, schema_context_url, protocol_name, protocol_display_name, @@ -475,58 +475,40 @@ def redcap2reproschema( protocol_visibility_obj, ) - def main(): - import argparse - - parser = argparse.ArgumentParser( - description="Convert REDCap data dictionary to Reproschema format." - ) + parser = argparse.ArgumentParser(description="Convert REDCap data dictionary to Reproschema format.") parser.add_argument("csv_file", help="Path to the REDCap data dictionary CSV file.") parser.add_argument("yaml_file", help="Path to the Reproschema protocol YAML file.") args = parser.parse_args() # Read the YAML configuration - try: - with open(args.yaml_file, "r") as f: + with open(args.yaml_file, "r") as f: protocol = yaml.safe_load(f) - except FileNotFoundError: - raise FileNotFoundError(f"YAML file '{args.yaml_file}' not found.") - user_name = protocol.get("user_name") - repo_name = protocol.get("repo_name") protocol_name = protocol.get("protocol_name") protocol_display_name = protocol.get("protocol_display_name") protocol_description = protocol.get("protocol_description") - if not user_name or not repo_name: - raise ValueError("User name and/or repo name not specified in the YAML file.") + if not protocol_name: + raise ValueError("Protocol name not specified in the YAML file.") - repo_url = f"https://github.com/{user_name}/{repo_name}" - local_repo_path = ( - repo_name # Assuming repo is cloned into a folder with the repo's name - ) + protocol_name = protocol_name.replace(' ', '_') # Replacing spaces with underscores # Check if the directory already exists - if not os.path.exists(local_repo_path): - # Git operations - subprocess.run(["git", "clone", repo_url]) + if not os.path.exists(protocol_name): + os.mkdir(protocol_name) # Create the directory if it doesn't exist # Get absolute path of the local repository - abs_repo_path = os.path.abspath(local_repo_path) - print(f"Local repository path: {abs_repo_path}") - os.chdir(abs_repo_path) - subprocess.run(["git", "checkout", "main"]) - os.chdir("..") + abs_folder_path = os.path.abspath(protocol_name) + # Call the main conversion function redcap2reproschema( args.csv_file, - abs_repo_path, + abs_folder_path, protocol_name, protocol_display_name, protocol_description, ) - if __name__ == "__main__": - main() + main() \ No newline at end of file diff --git a/templates/redcap2rs.yaml b/templates/redcap2rs.yaml index ed56cd1..262ee1c 100644 --- a/templates/redcap2rs.yaml +++ b/templates/redcap2rs.yaml @@ -9,12 +9,6 @@ protocol_name: "your_protocol_name" # Example: "My_Protocol" # This name will be displayed in the application. protocol_display_name: "Your protocol display name" -# GitHub Repository Information: -# Create a GitHub repository named 'reproschema' to store your reproschema protocols. -# Replace 'your_github_username' with your actual GitHub username. -user_name: "your_github_username" -repo_name: "reproschema" # Recommended name; can be different if preferred. - # Protocol Description: # Provide a brief description of your protocol. -protocol_description: "Description for your protocol" # Example: "This protocol is for ..." +protocol_description: "Description for your protocol" # Example: "This protocol is for ..." \ No newline at end of file From 3e72f256b69a969365a65bae1200975062b310ea Mon Sep 17 00:00:00 2001 From: Yibei Chen Date: Thu, 28 Dec 2023 20:20:23 +0000 Subject: [PATCH 15/21] add test for redcap2reproschema --- reproschema/tests/test_data/redcap2rs.yaml | 21 +++++++++++++ reproschema/tests/test_data/redcap_dict.csv | 31 ++++++++++++++++++++ reproschema/tests/test_redcap2reproschema.py | 28 ++++++++++++++++++ 3 files changed, 80 insertions(+) create mode 100644 reproschema/tests/test_data/redcap2rs.yaml create mode 100644 reproschema/tests/test_data/redcap_dict.csv create mode 100644 reproschema/tests/test_redcap2reproschema.py diff --git a/reproschema/tests/test_data/redcap2rs.yaml b/reproschema/tests/test_data/redcap2rs.yaml new file mode 100644 index 0000000..3330f3b --- /dev/null +++ b/reproschema/tests/test_data/redcap2rs.yaml @@ -0,0 +1,21 @@ +# Reproschema Protocol Configuration + +# Protocol Name: +# Use underscores for spaces and avoid special characters. +# This is the unique identifier for your protocol. +protocol_name: "test_redcap2rs" # Example: "My_Protocol" + +# Protocol Display Name: +# This name will be displayed in the application. +protocol_display_name: "redcap protocols" + +# GitHub Repository Information: +# Create a GitHub repository named 'reproschema' to store your reproschema protocols. +# Replace 'your_github_username' with your actual GitHub username. +user_name: "yibeichan" +repo_name: "redcap2reproschema" # Recommended name; can be different if preferred. +repo_url: "https://github.com/{{user_name}}/{{repo_name}}" + +# Protocol Description: +# Provide a brief description of your protocol. +protocol_description: "testing" # Example: "This protocol is for ..." diff --git a/reproschema/tests/test_data/redcap_dict.csv b/reproschema/tests/test_data/redcap_dict.csv new file mode 100644 index 0000000..6f48394 --- /dev/null +++ b/reproschema/tests/test_data/redcap_dict.csv @@ -0,0 +1,31 @@ +"Variable / Field Name","Form Name","Section Header","Field Type","Field Label","Choices, Calculations, OR Slider Labels","Field Note","Text Validation Type OR Show Slider Number","Text Validation Min","Text Validation Max",Identifier?,"Branching Logic (Show field only if...)","Required Field?","Custom Alignment","Question Number (surveys only)","Matrix Group Name","Matrix Ranking?","Field Annotation" +record_id,autism_parenting_stress_index_apsi,,text,"Record ID",,,,,,,,,,,,, +apsi_date,autism_parenting_stress_index_apsi,"Autism Parenting Stress Index for the Qigong Sensory Training Program Instructions: 1. Before beginning Qigong Sensory Training therapy with your child, complete the form on the following page. 2. Enter the date, name of your child, and who is completing the checklist. (It is very important that the same parent/caretaker complete the form each time the form is used.) 3. Choose the response for each item that most accurately describes your child. 4. Add all of the numbers chosen. 5. Enter total into the space provided. After using Qigong Sensory Training therapy on your child once a day for a five months, have the same parent complete the form again. Total numbers circled. Compare this number to the number at the beginning. If Qigong Sensory Training therapy is being implemented successfully, the total number should decrease over time.",text,Date:,,,date_ymd,,,,,,,,,, +apsi_name_of_child,autism_parenting_stress_index_apsi,,text,"Name of child:",,,,,,,,,,,,, +apsi_person_completing,autism_parenting_stress_index_apsi,,text,"Person completing checklist:",,,,,,,,,,,,, +apsi_social_dev,autism_parenting_stress_index_apsi,"Stress Ratings Please rate the following aspects of your child's health according to how much stress it causes you and/or your family by clicking on the button that best describes your situation.",radio,"Your child's social development ","0, 0 - Not stressful | 1, 1 - Sometimes creates stress | 2, 2 - Often creates stress | 3, 3 - Very stressful on a daily basis | 5, 5 - So stressful sometimes we feel we can't cope",,,,,,,,,,,, +apsi_communicate,autism_parenting_stress_index_apsi,,radio,"Your child's ability to communicate ","0, 0 - Not stressful | 1, 1 - Sometimes creates stress | 2, 2 - Often creates stress | 3, 3 - Very stressful on a daily basis | 5, 5 - So stressful sometimes we feel we can't cope",,,,,,,,,,,, +apsi_tantrums,autism_parenting_stress_index_apsi,,radio,"Tantrums/meltdowns ","0, 0 - Not stressful | 1, 1 - Sometimes creates stress | 2, 2 - Often creates stress | 3, 3 - Very stressful on a daily basis | 5, 5 - So stressful sometimes we feel we can't cope",,,,,,,,,,,, +apsi_agressive,autism_parenting_stress_index_apsi,,radio,"Aggressive behavior (siblings, peers) ","0, 0 - Not stressful | 1, 1 - Sometimes creates stress | 2, 2 - Often creates stress | 3, 3 - Very stressful on a daily basis | 5, 5 - So stressful sometimes we feel we can't cope",,,,,,,,,,,, +apsi_self_injure,autism_parenting_stress_index_apsi,,radio,"Self-injurious behavior ","0, 0 - Not stressful | 1, 1 - Sometimes creates stress | 2, 2 - Often creates stress | 3, 3 - Very stressful on a daily basis | 5, 5 - So stressful sometimes we feel we can't cope",,,,,,,,,,,, +apsi_transitions,autism_parenting_stress_index_apsi,,radio,"Difficulty making transitions from one activity to another ","0, 0 - Not stressful | 1, 1 - Sometimes creates stress | 2, 2 - Often creates stress | 3, 3 - Very stressful on a daily basis | 5, 5 - So stressful sometimes we feel we can't cope",,,,,,,,,,,, +apsi_sleep,autism_parenting_stress_index_apsi,,radio,"Sleep problems ","0, 0 - Not stressful | 1, 1 - Sometimes creates stress | 2, 2 - Often creates stress | 3, 3 - Very stressful on a daily basis | 5, 5 - So stressful sometimes we feel we can't cope",,,,,,,,,,,, +apsi_diet,autism_parenting_stress_index_apsi,,radio,"Your child's diet ","0, 0 - Not stressful | 1, 1 - Sometimes creates stress | 2, 2 - Often creates stress | 3, 3 - Very stressful on a daily basis | 5, 5 - So stressful sometimes we feel we can't cope",,,,,,,,,,,, +apsi_bowel,autism_parenting_stress_index_apsi,,radio,"Bowel problems (diarrhea, constipation) ","0, 0 - Not stressful | 1, 1 - Sometimes creates stress | 2, 2 - Often creates stress | 3, 3 - Very stressful on a daily basis | 5, 5 - So stressful sometimes we feel we can't cope",,,,,,,,,,,, +apsi_potty,autism_parenting_stress_index_apsi,,radio,"Potty training ","0, 0 - Not stressful | 1, 1 - Sometimes creates stress | 2, 2 - Often creates stress | 3, 3 - Very stressful on a daily basis | 5, 5 - So stressful sometimes we feel we can't cope",,,,,,,,,,,, +apsi_not_close,autism_parenting_stress_index_apsi,,radio,"Not feeling close to your child ","0, 0 - Not stressful | 1, 1 - Sometimes creates stress | 2, 2 - Often creates stress | 3, 3 - Very stressful on a daily basis | 5, 5 - So stressful sometimes we feel we can't cope",,,,,,,,,,,, +apsi_accepted,autism_parenting_stress_index_apsi,,radio,"Concern for the future of your child being accepted by others ","0, 0 - Not stressful | 1, 1 - Sometimes creates stress | 2, 2 - Often creates stress | 3, 3 - Very stressful on a daily basis | 5, 5 - So stressful sometimes we feel we can't cope",,,,,,,,,,,, +apsi_independently,autism_parenting_stress_index_apsi,,radio,"Concern for the future of your child living independently ","0, 0 - Not stressful | 1, 1 - Sometimes creates stress | 2, 2 - Often creates stress | 3, 3 - Very stressful on a daily basis | 5, 5 - So stressful sometimes we feel we can't cope",,,,,,,,,,,, +apsi_total,autism_parenting_stress_index_apsi,,text,Total,,,integer,,,,,,,,,, +cams_r_1,cognitive_and_affective_mindfulness_scalerevised_c,"Instructions: People have a variety of ways of relating to their thoughts and feelings. For each of the items below, rate how much each of these ways applies to you.",radio,"1. It is easy for me to concentrate on what I am doing.","1, 1 - Rarely/Not at all | 2, 2 - Sometimes | 3, 3 - Often | 4, 4 - Almost Always",,,,,,,,,,,, +cams_r_2,cognitive_and_affective_mindfulness_scalerevised_c,,radio,"2. I am preoccupied by the future.","1, 1 - Rarely/Not at all | 2, 2 - Sometimes | 3, 3 - Often | 4, 4 - Almost Always",,,,,,,,,,,, +cams_r_3,cognitive_and_affective_mindfulness_scalerevised_c,,radio,"3. I can tolerate emotional pain.","1, 1 - Rarely/Not at all | 2, 2 - Sometimes | 3, 3 - Often | 4, 4 - Almost Always",,,,,,,,,,,, +cams_r_4,cognitive_and_affective_mindfulness_scalerevised_c,,radio,"4. I can accept things I cannot change.","1, 1 - Rarely/Not at all | 2, 2 - Sometimes | 3, 3 - Often | 4, 4 - Almost Always",,,,,,,,,,,, +cams_r_5,cognitive_and_affective_mindfulness_scalerevised_c,,radio,"5. I can usually describe how I feel at the moment in considerable detail.","1, 1 - Rarely/Not at all | 2, 2 - Sometimes | 3, 3 - Often | 4, 4 - Almost Always",,,,,,,,,,,, +cams_r_6,cognitive_and_affective_mindfulness_scalerevised_c,,radio,"6. I am easily distracted.","1, 1 - Rarely/Not at all | 2, 2 - Sometimes | 3, 3 - Often | 4, 4 - Almost Always",,,,,,,,,,,, +cams_r_7,cognitive_and_affective_mindfulness_scalerevised_c,,radio,"7. I am preoccupied by the past.","1, 1 - Rarely/Not at all | 2, 2 - Sometimes | 3, 3 - Often | 4, 4 - Almost Always",,,,,,,,,,,, +cams_r_8,cognitive_and_affective_mindfulness_scalerevised_c,,radio,"8. It's easy for me to keep track of my thoughts and feelings.","1, 1 - Rarely/Not at all | 2, 2 - Sometimes | 3, 3 - Often | 4, 4 - Almost Always",,,,,,,,,,,, +cams_r_9,cognitive_and_affective_mindfulness_scalerevised_c,,radio,"9. I try to notice my thoughts without judging them.","1, 1 - Rarely/Not at all | 2, 2 - Sometimes | 3, 3 - Often | 4, 4 - Almost Always",,,,,,,,,,,, +cams_r_10,cognitive_and_affective_mindfulness_scalerevised_c,,radio,"10. I am able to accept the thoughts and feelings I have.","1, 1 - Rarely/Not at all | 2, 2 - Sometimes | 3, 3 - Often | 4, 4 - Almost Always",,,,,,,,,,,, +cams_r_11,cognitive_and_affective_mindfulness_scalerevised_c,,radio,"11. I am able to focus on the present moment.","1, 1 - Rarely/Not at all | 2, 2 - Sometimes | 3, 3 - Often | 4, 4 - Almost Always",,,,,,,,,,,, +cams_r_12,cognitive_and_affective_mindfulness_scalerevised_c,,radio,"12. I am able to pay close attention to one thing for a long period of time.","1, 1 - Rarely/Not at all | 2, 2 - Sometimes | 3, 3 - Often | 4, 4 - Almost Always",,,,,,,,,,,, diff --git a/reproschema/tests/test_redcap2reproschema.py b/reproschema/tests/test_redcap2reproschema.py new file mode 100644 index 0000000..776f0e6 --- /dev/null +++ b/reproschema/tests/test_redcap2reproschema.py @@ -0,0 +1,28 @@ +import os +import shutil +import pytest +from click.testing import CliRunner +from ..cli import main # Import the Click group + +# Assuming your test files are located in a 'tests' directory +CSV_FILE_NAME = "redcap_dict.csv" +YAML_FILE_NAME = "redcap2rs.yaml" +CSV_TEST_FILE = os.path.join( + os.path.dirname(__file__), "test_data", CSV_FILE_NAME +) +YAML_TEST_FILE = os.path.join( + os.path.dirname(__file__), "test_data", YAML_FILE_NAME +) + +def test_redcap2reproschema_success(): + runner = CliRunner() + + with runner.isolated_filesystem(): + # Copy the test files to the isolated filesystem + shutil.copy(CSV_TEST_FILE, CSV_FILE_NAME) + shutil.copy(YAML_TEST_FILE, YAML_FILE_NAME) + + # Run the command within the isolated filesystem + result = runner.invoke(main, ['redcap2reproschema', CSV_FILE_NAME, YAML_FILE_NAME]) + print(result.output) + assert result.exit_code == 0 \ No newline at end of file From 587b388cb61929f103e10e625b3eb8d49d2279de Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 28 Dec 2023 20:20:36 +0000 Subject: [PATCH 16/21] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- reproschema/redcap2reproschema.py | 12 ++++++++---- reproschema/tests/test_redcap2reproschema.py | 15 +++++++-------- templates/redcap2rs.yaml | 2 +- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py index 03a8e54..fe8a80e 100644 --- a/reproschema/redcap2reproschema.py +++ b/reproschema/redcap2reproschema.py @@ -475,15 +475,18 @@ def redcap2reproschema( protocol_visibility_obj, ) + def main(): - parser = argparse.ArgumentParser(description="Convert REDCap data dictionary to Reproschema format.") + parser = argparse.ArgumentParser( + description="Convert REDCap data dictionary to Reproschema format." + ) parser.add_argument("csv_file", help="Path to the REDCap data dictionary CSV file.") parser.add_argument("yaml_file", help="Path to the Reproschema protocol YAML file.") args = parser.parse_args() # Read the YAML configuration with open(args.yaml_file, "r") as f: - protocol = yaml.safe_load(f) + protocol = yaml.safe_load(f) protocol_name = protocol.get("protocol_name") protocol_display_name = protocol.get("protocol_display_name") @@ -492,7 +495,7 @@ def main(): if not protocol_name: raise ValueError("Protocol name not specified in the YAML file.") - protocol_name = protocol_name.replace(' ', '_') # Replacing spaces with underscores + protocol_name = protocol_name.replace(" ", "_") # Replacing spaces with underscores # Check if the directory already exists if not os.path.exists(protocol_name): @@ -510,5 +513,6 @@ def main(): protocol_description, ) + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/reproschema/tests/test_redcap2reproschema.py b/reproschema/tests/test_redcap2reproschema.py index 776f0e6..de2f630 100644 --- a/reproschema/tests/test_redcap2reproschema.py +++ b/reproschema/tests/test_redcap2reproschema.py @@ -7,12 +7,9 @@ # Assuming your test files are located in a 'tests' directory CSV_FILE_NAME = "redcap_dict.csv" YAML_FILE_NAME = "redcap2rs.yaml" -CSV_TEST_FILE = os.path.join( - os.path.dirname(__file__), "test_data", CSV_FILE_NAME -) -YAML_TEST_FILE = os.path.join( - os.path.dirname(__file__), "test_data", YAML_FILE_NAME -) +CSV_TEST_FILE = os.path.join(os.path.dirname(__file__), "test_data", CSV_FILE_NAME) +YAML_TEST_FILE = os.path.join(os.path.dirname(__file__), "test_data", YAML_FILE_NAME) + def test_redcap2reproschema_success(): runner = CliRunner() @@ -23,6 +20,8 @@ def test_redcap2reproschema_success(): shutil.copy(YAML_TEST_FILE, YAML_FILE_NAME) # Run the command within the isolated filesystem - result = runner.invoke(main, ['redcap2reproschema', CSV_FILE_NAME, YAML_FILE_NAME]) + result = runner.invoke( + main, ["redcap2reproschema", CSV_FILE_NAME, YAML_FILE_NAME] + ) print(result.output) - assert result.exit_code == 0 \ No newline at end of file + assert result.exit_code == 0 diff --git a/templates/redcap2rs.yaml b/templates/redcap2rs.yaml index 262ee1c..1e1dbc3 100644 --- a/templates/redcap2rs.yaml +++ b/templates/redcap2rs.yaml @@ -11,4 +11,4 @@ protocol_display_name: "Your protocol display name" # Protocol Description: # Provide a brief description of your protocol. -protocol_description: "Description for your protocol" # Example: "This protocol is for ..." \ No newline at end of file +protocol_description: "Description for your protocol" # Example: "This protocol is for ..." From a4e21a1212a142ce01e86a2710a5fa6f66607136 Mon Sep 17 00:00:00 2001 From: Yibei Chen Date: Thu, 28 Dec 2023 20:23:22 +0000 Subject: [PATCH 17/21] missing commits fpr cli --- reproschema/cli.py | 10 +---- reproschema/redcap2reproschema.py | 66 +++++++++++++------------------ 2 files changed, 28 insertions(+), 48 deletions(-) diff --git a/reproschema/cli.py b/reproschema/cli.py index 663ce56..317d64f 100644 --- a/reproschema/cli.py +++ b/reproschema/cli.py @@ -1,10 +1,9 @@ import os import click -import yaml from . import get_logger, set_logger_level from . import __version__ -from .redcap2reproschema import main as redcap2rs +from .redcap2reproschema import redcap2reproschema as redcap2rs lgr = get_logger() @@ -105,14 +104,7 @@ def serve(port): def redcap2reproschema(csv_path, yaml_path): """ Convert REDCap CSV files to Reproschema format. - - Provide the path to the REDCap CSV file and the YAML configuration file. """ - if not os.path.exists(csv_path): - raise click.ClickException(f"CSV file not found at {csv_path}") - if not os.path.exists(yaml_path): - raise click.ClickException(f"YAML file not found at {yaml_path}") - try: redcap2rs(csv_path, yaml_path) click.echo("Converted REDCap data dictionary to Reproschema format.") diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py index 03a8e54..f1a1d6e 100644 --- a/reproschema/redcap2reproschema.py +++ b/reproschema/redcap2reproschema.py @@ -303,7 +303,7 @@ def parse_language_iso_codes(input_string): def process_csv( - csv_path, + csv_file, abs_folder_path, schema_context_url, schema_map, @@ -317,7 +317,7 @@ def process_csv( order = {} languages = [] - with open(csv_path, mode="r", encoding="utf-8") as csvfile: + with open(csv_file, mode="r", encoding="utf-8") as csvfile: reader = csv.DictReader(csvfile) for row in reader: form_name = row["Form Name"] @@ -352,21 +352,35 @@ def process_csv( return datas, order, languages -def redcap2reproschema( - csv_path, - abs_folder_path, - protocol_name, - protocol_display_name, - protocol_description, - schema_context_url=None, -): +def redcap2reproschema(csv_file, yaml_file, schema_context_url=None): """ Convert a REDCap data dictionary to Reproschema format. - :param csv_path: Path to the REDCap CSV file. + :param csv_file: Path to the REDCap CSV file. :param yaml_path: Path to the YAML configuration file. :param schema_context_url: URL of the schema context. Optional. """ + + # Read the YAML configuration + with open(yaml_file, "r") as f: + protocol = yaml.safe_load(f) + + protocol_name = protocol.get("protocol_name") + protocol_display_name = protocol.get("protocol_display_name") + protocol_description = protocol.get("protocol_description") + + if not protocol_name: + raise ValueError("Protocol name not specified in the YAML file.") + + protocol_name = protocol_name.replace(' ', '_') # Replacing spaces with underscores + + # Check if the directory already exists + if not os.path.exists(protocol_name): + os.mkdir(protocol_name) # Create the directory if it doesn't exist + + # Get absolute path of the local repository + abs_folder_path = os.path.abspath(protocol_name) + if schema_context_url is None: schema_context_url = "https://raw.githubusercontent.com/ReproNim/reproschema/1.0.0-rc4/contexts/generic" @@ -410,7 +424,7 @@ def redcap2reproschema( # Process the CSV file datas, order, _ = process_csv( - csv_path, + csv_file, abs_folder_path, schema_context_url, schema_map, @@ -481,34 +495,8 @@ def main(): parser.add_argument("yaml_file", help="Path to the Reproschema protocol YAML file.") args = parser.parse_args() - # Read the YAML configuration - with open(args.yaml_file, "r") as f: - protocol = yaml.safe_load(f) - - protocol_name = protocol.get("protocol_name") - protocol_display_name = protocol.get("protocol_display_name") - protocol_description = protocol.get("protocol_description") - - if not protocol_name: - raise ValueError("Protocol name not specified in the YAML file.") - - protocol_name = protocol_name.replace(' ', '_') # Replacing spaces with underscores - - # Check if the directory already exists - if not os.path.exists(protocol_name): - os.mkdir(protocol_name) # Create the directory if it doesn't exist - - # Get absolute path of the local repository - abs_folder_path = os.path.abspath(protocol_name) - # Call the main conversion function - redcap2reproschema( - args.csv_file, - abs_folder_path, - protocol_name, - protocol_display_name, - protocol_description, - ) + redcap2reproschema(args.csv_file, args.yaml_file) if __name__ == "__main__": main() \ No newline at end of file From 8838e0620fedb061177a85efac72688852126163 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 28 Dec 2023 20:28:14 +0000 Subject: [PATCH 18/21] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- reproschema/redcap2reproschema.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py index 85c1bbb..54ac316 100644 --- a/reproschema/redcap2reproschema.py +++ b/reproschema/redcap2reproschema.py @@ -349,6 +349,7 @@ def process_csv( os.makedirs(f"{abs_folder_path}/protocols/{protocol_name}", exist_ok=True) return datas, order, languages + def redcap2reproschema(csv_file, yaml_file, schema_context_url=None): """ Convert a REDCap data dictionary to Reproschema format. @@ -360,7 +361,7 @@ def redcap2reproschema(csv_file, yaml_file, schema_context_url=None): # Read the YAML configuration with open(yaml_file, "r") as f: - protocol = yaml.safe_load(f) + protocol = yaml.safe_load(f) protocol_name = protocol.get("protocol_name") protocol_display_name = protocol.get("protocol_display_name") @@ -369,7 +370,7 @@ def redcap2reproschema(csv_file, yaml_file, schema_context_url=None): if not protocol_name: raise ValueError("Protocol name not specified in the YAML file.") - protocol_name = protocol_name.replace(' ', '_') # Replacing spaces with underscores + protocol_name = protocol_name.replace(" ", "_") # Replacing spaces with underscores # Check if the directory already exists if not os.path.exists(protocol_name): @@ -486,6 +487,7 @@ def redcap2reproschema(csv_file, yaml_file, schema_context_url=None): protocol_visibility_obj, ) + def main(): parser = argparse.ArgumentParser( description="Convert REDCap data dictionary to Reproschema format." @@ -497,5 +499,6 @@ def main(): # Call the main conversion function redcap2reproschema(args.csv_file, args.yaml_file) + if __name__ == "__main__": main() From ad7191a728db6f1c9038b2624c2406179661deb4 Mon Sep 17 00:00:00 2001 From: Yibei Chen Date: Thu, 28 Dec 2023 21:12:04 +0000 Subject: [PATCH 19/21] remove protocol_variable_map --- reproschema/redcap2reproschema.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py index 85c1bbb..d71f15c 100644 --- a/reproschema/redcap2reproschema.py +++ b/reproschema/redcap2reproschema.py @@ -226,15 +226,11 @@ def create_form_schema( def process_activities( - activity_name, protocol_visibility_obj, protocol_variable_map, protocol_order + activity_name, protocol_visibility_obj, protocol_order ): # Set default visibility condition protocol_visibility_obj[activity_name] = True - # Add activity to variableMap and Order - protocol_variable_map.append( - {"variableName": activity_name, "isAbout": f"items/{activity_name}"} - ) protocol_order.append(activity_name) @@ -244,21 +240,19 @@ def create_protocol_schema( protocol_name, protocol_display_name, protocol_description, - protocol_variable_map, protocol_order, protocol_visibility_obj, ): # Construct the protocol schema protocol_schema = { "@context": schema_context_url, - "@type": "reproschema:ActivitySet", + "@type": "reproschema:Protocol", "@id": f"{protocol_name}_schema", "skos:prefLabel": protocol_display_name, "skos:altLabel": f"{protocol_name}_schema", "schema:description": protocol_description, "schema:schemaVersion": "1.0.0-rc4", "schema:version": "0.0.1", - "variableMap": protocol_variable_map, "ui": { "addProperties": [], "order": protocol_order, @@ -432,7 +426,6 @@ def redcap2reproschema(csv_file, yaml_file, schema_context_url=None): protocol_name, ) # Initialize other variables for protocol context and schema - protocol_variable_map = [] protocol_visibility_obj = {} protocol_order = [] @@ -471,7 +464,7 @@ def redcap2reproschema(csv_file, yaml_file, schema_context_url=None): ) process_activities( - form_name, protocol_visibility_obj, protocol_variable_map, protocol_order + form_name, protocol_visibility_obj, protocol_order ) # Create protocol schema @@ -481,7 +474,6 @@ def redcap2reproschema(csv_file, yaml_file, schema_context_url=None): protocol_name, protocol_display_name, protocol_description, - protocol_variable_map, protocol_order, protocol_visibility_obj, ) From e5e60915c5235914bf17964176745d065d77484c Mon Sep 17 00:00:00 2001 From: Yibei Chen Date: Thu, 28 Dec 2023 21:13:55 +0000 Subject: [PATCH 20/21] fix protocol path --- reproschema/redcap2reproschema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py index 07543a1..1111f9a 100644 --- a/reproschema/redcap2reproschema.py +++ b/reproschema/redcap2reproschema.py @@ -340,7 +340,7 @@ def process_csv( additional_notes_list, ) - os.makedirs(f"{abs_folder_path}/protocols/{protocol_name}", exist_ok=True) + os.makedirs(f"{abs_folder_path}/{protocol_name}", exist_ok=True) return datas, order, languages From f5dd9c5b646e09a0b2ff6f11218696d88ce00ff3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 28 Dec 2023 21:14:16 +0000 Subject: [PATCH 21/21] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- reproschema/redcap2reproschema.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py index 1111f9a..16b80c7 100644 --- a/reproschema/redcap2reproschema.py +++ b/reproschema/redcap2reproschema.py @@ -225,9 +225,7 @@ def create_form_schema( print(f"Error writing to file {file_path}: {e}") -def process_activities( - activity_name, protocol_visibility_obj, protocol_order -): +def process_activities(activity_name, protocol_visibility_obj, protocol_order): # Set default visibility condition protocol_visibility_obj[activity_name] = True @@ -464,9 +462,7 @@ def redcap2reproschema(csv_file, yaml_file, schema_context_url=None): scores_list, ) - process_activities( - form_name, protocol_visibility_obj, protocol_order - ) + process_activities(form_name, protocol_visibility_obj, protocol_order) # Create protocol schema create_protocol_schema(