Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove useless DB call for performance. #193

Open
wants to merge 2 commits into
base: 3.2.0
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions src/data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def __init__(self, model, batch, mongo_dao, bucket, root_path, data_common, subm
self.main_nodes = self.model.get_main_nodes()
self.errors = None
self.submission = submission
self.ORCID = submission.get(ORCID)

"""
param: file_path_list downloaded from s3 bucket
Expand Down Expand Up @@ -60,6 +61,7 @@ def load_data(self, file_path_list):
for index, row in df.iterrows():
type = row[TYPE]
node_id = self.get_node_id(type, row)
crdc_id = None
exist_node = self.mongo_dao.get_dataRecord_by_node(node_id, type, self.batch[SUBMISSION_ID])
# 2. construct dataRecord
rawData = df.loc[index].to_dict()
Expand All @@ -71,17 +73,14 @@ def load_data(self, file_path_list):
id = self.get_record_id(exist_node)
# onlu generating CRDC ID for valid nodes
valid_crdc_id_nodes = type in main_node_types
crdc_id = self.get_crdc_id(exist_node, type, node_id, self.submission.get(STUDY_ID)) if valid_crdc_id_nodes else None
if valid_crdc_id_nodes:
crdc_id = self.get_crdc_id(exist_node, type, node_id, self.submission.get(STUDY_ID)) if type != PRINCIPAL_INVESTIGATOR else self.ORCID
# file nodes
if valid_crdc_id_nodes and type in file_types:
id_field = self.file_nodes.get(type, {}).get(ID_FIELD)
file_id_val = row.get(id_field)
if file_id_val:
crdc_id = file_id_val if file_id_val.startswith(DCF_PREFIX) else DCF_PREFIX + file_id_val
# principal investigator node
if type == PRINCIPAL_INVESTIGATOR and PRINCIPAL_INVESTIGATOR in main_node_types:
submission = self.mongo_dao.get_submission(self.batch[SUBMISSION_ID])
crdc_id = submission.get(ORCID) if submission and submission.get(ORCID) else None

if index == 0 or not self.process_m2m_rel(records, node_id, rawData, relation_fields):
dataRecord = {
Expand Down Expand Up @@ -110,7 +109,7 @@ def load_data(self, file_path_list):
STUDY_ID: self.submission.get(STUDY_ID)
}
if crdc_id:
dataRecord["CRDC_ID"] = crdc_id
dataRecord[CRDC_ID] = crdc_id
if type in file_types:
dataRecord[S3_FILE_INFO] = self.get_file_info(type, prop_names, row)
records.append(dataRecord)
Expand Down