-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdimpleflow.py
82 lines (69 loc) · 2.04 KB
/
dimpleflow.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# -*- coding: utf-8 -*-
"""
Spyder Editor
This is a temporary script file.
"""
import os
import tempfile
import subprocess
import shutil
import csv
import asyncio
from prefect import task, flow
from prefect.task_runners import ConcurrentTaskRunner
from pathlib import Path
import pandas
import multiprocessing
"""
INSTRUCTIONS:
Make a processing directory, e.g. processing-dir
within processing directory add
-reference pdb model
-a subdirectory for dimple outputs, e.g. models
-filtered csv file from gather script
"""
PROCESSING_DATA_DIRECTORY=""
MODELS_DIRECTORY="models"
REFERENCE_PDB = "reference.pdb"
FILTERED_XRAY_CSV = ""
root_dir = Path(f"{PROCESSING_DATA_DIRECTORY}")
models_dir = root_dir / Path(f"{MODELS_DIRECTORY}")
reference_model = str(root_dir / Path(f"{REFERENCE_PDB}"))
jobs_csv = root_dir / Path(f"{FILTERED_XRAY_CSV}")
jobs_df = pandas.read_csv(jobs_csv)
jobs_list = []
for index, row in jobs_df.iterrows():
jobs_list.append(
{
"hklout": f"{row['xtal_id']}.dimple.mtz",
"xyzout": f"{row['xtal_id']}.dimple.pdb",
"xyzin": reference_model,
"hklin": row["filepath"],
"sample_dir": str(models_dir / Path(f"{row['xtal_id']}")),
"xtal_id": row["xtal_id"],
}
)
@task(name="run_dimple", tags=["dimple_job"])
def run_dimple(dimple_params: dict):
cmd = "dimple --hklout {hklout} --xyzout {xyzout} {xyzin} {hklin} {sample_dir}".format(
**dimple_params
)
dimple_process = subprocess.Popen(
cmd.split(),
cwd=models_dir,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
dimple_process.communicate()
@flow(name="dimple_flow", task_runner=ConcurrentTaskRunner)
def dimple_flow(jobs, **kwargs):
run_dimple.map(jobs)
if __name__ == "__main__":
n_cpus = multiprocessing.cpu_count()
if n_cpus < 30:
n_chunks = n_cpus
else:
n_chunks = 30
job_chunks = [jobs_list[i : i + n_chunks] for i in range(0, len(jobs_list), n_chunks)]
for chunk in job_chunks:
dimple_flow(chunk)