diff --git a/devtools/conda-envs/beta_rc_env.yaml b/devtools/conda-envs/beta_rc_env.yaml index 2e451d4be..803c7d351 100644 --- a/devtools/conda-envs/beta_rc_env.yaml +++ b/devtools/conda-envs/beta_rc_env.yaml @@ -37,7 +37,7 @@ dependencies: - bson - msgpack-python - qcelemental - - qcportal >=0.15, <0.50.0a0 + - qcportal >=0.50 - qcengine - nglview - mdtraj diff --git a/devtools/conda-envs/openeye-examples.yaml b/devtools/conda-envs/openeye-examples.yaml index d2dd7984e..7d116bb7c 100644 --- a/devtools/conda-envs/openeye-examples.yaml +++ b/devtools/conda-envs/openeye-examples.yaml @@ -35,7 +35,7 @@ dependencies: - bson - msgpack-python - qcelemental - - qcportal >=0.15, <0.50.0a0 + - qcportal >=0.50 - qcengine - mdtraj - parmed =3 diff --git a/devtools/conda-envs/openeye.yaml b/devtools/conda-envs/openeye.yaml index cd2e229c0..1a33faa83 100644 --- a/devtools/conda-envs/openeye.yaml +++ b/devtools/conda-envs/openeye.yaml @@ -35,7 +35,7 @@ dependencies: - bson - msgpack-python - qcelemental - - qcportal >=0.15, <0.50.0a0 + - qcportal >=0.50 - qcengine - mdtraj - nglview diff --git a/devtools/conda-envs/rdkit-examples.yaml b/devtools/conda-envs/rdkit-examples.yaml index b23d65c75..58cb892fd 100644 --- a/devtools/conda-envs/rdkit-examples.yaml +++ b/devtools/conda-envs/rdkit-examples.yaml @@ -37,7 +37,7 @@ dependencies: - bson - msgpack-python - qcelemental - - qcportal >=0.15, <0.50.0a0 + - qcportal >=0.50 - qcengine - nbval - mdtraj diff --git a/devtools/conda-envs/rdkit.yaml b/devtools/conda-envs/rdkit.yaml index 670821474..c59184cb7 100644 --- a/devtools/conda-envs/rdkit.yaml +++ b/devtools/conda-envs/rdkit.yaml @@ -35,6 +35,6 @@ dependencies: - bson - msgpack-python - qcelemental - - qcportal >=0.15, <0.50.0a0 + - qcportal >=0.50 - qcengine - nglview diff --git a/devtools/conda-envs/test_env.yaml b/devtools/conda-envs/test_env.yaml index ba9ae17a3..9debbe0e5 100644 --- a/devtools/conda-envs/test_env.yaml +++ b/devtools/conda-envs/test_env.yaml @@ -36,7 +36,7 @@ dependencies: - bson - msgpack-python - qcelemental - - qcportal >=0.15, <0.50.0a0 + - qcportal >=0.50 - qcengine - nglview - mdtraj diff --git a/examples/QCArchive_interface/QCarchive_interface.ipynb b/examples/QCArchive_interface/QCarchive_interface.ipynb index 9fdd4b830..aec8207cc 100644 --- a/examples/QCArchive_interface/QCarchive_interface.ipynb +++ b/examples/QCArchive_interface/QCarchive_interface.ipynb @@ -6,7 +6,7 @@ "source": [ "# Using QCArchive with the OpenFF Toolkit\n", "\n", - "Here we show how to create OpenFF molecules safely from data in the QCArchive using the cmiles entries, specifically we want to use the canonical_isomeric_explicit_hydrogen_mapped_smiles data which is metadata stored at the entry-level of a collection.\n", + "Here we show how to create OpenFF molecules safely from data in the QCArchive using the CMILES entries. This transformation relies on the `\"canonical_isomeric_explicit_hydrogen_mapped_smiles\"`.\n", "\n", "First load up the client you wish to connect to, in this case, we use the public instance." ] @@ -25,103 +25,571 @@ "outputs": [ { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
tagline
collectionname
DatasetA Benchmark Data Set for Hydrogen CombustionA Benchmark Data Set for Hydrogen Combustion
ANI-122 million off-equilibrium conformations and e...
ANI-1ccxCoupled cluster properties for molecules
ANI-1xDensity functional theory properties for molec...
COMP6 ANI-MDBenchmark containing MD trajectories from the ...
.........
TorsionDriveDatasetOpenFF-benchmark-ligand-fragments-v2.0Ligand fragments from the JACS benchmark systems.
Pfizer Discrepancy Torsion Dataset 1None
SMIRNOFF Coverage Torsion Set 1None
SiliconTX Torsion Benchmark Set 1None
TorsionDrive PaperNone
\n", - "

207 rows × 1 columns

\n", - "
" - ], "text/plain": [ - " tagline\n", - "collection name \n", - "Dataset A Benchmark Data Set for Hydrogen Combustion A Benchmark Data Set for Hydrogen Combustion\n", - " ANI-1 22 million off-equilibrium conformations and e...\n", - " ANI-1ccx Coupled cluster properties for molecules\n", - " ANI-1x Density functional theory properties for molec...\n", - " COMP6 ANI-MD Benchmark containing MD trajectories from the ...\n", - "... ...\n", - "TorsionDriveDataset OpenFF-benchmark-ligand-fragments-v2.0 Ligand fragments from the JACS benchmark systems.\n", - " Pfizer Discrepancy Torsion Dataset 1 None\n", - " SMIRNOFF Coverage Torsion Set 1 None\n", - " SiliconTX Torsion Benchmark Set 1 None\n", - " TorsionDrive Paper None\n", - "\n", - "[207 rows x 1 columns]" + "[{'id': 35,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Fragmenter Phenyl Benchmark',\n", + " 'record_count': 454},\n", + " {'id': 36,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Group1 Torsions',\n", + " 'record_count': 820},\n", + " {'id': 41,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'OpenFF Optimization Set 1',\n", + " 'record_count': 937},\n", + " {'id': 42,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'Fragment Stability Benchmark',\n", + " 'record_count': 86},\n", + " {'id': 43,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'SMIRNOFF Coverage Set 1',\n", + " 'record_count': 1132},\n", + " {'id': 45,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'OpenFF VEHICLe Set 1',\n", + " 'record_count': 25500},\n", + " {'id': 48,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'SMIRNOFF Coverage Torsion Set 1',\n", + " 'record_count': 585},\n", + " {'id': 49,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'OpenFF NCI250K Boron 1',\n", + " 'record_count': 189},\n", + " {'id': 50,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'OpenFF Discrepancy Benchmark 1',\n", + " 'record_count': 19714},\n", + " {'id': 57,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Substituted Phenyl Set 1',\n", + " 'record_count': 795},\n", + " {'id': 68,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'Pfizer Discrepancy Optimization Dataset 1',\n", + " 'record_count': 352},\n", + " {'id': 69,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'FDA Optimization Dataset 1',\n", + " 'record_count': 6670},\n", + " {'id': 70,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'Pfizer Discrepancy Torsion Dataset 1',\n", + " 'record_count': 227},\n", + " {'id': 71,\n", + " 'dataset_type': 'gridoptimization',\n", + " 'dataset_name': 'OpenFF Trivalent Nitrogen Set 1 (deprecated)',\n", + " 'record_count': 311},\n", + " {'id': 148,\n", + " 'dataset_type': 'reaction',\n", + " 'dataset_name': 'AlkIsod14',\n", + " 'record_count': 994},\n", + " {'id': 149,\n", + " 'dataset_type': 'reaction',\n", + " 'dataset_name': 'BHPERI26',\n", + " 'record_count': 1846},\n", + " {'id': 151,\n", + " 'dataset_type': 'singlepoint',\n", + " 'dataset_name': 'OpenFF Optimization Set 1',\n", + " 'record_count': 937},\n", + " {'id': 152,\n", + " 'dataset_type': 'singlepoint',\n", + " 'dataset_name': 'OpenFF VEHICLe Set 1',\n", + " 'record_count': 48280},\n", + " {'id': 153,\n", + " 'dataset_type': 'singlepoint',\n", + " 'dataset_name': 'OpenFF NCI250K Boron 1',\n", + " 'record_count': 189},\n", + " {'id': 156,\n", + " 'dataset_type': 'reaction',\n", + " 'dataset_name': 'CYCONF',\n", + " 'record_count': 710},\n", + " {'id': 157,\n", + " 'dataset_type': 'manybody',\n", + " 'dataset_name': 'DS14',\n", + " 'record_count': 1946},\n", + " {'id': 158,\n", + " 'dataset_type': 'manybody',\n", + " 'dataset_name': 'FmH2O10',\n", + " 'record_count': 1390},\n", + " {'id': 159,\n", + " 'dataset_type': 'singlepoint',\n", + " 'dataset_name': 'OpenFF Discrepancy Benchmark 1',\n", + " 'record_count': 18864},\n", + " {'id': 160,\n", + " 'dataset_type': 'manybody',\n", + " 'dataset_name': 'NC15',\n", + " 'record_count': 2919},\n", + " {'id': 161,\n", + " 'dataset_type': 'reaction',\n", + " 'dataset_name': 'Butanediol65',\n", + " 'record_count': 4544},\n", + " {'id': 162,\n", + " 'dataset_type': 'manybody',\n", + " 'dataset_name': 'HB15',\n", + " 'record_count': 2085},\n", + " {'id': 163,\n", + " 'dataset_type': 'reaction',\n", + " 'dataset_name': 'ACONF',\n", + " 'record_count': 1065},\n", + " {'id': 164,\n", + " 'dataset_type': 'reaction',\n", + " 'dataset_name': 'HNBrBDE18',\n", + " 'record_count': 1278},\n", + " {'id': 165,\n", + " 'dataset_type': 'reaction',\n", + " 'dataset_name': 'MPCONF196',\n", + " 'record_count': 12936},\n", + " {'id': 167,\n", + " 'dataset_type': 'reaction',\n", + " 'dataset_name': 'AlkIsomer11',\n", + " 'record_count': 781},\n", + " {'id': 168,\n", + " 'dataset_type': 'manybody',\n", + " 'dataset_name': 'A21',\n", + " 'record_count': 2919},\n", + " {'id': 169,\n", + " 'dataset_type': 'manybody',\n", + " 'dataset_name': 'A24',\n", + " 'record_count': 3336},\n", + " {'id': 170,\n", + " 'dataset_type': 'reaction',\n", + " 'dataset_name': 'BSR36',\n", + " 'record_count': 2556},\n", + " {'id': 171,\n", + " 'dataset_type': 'reaction',\n", + " 'dataset_name': 'BH76RC',\n", + " 'record_count': 2130},\n", + " {'id': 172,\n", + " 'dataset_type': 'manybody',\n", + " 'dataset_name': 'AlkBind12',\n", + " 'record_count': 1668},\n", + " {'id': 173,\n", + " 'dataset_type': 'reaction',\n", + " 'dataset_name': 'C20C24',\n", + " 'record_count': 426},\n", + " {'id': 174,\n", + " 'dataset_type': 'singlepoint',\n", + " 'dataset_name': 'SMIRNOFF Coverage Set 1',\n", + " 'record_count': 1109},\n", + " {'id': 194,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Primary TorsionDrive Benchmark 1',\n", + " 'record_count': 31},\n", + " {'id': 195,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Full TorsionDrive Benchmark 1',\n", + " 'record_count': 227},\n", + " {'id': 196,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'OpenFF Primary Optimization Benchmark 1',\n", + " 'record_count': 1885},\n", + " {'id': 197,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'OpenFF Full Optimization Benchmark 1',\n", + " 'record_count': 26736},\n", + " {'id': 213,\n", + " 'dataset_type': 'singlepoint',\n", + " 'dataset_name': 'Solvated Protein Fragments',\n", + " 'record_count': 0},\n", + " {'id': 217,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'TorsionDrive Paper',\n", + " 'record_count': 4},\n", + " {'id': 231,\n", + " 'dataset_type': 'gridoptimization',\n", + " 'dataset_name': 'OpenFF Trivalent Nitrogen Set 1',\n", + " 'record_count': 311},\n", + " {'id': 232,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'Kinase Inhibitors: WBO Distributions',\n", + " 'record_count': 6567},\n", + " {'id': 235,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Primary Benchmark 1 Torsion Set',\n", + " 'record_count': 259},\n", + " {'id': 237,\n", + " 'dataset_type': 'gridoptimization',\n", + " 'dataset_name': 'OpenFF Trivalent Nitrogen Set 2',\n", + " 'record_count': 311},\n", + " {'id': 239,\n", + " 'dataset_type': 'gridoptimization',\n", + " 'dataset_name': 'OpenFF Trivalent Nitrogen Set 3',\n", + " 'record_count': 126},\n", + " {'id': 241,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Primary Benchmark 2 Torsion Set',\n", + " 'record_count': 595},\n", + " {'id': 242,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Group1 Torsions 2',\n", + " 'record_count': 19},\n", + " {'id': 243,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Group1 Torsions 3',\n", + " 'record_count': 6},\n", + " {'id': 245,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Gen 2 Torsion Set 1 Roche',\n", + " 'record_count': 65},\n", + " {'id': 246,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Gen 2 Torsion Set 2 Coverage',\n", + " 'record_count': 93},\n", + " {'id': 247,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Gen 2 Torsion Set 3 Pfizer Discrepancy',\n", + " 'record_count': 38},\n", + " {'id': 248,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Gen 2 Torsion Set 4 eMolecules Discrepancy',\n", + " 'record_count': 106},\n", + " {'id': 249,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Gen 2 Torsion Set 5 Bayer',\n", + " 'record_count': 100},\n", + " {'id': 250,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Gen 2 Torsion Set 6 Supplemental',\n", + " 'record_count': 7},\n", + " {'id': 251,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'OpenFF Gen 2 Opt Set 1 Roche',\n", + " 'record_count': 298},\n", + " {'id': 253,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'OpenFF Gen 2 Opt Set 2 Coverage',\n", + " 'record_count': 373},\n", + " {'id': 254,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'OpenFF Gen 2 Opt Set 4 eMolecules Discrepancy',\n", + " 'record_count': 2201},\n", + " {'id': 255,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'OpenFF Gen 2 Opt Set 3 Pfizer Discrepancy',\n", + " 'record_count': 197},\n", + " {'id': 256,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Gen 2 Torsion Set 1 Roche 2',\n", + " 'record_count': 142},\n", + " {'id': 257,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Gen 2 Torsion Set 2 Coverage 2',\n", + " 'record_count': 157},\n", + " {'id': 258,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Gen 2 Torsion Set 3 Pfizer Discrepancy 2',\n", + " 'record_count': 82},\n", + " {'id': 259,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Gen 2 Torsion Set 4 eMolecules Discrepancy 2',\n", + " 'record_count': 272},\n", + " {'id': 260,\n", + " 'dataset_type': 'singlepoint',\n", + " 'dataset_name': 'OpenFF Gen 2 Opt Set 1 Roche',\n", + " 'record_count': 298},\n", + " {'id': 262,\n", + " 'dataset_type': 'singlepoint',\n", + " 'dataset_name': 'OpenFF Gen 2 Opt Set 2 Coverage',\n", + " 'record_count': 352},\n", + " {'id': 263,\n", + " 'dataset_type': 'singlepoint',\n", + " 'dataset_name': 'OpenFF Gen 2 Opt Set 3 Pfizer Discrepancy',\n", + " 'record_count': 197},\n", + " {'id': 264,\n", + " 'dataset_type': 'singlepoint',\n", + " 'dataset_name': 'OpenFF Gen 2 Opt Set 4 eMolecules Discrepancy',\n", + " 'record_count': 2181},\n", + " {'id': 265,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Gen 2 Torsion Set 5 Bayer 2',\n", + " 'record_count': 219},\n", + " {'id': 266,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Gen 2 Torsion Set 6 Supplemental 2',\n", + " 'record_count': 22},\n", + " {'id': 268,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'SiliconTX Torsion Benchmark Set 1',\n", + " 'record_count': 1459},\n", + " {'id': 270,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'OpenFF Gen 2 Opt Set 5 Bayer',\n", + " 'record_count': 1850},\n", + " {'id': 272,\n", + " 'dataset_type': 'singlepoint',\n", + " 'dataset_name': 'OpenFF Gen 2 Opt Set 5 Bayer',\n", + " 'record_count': 1772},\n", + " {'id': 275,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'Fragmenter paper',\n", + " 'record_count': 462},\n", + " {'id': 278,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Fragmenter Validation 1.0',\n", + " 'record_count': 440},\n", + " {'id': 279,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'OpenFF Ehrman Informative Optimization v0.1',\n", + " 'record_count': 3485},\n", + " {'id': 281,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'OpenFF Ehrman Informative Optimization v0.2',\n", + " 'record_count': 3485},\n", + " {'id': 282,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF DANCE 1 eMolecules t142 v1.0',\n", + " 'record_count': 20},\n", + " {'id': 283,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Rowley Biaryl v1.0',\n", + " 'record_count': 87},\n", + " {'id': 284,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'OpenFF Protein Fragments v1.0',\n", + " 'record_count': 576},\n", + " {'id': 285,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Theory Benchmarking Set B3LYP-D3BJ DZVP v1.0',\n", + " 'record_count': 36},\n", + " {'id': 286,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Theory Benchmarking Set B3LYP-D3BJ def2-TZVP v1.0',\n", + " 'record_count': 36},\n", + " {'id': 287,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Theory Benchmarking Set B3LYP-D3BJ def2-TZVPD v1.0',\n", + " 'record_count': 36},\n", + " {'id': 288,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Theory Benchmarking Set B3LYP-D3BJ def2-TZVPP v1.0',\n", + " 'record_count': 36},\n", + " {'id': 289,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF-benchmark-ligand-fragments-v1.0',\n", + " 'record_count': 3848},\n", + " {'id': 290,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'OpenFF Protein Fragments v2.0',\n", + " 'record_count': 6716},\n", + " {'id': 291,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Protein Fragments TorsionDrives v1.0',\n", + " 'record_count': 845},\n", + " {'id': 296,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'OpenFF Sandbox CHO PhAlkEthOH v1.0',\n", + " 'record_count': 85897},\n", + " {'id': 297,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'OpenFF Protein Peptide Fragments constrained v1.0',\n", + " 'record_count': 6716},\n", + " {'id': 298,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'OpenFF Protein Peptide Fragments unconstrained v1.0',\n", + " 'record_count': 6709},\n", + " {'id': 299,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Theory Benchmarking Set B3LYP-D3BJ 6-31+Gss',\n", + " 'record_count': 36},\n", + " {'id': 300,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Theory Benchmarking Set B3LYP-D3BJ def2-TZVPPD',\n", + " 'record_count': 36},\n", + " {'id': 301,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'OpenFF PEPCONF OptimizationDataset v1.0',\n", + " 'record_count': 22680},\n", + " {'id': 302,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Substituted Phenyl Set 1 v2.0',\n", + " 'record_count': 780},\n", + " {'id': 303,\n", + " 'dataset_type': 'singlepoint',\n", + " 'dataset_name': 'OpenFF BCC Refit Study COH v1.0',\n", + " 'record_count': 382},\n", + " {'id': 304,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'OpenFF Roche Opt Set With Protomers and Tautomers v1.0',\n", + " 'record_count': 1376},\n", + " {'id': 305,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'Genentech PDB Ligand Expo whole optimization neutral v1.0',\n", + " 'record_count': 445},\n", + " {'id': 307,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'OpenFF Roche Opt Set With Protomers and Tautomers v1.1',\n", + " 'record_count': 1376},\n", + " {'id': 308,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF WBO Conjugated Series v1.0',\n", + " 'record_count': 787},\n", + " {'id': 309,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Theory Benchmarking Set v1.0',\n", + " 'record_count': 767},\n", + " {'id': 310,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'OpenFF Theory Benchmarking Constrained Optimization Set B3LYP-NL def2-TZVPD v1.0',\n", + " 'record_count': 864},\n", + " {'id': 311,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'OpenFF Theory Benchmarking Constrained Optimization Set MP2 heavy-aug-cc-pVTZ v1.0',\n", + " 'record_count': 864},\n", + " {'id': 312,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'OpenFF Theory Benchmarking Constrained Optimization Set WB97X-V def2-TZVPD v1.0',\n", + " 'record_count': 864},\n", + " {'id': 313,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'Genentech PDB Ligand Expo fragment optimization neutral v1.0',\n", + " 'record_count': 2363},\n", + " {'id': 314,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Amide Torsion Set v1.0',\n", + " 'record_count': 260},\n", + " {'id': 315,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'OpenFF Aniline Para Opt v1.0',\n", + " 'record_count': 223},\n", + " {'id': 316,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'OpenFF Industry Benchmark Season 1 v1.0',\n", + " 'record_count': 69672},\n", + " {'id': 317,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Gen3 Torsion Set v1.0',\n", + " 'record_count': 888},\n", + " {'id': 318,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Aniline 2D Impropers v1.0',\n", + " 'record_count': 24},\n", + " {'id': 319,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'OpenFF Industry Benchmark Season 1 v1.1',\n", + " 'record_count': 539385},\n", + " {'id': 320,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'OpenFF BCC Refit Study COH v2.0',\n", + " 'record_count': 4650},\n", + " {'id': 321,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'OpenFF Theory Benchmarking Constrained Optimization Set MP2 heavy-aug-cc-pVTZ v1.1',\n", + " 'record_count': 1416},\n", + " {'id': 322,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'OpenFF Industry Benchmark Season 1 - MM v1.1',\n", + " 'record_count': 501585},\n", + " {'id': 323,\n", + " 'dataset_type': 'singlepoint',\n", + " 'dataset_name': 'OpenFF Theory Benchmarking Single Point Energies v1.0',\n", + " 'record_count': 66552},\n", + " {'id': 324,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF-benchmark-ligand-fragments-v2.0',\n", + " 'record_count': 8052},\n", + " {'id': 326,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'OpenFF RESP Polarizability Optimizations v1.0',\n", + " 'record_count': 735},\n", + " {'id': 327,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'OpenFF RESP Polarizability Optimizations v1.1',\n", + " 'record_count': 735},\n", + " {'id': 329,\n", + " 'dataset_type': 'singlepoint',\n", + " 'dataset_name': 'TorsionNet500 Single Points Dataset v1.0',\n", + " 'record_count': 24000},\n", + " {'id': 333,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Protein Dipeptide 2-D TorsionDrive v1.0',\n", + " 'record_count': 5},\n", + " {'id': 344,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Protein Dipeptide 2-D TorsionDrive v1.1',\n", + " 'record_count': 5},\n", + " {'id': 345,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'OpenFF Gen2 Optimization Dataset Protomers v1.0',\n", + " 'record_count': 600},\n", + " {'id': 346,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Protein Dipeptide 2-D TorsionDrive v2.0',\n", + " 'record_count': 26},\n", + " {'id': 347,\n", + " 'dataset_type': 'singlepoint',\n", + " 'dataset_name': 'OpenFF ESP Fragment Conformers v1.0',\n", + " 'record_count': 65116},\n", + " {'id': 351,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'OpenFF ESP Industry Benchmark Set v1.0',\n", + " 'record_count': 56054},\n", + " {'id': 352,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Protein Capped 1-mer Sidechains v1.0',\n", + " 'record_count': 46},\n", + " {'id': 353,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Protein Dipeptide 2-D TorsionDrive v2.1',\n", + " 'record_count': 26},\n", + " {'id': 357,\n", + " 'dataset_type': 'singlepoint',\n", + " 'dataset_name': 'OpenFF ESP Industry Benchmark Set v1.1',\n", + " 'record_count': 39983},\n", + " {'id': 358,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Protein Capped 1-mer Sidechains v1.1',\n", + " 'record_count': 46},\n", + " {'id': 359,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Protein Capped 1-mer Sidechains v1.2',\n", + " 'record_count': 46},\n", + " {'id': 360,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Protein Capped 1-mer Sidechains v1.3',\n", + " 'record_count': 61},\n", + " {'id': 363,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'OpenFF multiplicity correction optimization set v1.0',\n", + " 'record_count': 400},\n", + " {'id': 364,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF multiplicity correction torsion drive data v1.0',\n", + " 'record_count': 99},\n", + " {'id': 365,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'OpenFF Protein Capped 1-mers 3-mers Optimization Dataset v1.0',\n", + " 'record_count': 759},\n", + " {'id': 366,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Protein Capped 3-mer Backbones v1.0',\n", + " 'record_count': 54},\n", + " {'id': 370,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF multiplicity correction torsion drive data v1.1',\n", + " 'record_count': 131},\n", + " {'id': 371,\n", + " 'dataset_type': 'singlepoint',\n", + " 'dataset_name': 'RNA Single Point Dataset v1.0',\n", + " 'record_count': 13467},\n", + " {'id': 372,\n", + " 'dataset_type': 'optimization',\n", + " 'dataset_name': 'OpenFF Iodine Chemistry Optimization Dataset v1.0',\n", + " 'record_count': 327},\n", + " {'id': 373,\n", + " 'dataset_type': 'singlepoint',\n", + " 'dataset_name': 'RNA Trinucleotide Single Point Dataset v1.0',\n", + " 'record_count': 81670},\n", + " {'id': 374,\n", + " 'dataset_type': 'torsiondrive',\n", + " 'dataset_name': 'OpenFF Protein Capped 3-mer Omega v1.0',\n", + " 'record_count': 26},\n", + " {'id': 375,\n", + " 'dataset_type': 'singlepoint',\n", + " 'dataset_name': 'RNA Nucleoside Single Point Dataset v1.0',\n", + " 'record_count': 19110}]" ] }, "execution_count": 1, @@ -130,20 +598,21 @@ } ], "source": [ - "import qcportal as ptl\n", + "import qcelemental\n", + "import qcportal\n", "\n", "from openff.toolkit import Molecule\n", "\n", - "client = ptl.FractalClient()\n", - "# list the collections available\n", - "client.list_collections()" + "client = qcportal.PortalClient(\"https://api.qcarchive.molssi.org:443\")\n", + "\n", + "client.list_datasets()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Now let us grab a molecule from an optimization dataset" + "Data in the QCArchive is organized into [datasets](https://molssi.github.io/QCFractal/user_guide/datasets.html#using-datasets), which are semi-Now let us grab a molecule from an optimization dataset" ] }, { @@ -159,8 +628,9 @@ }, "outputs": [], "source": [ - "ds = client.get_collection(\n", - " \"OptimizationDataset\", \"Kinase Inhibitors: WBO Distributions\"\n", + "dataset = client.get_dataset(\n", + " dataset_type=\"optimization\",\n", + " dataset_name=\"Kinase Inhibitors: WBO Distributions\",\n", ")" ] }, @@ -168,7 +638,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Take the first entry from the collection. " + "Take an arbitrary entry from the collection." ] }, { @@ -184,7 +654,7 @@ }, "outputs": [], "source": [ - "entry = ds.get_entry(ds.df.index[0])" + "entry = dataset.get_entry(entry_name=dataset.entry_names[-1])" ] }, { @@ -209,21 +679,184 @@ { "data": { "text/plain": [ - "{'name': 'Cc1ccc(cc1Nc2nccc(n2)c3cccnc3)NC(=O)c4ccc(cc4)CN5CCN(CC5)C-0',\n", - " 'initial_molecule': '9589274',\n", + "{'name': 'c1cc(c(cc1f)[c@h]2ccc[n@@]2c3ccn4c(n3)c(cn4)nc(=o)[n@@]5cc[c@@h](c5)o)f-77',\n", + " 'initial_molecule': {'schema_name': 'qcschema_molecule',\n", + " 'schema_version': 2,\n", + " 'validated': True,\n", + " 'symbols': array(['C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C',\n", + " 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'N', 'N', 'N', 'N', 'N',\n", + " 'N', 'O', 'O', 'F', 'F', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H',\n", + " 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H',\n", + " 'H'], dtype='