diff --git a/devtools/conda-envs/beta_rc_env.yaml b/devtools/conda-envs/beta_rc_env.yaml
index 2e451d4be..803c7d351 100644
--- a/devtools/conda-envs/beta_rc_env.yaml
+++ b/devtools/conda-envs/beta_rc_env.yaml
@@ -37,7 +37,7 @@ dependencies:
- bson
- msgpack-python
- qcelemental
- - qcportal >=0.15, <0.50.0a0
+ - qcportal >=0.50
- qcengine
- nglview
- mdtraj
diff --git a/devtools/conda-envs/openeye-examples.yaml b/devtools/conda-envs/openeye-examples.yaml
index d2dd7984e..7d116bb7c 100644
--- a/devtools/conda-envs/openeye-examples.yaml
+++ b/devtools/conda-envs/openeye-examples.yaml
@@ -35,7 +35,7 @@ dependencies:
- bson
- msgpack-python
- qcelemental
- - qcportal >=0.15, <0.50.0a0
+ - qcportal >=0.50
- qcengine
- mdtraj
- parmed =3
diff --git a/devtools/conda-envs/openeye.yaml b/devtools/conda-envs/openeye.yaml
index cd2e229c0..1a33faa83 100644
--- a/devtools/conda-envs/openeye.yaml
+++ b/devtools/conda-envs/openeye.yaml
@@ -35,7 +35,7 @@ dependencies:
- bson
- msgpack-python
- qcelemental
- - qcportal >=0.15, <0.50.0a0
+ - qcportal >=0.50
- qcengine
- mdtraj
- nglview
diff --git a/devtools/conda-envs/rdkit-examples.yaml b/devtools/conda-envs/rdkit-examples.yaml
index b23d65c75..58cb892fd 100644
--- a/devtools/conda-envs/rdkit-examples.yaml
+++ b/devtools/conda-envs/rdkit-examples.yaml
@@ -37,7 +37,7 @@ dependencies:
- bson
- msgpack-python
- qcelemental
- - qcportal >=0.15, <0.50.0a0
+ - qcportal >=0.50
- qcengine
- nbval
- mdtraj
diff --git a/devtools/conda-envs/rdkit.yaml b/devtools/conda-envs/rdkit.yaml
index 670821474..c59184cb7 100644
--- a/devtools/conda-envs/rdkit.yaml
+++ b/devtools/conda-envs/rdkit.yaml
@@ -35,6 +35,6 @@ dependencies:
- bson
- msgpack-python
- qcelemental
- - qcportal >=0.15, <0.50.0a0
+ - qcportal >=0.50
- qcengine
- nglview
diff --git a/devtools/conda-envs/test_env.yaml b/devtools/conda-envs/test_env.yaml
index ba9ae17a3..9debbe0e5 100644
--- a/devtools/conda-envs/test_env.yaml
+++ b/devtools/conda-envs/test_env.yaml
@@ -36,7 +36,7 @@ dependencies:
- bson
- msgpack-python
- qcelemental
- - qcportal >=0.15, <0.50.0a0
+ - qcportal >=0.50
- qcengine
- nglview
- mdtraj
diff --git a/examples/QCArchive_interface/QCarchive_interface.ipynb b/examples/QCArchive_interface/QCarchive_interface.ipynb
index 9fdd4b830..aec8207cc 100644
--- a/examples/QCArchive_interface/QCarchive_interface.ipynb
+++ b/examples/QCArchive_interface/QCarchive_interface.ipynb
@@ -6,7 +6,7 @@
"source": [
"# Using QCArchive with the OpenFF Toolkit\n",
"\n",
- "Here we show how to create OpenFF molecules safely from data in the QCArchive using the cmiles entries, specifically we want to use the canonical_isomeric_explicit_hydrogen_mapped_smiles data which is metadata stored at the entry-level of a collection.\n",
+ "Here we show how to create OpenFF molecules safely from data in the QCArchive using the CMILES entries. This transformation relies on the `\"canonical_isomeric_explicit_hydrogen_mapped_smiles\"`.\n",
"\n",
"First load up the client you wish to connect to, in this case, we use the public instance."
]
@@ -25,103 +25,571 @@
"outputs": [
{
"data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " | \n",
- " tagline | \n",
- "
\n",
- " \n",
- " collection | \n",
- " name | \n",
- " | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " Dataset | \n",
- " A Benchmark Data Set for Hydrogen Combustion | \n",
- " A Benchmark Data Set for Hydrogen Combustion | \n",
- "
\n",
- " \n",
- " ANI-1 | \n",
- " 22 million off-equilibrium conformations and e... | \n",
- "
\n",
- " \n",
- " ANI-1ccx | \n",
- " Coupled cluster properties for molecules | \n",
- "
\n",
- " \n",
- " ANI-1x | \n",
- " Density functional theory properties for molec... | \n",
- "
\n",
- " \n",
- " COMP6 ANI-MD | \n",
- " Benchmark containing MD trajectories from the ... | \n",
- "
\n",
- " \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " TorsionDriveDataset | \n",
- " OpenFF-benchmark-ligand-fragments-v2.0 | \n",
- " Ligand fragments from the JACS benchmark systems. | \n",
- "
\n",
- " \n",
- " Pfizer Discrepancy Torsion Dataset 1 | \n",
- " None | \n",
- "
\n",
- " \n",
- " SMIRNOFF Coverage Torsion Set 1 | \n",
- " None | \n",
- "
\n",
- " \n",
- " SiliconTX Torsion Benchmark Set 1 | \n",
- " None | \n",
- "
\n",
- " \n",
- " TorsionDrive Paper | \n",
- " None | \n",
- "
\n",
- " \n",
- "
\n",
- "
207 rows × 1 columns
\n",
- "
"
- ],
"text/plain": [
- " tagline\n",
- "collection name \n",
- "Dataset A Benchmark Data Set for Hydrogen Combustion A Benchmark Data Set for Hydrogen Combustion\n",
- " ANI-1 22 million off-equilibrium conformations and e...\n",
- " ANI-1ccx Coupled cluster properties for molecules\n",
- " ANI-1x Density functional theory properties for molec...\n",
- " COMP6 ANI-MD Benchmark containing MD trajectories from the ...\n",
- "... ...\n",
- "TorsionDriveDataset OpenFF-benchmark-ligand-fragments-v2.0 Ligand fragments from the JACS benchmark systems.\n",
- " Pfizer Discrepancy Torsion Dataset 1 None\n",
- " SMIRNOFF Coverage Torsion Set 1 None\n",
- " SiliconTX Torsion Benchmark Set 1 None\n",
- " TorsionDrive Paper None\n",
- "\n",
- "[207 rows x 1 columns]"
+ "[{'id': 35,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Fragmenter Phenyl Benchmark',\n",
+ " 'record_count': 454},\n",
+ " {'id': 36,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Group1 Torsions',\n",
+ " 'record_count': 820},\n",
+ " {'id': 41,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'OpenFF Optimization Set 1',\n",
+ " 'record_count': 937},\n",
+ " {'id': 42,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'Fragment Stability Benchmark',\n",
+ " 'record_count': 86},\n",
+ " {'id': 43,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'SMIRNOFF Coverage Set 1',\n",
+ " 'record_count': 1132},\n",
+ " {'id': 45,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'OpenFF VEHICLe Set 1',\n",
+ " 'record_count': 25500},\n",
+ " {'id': 48,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'SMIRNOFF Coverage Torsion Set 1',\n",
+ " 'record_count': 585},\n",
+ " {'id': 49,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'OpenFF NCI250K Boron 1',\n",
+ " 'record_count': 189},\n",
+ " {'id': 50,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'OpenFF Discrepancy Benchmark 1',\n",
+ " 'record_count': 19714},\n",
+ " {'id': 57,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Substituted Phenyl Set 1',\n",
+ " 'record_count': 795},\n",
+ " {'id': 68,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'Pfizer Discrepancy Optimization Dataset 1',\n",
+ " 'record_count': 352},\n",
+ " {'id': 69,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'FDA Optimization Dataset 1',\n",
+ " 'record_count': 6670},\n",
+ " {'id': 70,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'Pfizer Discrepancy Torsion Dataset 1',\n",
+ " 'record_count': 227},\n",
+ " {'id': 71,\n",
+ " 'dataset_type': 'gridoptimization',\n",
+ " 'dataset_name': 'OpenFF Trivalent Nitrogen Set 1 (deprecated)',\n",
+ " 'record_count': 311},\n",
+ " {'id': 148,\n",
+ " 'dataset_type': 'reaction',\n",
+ " 'dataset_name': 'AlkIsod14',\n",
+ " 'record_count': 994},\n",
+ " {'id': 149,\n",
+ " 'dataset_type': 'reaction',\n",
+ " 'dataset_name': 'BHPERI26',\n",
+ " 'record_count': 1846},\n",
+ " {'id': 151,\n",
+ " 'dataset_type': 'singlepoint',\n",
+ " 'dataset_name': 'OpenFF Optimization Set 1',\n",
+ " 'record_count': 937},\n",
+ " {'id': 152,\n",
+ " 'dataset_type': 'singlepoint',\n",
+ " 'dataset_name': 'OpenFF VEHICLe Set 1',\n",
+ " 'record_count': 48280},\n",
+ " {'id': 153,\n",
+ " 'dataset_type': 'singlepoint',\n",
+ " 'dataset_name': 'OpenFF NCI250K Boron 1',\n",
+ " 'record_count': 189},\n",
+ " {'id': 156,\n",
+ " 'dataset_type': 'reaction',\n",
+ " 'dataset_name': 'CYCONF',\n",
+ " 'record_count': 710},\n",
+ " {'id': 157,\n",
+ " 'dataset_type': 'manybody',\n",
+ " 'dataset_name': 'DS14',\n",
+ " 'record_count': 1946},\n",
+ " {'id': 158,\n",
+ " 'dataset_type': 'manybody',\n",
+ " 'dataset_name': 'FmH2O10',\n",
+ " 'record_count': 1390},\n",
+ " {'id': 159,\n",
+ " 'dataset_type': 'singlepoint',\n",
+ " 'dataset_name': 'OpenFF Discrepancy Benchmark 1',\n",
+ " 'record_count': 18864},\n",
+ " {'id': 160,\n",
+ " 'dataset_type': 'manybody',\n",
+ " 'dataset_name': 'NC15',\n",
+ " 'record_count': 2919},\n",
+ " {'id': 161,\n",
+ " 'dataset_type': 'reaction',\n",
+ " 'dataset_name': 'Butanediol65',\n",
+ " 'record_count': 4544},\n",
+ " {'id': 162,\n",
+ " 'dataset_type': 'manybody',\n",
+ " 'dataset_name': 'HB15',\n",
+ " 'record_count': 2085},\n",
+ " {'id': 163,\n",
+ " 'dataset_type': 'reaction',\n",
+ " 'dataset_name': 'ACONF',\n",
+ " 'record_count': 1065},\n",
+ " {'id': 164,\n",
+ " 'dataset_type': 'reaction',\n",
+ " 'dataset_name': 'HNBrBDE18',\n",
+ " 'record_count': 1278},\n",
+ " {'id': 165,\n",
+ " 'dataset_type': 'reaction',\n",
+ " 'dataset_name': 'MPCONF196',\n",
+ " 'record_count': 12936},\n",
+ " {'id': 167,\n",
+ " 'dataset_type': 'reaction',\n",
+ " 'dataset_name': 'AlkIsomer11',\n",
+ " 'record_count': 781},\n",
+ " {'id': 168,\n",
+ " 'dataset_type': 'manybody',\n",
+ " 'dataset_name': 'A21',\n",
+ " 'record_count': 2919},\n",
+ " {'id': 169,\n",
+ " 'dataset_type': 'manybody',\n",
+ " 'dataset_name': 'A24',\n",
+ " 'record_count': 3336},\n",
+ " {'id': 170,\n",
+ " 'dataset_type': 'reaction',\n",
+ " 'dataset_name': 'BSR36',\n",
+ " 'record_count': 2556},\n",
+ " {'id': 171,\n",
+ " 'dataset_type': 'reaction',\n",
+ " 'dataset_name': 'BH76RC',\n",
+ " 'record_count': 2130},\n",
+ " {'id': 172,\n",
+ " 'dataset_type': 'manybody',\n",
+ " 'dataset_name': 'AlkBind12',\n",
+ " 'record_count': 1668},\n",
+ " {'id': 173,\n",
+ " 'dataset_type': 'reaction',\n",
+ " 'dataset_name': 'C20C24',\n",
+ " 'record_count': 426},\n",
+ " {'id': 174,\n",
+ " 'dataset_type': 'singlepoint',\n",
+ " 'dataset_name': 'SMIRNOFF Coverage Set 1',\n",
+ " 'record_count': 1109},\n",
+ " {'id': 194,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Primary TorsionDrive Benchmark 1',\n",
+ " 'record_count': 31},\n",
+ " {'id': 195,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Full TorsionDrive Benchmark 1',\n",
+ " 'record_count': 227},\n",
+ " {'id': 196,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'OpenFF Primary Optimization Benchmark 1',\n",
+ " 'record_count': 1885},\n",
+ " {'id': 197,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'OpenFF Full Optimization Benchmark 1',\n",
+ " 'record_count': 26736},\n",
+ " {'id': 213,\n",
+ " 'dataset_type': 'singlepoint',\n",
+ " 'dataset_name': 'Solvated Protein Fragments',\n",
+ " 'record_count': 0},\n",
+ " {'id': 217,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'TorsionDrive Paper',\n",
+ " 'record_count': 4},\n",
+ " {'id': 231,\n",
+ " 'dataset_type': 'gridoptimization',\n",
+ " 'dataset_name': 'OpenFF Trivalent Nitrogen Set 1',\n",
+ " 'record_count': 311},\n",
+ " {'id': 232,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'Kinase Inhibitors: WBO Distributions',\n",
+ " 'record_count': 6567},\n",
+ " {'id': 235,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Primary Benchmark 1 Torsion Set',\n",
+ " 'record_count': 259},\n",
+ " {'id': 237,\n",
+ " 'dataset_type': 'gridoptimization',\n",
+ " 'dataset_name': 'OpenFF Trivalent Nitrogen Set 2',\n",
+ " 'record_count': 311},\n",
+ " {'id': 239,\n",
+ " 'dataset_type': 'gridoptimization',\n",
+ " 'dataset_name': 'OpenFF Trivalent Nitrogen Set 3',\n",
+ " 'record_count': 126},\n",
+ " {'id': 241,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Primary Benchmark 2 Torsion Set',\n",
+ " 'record_count': 595},\n",
+ " {'id': 242,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Group1 Torsions 2',\n",
+ " 'record_count': 19},\n",
+ " {'id': 243,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Group1 Torsions 3',\n",
+ " 'record_count': 6},\n",
+ " {'id': 245,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Gen 2 Torsion Set 1 Roche',\n",
+ " 'record_count': 65},\n",
+ " {'id': 246,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Gen 2 Torsion Set 2 Coverage',\n",
+ " 'record_count': 93},\n",
+ " {'id': 247,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Gen 2 Torsion Set 3 Pfizer Discrepancy',\n",
+ " 'record_count': 38},\n",
+ " {'id': 248,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Gen 2 Torsion Set 4 eMolecules Discrepancy',\n",
+ " 'record_count': 106},\n",
+ " {'id': 249,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Gen 2 Torsion Set 5 Bayer',\n",
+ " 'record_count': 100},\n",
+ " {'id': 250,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Gen 2 Torsion Set 6 Supplemental',\n",
+ " 'record_count': 7},\n",
+ " {'id': 251,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'OpenFF Gen 2 Opt Set 1 Roche',\n",
+ " 'record_count': 298},\n",
+ " {'id': 253,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'OpenFF Gen 2 Opt Set 2 Coverage',\n",
+ " 'record_count': 373},\n",
+ " {'id': 254,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'OpenFF Gen 2 Opt Set 4 eMolecules Discrepancy',\n",
+ " 'record_count': 2201},\n",
+ " {'id': 255,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'OpenFF Gen 2 Opt Set 3 Pfizer Discrepancy',\n",
+ " 'record_count': 197},\n",
+ " {'id': 256,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Gen 2 Torsion Set 1 Roche 2',\n",
+ " 'record_count': 142},\n",
+ " {'id': 257,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Gen 2 Torsion Set 2 Coverage 2',\n",
+ " 'record_count': 157},\n",
+ " {'id': 258,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Gen 2 Torsion Set 3 Pfizer Discrepancy 2',\n",
+ " 'record_count': 82},\n",
+ " {'id': 259,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Gen 2 Torsion Set 4 eMolecules Discrepancy 2',\n",
+ " 'record_count': 272},\n",
+ " {'id': 260,\n",
+ " 'dataset_type': 'singlepoint',\n",
+ " 'dataset_name': 'OpenFF Gen 2 Opt Set 1 Roche',\n",
+ " 'record_count': 298},\n",
+ " {'id': 262,\n",
+ " 'dataset_type': 'singlepoint',\n",
+ " 'dataset_name': 'OpenFF Gen 2 Opt Set 2 Coverage',\n",
+ " 'record_count': 352},\n",
+ " {'id': 263,\n",
+ " 'dataset_type': 'singlepoint',\n",
+ " 'dataset_name': 'OpenFF Gen 2 Opt Set 3 Pfizer Discrepancy',\n",
+ " 'record_count': 197},\n",
+ " {'id': 264,\n",
+ " 'dataset_type': 'singlepoint',\n",
+ " 'dataset_name': 'OpenFF Gen 2 Opt Set 4 eMolecules Discrepancy',\n",
+ " 'record_count': 2181},\n",
+ " {'id': 265,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Gen 2 Torsion Set 5 Bayer 2',\n",
+ " 'record_count': 219},\n",
+ " {'id': 266,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Gen 2 Torsion Set 6 Supplemental 2',\n",
+ " 'record_count': 22},\n",
+ " {'id': 268,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'SiliconTX Torsion Benchmark Set 1',\n",
+ " 'record_count': 1459},\n",
+ " {'id': 270,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'OpenFF Gen 2 Opt Set 5 Bayer',\n",
+ " 'record_count': 1850},\n",
+ " {'id': 272,\n",
+ " 'dataset_type': 'singlepoint',\n",
+ " 'dataset_name': 'OpenFF Gen 2 Opt Set 5 Bayer',\n",
+ " 'record_count': 1772},\n",
+ " {'id': 275,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'Fragmenter paper',\n",
+ " 'record_count': 462},\n",
+ " {'id': 278,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Fragmenter Validation 1.0',\n",
+ " 'record_count': 440},\n",
+ " {'id': 279,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'OpenFF Ehrman Informative Optimization v0.1',\n",
+ " 'record_count': 3485},\n",
+ " {'id': 281,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'OpenFF Ehrman Informative Optimization v0.2',\n",
+ " 'record_count': 3485},\n",
+ " {'id': 282,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF DANCE 1 eMolecules t142 v1.0',\n",
+ " 'record_count': 20},\n",
+ " {'id': 283,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Rowley Biaryl v1.0',\n",
+ " 'record_count': 87},\n",
+ " {'id': 284,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'OpenFF Protein Fragments v1.0',\n",
+ " 'record_count': 576},\n",
+ " {'id': 285,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Theory Benchmarking Set B3LYP-D3BJ DZVP v1.0',\n",
+ " 'record_count': 36},\n",
+ " {'id': 286,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Theory Benchmarking Set B3LYP-D3BJ def2-TZVP v1.0',\n",
+ " 'record_count': 36},\n",
+ " {'id': 287,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Theory Benchmarking Set B3LYP-D3BJ def2-TZVPD v1.0',\n",
+ " 'record_count': 36},\n",
+ " {'id': 288,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Theory Benchmarking Set B3LYP-D3BJ def2-TZVPP v1.0',\n",
+ " 'record_count': 36},\n",
+ " {'id': 289,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF-benchmark-ligand-fragments-v1.0',\n",
+ " 'record_count': 3848},\n",
+ " {'id': 290,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'OpenFF Protein Fragments v2.0',\n",
+ " 'record_count': 6716},\n",
+ " {'id': 291,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Protein Fragments TorsionDrives v1.0',\n",
+ " 'record_count': 845},\n",
+ " {'id': 296,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'OpenFF Sandbox CHO PhAlkEthOH v1.0',\n",
+ " 'record_count': 85897},\n",
+ " {'id': 297,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'OpenFF Protein Peptide Fragments constrained v1.0',\n",
+ " 'record_count': 6716},\n",
+ " {'id': 298,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'OpenFF Protein Peptide Fragments unconstrained v1.0',\n",
+ " 'record_count': 6709},\n",
+ " {'id': 299,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Theory Benchmarking Set B3LYP-D3BJ 6-31+Gss',\n",
+ " 'record_count': 36},\n",
+ " {'id': 300,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Theory Benchmarking Set B3LYP-D3BJ def2-TZVPPD',\n",
+ " 'record_count': 36},\n",
+ " {'id': 301,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'OpenFF PEPCONF OptimizationDataset v1.0',\n",
+ " 'record_count': 22680},\n",
+ " {'id': 302,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Substituted Phenyl Set 1 v2.0',\n",
+ " 'record_count': 780},\n",
+ " {'id': 303,\n",
+ " 'dataset_type': 'singlepoint',\n",
+ " 'dataset_name': 'OpenFF BCC Refit Study COH v1.0',\n",
+ " 'record_count': 382},\n",
+ " {'id': 304,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'OpenFF Roche Opt Set With Protomers and Tautomers v1.0',\n",
+ " 'record_count': 1376},\n",
+ " {'id': 305,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'Genentech PDB Ligand Expo whole optimization neutral v1.0',\n",
+ " 'record_count': 445},\n",
+ " {'id': 307,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'OpenFF Roche Opt Set With Protomers and Tautomers v1.1',\n",
+ " 'record_count': 1376},\n",
+ " {'id': 308,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF WBO Conjugated Series v1.0',\n",
+ " 'record_count': 787},\n",
+ " {'id': 309,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Theory Benchmarking Set v1.0',\n",
+ " 'record_count': 767},\n",
+ " {'id': 310,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'OpenFF Theory Benchmarking Constrained Optimization Set B3LYP-NL def2-TZVPD v1.0',\n",
+ " 'record_count': 864},\n",
+ " {'id': 311,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'OpenFF Theory Benchmarking Constrained Optimization Set MP2 heavy-aug-cc-pVTZ v1.0',\n",
+ " 'record_count': 864},\n",
+ " {'id': 312,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'OpenFF Theory Benchmarking Constrained Optimization Set WB97X-V def2-TZVPD v1.0',\n",
+ " 'record_count': 864},\n",
+ " {'id': 313,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'Genentech PDB Ligand Expo fragment optimization neutral v1.0',\n",
+ " 'record_count': 2363},\n",
+ " {'id': 314,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Amide Torsion Set v1.0',\n",
+ " 'record_count': 260},\n",
+ " {'id': 315,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'OpenFF Aniline Para Opt v1.0',\n",
+ " 'record_count': 223},\n",
+ " {'id': 316,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'OpenFF Industry Benchmark Season 1 v1.0',\n",
+ " 'record_count': 69672},\n",
+ " {'id': 317,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Gen3 Torsion Set v1.0',\n",
+ " 'record_count': 888},\n",
+ " {'id': 318,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Aniline 2D Impropers v1.0',\n",
+ " 'record_count': 24},\n",
+ " {'id': 319,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'OpenFF Industry Benchmark Season 1 v1.1',\n",
+ " 'record_count': 539385},\n",
+ " {'id': 320,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'OpenFF BCC Refit Study COH v2.0',\n",
+ " 'record_count': 4650},\n",
+ " {'id': 321,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'OpenFF Theory Benchmarking Constrained Optimization Set MP2 heavy-aug-cc-pVTZ v1.1',\n",
+ " 'record_count': 1416},\n",
+ " {'id': 322,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'OpenFF Industry Benchmark Season 1 - MM v1.1',\n",
+ " 'record_count': 501585},\n",
+ " {'id': 323,\n",
+ " 'dataset_type': 'singlepoint',\n",
+ " 'dataset_name': 'OpenFF Theory Benchmarking Single Point Energies v1.0',\n",
+ " 'record_count': 66552},\n",
+ " {'id': 324,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF-benchmark-ligand-fragments-v2.0',\n",
+ " 'record_count': 8052},\n",
+ " {'id': 326,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'OpenFF RESP Polarizability Optimizations v1.0',\n",
+ " 'record_count': 735},\n",
+ " {'id': 327,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'OpenFF RESP Polarizability Optimizations v1.1',\n",
+ " 'record_count': 735},\n",
+ " {'id': 329,\n",
+ " 'dataset_type': 'singlepoint',\n",
+ " 'dataset_name': 'TorsionNet500 Single Points Dataset v1.0',\n",
+ " 'record_count': 24000},\n",
+ " {'id': 333,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Protein Dipeptide 2-D TorsionDrive v1.0',\n",
+ " 'record_count': 5},\n",
+ " {'id': 344,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Protein Dipeptide 2-D TorsionDrive v1.1',\n",
+ " 'record_count': 5},\n",
+ " {'id': 345,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'OpenFF Gen2 Optimization Dataset Protomers v1.0',\n",
+ " 'record_count': 600},\n",
+ " {'id': 346,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Protein Dipeptide 2-D TorsionDrive v2.0',\n",
+ " 'record_count': 26},\n",
+ " {'id': 347,\n",
+ " 'dataset_type': 'singlepoint',\n",
+ " 'dataset_name': 'OpenFF ESP Fragment Conformers v1.0',\n",
+ " 'record_count': 65116},\n",
+ " {'id': 351,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'OpenFF ESP Industry Benchmark Set v1.0',\n",
+ " 'record_count': 56054},\n",
+ " {'id': 352,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Protein Capped 1-mer Sidechains v1.0',\n",
+ " 'record_count': 46},\n",
+ " {'id': 353,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Protein Dipeptide 2-D TorsionDrive v2.1',\n",
+ " 'record_count': 26},\n",
+ " {'id': 357,\n",
+ " 'dataset_type': 'singlepoint',\n",
+ " 'dataset_name': 'OpenFF ESP Industry Benchmark Set v1.1',\n",
+ " 'record_count': 39983},\n",
+ " {'id': 358,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Protein Capped 1-mer Sidechains v1.1',\n",
+ " 'record_count': 46},\n",
+ " {'id': 359,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Protein Capped 1-mer Sidechains v1.2',\n",
+ " 'record_count': 46},\n",
+ " {'id': 360,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Protein Capped 1-mer Sidechains v1.3',\n",
+ " 'record_count': 61},\n",
+ " {'id': 363,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'OpenFF multiplicity correction optimization set v1.0',\n",
+ " 'record_count': 400},\n",
+ " {'id': 364,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF multiplicity correction torsion drive data v1.0',\n",
+ " 'record_count': 99},\n",
+ " {'id': 365,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'OpenFF Protein Capped 1-mers 3-mers Optimization Dataset v1.0',\n",
+ " 'record_count': 759},\n",
+ " {'id': 366,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Protein Capped 3-mer Backbones v1.0',\n",
+ " 'record_count': 54},\n",
+ " {'id': 370,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF multiplicity correction torsion drive data v1.1',\n",
+ " 'record_count': 131},\n",
+ " {'id': 371,\n",
+ " 'dataset_type': 'singlepoint',\n",
+ " 'dataset_name': 'RNA Single Point Dataset v1.0',\n",
+ " 'record_count': 13467},\n",
+ " {'id': 372,\n",
+ " 'dataset_type': 'optimization',\n",
+ " 'dataset_name': 'OpenFF Iodine Chemistry Optimization Dataset v1.0',\n",
+ " 'record_count': 327},\n",
+ " {'id': 373,\n",
+ " 'dataset_type': 'singlepoint',\n",
+ " 'dataset_name': 'RNA Trinucleotide Single Point Dataset v1.0',\n",
+ " 'record_count': 81670},\n",
+ " {'id': 374,\n",
+ " 'dataset_type': 'torsiondrive',\n",
+ " 'dataset_name': 'OpenFF Protein Capped 3-mer Omega v1.0',\n",
+ " 'record_count': 26},\n",
+ " {'id': 375,\n",
+ " 'dataset_type': 'singlepoint',\n",
+ " 'dataset_name': 'RNA Nucleoside Single Point Dataset v1.0',\n",
+ " 'record_count': 19110}]"
]
},
"execution_count": 1,
@@ -130,20 +598,21 @@
}
],
"source": [
- "import qcportal as ptl\n",
+ "import qcelemental\n",
+ "import qcportal\n",
"\n",
"from openff.toolkit import Molecule\n",
"\n",
- "client = ptl.FractalClient()\n",
- "# list the collections available\n",
- "client.list_collections()"
+ "client = qcportal.PortalClient(\"https://api.qcarchive.molssi.org:443\")\n",
+ "\n",
+ "client.list_datasets()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "Now let us grab a molecule from an optimization dataset"
+ "Data in the QCArchive is organized into [datasets](https://molssi.github.io/QCFractal/user_guide/datasets.html#using-datasets), which are semi-Now let us grab a molecule from an optimization dataset"
]
},
{
@@ -159,8 +628,9 @@
},
"outputs": [],
"source": [
- "ds = client.get_collection(\n",
- " \"OptimizationDataset\", \"Kinase Inhibitors: WBO Distributions\"\n",
+ "dataset = client.get_dataset(\n",
+ " dataset_type=\"optimization\",\n",
+ " dataset_name=\"Kinase Inhibitors: WBO Distributions\",\n",
")"
]
},
@@ -168,7 +638,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "Take the first entry from the collection. "
+ "Take an arbitrary entry from the collection."
]
},
{
@@ -184,7 +654,7 @@
},
"outputs": [],
"source": [
- "entry = ds.get_entry(ds.df.index[0])"
+ "entry = dataset.get_entry(entry_name=dataset.entry_names[-1])"
]
},
{
@@ -209,21 +679,184 @@
{
"data": {
"text/plain": [
- "{'name': 'Cc1ccc(cc1Nc2nccc(n2)c3cccnc3)NC(=O)c4ccc(cc4)CN5CCN(CC5)C-0',\n",
- " 'initial_molecule': '9589274',\n",
+ "{'name': 'c1cc(c(cc1f)[c@h]2ccc[n@@]2c3ccn4c(n3)c(cn4)nc(=o)[n@@]5cc[c@@h](c5)o)f-77',\n",
+ " 'initial_molecule': {'schema_name': 'qcschema_molecule',\n",
+ " 'schema_version': 2,\n",
+ " 'validated': True,\n",
+ " 'symbols': array(['C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C',\n",
+ " 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'N', 'N', 'N', 'N', 'N',\n",
+ " 'N', 'O', 'O', 'F', 'F', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H',\n",
+ " 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H',\n",
+ " 'H'], dtype='