Skip to content

Commit

Permalink
Merge pull request #94 from monarch-initiative/add_bootstrap
Browse files Browse the repository at this point in the history
Add Bootstrap function to app; fix some parsing bugs
  • Loading branch information
caufieldjh authored Oct 9, 2024
2 parents 1a9c248 + 79c643f commit 6795765
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 6 deletions.
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ sphinx-click = {version = ">=4.3.0"}
myst-parser = {version = ">=0.18.1"}

[tool.poetry.scripts]
curategpt = "curate_gpt.cli:main"
gocampr = "curate_gpt.adhoc.gocam_predictor:main"
curategpt = "curategpt.cli:main"
gocampr = "curategpt.adhoc.gocam_predictor:main"

[tool.poetry.extras]
docs = [
Expand Down
48 changes: 48 additions & 0 deletions src/curategpt/app/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from curategpt import BasicExtractor
from curategpt.agents import MappingAgent
from curategpt.agents.bootstrap_agent import BootstrapAgent, KnowledgeBaseSpecification
from curategpt.agents.chat_agent import ChatAgent, ChatResponse
from curategpt.agents.dase_agent import DatabaseAugmentedStructuredExtraction
from curategpt.agents.dragon_agent import DragonAgent
Expand All @@ -37,6 +38,7 @@
SEARCH = "Search"
CLUSTER_SEARCH = "Cluster Search"
MATCH = "Match"
BOOTSTRAP = "Bootstrap"
CURATE = "Curate"
ADD_TO_CART = "Add to Cart"
# EXTRACT = "Extract"
Expand Down Expand Up @@ -77,6 +79,7 @@
EXTRACT,
CITESEEK,
MATCH,
BOOTSTRAP,
CART,
ABOUT,
HELP,
Expand Down Expand Up @@ -664,6 +667,51 @@ def _flat(obj: dict, limit=40) -> dict:
st.subheader(f"Reference {ref}", anchor=f"ref-{ref}")
st.code(text, language="yaml")

elif option == BOOTSTRAP:
page_state = state.get_page_state(BOOTSTRAP)
st.subheader("Generate a schema and data for a new knowledge base.")

extractor = BasicExtractor()
extractor.model_name = model_name
bootstrap_agent = BootstrapAgent(extractor=extractor)

kb_name = st.text_input("KB Name", help="Name of the knowledge base, without spaces (e.g. 'ice_cream_kb')")
description = st.text_input(
"Description",
help="Description of the knowledge base (e.g. 'A knowledge base for ice cream')",
)
attributes = st.text_input(
"Attributes",
help="Attributes of the knowledge base (e.g. 'flavor, viscosity, color')",
)
main_class = st.text_input(
"Main Class",
help="Main class of the knowledge base, without spaces (e.g. 'IceCreamType')",
)
generate_data = st.checkbox(
"Generate data",
help="""
If checked, after generating the schema, generate example data.
""",
)
if st.button("Make Schema"):
st.write(f"Generating schema for *{kb_name}*")
config_dict = {
"kb_name": kb_name,
"description": description,
"attributes": attributes,
"main_class": main_class,
}
config = KnowledgeBaseSpecification(**config_dict)
ao = bootstrap_agent.bootstrap_schema(config)
schema_dict = ao.model_dump()
st.write(schema_dict)

if generate_data:
st.write(f"Generating data for *{kb_name}*")
data = bootstrap_agent.bootstrap_data(schema=schema_dict)
st.code(data, language="yaml")

elif option == CART:
page_state = state.get_page_state(CART)
st.subheader("Your items")
Expand Down
31 changes: 28 additions & 3 deletions src/curategpt/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -829,7 +829,32 @@ def extract_from_pubmed(

@main.group()
def bootstrap():
"Bootstrap schema or data."
"""Bootstrap schema or data.
Starting with a general description or a LinkML schema,
generate an initial version of a knowledge base.
The config should be a yaml file with the following fields:
kb_name: str
description: str
attributes: str
main_class: str
For example, this is a valid config:
kb_name: lumber_kb
description: A knowledge base for lumber
attributes: source_tree
main_class: Lumber_Type
Examples:
curategpt bootstrap schema -C config.yaml
(This will generate a LinkML schema, based on the provided config.)
curategpt bootstrap data -s schema.yaml
(This will generate data based on the provided schema.
The output of the previous command can be used as input for this command.)
"""


@bootstrap.command(name="schema")
Expand All @@ -841,7 +866,7 @@ def bootstrap():
help="path to yaml config",
)
def bootstrap_schema(config, model):
"""Bootstrap a knowledge base."""
"""Bootstrap a knowledge base with LinkML schema."""
extractor = BasicExtractor()
if model:
extractor.model_name = model
Expand All @@ -865,7 +890,7 @@ def bootstrap_schema(config, model):
help="path to yaml linkml schema",
)
def bootstrap_data(config, schema, model):
"""Bootstrap a knowledge base."""
"""Bootstrap a knowledge base with initial data."""
extractor = BasicExtractor()
if model:
extractor.model_name = model
Expand Down
5 changes: 4 additions & 1 deletion src/curategpt/conf/prompts/bootstrap-schema.j2
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ classes:

The goal is to have a tree like model with {{ main_class }} as the root of the tree,
and then lists of rich objects as the children of the root. Each rich object may need
its own class to describe it
its own class to describe it. Attributes of the main class should include {{ attributes }}.

## Examples

Expand Down Expand Up @@ -151,6 +151,9 @@ classes:

## General Tips

- Do not include any markdown formatting such as code fences, except as specified above
- Do not include any commentary preceding the YAML
- Do not include any "`" characters within the YAML
- don't include base types like string etc under `types`, they are imported already
- give {{ main_class }} a mixture of simple string metadata and lists of rich objects for associated data that might require provenance or other metadata.
- make sure the YAML is complete. Even though I provide placeholder "..." in the examples, the YAML you provide must parse
Expand Down

0 comments on commit 6795765

Please sign in to comment.