monarch-initiative · ptgolden · Dec 12, 2024 · Dec 12, 2024 · Dec 12, 2024 · Dec 12, 2024
diff --git a/examples/maps/custom-entrez-2-string.py b/examples/maps/custom-entrez-2-string.py
@@ -1,16 +1,7 @@
-from koza.cli_utils import get_koza_app
+from koza.runner import KozaTransform
 
-source_name = 'custom-map-protein-links-detailed'
-map_name = 'custom-entrez-2-string'
-
-koza_app = get_koza_app(source_name)
-
-row = koza_app.get_row(map_name)
-
-map = koza_app.get_map(map_name)
-
-entry = dict()
-
-entry["entrez"] = row["entrez"]
-
-map[row["STRING"]] = entry
+def transform_record(koza: KozaTransform, record: dict):
+    koza.write({
+        "STRING": record['STRING'],
+        "entrez": record["entrez"],
+    })
diff --git a/examples/maps/custom-entrez-2-string.yaml b/examples/maps/custom-entrez-2-string.yaml
@@ -3,23 +3,25 @@ name: 'custom-entrez-2-string'
 metadata:
   description: 'Mapping file provided by StringDB that contains entrez to protein ID mappings'
 
-delimiter: '\t'
-header_delimiter: '/'
+reader:
+  delimiter: '\t'
+  header_prefix: '#'
+  header_delimiter: '/'
 
-# Assumes that no identifiers are overlapping
-# otherwise these should go into separate configs
-files:
-  - './examples/data/entrez-2-string.tsv'
-  - './examples/data/additional-entrez-2-string.tsv'
+  # Assumes that no identifiers are overlapping
+  # otherwise these should go into separate configs
+  files:
+    - './examples/data/entrez-2-string.tsv'
+    - './examples/data/additional-entrez-2-string.tsv'
 
-header: 0
+  header_mode: 0
 
-columns:
+  columns:
   - 'NCBI taxid'
   - 'entrez'
   - 'STRING'
 
-key: 'STRING'
-
-values:
-  - 'entrez'
+transform:
+  key: 'STRING'
+  values:
+    - 'entrez'
diff --git a/examples/maps/entrez-2-string.yaml b/examples/maps/entrez-2-string.yaml
@@ -3,23 +3,25 @@ name: 'entrez-2-string'
 metadata:
   description: 'Mapping file provided by StringDB that contains entrez to protein ID mappings'
 
-delimiter: '\t'
-header_delimiter: '/'
-header: 0
-comment_char: '#'
+reader:
+  delimiter: '\t'
+  header_delimiter: '/'
+  header_mode: 0
+  header_prefix: '#'
+  comment_char: '#'
 
-# Assumes that no identifiers are overlapping
-# otherwise these should go into separate configs
-files:
-  - './examples/data/entrez-2-string.tsv'
-  - './examples/data/additional-entrez-2-string.tsv'
+  # Assumes that no identifiers are overlapping
+  # otherwise these should go into separate configs
+  files:
+    - './examples/data/entrez-2-string.tsv'
+    - './examples/data/additional-entrez-2-string.tsv'
 
-columns:
-  - 'NCBI taxid'
-  - 'entrez'
-  - 'STRING'
+  columns:
+    - 'NCBI taxid'
+    - 'entrez'
+    - 'STRING'
 
-key: 'STRING'
-
-values:
-  - 'entrez'
+transform:
+  key: 'STRING'
+  values:
+    - 'entrez'
diff --git a/examples/minimal.py b/examples/minimal.py
@@ -0,0 +1,4 @@
+from koza.runner import KozaTransform
+
+def transform(koza: KozaTransform):
+    pass
diff --git a/examples/string-declarative/declarative-protein-links-detailed.py b/examples/string-declarative/declarative-protein-links-detailed.py
@@ -1,24 +1,22 @@
 import re
+from typing import Any
 import uuid
 
 from biolink_model.datamodel.pydanticmodel_v2 import PairwiseGeneToGeneInteraction, Protein
 
-from koza.cli_utils import get_koza_app
+from koza.runner import KozaTransform
 
-koza_app = get_koza_app("declarative-protein-links-detailed")
+def transform_record(koza: KozaTransform, record: dict[str, Any]):
+    protein_a = Protein(id="ENSEMBL:" + re.sub(r"\d+\.", "", record["protein1"]))
+    protein_b = Protein(id="ENSEMBL:" + re.sub(r"\d+\.", "", record["protein2"]))
 
-row = koza_app.get_row()
+    pairwise_gene_to_gene_interaction = PairwiseGeneToGeneInteraction(
+        id="uuid:" + str(uuid.uuid1()),
+        subject=protein_a.id,
+        object=protein_b.id,
+        predicate="biolink:interacts_with",
+        knowledge_level="not_provided",
+        agent_type="not_provided",
+    )
 
-protein_a = Protein(id="ENSEMBL:" + re.sub(r"\d+\.", "", row["protein1"]))
-protein_b = Protein(id="ENSEMBL:" + re.sub(r"\d+\.", "", row["protein2"]))
-
-pairwise_gene_to_gene_interaction = PairwiseGeneToGeneInteraction(
-    id="uuid:" + str(uuid.uuid1()),
-    subject=protein_a.id,
-    object=protein_b.id,
-    predicate="biolink:interacts_with",
-    knowledge_level="not_provided",
-    agent_type="not_provided",
-)
-
-koza_app.write(protein_a, protein_b, pairwise_gene_to_gene_interaction)
+    koza.write(protein_a, protein_b, pairwise_gene_to_gene_interaction)
diff --git a/examples/string-declarative/declarative-protein-links-detailed.yaml b/examples/string-declarative/declarative-protein-links-detailed.yaml
@@ -1,49 +1,51 @@
 name: 'declarative-protein-links-detailed'
 
-delimiter: ' '
-
-files:
-  - './examples/data/string.tsv'
-  - './examples/data/string2.tsv'
-
 metadata:
   ingest_title: 'String DB'
   ingest_url: 'https://string-db.org'
   description: 'STRING: functional protein association networks'
   rights: 'https://string-db.org/cgi/access.pl?footer_active_subpage=licensing'
 
-global_table: './examples/translation_table.yaml'
-
-columns:
-  - 'protein1'
-  - 'protein2'
-  - 'neighborhood'
-  - 'fusion'
-  - 'cooccurence'
-  - 'coexpression'
-  - 'experimental'
-  - 'database'
-  - 'textmining'
-  - 'combined_score' : 'int'
-
-filters:
-  - inclusion: 'include'
-    column: 'combined_score'
-    filter_code: 'lt'
-    value: 700
-
-transform_mode: 'flat'
-
-node_properties:
-  - 'id'
-  - 'category'
-  - 'provided_by'
-
-edge_properties:
-  - 'id'
-  - 'subject'
-  - 'predicate'
-  - 'object'
-  - 'category'
-  - 'relation'
-  - 'provided_by'
+reader:
+  format: csv
+
+  delimiter: ' '
+
+  files:
+    - './examples/data/string.tsv'
+    - './examples/data/string2.tsv'
+
+  columns:
+    - 'protein1'
+    - 'protein2'
+    - 'neighborhood'
+    - 'fusion'
+    - 'cooccurence'
+    - 'coexpression'
+    - 'experimental'
+    - 'database'
+    - 'textmining'
+    - 'combined_score' : 'int'
+
+
+transform:
+  filters:
+    - inclusion: 'include'
+      column: 'combined_score'
+      filter_code: 'lt'
+      value: 700
+
+writer:
+  node_properties:
+    - 'id'
+    - 'category'
+    - 'provided_by'
+
+  edge_properties:
+    - 'id'
+    - 'subject'
+    - 'predicate'
+    - 'object'
+    - 'category'
+    - 'relation'
+    - 'provided_by'
diff --git a/examples/string-w-custom-map/custom-map-protein-links-detailed.py b/examples/string-w-custom-map/custom-map-protein-links-detailed.py
@@ -2,23 +2,23 @@
 
 from biolink_model.datamodel.pydanticmodel_v2 import Gene, PairwiseGeneToGeneInteraction
 
-from koza.cli_utils import get_koza_app
+from koza.runner import KozaTransform
 
-source_name = "custom-map-protein-links-detailed"
-koza_app = get_koza_app(source_name)
-row = koza_app.get_row()
-entrez_2_string = koza_app.get_map("custom-entrez-2-string")
+def transform_record(koza: KozaTransform, record: dict):
+    a = record["protein1"]
+    b = record["protein2"]
+    mapped_a = koza.lookup(a, "entrez")
+    mapped_b = koza.lookup(b, "entrez")
+    gene_a = Gene(id="NCBIGene:" + mapped_a)
+    gene_b = Gene(id="NCBIGene:" + mapped_b)
 
-gene_a = Gene(id="NCBIGene:" + entrez_2_string[row["protein1"]]["entrez"])
-gene_b = Gene(id="NCBIGene:" + entrez_2_string[row["protein2"]]["entrez"])
+    pairwise_gene_to_gene_interaction = PairwiseGeneToGeneInteraction(
+        id="uuid:" + str(uuid.uuid1()),
+        subject=gene_a.id,
+        object=gene_b.id,
+        predicate="biolink:interacts_with",
+        knowledge_level="not_provided",
+        agent_type="not_provided",
+    )
 
-pairwise_gene_to_gene_interaction = PairwiseGeneToGeneInteraction(
-    id="uuid:" + str(uuid.uuid1()),
-    subject=gene_a.id,
-    object=gene_b.id,
-    predicate="biolink:interacts_with",
-    knowledge_level="not_provided",
-    agent_type="not_provided",
-)
-
-koza_app.write(gene_a, gene_b, pairwise_gene_to_gene_interaction)
+    koza.write(gene_a, gene_b, pairwise_gene_to_gene_interaction)
diff --git a/examples/string-w-custom-map/custom-map-protein-links-detailed.yaml b/examples/string-w-custom-map/custom-map-protein-links-detailed.yaml
@@ -1,46 +1,47 @@
 name: 'custom-map-protein-links-detailed'
 
-delimiter: ' '
-
-files:
-  - './examples/data/string.tsv'
-  - './examples/data/string2.tsv'
-
 metadata: !include './examples/string-w-custom-map/metadata.yaml'
 
-columns:
-  - 'protein1'
-  - 'protein2'
-  - 'neighborhood'
-  - 'fusion'
-  - 'cooccurence'
-  - 'coexpression'
-  - 'experimental'
-  - 'database'
-  - 'textmining'
-  - 'combined_score' : 'int'
-
-filters:
-  - inclusion: 'include'
-    column: 'combined_score'
-    filter_code: 'lt'
-    value: 700
-
-depends_on:
-  - 'examples/maps/custom-entrez-2-string.yaml'
-
-transform_mode: 'flat'
-
-node_properties:
-  - 'id'
-  - 'category'
-  - 'provided_by'
-
-edge_properties:
-  - 'id'
-  - 'subject'
-  - 'predicate'
-  - 'object'
-  - 'category'
-  - 'relation'
-  - 'provided_by'
+reader:
+  delimiter: ' '
+
+  files:
+    - './examples/data/string.tsv'
+    - './examples/data/string2.tsv'
+
+  columns:
+    - 'protein1'
+    - 'protein2'
+    - 'neighborhood'
+    - 'fusion'
+    - 'cooccurence'
+    - 'coexpression'
+    - 'experimental'
+    - 'database'
+    - 'textmining'
+    - 'combined_score' : 'int'
+
+transform:
+  filters:
+    - inclusion: 'include'
+      column: 'combined_score'
+      filter_code: 'lt'
+      value: 700
+
+  mappings:
+    - 'examples/maps/custom-entrez-2-string.yaml'
+
+writer:
+  node_properties:
+    - 'id'
+    - 'category'
+    - 'provided_by'
+
+  edge_properties:
+    - 'id'
+    - 'subject'
+    - 'predicate'
+    - 'object'
+    - 'category'
+    - 'relation'
+    - 'provided_by'