Skip to content

Commit

Permalink
Merge branch 'master' into signor_development_1
Browse files Browse the repository at this point in the history
  • Loading branch information
EvanDietzMorris authored Dec 23, 2024
2 parents 27ab8c9 + 502581d commit 32fee4c
Show file tree
Hide file tree
Showing 41 changed files with 1,074 additions and 355 deletions.
46 changes: 46 additions & 0 deletions .github/scripts/Bio_QC_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import os
import requests

PREDICATE_KEYWORDS = ["predicate", "biolink:", "edges"]
LABEL_NAME = "Biological Context QC" # Label to add if keywords are found

# GitHub API variables
GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
REPO_NAME = os.getenv("GITHUB_REPOSITORY")
ISSUE_NUMBER = os.getenv("ISSUE_NUMBER")
print("GITHUB_TOKEN:", GITHUB_TOKEN)
print("REPO_NAME:", REPO_NAME)
print("ISSUE_NUMBER:", ISSUE_NUMBER)

headers = {"Authorization": f"Bearer {GITHUB_TOKEN}"}
api_url = f"https://api.github.com/repos/{REPO_NAME}"

def get_issue_details(issue_number):
response = requests.get(f"{api_url}/issues/{issue_number}", headers=headers)
response.raise_for_status()
return response.json()

def add_label(issue_number, label_name):
response = requests.post(
f"{api_url}/issues/{issue_number}/labels",
headers=headers,
json={"labels": [label_name]}
)
response.raise_for_status()
print(f"Label '{label_name}' added to issue/PR #{issue_number}")

def check_keywords_in_text(text, keywords):
return any(keyword in text for keyword in keywords)

def main():
issue_details = get_issue_details(ISSUE_NUMBER)
title = issue_details["title"]
body = issue_details["body"]

if check_keywords_in_text(title, PREDICATE_KEYWORDS) or check_keywords_in_text(body, PREDICATE_KEYWORDS):
add_label(ISSUE_NUMBER, LABEL_NAME)
else:
print("No predicate keywords found.")

if __name__ == "__main__":
main()
32 changes: 32 additions & 0 deletions .github/workflows/label-predicate-changes.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
name: 'Label Predicate Changes'

on:
pull_request:
types: [opened, edited, synchronize]
issues:
types: [opened, edited]

jobs:
label_check:
runs-on: ubuntu-latest

steps:
- name: Check out code
uses: actions/checkout@v3

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: 3.9

- name: Install dependencies
run: |
pip install -r requirements.txt
pip install PyGithub
- name: Run predicate check
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
ISSUE_NUMBER: ${{ github.event.pull_request.number || github.event.issue.number }}
run: |
python .github/scripts/Bio_QC_check.py
20 changes: 10 additions & 10 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,27 +13,27 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Check out the repo
uses: actions/checkout@v2
uses: actions/checkout@v4
- name: Get the version
id: get_version
run: echo ::set-output name=VERSION::${GITHUB_REF/refs\/tags\//}
- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38
with:
images:
ghcr.io/${{ github.repository }}
- name: Login to ghcr
uses: docker/login-action@v1
uses: docker/login-action@f4ef78c080cd8ba55a85445d5b36e214a81df20a
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
with:
images:
ghcr.io/${{ github.repository }}
- name: Push to GitHub Packages
uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc
uses: docker/build-push-action@3b5e8027fcad23fda98b2e3ac259d8d67585f671
with:
context: .
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
build-args: VERSION=${{ steps.get_version.outputs.VERSION }}
build-args: VERSION=${{ steps.get_version.outputs.VERSION }}
6 changes: 6 additions & 0 deletions Common/biolink_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,13 @@
PREDICATE = 'predicate'
PRIMARY_KNOWLEDGE_SOURCE = 'primary_knowledge_source'
AGGREGATOR_KNOWLEDGE_SOURCES = 'aggregator_knowledge_source'
SUPPORTING_DATA_SOURCE = 'supporting_data_source'
P_VALUE = 'p_value'
ADJUSTED_P_VALUE = 'adjusted_p_value'
AGENT_TYPE = 'agent_type'
KNOWLEDGE_LEVEL = 'knowledge_level'
MAX_RESEARCH_PHASE = 'max_research_phase'
HAS_SUPPORTING_STUDY_RESULT = 'has_supporting_study_result'

# enums for knowledge level
KNOWLEDGE_ASSERTION = 'knowledge_assertion'
Expand Down Expand Up @@ -137,6 +140,7 @@
PREDICATE,
PRIMARY_KNOWLEDGE_SOURCE,
AGGREGATOR_KNOWLEDGE_SOURCES,
SUPPORTING_DATA_SOURCE,
PUBLICATIONS,
SYNONYMS,
DESCRIPTION,
Expand All @@ -147,6 +151,8 @@
FDA_APPROVAL_STATUS,
KNOWLEDGE_LEVEL,
MECHANISM_OF_ACTION,
MAX_RESEARCH_PHASE,
HAS_SUPPORTING_STUDY_RESULT,
# qualifiers
ANATOMICAL_CONTEXT_QUALIFIER,
CAUSAL_MECHANISM_QUALIFIER,
Expand Down
57 changes: 48 additions & 9 deletions Common/build_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,19 @@
from Common.load_manager import SourceDataManager
from Common.kgx_file_merger import KGXFileMerger
from Common.neo4j_tools import create_neo4j_dump
from Common.kgxmodel import GraphSpec, SubGraphSource, DataSource, NormalizationScheme
from Common.normalization import NORMALIZATION_CODE_VERSION
from Common.kgxmodel import GraphSpec, SubGraphSource, DataSource
from Common.normalization import NORMALIZATION_CODE_VERSION, NormalizationScheme
from Common.metadata import Metadata, GraphMetadata, SourceMetadata
from Common.supplementation import SequenceVariantSupplementation
from Common.biolink_constants import PRIMARY_KNOWLEDGE_SOURCE, AGGREGATOR_KNOWLEDGE_SOURCES, PREDICATE, PUBLICATIONS
from Common.meta_kg import MetaKnowledgeGraphBuilder, META_KG_FILENAME, TEST_DATA_FILENAME
from Common.redundant_kg import generate_redundant_kg
from Common.collapse_qualifiers import generate_collapsed_qualifiers_kg

NODES_FILENAME = 'nodes.jsonl'
EDGES_FILENAME = 'edges.jsonl'
REDUNDANT_EDGES_FILENAME = 'redundant_edges.jsonl'
COLLAPSED_QUALIFIERS_FILENAME = 'collapsed_qualifier_edges.jsonl'


class GraphBuilder:
Expand Down Expand Up @@ -115,6 +117,49 @@ def build_graph(self, graph_id: str):
output_formats = graph_spec.graph_output_format.lower().split('+') if graph_spec.graph_output_format else []
nodes_filepath = os.path.join(graph_output_dir, NODES_FILENAME)
edges_filepath = os.path.join(graph_output_dir, EDGES_FILENAME)

if 'redundant_jsonl' in output_formats:
self.logger.info(f'Generating redundant edge KG for {graph_id}...')
redundant_filepath = edges_filepath.replace(EDGES_FILENAME, REDUNDANT_EDGES_FILENAME)
generate_redundant_kg(edges_filepath, redundant_filepath)

if 'redundant_neo4j' in output_formats:
self.logger.info(f'Generating redundant edge KG for {graph_id}...')
redundant_filepath = edges_filepath.replace(EDGES_FILENAME, REDUNDANT_EDGES_FILENAME)
generate_redundant_kg(edges_filepath, redundant_filepath)
self.logger.info(f'Starting Neo4j dump pipeline for redundant {graph_id}...')
dump_success = create_neo4j_dump(nodes_filepath=nodes_filepath,
edges_filepath=redundant_filepath,
output_directory=graph_output_dir,
graph_id=graph_id,
graph_version=graph_version,
logger=self.logger)

if dump_success:
graph_output_url = self.get_graph_output_URL(graph_id, graph_version)
graph_metadata.set_dump_url(f'{graph_output_url}graph_{graph_version}_redundant.db.dump')

if 'collapsed_qualifiers_jsonl' in output_formats:
self.logger.info(f'Generating collapsed qualifier predicates KG for {graph_id}...')
collapsed_qualifiers_filepath = edges_filepath.replace(EDGES_FILENAME, COLLAPSED_QUALIFIERS_FILENAME)
generate_collapsed_qualifiers_kg(edges_filepath, collapsed_qualifiers_filepath)

if 'collapsed_qualifiers_neo4j' in output_formats:
self.logger.info(f'Generating collapsed qualifier predicates KG for {graph_id}...')
collapsed_qualifiers_filepath = edges_filepath.replace(EDGES_FILENAME, COLLAPSED_QUALIFIERS_FILENAME)
generate_collapsed_qualifiers_kg(edges_filepath, collapsed_qualifiers_filepath)
self.logger.info(f'Starting Neo4j dump pipeline for {graph_id} with collapsed qualifiers...')
dump_success = create_neo4j_dump(nodes_filepath=nodes_filepath,
edges_filepath=collapsed_qualifiers_filepath,
output_directory=graph_output_dir,
graph_id=graph_id,
graph_version=graph_version,
logger=self.logger)

if dump_success:
graph_output_url = self.get_graph_output_URL(graph_id, graph_version)
graph_metadata.set_dump_url(f'{graph_output_url}graph_{graph_version}_collapsed_qualifiers.db.dump')

if 'neo4j' in output_formats:
self.logger.info(f'Starting Neo4j dump pipeline for {graph_id}...')
dump_success = create_neo4j_dump(nodes_filepath=nodes_filepath,
Expand All @@ -128,19 +173,13 @@ def build_graph(self, graph_id: str):
graph_output_url = self.get_graph_output_URL(graph_id, graph_version)
graph_metadata.set_dump_url(f'{graph_output_url}graph_{graph_version}.db.dump')

if 'redundant_jsonl' in output_formats:
self.logger.info(f'Generating redundant edge KG for {graph_id}...')
redundant_filepath = edges_filepath.replace(EDGES_FILENAME, REDUNDANT_EDGES_FILENAME)
generate_redundant_kg(edges_filepath, redundant_filepath)

def build_dependencies(self, graph_spec: GraphSpec):
for subgraph_source in graph_spec.subgraphs:
subgraph_id = subgraph_source.id
subgraph_version = subgraph_source.version
if self.check_for_existing_graph_dir(subgraph_id, subgraph_version):
# load previous metadata
graph_metadata = self.get_graph_metadata(subgraph_id, subgraph_version)
subgraph_source.graph_metadata = graph_metadata.metadata
subgraph_source.graph_metadata = self.get_graph_metadata(subgraph_id, subgraph_version)
elif self.current_graph_versions[subgraph_id] == subgraph_version:
self.logger.warning(f'For graph {graph_spec.graph_id} subgraph dependency '
f'{subgraph_id} version {subgraph_version} is not ready. Building now...')
Expand Down
Loading

0 comments on commit 32fee4c

Please sign in to comment.