Skip to content

Commit

Permalink
Add variants application
Browse files Browse the repository at this point in the history
  • Loading branch information
gregorjerse committed May 8, 2024
1 parent 5e9068b commit 86594bb
Show file tree
Hide file tree
Showing 10 changed files with 1,655 additions and 0 deletions.
7 changes: 7 additions & 0 deletions resolwe_bio/variants/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
""".. Ignore pydocstyle D400.
===================================
Resolwe Bioinformatics Variants App
===================================
"""
16 changes: 16 additions & 0 deletions resolwe_bio/variants/apps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
""".. Ignore pydocstyle D400.
===============================
Variants Base App Configuration
===============================
"""
from django.apps import AppConfig


class VariantsConfig(AppConfig):
"""App configuration."""

name = "resolwe_bio.variants"
label = "resolwe_bio_variants"
verbose_name = "Resolwe Bioinformatics Variants Base"
75 changes: 75 additions & 0 deletions resolwe_bio/variants/listener_plugin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
"""Handle variants related commands."""

import logging
from typing import TYPE_CHECKING

from resolwe.flow.executors.socket_utils import Message, Response
from resolwe.flow.managers.listener.plugin import (
ListenerPlugin,
listener_plugin_manager,
)

from .models import Variant, VariantCall, VariantExperiment

if TYPE_CHECKING:
from resolwe.flow.managers.listener.listener import Processor

logger = logging.getLogger(__name__)


class VariantCommands(ListenerPlugin):
"""Listener handlers related to the variants application."""

plugin_manager = listener_plugin_manager

def add_variants(
self, data_id: int, message: Message[dict], manager: "Processor"
) -> Response[int]:
"""Handle connecting variants with the samples.
If the reported variant does not exist in the file it is created.
"""
data = manager.data(data_id)
sample = data.entity
metadata, variants_data = message.message_data
species, genome_assembly = metadata["species"], metadata["genome_assembly"]

variant_calls = list()
variant_cache = dict()
experiment = VariantExperiment.objects.create(
variant_data_source=metadata["variant_data_source"],
contributor=data.contributor,
)

# Bulk create variants. The consequesce of ignore_conflicts flag is that the
# database does not returt the ids of the created objects. So first create all
# the variants and then create the variant calls.
for variant_data in variants_data:
key = {
"species": species,
"genome_assembly": genome_assembly,
"chromosome": variant_data["chromosome"],
"position": variant_data["position"],
"reference": variant_data["reference"],
"alternative": variant_data["alternative"],
}
# To reduce the hits to the database use cache for variants.
key_tuple = tuple(key.values())
if key_tuple not in variant_cache:
variant_cache[key_tuple] = Variant.objects.get_or_create(**key)[0]
variant = variant_cache[key_tuple]

variant_calls.append(
VariantCall(
variant=variant,
data=data,
sample=sample,
quality=variant_data["quality"],
depth=variant_data["depth"],
genotype=variant_data["genotype"],
filter=variant_data["filter"],
experiment=experiment,
)
)

VariantCall.objects.bulk_create(variant_calls)
192 changes: 192 additions & 0 deletions resolwe_bio/variants/migrations/0001_initial.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
# Generated by Django 4.2.11 on 2024-03-25 10:07

from django.conf import settings
import django.contrib.postgres.fields
from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

initial = True

dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
("flow", "0021_annotationvalue_modified"),
]

operations = [
migrations.CreateModel(
name="Variant",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("species", models.CharField(max_length=50)),
("genome_assembly", models.CharField(max_length=20)),
("chromosome", models.CharField(max_length=20)),
("position", models.PositiveBigIntegerField()),
("reference", models.CharField(max_length=100)),
("alternative", models.CharField(max_length=100)),
],
),
migrations.CreateModel(
name="VariantExperiment",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("variant_data_source", models.CharField(max_length=100)),
("date", models.DateTimeField(auto_now_add=True, db_index=True)),
(
"contributor",
models.ForeignKey(
on_delete=django.db.models.deletion.PROTECT,
to=settings.AUTH_USER_MODEL,
),
),
],
),
migrations.CreateModel(
name="VariantCall",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("quality", models.FloatField()),
("depth", models.PositiveIntegerField()),
("filter", models.CharField(max_length=20)),
("genotype", models.CharField(blank=True, max_length=100, null=True)),
(
"data",
models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.CASCADE,
related_name="variant_calls",
to="flow.data",
),
),
(
"experiment",
models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.CASCADE,
related_name="variant_calls",
to="resolwe_bio_variants.variantexperiment",
),
),
(
"sample",
models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.CASCADE,
related_name="variant_calls",
to="flow.entity",
),
),
(
"variant",
models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.CASCADE,
related_name="variant_calls",
to="resolwe_bio_variants.variant",
),
),
],
),
migrations.CreateModel(
name="VariantAnnotation",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("type", models.CharField(blank=True, max_length=100, null=True)),
("annotation", models.CharField(max_length=200)),
("annotation_impact", models.CharField(max_length=20)),
("gene", models.CharField(max_length=100)),
("protein_impact", models.CharField(max_length=100)),
(
"feature_id",
django.contrib.postgres.fields.ArrayField(
base_field=models.CharField(max_length=200),
default=list,
size=None,
),
),
(
"clinical_diagnosis",
models.CharField(blank=True, max_length=200, null=True),
),
(
"clinical_significance",
models.CharField(blank=True, max_length=100, null=True),
),
("dbsnp_id", models.CharField(blank=True, max_length=20, null=True)),
(
"clinical_var_id",
models.CharField(blank=True, max_length=20, null=True),
),
(
"data",
models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.CASCADE,
related_name="variant_annotations",
to="flow.data",
),
),
(
"variant",
models.OneToOneField(
on_delete=django.db.models.deletion.CASCADE,
related_name="annotation",
to="resolwe_bio_variants.variant",
),
),
],
),
migrations.AddConstraint(
model_name="variant",
constraint=models.UniqueConstraint(
fields=(
"species",
"genome_assembly",
"chromosome",
"position",
"reference",
"alternative",
),
name="uniq_composite_key_variants",
),
),
]
Empty file.
Loading

0 comments on commit 86594bb

Please sign in to comment.