From 48998ca103d32bc5ddf168fffbbb3cfb55a503df Mon Sep 17 00:00:00 2001 From: Jack Tierney Date: Sat, 21 Oct 2023 12:53:46 +0100 Subject: [PATCH] sorted genome.gtf and removed invalid entries --- data/genomics/homo_sapiens/genome/genome.gtf | 26 ++++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/data/genomics/homo_sapiens/genome/genome.gtf b/data/genomics/homo_sapiens/genome/genome.gtf index 857300a6b..d86d95647 100644 --- a/data/genomics/homo_sapiens/genome/genome.gtf +++ b/data/genomics/homo_sapiens/genome/genome.gtf @@ -5,36 +5,36 @@ chr22 havana exon 682 727 . + . gene_id "ENSG00000233995"; gene_version "1"; tra chr22 havana exon 1018 1186 . + . gene_id "ENSG00000233995"; gene_version "1"; transcript_id "ENST00000454360"; transcript_version "1"; exon_number "3"; gene_name "AP000547.1"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "AP000547.1-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00001597534"; exon_version "1"; tag "basic"; transcript_support_level "NA"; chr22 havana exon 1396 1501 . + . gene_id "ENSG00000233995"; gene_version "1"; transcript_id "ENST00000454360"; transcript_version "1"; exon_number "4"; gene_name "AP000547.1"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "AP000547.1-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00001739921"; exon_version "1"; tag "basic"; transcript_support_level "NA"; chr22 havana exon 2440 2611 . + . gene_id "ENSG00000233995"; gene_version "1"; transcript_id "ENST00000454360"; transcript_version "1"; exon_number "5"; gene_name "AP000547.1"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "AP000547.1-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00001596670"; exon_version "1"; tag "basic"; transcript_support_level "NA"; +chr22 havana gene 3337 10681 . - . gene_id "ENSG00000239435"; gene_version "2"; gene_name "KCNMB3P1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; +chr22 havana transcript 3337 10681 . - . gene_id "ENSG00000239435"; gene_version "2"; transcript_id "ENST00000493696"; transcript_version "2"; gene_name "KCNMB3P1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "KCNMB3P1-202"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; tag "basic"; transcript_support_level "NA"; chr22 havana exon 3337 6047 . - . gene_id "ENSG00000239435"; gene_version "2"; transcript_id "ENST00000493696"; transcript_version "2"; exon_number "3"; gene_name "KCNMB3P1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "KCNMB3P1-202"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00003755408"; exon_version "1"; tag "basic"; transcript_support_level "NA"; -chr22 havana exon 10481 10681 . - . gene_id "ENSG00000239435"; gene_version "2"; transcript_id "ENST00000493696"; transcript_version "2"; exon_number "1"; gene_name "KCNMB3P1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "KCNMB3P1-202"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001930505"; exon_version "2"; tag "basic"; transcript_support_level "NA"; chr22 havana exon 8785 9261 . - . gene_id "ENSG00000239435"; gene_version "2"; transcript_id "ENST00000493696"; transcript_version "2"; exon_number "2"; gene_name "KCNMB3P1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "KCNMB3P1-202"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001839032"; exon_version "2"; tag "basic"; transcript_support_level "NA"; chr22 havana transcript 9051 10576 . - . gene_id "ENSG00000239435"; gene_version "2"; transcript_id "ENST00000472972"; transcript_version "1"; gene_name "KCNMB3P1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "KCNMB3P1-201"; transcript_source "havana"; transcript_biotype "processed_transcript"; transcript_support_level "2"; -chr22 havana exon 10481 10576 . - . gene_id "ENSG00000239435"; gene_version "2"; transcript_id "ENST00000472972"; transcript_version "1"; exon_number "1"; gene_name "KCNMB3P1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "KCNMB3P1-201"; transcript_source "havana"; transcript_biotype "processed_transcript"; exon_id "ENSE00001937172"; exon_version "1"; transcript_support_level "2"; chr22 havana exon 9051 9261 . - . gene_id "ENSG00000239435"; gene_version "2"; transcript_id "ENST00000472972"; transcript_version "1"; exon_number "2"; gene_name "KCNMB3P1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "KCNMB3P1-201"; transcript_source "havana"; transcript_biotype "processed_transcript"; exon_id "ENSE00001878881"; exon_version "1"; transcript_support_level "2"; +chr22 havana exon 10481 10576 . - . gene_id "ENSG00000239435"; gene_version "2"; transcript_id "ENST00000472972"; transcript_version "1"; exon_number "1"; gene_name "KCNMB3P1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "KCNMB3P1-201"; transcript_source "havana"; transcript_biotype "processed_transcript"; exon_id "ENSE00001937172"; exon_version "1"; transcript_support_level "2"; +chr22 havana exon 10481 10681 . - . gene_id "ENSG00000239435"; gene_version "2"; transcript_id "ENST00000493696"; transcript_version "2"; exon_number "1"; gene_name "KCNMB3P1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "KCNMB3P1-202"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001930505"; exon_version "2"; tag "basic"; transcript_support_level "NA"; chr22 ensembl_havana gene 18725 20784 . - . gene_id "ENSG00000198445"; gene_version "4"; gene_name "CCT8L2"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; chr22 ensembl_havana transcript 18725 20784 . - . gene_id "ENSG00000198445"; gene_version "4"; transcript_id "ENST00000359963"; transcript_version "4"; gene_name "CCT8L2"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "CCT8L2-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS13738"; tag "basic"; transcript_support_level "NA"; chr22 ensembl_havana exon 18725 20784 . - . gene_id "ENSG00000198445"; gene_version "4"; transcript_id "ENST00000359963"; transcript_version "4"; exon_number "1"; gene_name "CCT8L2"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "CCT8L2-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS13738"; exon_id "ENSE00001806026"; exon_version "2"; tag "basic"; transcript_support_level "NA"; -chr22 ensembl_havana CDS 18854 20524 . - 0 gene_id "ENSG00000198445"; gene_version "4"; transcript_id "ENST00000359963"; transcript_version "4"; exon_number "1"; gene_name "CCT8L2"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "CCT8L2-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS13738"; protein_id "ENSP00000353048"; protein_version "3"; tag "basic"; transcript_support_level "NA"; +chr22 ensembl_havana three_prime_utr 18725 18850 . - . gene_id "ENSG00000198445"; gene_version "4"; transcript_id "ENST00000359963"; transcript_version "4"; gene_name "CCT8L2"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "CCT8L2-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS13738"; tag "basic"; transcript_support_level "NA"; chr22 ensembl_havana stop_codon 18851 18853 . - 0 gene_id "ENSG00000198445"; gene_version "4"; transcript_id "ENST00000359963"; transcript_version "4"; exon_number "1"; gene_name "CCT8L2"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "CCT8L2-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS13738"; tag "basic"; transcript_support_level "NA"; +chr22 ensembl_havana CDS 18854 20524 . - 0 gene_id "ENSG00000198445"; gene_version "4"; transcript_id "ENST00000359963"; transcript_version "4"; exon_number "1"; gene_name "CCT8L2"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "CCT8L2-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS13738"; protein_id "ENSP00000353048"; protein_version "3"; tag "basic"; transcript_support_level "NA"; chr22 ensembl_havana start_codon 20522 20524 . - 0 gene_id "ENSG00000198445"; gene_version "4"; transcript_id "ENST00000359963"; transcript_version "4"; exon_number "1"; gene_name "CCT8L2"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "CCT8L2-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS13738"; tag "basic"; transcript_support_level "NA"; chr22 ensembl_havana five_prime_utr 20525 20784 . - . gene_id "ENSG00000198445"; gene_version "4"; transcript_id "ENST00000359963"; transcript_version "4"; gene_name "CCT8L2"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "CCT8L2-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS13738"; tag "basic"; transcript_support_level "NA"; -chr22 ensembl_havana three_prime_utr 18725 18850 . - . gene_id "ENSG00000198445"; gene_version "4"; transcript_id "ENST00000359963"; transcript_version "4"; gene_name "CCT8L2"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "CCT8L2-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS13738"; tag "basic"; transcript_support_level "NA"; +chr22 havana_tagene transcript 20869 28084 . + . gene_id "ENSG00000287285"; gene_version "1"; transcript_id "ENST00000656324"; transcript_version "1"; gene_name "AP000547.4"; gene_source "havana_tagene"; gene_biotype "lncRNA"; transcript_name "AP000547.4-201"; transcript_source "havana_tagene"; transcript_biotype "lncRNA"; tag "basic"; chr22 havana_tagene exon 20869 20949 . + . gene_id "ENSG00000287285"; gene_version "1"; transcript_id "ENST00000656324"; transcript_version "1"; exon_number "1"; gene_name "AP000547.4"; gene_source "havana_tagene"; gene_biotype "lncRNA"; transcript_name "AP000547.4-201"; transcript_source "havana_tagene"; transcript_biotype "lncRNA"; exon_id "ENSE00003870517"; exon_version "1"; tag "basic"; +chr22 havana_tagene gene 20869 28084 . + . gene_id "ENSG00000287285"; gene_version "1"; gene_name "AP000547.4"; gene_source "havana_tagene"; gene_biotype "lncRNA"; chr22 havana gene 23063 23459 . - . gene_id "ENSG00000240122"; gene_version "1"; gene_name "FABP5P11"; gene_source "havana"; gene_biotype "processed_pseudogene"; chr22 havana transcript 23063 23459 . - . gene_id "ENSG00000240122"; gene_version "1"; transcript_id "ENST00000430910"; transcript_version "1"; gene_name "FABP5P11"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "FABP5P11-201"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; tag "basic"; transcript_support_level "NA"; chr22 havana exon 23063 23459 . - . gene_id "ENSG00000240122"; gene_version "1"; transcript_id "ENST00000430910"; transcript_version "1"; exon_number "1"; gene_name "FABP5P11"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "FABP5P11-201"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; exon_id "ENSE00001596581"; exon_version "1"; tag "basic"; transcript_support_level "NA"; chr22 havana_tagene exon 27394 28084 . + . gene_id "ENSG00000287285"; gene_version "1"; transcript_id "ENST00000656324"; transcript_version "1"; exon_number "2"; gene_name "AP000547.4"; gene_source "havana_tagene"; gene_biotype "lncRNA"; transcript_name "AP000547.4-201"; transcript_source "havana_tagene"; transcript_biotype "lncRNA"; exon_id "ENSE00003867293"; exon_version "1"; tag "basic"; chr22 havana exon 29861 30189 . + . gene_id "ENSG00000100181"; gene_version "22"; transcript_id "ENST00000558085"; transcript_version "6"; exon_number "1"; gene_name "TPTEP1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "TPTEP1-205"; transcript_source "havana"; transcript_biotype "processed_transcript"; exon_id "ENSE00002570186"; exon_version "1"; tag "basic"; transcript_support_level "2"; -chr22 havana exon 29885 30189 . + . gene_id "ENSG00000100181"; gene_version "22"; transcript_id "ENST00000400593"; transcript_version "6"; exon_number "1"; gene_name "TPTEP1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "TPTEP1-202"; transcript_source "havana"; transcript_biotype "processed_transcript"; exon_id "ENSE00003795523"; exon_version "1"; transcript_support_level "1"; -chr22 havana exon 30018 30189 . + . gene_id "ENSG00000100181"; gene_version "22"; transcript_id "ENST00000426585"; transcript_version "5"; exon_number "1"; gene_name "TPTEP1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "TPTEP1-204"; transcript_source "havana"; transcript_biotype "processed_transcript"; exon_id "ENSE00001675045"; exon_version "1"; transcript_support_level "1"; -chr22 havana exon 29885 30189 . + . gene_id "ENSG00000283633"; gene_version "1"; transcript_id "ENST00000592918"; transcript_version "5"; exon_number "1"; gene_name "AP000547.3"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "AP000547.3-201"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSE00003792889"; exon_version "1"; tag "basic"; transcript_support_level "1"; -chr22 havana exon 29922 30189 . + . gene_id "ENSG00000283633"; gene_version "1"; transcript_id "ENST00000592107"; transcript_version "5"; exon_number "1"; gene_name "AP000547.3"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "AP000547.3-202"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSE00002911043"; exon_version "1"; tag "basic"; transcript_support_level "1"; -chr22 havana exon 30024 30189 . + . gene_id "ENSG00000283633"; gene_version "1"; transcript_id "ENST00000591299"; transcript_version "1"; exon_number "1"; gene_name "AP000547.3"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "AP000547.3-203"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSE00002800237"; exon_version "1"; tag "basic"; transcript_support_level "2"; -chr22 havana gene 3337 10681 . - . gene_id "ENSG00000239435"; gene_version "2"; gene_name "KCNMB3P1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; -chr22 havana transcript 3337 10681 . - . gene_id "ENSG00000239435"; gene_version "2"; transcript_id "ENST00000493696"; transcript_version "2"; gene_name "KCNMB3P1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "KCNMB3P1-202"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; tag "basic"; transcript_support_level "NA"; -chr22 havana_tagene gene 20869 28084 . + . gene_id "ENSG00000287285"; gene_version "1"; gene_name "AP000547.4"; gene_source "havana_tagene"; gene_biotype "lncRNA"; -chr22 havana_tagene transcript 20869 28084 . + . gene_id "ENSG00000287285"; gene_version "1"; transcript_id "ENST00000656324"; transcript_version "1"; gene_name "AP000547.4"; gene_source "havana_tagene"; gene_biotype "lncRNA"; transcript_name "AP000547.4-201"; transcript_source "havana_tagene"; transcript_biotype "lncRNA"; tag "basic"; chr22 havana gene 29885 40000 . + . gene_id "ENSG00000283633"; gene_version "1"; gene_name "AP000547.3"; gene_source "havana"; gene_biotype "lncRNA"; chr22 havana transcript 29885 40000 . + . gene_id "ENSG00000283633"; gene_version "1"; transcript_id "ENST00000592918"; transcript_version "5"; gene_name "AP000547.3"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "AP000547.3-201"; transcript_source "havana"; transcript_biotype "lncRNA"; tag "basic"; transcript_support_level "1"; +chr22 havana exon 29885 30189 . + . gene_id "ENSG00000100181"; gene_version "22"; transcript_id "ENST00000400593"; transcript_version "6"; exon_number "1"; gene_name "TPTEP1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "TPTEP1-202"; transcript_source "havana"; transcript_biotype "processed_transcript"; exon_id "ENSE00003795523"; exon_version "1"; transcript_support_level "1"; +chr22 havana exon 29885 30189 . + . gene_id "ENSG00000283633"; gene_version "1"; transcript_id "ENST00000592918"; transcript_version "5"; exon_number "1"; gene_name "AP000547.3"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "AP000547.3-201"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSE00003792889"; exon_version "1"; tag "basic"; transcript_support_level "1"; chr22 havana transcript 29922 40000 . + . gene_id "ENSG00000283633"; gene_version "1"; transcript_id "ENST00000592107"; transcript_version "5"; gene_name "AP000547.3"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "AP000547.3-202"; transcript_source "havana"; transcript_biotype "lncRNA"; tag "basic"; transcript_support_level "1"; +chr22 havana exon 29922 30189 . + . gene_id "ENSG00000283633"; gene_version "1"; transcript_id "ENST00000592107"; transcript_version "5"; exon_number "1"; gene_name "AP000547.3"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "AP000547.3-202"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSE00002911043"; exon_version "1"; tag "basic"; transcript_support_level "1"; +chr22 havana exon 30018 30189 . + . gene_id "ENSG00000100181"; gene_version "22"; transcript_id "ENST00000426585"; transcript_version "5"; exon_number "1"; gene_name "TPTEP1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "TPTEP1-204"; transcript_source "havana"; transcript_biotype "processed_transcript"; exon_id "ENSE00001675045"; exon_version "1"; transcript_support_level "1"; chr22 havana transcript 30024 40000 . + . gene_id "ENSG00000283633"; gene_version "1"; transcript_id "ENST00000591299"; transcript_version "1"; gene_name "AP000547.3"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "AP000547.3-203"; transcript_source "havana"; transcript_biotype "lncRNA"; tag "basic"; transcript_support_level "2"; +chr22 havana exon 30024 30189 . + . gene_id "ENSG00000283633"; gene_version "1"; transcript_id "ENST00000591299"; transcript_version "1"; exon_number "1"; gene_name "AP000547.3"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "AP000547.3-203"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSE00002800237"; exon_version "1"; tag "basic"; transcript_support_level "2";