-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathold_locus_rename.py
36 lines (28 loc) · 2.13 KB
/
old_locus_rename.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
input_file = '/ibex/user/niuk0a/CLEAN/app/data/inputs/NC_000853.1.fasta'
cleanfile = '/ibex/user/niuk0a/CLEAN/app/results/inputs/NC_000853.1_maxsep.csv'
test_predscorefile = '/ibex/user/niuk0a/funcarve/cobra/iCN718_R_updateprs_10.pkl'
name = 'iLJ478_R'
gbf='/ibex/user/niuk0a/funcarve/cobra/NC_000853.1.gb'
import pandas as pd
import numpy as np
import pickle
import os
from Bio import SeqIO
def parse_genebank(f):
recs = [rec for rec in SeqIO.parse(f, "genbank")]
rec = recs[0]
feats = [feat for feat in rec.features if feat.type == "CDS"]
lt2ec={}
lt2oldlt={}
for feat in feats:
dd = feat.qualifiers
'''
dd = {'locus_tag': ['TM_RS00005'], 'old_locus_tag': ['TM0005', 'TM_0005'], 'EC_number': ['3.6.4.12'], 'inference': ['COORDINATES: similar to AA sequence:RefSeq:WP_012310830.1'], 'GO_function': ['GO:0003678 - DNA helicase activity [Evidence IEA]'], 'GO_process': ['GO:0006281 - DNA repair [Evidence IEA]'], 'note': ['Derived by automated computational analysis using gene prediction method: Protein Homology.'], 'codon_start': ['1'], 'transl_table': ['11'], 'product': ['IGHMBP2 family helicase'], 'protein_id': ['WP_010865024.1'], 'db_xref': ['GI:499163180'], 'translation': ['MTVQQFIKKLVRLVELERNAEINAMLDEMKRLSGEEREKKGRAVLGLTGKFIGEELGYFLVRFGRRKKIDTEIGVGDLVLISKGNPLKSDYTGTVVEKGERFITVAVDRLPSWKLKNVRIDLFASDITFRRQIENLMTLSSEGKKALEFLLGKRKPEESFEEEFTPFDEGLNESQREAVSLALGSSDFFLIHGPFGTGKTRTLVEYIRQEVARGKKILVTAESNLAVDNLVERLWGKVSLVRIGHPSRVSSHLKESTLAHQIETSSEYEKVKKMKEELAKLIKKRDSFTKPSPQWRRGLSDKKILEYAEKNWSARGVSKEKIKEMAEWIKLNSQIQDIRDLIERKEEIIASRIVREAQVVLSTNSSAALEILSGIVFDVVVVDEASQATIPSILIPISKGKKFVLAGDHKQLPPTILSEDAKDLSRTLFEELITRYPEKSSLLDTQYRMNELLMEFPSEEFYDGKLKAAEKVRNITLFDLGVEIPNFGKFWDVVLSPKNVLVFIDTKNRSDRFERQRKDSPSRENPLEAQIVKEVVEKLLSMGVKEDWIGIITPYDDQVNLIRELIEAKVEVHSVDGFQGREKEVIIISFVRSNKNGEIGFLEDLRRLNVSLTRAKRKLIATGDSSTLSVHPTYRRFVEFVKKKGTYVIF']}
'''
locus_tag = dd['locus_tag']
ec_number = dd['EC_number']
old_locus_tag = dd['old_locus_tag']
lt2ec[locus_tag[0]] = ec_number
for old in old_locus_tag:
lt2oldlt[old] = old_locus_tag
return lt2ec, lt2oldlt