Skip to content

Commit

Permalink
Merge pull request #34 from hgb-bin-proteomics/develop
Browse files Browse the repository at this point in the history
Use biopython for fasta reading in xiNET and xiVIEW exporters
  • Loading branch information
michabirklbauer authored Jan 21, 2025
2 parents 389afed + b3c8fb9 commit 81222d8
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 56 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ xiNetExporter_msannika.py f [f ...]
[--version]
positional arguments:
f MS Annika crosslink result files in Microsoft Excel
format (.xlsx) to process.
format (.xlsx) to process. Decoys should be excluded!
required arguments:
-fasta FASTAFILE, --fasta FASTAFILE
Fasta file used for crosslink search. Must contain
Expand Down Expand Up @@ -139,7 +139,7 @@ xiViewExporter_msannika.py f [f ...]
[--version]
positional arguments:
f MS Annika crosslink result files in Microsoft Excel
format (.xlsx) to process.
format (.xlsx) to process. Decoys should be excluded!
required arguments:
-fasta FASTAFILE, --fasta FASTAFILE
Fasta file used for crosslink search. Must contain
Expand Down
Binary file modified binaries/windows/xiNetExporter_msannika.exe
Binary file not shown.
Binary file modified binaries/windows/xiViewExporter_msannika.exe
Binary file not shown.
38 changes: 11 additions & 27 deletions xiNetExporter_msannika.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@

import argparse
import pandas as pd
from Bio import SeqIO
from typing import List

__version = "1.0.0"
__date = "20221018"
__version = "1.1.0"
__date = "20250121"

"""
DESCRIPTION:
Expand All @@ -24,7 +25,7 @@
[--version]
positional arguments:
f MS Annika crosslink result files in Microsoft Excel
format (.xlsx) to process.
format (.xlsx) to process. Decoys should be excluded!
required arguments:
-fasta FASTAFILE, --fasta FASTAFILE
Fasta file used for crosslink search. Must contain
Expand Down Expand Up @@ -52,20 +53,13 @@ def __init__(self, input_files: List[str], fasta_file: str, ignore_list: List[st
# read fasta file
sequences = dict()

with open(fasta_file, "r", encoding = "utf-8") as f:
fasta_data = f.read()
f.close()

for entry in fasta_data.split(">"):
lines = entry.split("\n")
if lines[0].strip() != "":
description = lines[0].strip()
identifier = lines[0].strip().split("|")[1].strip()
sequence = "".join([x.strip() for x in lines[1:]])
if identifier not in sequences:
sequences[identifier] = {"description": description, "sequence": sequence}
else:
print("WARNING: Identifier " + identifier + " is not unique!")
for entry in SeqIO.parse(fasta_file, "fasta"):
identifier = str(entry.id).split("|")[1].strip()
sequence = str(entry.seq)
if identifier not in sequences:
sequences[identifier] = {"sequence": sequence}
else:
print("WARNING: Identifier " + identifier + " is not unique!")

self.database = sequences

Expand Down Expand Up @@ -136,25 +130,15 @@ def __generate_csv_df(self) -> pd.DataFrame:

return result

def __generate_fasta_str(self) -> str:
fasta_str = ""
for key in self.database:
fasta_str = fasta_str + ">" + key + " " + self.database[key]["description"] + "\n" + self.database[key]["sequence"] + "\n"
return fasta_str

# export function, takes one argument "output_file" which sets the prefix
# of generated output files
def export(self, output_file = None, format = "xiNET") -> None:
csv = self.__generate_csv_df()
fasta = self.__generate_fasta_str()

if output_file == None:
output_file = self.input_files[0].split(".")[0]

csv.to_csv(output_file + ".csv", index = False)
with open(output_file + ".fasta", "w", encoding = "utf-8") as f:
f.write(fasta)
f.close()

# initialize exporter and export xiNET files
def main() -> None:
Expand Down
38 changes: 11 additions & 27 deletions xiViewExporter_msannika.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@

import argparse
import pandas as pd
from Bio import SeqIO
from typing import List

__version = "1.0.0"
__date = "20221020"
__version = "1.1.0"
__date = "20250116"

"""
DESCRIPTION:
Expand All @@ -24,7 +25,7 @@
[--version]
positional arguments:
f MS Annika crosslink result files in Microsoft Excel
format (.xlsx) to process.
format (.xlsx) to process. Decoys should be excluded!
required arguments:
-fasta FASTAFILE, --fasta FASTAFILE
Fasta file used for crosslink search. Must contain
Expand Down Expand Up @@ -52,20 +53,13 @@ def __init__(self, input_files: List[str], fasta_file: str, ignore_list: List[st
# read fasta file
sequences = dict()

with open(fasta_file, "r", encoding = "utf-8") as f:
fasta_data = f.read()
f.close()

for entry in fasta_data.split(">"):
lines = entry.split("\n")
if lines[0].strip() != "":
description = lines[0].strip()
identifier = lines[0].strip().split("|")[1].strip()
sequence = "".join([x.strip() for x in lines[1:]])
if identifier not in sequences:
sequences[identifier] = {"description": description, "sequence": sequence}
else:
print("WARNING: Identifier " + identifier + " is not unique!")
for entry in SeqIO.parse(fasta_file, "fasta"):
identifier = str(entry.id).split("|")[1].strip()
sequence = str(entry.seq)
if identifier not in sequences:
sequences[identifier] = {"sequence": sequence}
else:
print("WARNING: Identifier " + identifier + " is not unique!")

self.database = sequences

Expand Down Expand Up @@ -135,25 +129,15 @@ def __generate_csv_df(self) -> pd.DataFrame:

return result

def __generate_fasta_str(self) -> str:
fasta_str = ""
for key in self.database:
fasta_str = fasta_str + ">" + self.database[key]["description"] + "\n" + self.database[key]["sequence"] + "\n"
return fasta_str

# export function, takes one argument "output_file" which sets the prefix
# of generated output files
def export(self, output_file = None, format = "xiVIEW") -> None:
csv = self.__generate_csv_df()
fasta = self.__generate_fasta_str()

if output_file == None:
output_file = self.input_files[0].split(".")[0]

csv.to_csv(output_file + ".csv", index = False)
with open(output_file + ".fasta", "w", encoding = "utf-8") as f:
f.write(fasta)
f.close()

# initialize exporter and export xiVIEW files
def main() -> None:
Expand Down

0 comments on commit 81222d8

Please sign in to comment.