Skip to content

Commit

Permalink
optimize FDR calculation
Browse files Browse the repository at this point in the history
  • Loading branch information
michabirklbauer committed Jan 24, 2024
1 parent 9592795 commit 15a6b9a
Showing 1 changed file with 18 additions and 24 deletions.
42 changes: 18 additions & 24 deletions msannika_fdr.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@
# micha.birklbauer@gmail.com

# version tracking
__version = "1.0.0"
__date = "2024-01-09"
__version = "1.1.0"
__date = "2024-01-24"

# REQUIREMENTS
# pip install numpy
# pip install pandas
# pip install openpyxl

Expand Down Expand Up @@ -41,6 +42,7 @@
######################

import argparse
import numpy as np
import pandas as pd

from typing import List
Expand Down Expand Up @@ -138,20 +140,16 @@ class MSAnnika_CSM_Validator:
def get_class(row: pd.Series) -> str:
return "Decoy" if "D" in row["Alpha T/D"] or "D" in row["Beta T/D"] else "Target"

@staticmethod
def get_fdr(data: pd.DataFrame, score: float) -> float:

df = data[data["Combined Score"] > score].copy()
df["Class"] = df.apply(lambda row: MSAnnika_CSM_Validator.get_class(row), axis = 1)

return df[df["Class"] == "Decoy"].shape[0] / df[df["Class"] == "Target"].shape[0]

@staticmethod
def get_cutoff(data: pd.DataFrame, fdr: float) -> float:

scores = sorted(data["Combined Score"].tolist())
for score in scores:
if MSAnnika_CSM_Validator.get_fdr(data, score) < fdr:
data["Class"] = data.apply(lambda row: MSAnnika_CSM_Validator.get_class(row), axis = 1)
data["Class_label"] = data.apply(lambda row: 0 if row["Class"] == "Target" else 1, axis = 1)
labels = data["Class_label"].to_numpy()
labels_sorted = labels[data["Combined Score"].to_numpy().argsort()]

for i, score in enumerate(sorted(data["Combined Score"].tolist())):
if labels_sorted[i:].sum() / (labels_sorted[i:].shape[0] - labels_sorted[i:].sum()) < fdr:
return score

return scores[0]
Expand All @@ -175,20 +173,16 @@ class MSAnnika_Crosslink_Validator:
def get_class(row: pd.Series) -> str:
return "Decoy" if row["Decoy"] else "Target"

@staticmethod
def get_fdr(data: pd.DataFrame, score: float) -> float:

df = data[data["Best CSM Score"] > score].copy()
df["Class"] = df.apply(lambda row: MSAnnika_Crosslink_Validator.get_class(row), axis = 1)

return df[df["Class"] == "Decoy"].shape[0] / df[df["Class"] == "Target"].shape[0]

@staticmethod
def get_cutoff(data: pd.DataFrame, fdr: float) -> float:

scores = sorted(data["Best CSM Score"].tolist())
for score in scores:
if MSAnnika_Crosslink_Validator.get_fdr(data, score) < fdr:
data["Class"] = data.apply(lambda row: MSAnnika_CSM_Validator.get_class(row), axis = 1)
data["Class_label"] = data.apply(lambda row: 0 if row["Class"] == "Target" else 1, axis = 1)
labels = data["Class_label"].to_numpy()
labels_sorted = labels[data["Best CSM Score"].to_numpy().argsort()]

for i, score in enumerate(sorted(data["Best CSM Score"].tolist())):
if labels_sorted[i:].sum() / (labels_sorted[i:].shape[0] - labels_sorted[i:].sum()) < fdr:
return score

return scores[0]
Expand Down

0 comments on commit 15a6b9a

Please sign in to comment.