Skip to content

Commit

Permalink
MERGE #19
Browse files Browse the repository at this point in the history
  • Loading branch information
eboileau committed May 22, 2023
2 parents a5366fd + e08adb3 commit d18ec22
Show file tree
Hide file tree
Showing 22 changed files with 17 additions and 68 deletions.
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ repos:
- id: check-toml

- repo: https://github.com/psf/black
rev: 22.12.0
rev: 23.3.0
hooks:
- id: black
- id: black-jupyter
Expand All @@ -21,15 +21,15 @@ repos:
- id: nbstripout

- repo: https://github.com/pre-commit/mirrors-prettier
rev: v3.0.0-alpha.0
rev: v3.0.0-alpha.6
hooks:
- id: prettier
additional_dependencies:
- prettier@2.3.2
- "prettier-plugin-toml"

- repo: https://github.com/python-jsonschema/check-jsonschema
rev: 0.19.2
rev: 0.22.0
hooks:
- id: check-github-workflows
- id: check-readthedocs
Expand Down
3 changes: 0 additions & 3 deletions src/pbiotools/misc/dask_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ def connect(args):
cluster = None

if args.cluster_location == "LOCAL":

msg = "[dask_utils]: starting local dask cluster"
logger.info(msg)

Expand Down Expand Up @@ -73,7 +72,6 @@ def connect(args):
def add_dask_options(
parser, num_cpus=1, num_threads_per_cpu=1, cluster_location="LOCAL"
):

"""Add options for connecting to and/or controlling a local dask cluster
Parameters
Expand Down Expand Up @@ -131,7 +129,6 @@ def add_dask_values_to_args(
cluster_location="LOCAL",
client_restart=False,
):

"""Add the options for a dask cluster to the given argparse namespace
This function is mostly intended as a helper for use in ipython notebooks.
Expand Down
2 changes: 0 additions & 2 deletions src/pbiotools/misc/logging_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,6 @@ def get_logging_options_string(args):
def update_logging(
args, logger=None, format_str="%(levelname)-8s %(name)-8s %(asctime)s : %(message)s"
):

"""This function interprets the logging options in args. Presumably, these
were added to an argument parser using add_logging_options.
Expand Down Expand Up @@ -197,7 +196,6 @@ def update_logging(
def get_ipython_logger(
logging_level="DEBUG", format_str="%(levelname)-8s : %(message)s"
):

level = logging.getLevelName(logging_level)
formatter = logging.Formatter(format_str)

Expand Down
2 changes: 0 additions & 2 deletions src/pbiotools/misc/math_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,6 @@ def check_range(
raise_on_invalid=True,
logger=logger,
):

"""This function checks whether the given value falls within the
specified range. If not, either an exception is raised or a
warning is logged.
Expand Down Expand Up @@ -606,7 +605,6 @@ def matrix_multiply(m1, m2, m3):


def fit_bayesian_gaussian_mixture(X, n_components=100, seed=8675309, **kwargs):

"""Fit a sklearn.mixture.BayesianGaussianMixture with the parameters.
This function is mostly used to give slightly more reasonable defaults for
Expand Down
5 changes: 0 additions & 5 deletions src/pbiotools/misc/mpl_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,6 @@ def plot_roc_curve(
xlabel="False positive rate",
ylabel="True positive rate",
):

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors
Expand Down Expand Up @@ -407,7 +406,6 @@ def plot_confusion_matrix(
predicted_tick_rotation=None,
out=None,
):

"""Plot the given confusion matrix"""
if ax is None:
fig, ax = plt.subplots()
Expand Down Expand Up @@ -456,7 +454,6 @@ def plot_confusion_matrix(
s = confusion_matrix.shape
it = itertools.product(range(s[0]), range(s[1]))
for i, j in it:

val = confusion_matrix[i, j]
cell_color = cmap(mappable.norm(val))

Expand Down Expand Up @@ -498,7 +495,6 @@ def plot_venn_diagram(
counts_fontsize=12,
sci_notation_limit=999,
):

"""This function is a wrapper around matplotlib_venn. It most just makes
setting the fonts and and label formatting a bit easier.
Expand Down Expand Up @@ -592,7 +588,6 @@ def create_simple_bar_chart(
legend_fontsize=12,
title_fontsize=12,
):

import numpy as np
import matplotlib.colors
import matplotlib.pyplot as plt
Expand Down
2 changes: 1 addition & 1 deletion src/pbiotools/misc/pandas_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def write_df(
filetype="AUTO",
sheet="Sheet_1",
do_not_compress=False,
**kwargs
**kwargs,
):
"""This function writes a data frame to a file of the specified type.
Unless otherwise specified, csv files are gzipped when written. By
Expand Down
6 changes: 3 additions & 3 deletions src/pbiotools/misc/parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def apply_parallel_iter(
progress_bar=False,
total=None,
num_groups=None,
backend="loky"
backend="loky",
):
"""This function parallelizes applying a function to all items in an iterator using the
joblib library. In particular, func is called for each of the items in the list. (Unless
Expand Down Expand Up @@ -171,7 +171,7 @@ def apply_parallel_split(
*args,
progress_bar=False,
num_groups=None,
backend="loky"
backend="loky",
):
"""This function parallelizes applying a function to the rows of a data frame using the
joblib library. The data frame is first split into num_procs equal-sized groups, and
Expand Down Expand Up @@ -343,5 +343,5 @@ def apply_iter_simple(
progress_bar=progress_bar,
total=total,
num_groups=num_groups,
backend=backend
backend=backend,
)
4 changes: 0 additions & 4 deletions src/pbiotools/misc/shell_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,6 @@ def download_file(url, local_filename=None, chunk_size=1024, overwrite=False):
def check_programs_exist(
programs, raise_on_error=True, package_name=None, logger=logger
):

"""This function checks that all of the programs in the list cam be
called from python. After checking all of the programs, an exception
is raised if any of them are not callable. Optionally, only a warning
Expand Down Expand Up @@ -172,7 +171,6 @@ def check_programs_exist(


def check_call_step(cmd, current_step=-1, init_step=-1, call=True, raise_on_error=True):

logging.info(cmd)
ret_code = 0

Expand Down Expand Up @@ -205,7 +203,6 @@ def check_call(cmd, call=True, raise_on_error=True):


def check_output_step(cmd, current_step=0, init_step=0, raise_on_error=True):

logging.info(cmd)
if current_step >= init_step:
logging.info("calling")
Expand Down Expand Up @@ -240,7 +237,6 @@ def call_if_not_exists(
to_delete=[],
keep_delete_files=False,
):

"""This function checks if out_file exists. If it does not, or if overwrite
is true, then the command is executed, according to the call flag.
Otherwise, a warning is issued stating that the file already exists
Expand Down
2 changes: 0 additions & 2 deletions src/pbiotools/misc/slurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ def check_sbatch(
stderr_file=None,
args=None,
):

"""This function wraps calls to sbatch. It adds the relevant command line
options based on the parameters (either specified or extracted from
args, if args is not None).
Expand Down Expand Up @@ -234,7 +233,6 @@ def add_sbatch_options(
mail_type=["FAIL", "TIME_LIMIT"],
mail_user=None,
):

"""This function adds the options for calling sbatch to the given parser.
The provided arguments are used as defaults for the options.
Expand Down
8 changes: 3 additions & 5 deletions src/pbiotools/misc/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,14 +222,13 @@ def get_vars_to_save(to_save, to_remove=["parser", "args"]):
import types

# remove the system variables, modules and functions
for (var_name, value) in to_save.items():
for var_name, value in to_save.items():
if var_name.startswith("__"):
to_remove.append(var_name)

elif isinstance(value, types.FunctionType) or isinstance(
value, types.ModuleType
):

to_remove.append(var_name)

for var_name in to_remove:
Expand Down Expand Up @@ -755,7 +754,7 @@ def write_df(
filetype="AUTO",
sheet="Sheet_1",
do_not_compress=False,
**kwargs
**kwargs,
):
"""This function writes a data frame to a file of the specified type.
Unless otherwise specified, csv files are gzipped when written. By
Expand Down Expand Up @@ -1290,9 +1289,8 @@ def call_func_if_not_exists(
file_checkers=None,
to_delete=[],
keep_delete_files=False,
**kwargs
**kwargs,
):

"""Call a python function with extra checks on input/output files, etc.
This is adapted from shell_utils.call_if_not_exists, see this function
for more details.
Expand Down
20 changes: 3 additions & 17 deletions src/pbiotools/utils/bed_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def read_bed(
comment=None,
header=None,
use_default_field_names=False,
**kwargs
**kwargs,
):
"""This function reads a bed file into a pandas data frame. By default, it
assumes the first line of the bed file actually gives the field names,
Expand Down Expand Up @@ -133,7 +133,7 @@ def write_bed(data_frame, filename, compress=True, **kwargs):
header=header,
do_not_compress=do_not_compress,
quoting=csv.QUOTE_NONE,
**kwargs
**kwargs,
)


Expand Down Expand Up @@ -1306,7 +1306,6 @@ def merge_intervals(interval_starts, interval_ends, interval_info=None):
# and advance
next_interval += 1
if next_interval < num_intervals:

next_interval_start = interval_starts[next_interval]
next_interval_end = interval_ends[next_interval]

Expand Down Expand Up @@ -1430,7 +1429,6 @@ def merge_all_intervals(bed, split=False):
def get_position_intersections(
positions, interval_starts, interval_ends, interval_info=None, position_info=None
):

"""This function finds the intersections of a set of (1bp) points and a
set of intervals, specified by (inclusive) start and (exclusive) end
positions. Furthermore, it allows arbitrary information to be attached
Expand Down Expand Up @@ -1530,10 +1528,8 @@ def get_position_intersections(
next_exon_start = interval_starts[0]

while next_p_site_position != np.inf:

# do we grab the p_site or the exon
if next_p_site_position < next_exon_start:

# then we take the p_site

# first, remove everything from the cache which ends before this
Expand Down Expand Up @@ -1742,10 +1738,8 @@ def get_exact_interval_matches(a_starts, a_ends, a_info, b_starts, b_ends, b_inf

matches = []
while next_a_interval < num_a_intervals:

# get whichever interval comes next
if next_a_start < next_b_start:

# check if this exactly matches anything in the cache
for c in cache:
starts = b_starts[c] == next_a_start
Expand All @@ -1768,7 +1762,6 @@ def get_exact_interval_matches(a_starts, a_ends, a_info, b_starts, b_ends, b_inf
next_a_end = a_ends[next_a_interval]

else:

# just add it to the cache
cache.append(next_b_interval)

Expand All @@ -1787,7 +1780,6 @@ def get_exact_interval_matches(a_starts, a_ends, a_info, b_starts, b_ends, b_inf
def get_exact_block_matches(
matches, block_counts_a, block_counts_b=None, block_id_index=None
):

"""This function finds pairs of transcripts (or whatever outer-level
object is considered) which have exact interval matches for all of
their blocks (i.e., exons). Roughly, it does this by counting the
Expand Down Expand Up @@ -2135,10 +2127,8 @@ def get_interval_overlaps(a_starts, a_ends, a_info, b_starts, b_ends, b_info):

matches = []
while (next_a_interval < num_a_intervals) or (len(a_cache) != 0):

# get whichever interval comes next
if next_a_start < next_b_start:

# a is first

# remove everything in the b_cache which ends before this starts
Expand Down Expand Up @@ -2211,7 +2201,6 @@ def get_interval_overlaps(a_starts, a_ends, a_info, b_starts, b_ends, b_info):


def get_transcript_overlaps(interval_overlaps):

"""This function finds pairs of transcripts (or whatever outer-level
object is considered) which have interval matches across multiple
blocks and counts the total overlap.
Expand Down Expand Up @@ -2409,7 +2398,6 @@ def get_bed_overlaps(

for seqname in seqnames:
for strand in strands:

m_bed_a_seqname = bed_a["seqname"] == seqname
m_bed_b_seqname = bed_b["seqname"] == seqname

Expand Down Expand Up @@ -2548,7 +2536,6 @@ def get_entries_with_upstream_overlaps(
exons_a=None,
exons_b=None,
):

"""This function finds all intervals of A which have upstream intervals of B.
It always takes the strand of the intervals into account. By default, the
function looks for intervals in B which are strictly upstream of the
Expand Down Expand Up @@ -2721,7 +2708,6 @@ def get_bed_sequence(bed_entry, seq_sequence, split_exons=True):
transcript_sequence = seq_sequence[genomic_start:genomic_end]

else:

exon_starts = np.fromstring(
bed_entry["exon_genomic_relative_starts"], sep=",", dtype=int
)
Expand Down Expand Up @@ -2819,7 +2805,7 @@ def get_all_bed_sequences(

all_transcript_sequences = []

for (seqname, sequence) in fasta:
for seqname, sequence in fasta:
msg = "Processing seqname: {}".format(seqname)
logger.debug(msg)

Expand Down
3 changes: 0 additions & 3 deletions src/pbiotools/utils/bio.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,6 @@ def read_bitseq_tr_file(
comment="#",
sep=" ",
):

"""This function reads the BitSeq transcript_info file into a data frame.
The file is assumed to contain four columns: source_name,
transcript_name, length and effective_length.
Expand Down Expand Up @@ -114,7 +113,6 @@ def read_bitseq_tr_file(
def read_maxquant_peptides_file(
filename, names=None, header="infer", comment="#", sep="\t"
):

"""This function reads the peptides.txt file produced by MaxQuant into a
data frame. By default, the file is assumed to be tab-delimited, and
the first row is used as the column names.
Expand Down Expand Up @@ -143,7 +141,6 @@ def read_maxquant_peptides_file(
def read_protein_digestion_simulator_file(
filename, names=None, header="infer", comment="#", sep="\t"
):

"""This function reads the output of the Protein Digestion Simulator program
(https://omics.pnl.gov/software/protein-digestion-simulator). By default,
the file is assumed to be tab-delimited and the first row is used as
Expand Down
Loading

0 comments on commit d18ec22

Please sign in to comment.