Skip to content

Commit

Permalink
add doc strings
Browse files Browse the repository at this point in the history
  • Loading branch information
endast committed Nov 21, 2023
1 parent b45037b commit 3f216bd
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 1 deletion.
6 changes: 6 additions & 0 deletions fake_vcf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@


def get_version() -> str:
"""
Retrieve the version of the package.
Returns:
str: The version of the package, or 'unknown' if the version cannot be found.
"""
try:
return importlib_metadata.version(__name__)
except importlib_metadata.PackageNotFoundError: # pragma: no cover
Expand Down
21 changes: 20 additions & 1 deletion fake_vcf/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,12 @@


def version_callback(print_version: bool) -> None:
"""Print the version of the package."""
"""
Callback function to print the version of the package.
Args:
print_version (bool): Flag to print the version.
"""
if print_version:
console.print(f"[yellow]fake-vcf[/] version: [bold blue]{version}[/]")
raise typer.Exit()
Expand Down Expand Up @@ -56,6 +61,20 @@ def main(
help="Prints the version of the fake-vcf package.",
),
) -> None:
"""
Main function to generate fake VCF data using Typer CLI.
Args:
fake_vcf_path (Path): Path to fake VCF file or None to write to standard output.
num_rows (int): Number of rows.
num_samples (int): Number of samples.
chromosome (str): Chromosome identifier.
seed (int): Random seed for reproducibility.
sample_prefix (str): Prefix for sample names.
phased (bool): Simulate phased genotypes.
large_format (bool): Write large format VCF.
print_version (bool): Flag to print the version of the fake-vcf package.
"""
fake_vcf_data(
fake_vcf_path=fake_vcf_path,
num_rows=num_rows,
Expand Down
34 changes: 34 additions & 0 deletions fake_vcf/vcf_faker.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,18 @@ def __init__(
phased: bool = True,
large_format: bool = True,
):
"""
Initialize VirtualVCF object.
Args:
num_rows (int): Number of rows.
num_samples (int): Number of samples.
chromosome (str): Chromosome identifier.
sample_prefix (str, optional): Prefix for sample names. Defaults to "SAMPLES".
random_seed (int, optional): Random seed for reproducibility. Defaults to None.
phased (bool, optional): Phased or unphased genotypes. Defaults to True.
large_format (bool, optional): Use large format VCF. Defaults to True.
"""
self.num_rows = num_rows
self.rows_remaining = num_rows + 1 # One for the header
self.num_samples = num_samples
Expand Down Expand Up @@ -111,16 +123,25 @@ def __init__(
self.current_pos = 0

def __iter__(self):
"""
Iterates over VirtualVCF object.
"""
return self

def __next__(self):
"""
Retrieves the next VCF data.
"""
if self.rows_remaining <= 0:
raise StopIteration
vcf_data = self._generate_vcf_data()
self.rows_remaining -= 1
return vcf_data

def _generate_vcf_header(self):
"""
Generates the VCF header.
"""
# Create a list of column names
columns = [
"#CHROM",
Expand All @@ -143,6 +164,9 @@ def _generate_vcf_header(self):
return self.header

def _generate_vcf_row(self):
"""
Generates a VCF row.
"""
ref_index = self.random.randint(0, 3)

position = self.positions[self.current_pos]
Expand Down Expand Up @@ -177,14 +201,24 @@ def _generate_vcf_row(self):
return row

def _generate_vcf_data(self):
"""
Generates VCF data.
"""
if self.rows_remaining == self.num_rows + 1:
vcf_row = self._generate_vcf_header()
else:
vcf_row = self._generate_vcf_row()
return vcf_row

def __enter__(self):
"""
Enters the context.
"""

return self

def __exit__(self, exc_type, exc_value, traceback):
"""
Exits the context.
"""
pass
27 changes: 27 additions & 0 deletions fake_vcf/vcf_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,26 @@


def to_std_out(virtual_vcf: VirtualVCF) -> None:
"""
Writes VirtualVCF data to standard output.
Args:
virtual_vcf (VirtualVCF): VirtualVCF object containing the data.
"""
with virtual_vcf as v_vcf:
for line in v_vcf:
sys.stdout.write(line)


def to_vcf_file(virtual_vcf: VirtualVCF, fake_vcf_path: Path, num_rows: int) -> None:
"""
Writes VirtualVCF data to a VCF file.
Args:
virtual_vcf (VirtualVCF): VirtualVCF object containing the data.
fake_vcf_path (Path): Path to the fake VCF file.
num_rows (int): Number of rows.
"""
print(f"Writing to file {fake_vcf_path}")

if fake_vcf_path.suffix == ".gz":
Expand Down Expand Up @@ -42,6 +56,19 @@ def fake_vcf_data(
phased,
large_format,
):
"""
Generates fake VCF data and writes it to either a file or standard output.
Args:
fake_vcf_path (str or None): Path to the fake VCF file or None to write to standard output.
num_rows (int): Number of rows.
num_samples (int): Number of samples.
chromosome (str): Chromosome identifier.
seed (int): Random seed for reproducibility.
sample_prefix (str): Prefix for sample names.
phased (bool): Phased or unphased genotypes.
large_format (bool): Use large format VCF.
"""
virtual_vcf = VirtualVCF(
num_rows=num_rows,
num_samples=num_samples,
Expand Down

0 comments on commit 3f216bd

Please sign in to comment.