diff --git a/fake_vcf/__init__.py b/fake_vcf/__init__.py index 93ba6c0..1ecde1a 100644 --- a/fake_vcf/__init__.py +++ b/fake_vcf/__init__.py @@ -4,6 +4,12 @@ def get_version() -> str: + """ + Retrieve the version of the package. + + Returns: + str: The version of the package, or 'unknown' if the version cannot be found. + """ try: return importlib_metadata.version(__name__) except importlib_metadata.PackageNotFoundError: # pragma: no cover diff --git a/fake_vcf/__main__.py b/fake_vcf/__main__.py index 938c002..a286947 100644 --- a/fake_vcf/__main__.py +++ b/fake_vcf/__main__.py @@ -15,7 +15,12 @@ def version_callback(print_version: bool) -> None: - """Print the version of the package.""" + """ + Callback function to print the version of the package. + + Args: + print_version (bool): Flag to print the version. + """ if print_version: console.print(f"[yellow]fake-vcf[/] version: [bold blue]{version}[/]") raise typer.Exit() @@ -56,6 +61,20 @@ def main( help="Prints the version of the fake-vcf package.", ), ) -> None: + """ + Main function to generate fake VCF data using Typer CLI. + + Args: + fake_vcf_path (Path): Path to fake VCF file or None to write to standard output. + num_rows (int): Number of rows. + num_samples (int): Number of samples. + chromosome (str): Chromosome identifier. + seed (int): Random seed for reproducibility. + sample_prefix (str): Prefix for sample names. + phased (bool): Simulate phased genotypes. + large_format (bool): Write large format VCF. + print_version (bool): Flag to print the version of the fake-vcf package. + """ fake_vcf_data( fake_vcf_path=fake_vcf_path, num_rows=num_rows, diff --git a/fake_vcf/vcf_faker.py b/fake_vcf/vcf_faker.py index f744a01..80cad66 100644 --- a/fake_vcf/vcf_faker.py +++ b/fake_vcf/vcf_faker.py @@ -17,6 +17,18 @@ def __init__( phased: bool = True, large_format: bool = True, ): + """ + Initialize VirtualVCF object. + + Args: + num_rows (int): Number of rows. + num_samples (int): Number of samples. + chromosome (str): Chromosome identifier. + sample_prefix (str, optional): Prefix for sample names. Defaults to "SAMPLES". + random_seed (int, optional): Random seed for reproducibility. Defaults to None. + phased (bool, optional): Phased or unphased genotypes. Defaults to True. + large_format (bool, optional): Use large format VCF. Defaults to True. + """ self.num_rows = num_rows self.rows_remaining = num_rows + 1 # One for the header self.num_samples = num_samples @@ -111,9 +123,15 @@ def __init__( self.current_pos = 0 def __iter__(self): + """ + Iterates over VirtualVCF object. + """ return self def __next__(self): + """ + Retrieves the next VCF data. + """ if self.rows_remaining <= 0: raise StopIteration vcf_data = self._generate_vcf_data() @@ -121,6 +139,9 @@ def __next__(self): return vcf_data def _generate_vcf_header(self): + """ + Generates the VCF header. + """ # Create a list of column names columns = [ "#CHROM", @@ -143,6 +164,9 @@ def _generate_vcf_header(self): return self.header def _generate_vcf_row(self): + """ + Generates a VCF row. + """ ref_index = self.random.randint(0, 3) position = self.positions[self.current_pos] @@ -177,6 +201,9 @@ def _generate_vcf_row(self): return row def _generate_vcf_data(self): + """ + Generates VCF data. + """ if self.rows_remaining == self.num_rows + 1: vcf_row = self._generate_vcf_header() else: @@ -184,7 +211,14 @@ def _generate_vcf_data(self): return vcf_row def __enter__(self): + """ + Enters the context. + """ + return self def __exit__(self, exc_type, exc_value, traceback): + """ + Exits the context. + """ pass diff --git a/fake_vcf/vcf_generator.py b/fake_vcf/vcf_generator.py index 98f82f8..660b150 100644 --- a/fake_vcf/vcf_generator.py +++ b/fake_vcf/vcf_generator.py @@ -8,12 +8,26 @@ def to_std_out(virtual_vcf: VirtualVCF) -> None: + """ + Writes VirtualVCF data to standard output. + + Args: + virtual_vcf (VirtualVCF): VirtualVCF object containing the data. + """ with virtual_vcf as v_vcf: for line in v_vcf: sys.stdout.write(line) def to_vcf_file(virtual_vcf: VirtualVCF, fake_vcf_path: Path, num_rows: int) -> None: + """ + Writes VirtualVCF data to a VCF file. + + Args: + virtual_vcf (VirtualVCF): VirtualVCF object containing the data. + fake_vcf_path (Path): Path to the fake VCF file. + num_rows (int): Number of rows. + """ print(f"Writing to file {fake_vcf_path}") if fake_vcf_path.suffix == ".gz": @@ -42,6 +56,19 @@ def fake_vcf_data( phased, large_format, ): + """ + Generates fake VCF data and writes it to either a file or standard output. + + Args: + fake_vcf_path (str or None): Path to the fake VCF file or None to write to standard output. + num_rows (int): Number of rows. + num_samples (int): Number of samples. + chromosome (str): Chromosome identifier. + seed (int): Random seed for reproducibility. + sample_prefix (str): Prefix for sample names. + phased (bool): Phased or unphased genotypes. + large_format (bool): Use large format VCF. + """ virtual_vcf = VirtualVCF( num_rows=num_rows, num_samples=num_samples,