diff --git a/genomkit/regions/gregion.py b/genomkit/regions/gregion.py index 4afd75a..e01862b 100644 --- a/genomkit/regions/gregion.py +++ b/genomkit/regions/gregion.py @@ -236,7 +236,7 @@ def resize(self, extend_upstream: int, extend_downstream: int, :rtype: GRegion """ if center == "mid_point": - center = int(0.5*(self.end-self.start)) + center = self.start + int(0.5*(self.end-self.start)) if self.orientation == "-": start = center-extend_downstream end = center+extend_upstream @@ -261,6 +261,10 @@ def resize(self, extend_upstream: int, extend_downstream: int, center = self.end start = center-extend_upstream end = center+extend_downstream + if start < 0: + start = 0 + if end < 0: + end = 0 res = GRegion(sequence=self.sequence, start=start, end=end, orientation=self.orientation, score=self.score, name=self.name, data=self.data) diff --git a/genomkit/regions/gregions.py b/genomkit/regions/gregions.py index 3021df0..5550186 100644 --- a/genomkit/regions/gregions.py +++ b/genomkit/regions/gregions.py @@ -958,12 +958,12 @@ def get_GSequences(self, FASTA_file): print(FASTA_file + " is not found.") sys.exit() res = GSequences(name=self.name) - for region in self.elements: + for region in tqdm(self.elements, desc="Get GSequences"): seq = fasta.get_sequence(name=region.sequence, start=region.start, end=region.end) if seq: - seq.name = region.name + seq.name = str(region) seq.data = region.data if region.orientation == "-": seq.reverse_complement() diff --git a/genomkit/sequences/gsequences.py b/genomkit/sequences/gsequences.py index 267a020..3faefe0 100644 --- a/genomkit/sequences/gsequences.py +++ b/genomkit/sequences/gsequences.py @@ -130,7 +130,7 @@ def get_sequence(self, name, start, end): return seq.slice_sequence(start, end) def write_FASTA(self, filename: str, data: bool = False, - gz: bool = True): + gz: bool = False): write_FASTA(seqs=self, filename=filename, data=data, gz=gz) def write_FASTQ(self, filename: str, data: bool = False, diff --git a/genomkit/sequences/io.py b/genomkit/sequences/io.py index f2a2f3f..ffa1534 100644 --- a/genomkit/sequences/io.py +++ b/genomkit/sequences/io.py @@ -30,7 +30,7 @@ def load_FASTA_from_file(file): # If there was a previously stored sequence, store it if current_sequence_id is not None: infos = re.split(r'[ |;,-]', current_sequence_id) - name = infos[0] + name = infos[0].split(".")[0] data = infos[1:] res.add(GSequence(sequence=current_sequence, name=name, data=data)) @@ -45,7 +45,7 @@ def load_FASTA_from_file(file): # Store the last sequence if current_sequence_id is not None: infos = re.split(r'[ |;,-]', current_sequence_id) - name = infos[0] + name = infos[0].split(".")[0] data = infos[1:] res.add(GSequence(sequence=current_sequence, name=name, data=data)) @@ -106,7 +106,7 @@ def load_FASTQ_from_file(file): def write_FASTA(seqs, filename: str, data: bool = False, - gz: bool = True): + gz: bool = False): if gz: with gzip.open(filename, "wt") as fasta_file: write_fasta_content(seqs, fasta_file, data)