Skip to content

Commit

Permalink
version 0.5.3
Browse files Browse the repository at this point in the history
  • Loading branch information
liguowang committed Apr 17, 2021
1 parent dd97a82 commit 736624b
Show file tree
Hide file tree
Showing 36 changed files with 358 additions and 373 deletions.
Binary file modified .DS_Store
Binary file not shown.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
scripts/*
test/test.sh
dist/*
build/*
118 changes: 18 additions & 100 deletions bin/CrossMap.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
import sys
import optparse
import pyBigWig
#import logging
from textwrap import wrap
import logging
#from textwrap import wrap
from cmmodule.utils import read_chain_file
from cmmodule.mapvcf import crossmap_vcf_file
from cmmodule.mapgvcf import crossmap_gvcf_file
Expand All @@ -20,114 +20,24 @@
from cmmodule.mapbam import crossmap_bam_file
from cmmodule.mapgff import crossmap_gff_file
from cmmodule.mapwig import crossmap_wig_file
from cmmodule.helpinfo import *

__author__ = "Liguo Wang, Hao Zhao"
__contributor__="Liguo Wang, Hao Zhao"
__copyright__ = "Copyleft"
__credits__ = []
__license__ = "GPLv2"
__version__="0.5.2"
__version__="0.5.3"
__maintainer__ = "Liguo Wang"
__email__ = "wangliguo78@gmail.com"
__status__ = "Production"

debug = False
if debug:
logging.basicConfig(format = "%(asctime)s [%(levelname)s] %(message)s",datefmt='%Y-%m-%d %I:%M:%S', level=logging.DEBUG)
else:
logging.basicConfig(format = "%(asctime)s [%(levelname)s] %(message)s",datefmt='%Y-%m-%d %I:%M:%S', level=logging.INFO)

def general_help(cmds):
desc=("CrossMap is a program to convert genome coordinates between different reference assemblies"
"(e.g. from human hg19 to hg38 or vice versa). The supported file formats include BAM, BED, "
"BigWig, CRAM, GFF, GTF, GVCF, MAF (mutation annotation format), SAM, Wiggle, and VCF.")

print("Program: %s (v%s)" % ("CrossMap", __version__), file=sys.stderr)
print("\nDescription: \n%s" % '\n'.join(' '+i for i in wrap(desc,width=80)), file=sys.stderr)
print("\nUsage: CrossMap.py <command> [options]\n", file=sys.stderr)
for k in sorted(cmds):
print(' ' + k + '\t' + cmds[k], file=sys.stderr)
print(file=sys.stderr)

def bed_help():
msg =[
('Usage', "CrossMap.py bed <chain_file> <input.bed> [output_file]"),
('Description', ("Convert BED format file. The \"chain_file\" and \"input.bed\" can be regular or compressed"
"(*.gz, *.Z, *.z, *.bz, *.bz2, *.bzip2) file, local file or URL (http://, https://, ftp://) "
"pointing to remote file. BED format file must have at least 3 columns (chrom, start, end). "
"If no \"output_file\" is specified, output will be directed to the screen (console).")),
('Example1 (write output to file)', "CrossMap.py bed hg18ToHg19.over.chain.gz test.hg18.bed test.hg19.bed"),
('Example2 (write output to screen)', "CrossMap.py bed hg18ToHg19.over.chain.gz test.hg18.bed"),
]
for i,j in msg:
print('\n' + i + '\n' + '-'*len(i) + '\n' + '\n'.join([' ' + k for k in wrap(j,width=80)]), file=sys.stderr)

def gff_help():
msg =[
('Usage', "CrossMap.py gff <chain_file> <input.gff> <output_file>"),
('Description', ("Convert GFF or GTF format file. The\"chain_file\" can be regular or compressed "
"(*.gz, *.Z, *.z, *.bz, *.bz2, *.bzip2) file, local file or URL (http://, https://, ftp://)"
" pointing to remote file. Input file must be in GFF or GTF format. GFF format: "
"http://genome.ucsc.edu/FAQ/FAQformat.html#format3 GTF format: http://genome.ucsc.edu/FAQ/"
"FAQformat.html#format4")),
('Example1 (write output to file)', "CrossMap.py gff hg19ToHg18.over.chain.gz test.hg19.gtf test.hg18.gtf"),
('Example2 (write output to screen)', "CrossMap.py gff hg19ToHg18.over.chain.gz test.hg19.gtf"),
]
for i,j in msg:
print('\n' + i + '\n' + '-'*len(i) + '\n' + '\n'.join([' ' + k for k in wrap(j,width=80)]), file=sys.stderr)

def wig_help():
msg =[
('Usage', "CrossMap.py wig <chain_file> <input.wig> <output_prefix>"),
('Description', ("Convert wiggle format file. The \"chain_file\" can be regular or compressed (*.gz, *.Z, *.z, "
"*.bz, *.bz2, *.bzip2) file, local file or URL (http://, https://, ftp://) pointing to remote file. "
"Both \"variableStep\" and \"fixedStep\" wiggle lines are supported. Wiggle format: "
"http://genome.ucsc.edu/goldenPath/help/wiggle.html")),
('Example', "CrossMap.py wig hg18ToHg19.over.chain.gz test.hg18.wig test.hg19"),
]
for i,j in msg:
print('\n' + i + '\n' + '-'*len(i) + '\n' + '\n'.join([' ' + k for k in wrap(j,width=80)]), file=sys.stderr)

def bigwig_help():
msg =[
('Usage', "CrossMap.py bigwig <chain_file> <input.bigwig> <output_prefix>"),
('Description', ("Convert BigWig format file. The \"chain_file\" can be regular or compressed "
"(*.gz, *.Z, *.z, *.bz, *.bz2, *.bzip2) file, local file or URL (http://, https://, ftp://) "
"pointing to remote file. Bigwig format: http://genome.ucsc.edu/goldenPath/help/bigWig.html")),
('Example', "CrossMap.py bigwig hg18ToHg19.over.chain.gz test.hg18.bw test.hg19"),
]
for i,j in msg:
print('\n' + i + '\n' + '-'*len(i) + '\n' + '\n'.join([' ' + k for k in wrap(j,width=80)]), file=sys.stderr)

#def vcf_help():
# msg =[
# ("usage","CrossMap.py vcf <chain_file> <input.vcf> <refGenome.fa> <output_file>"),
# ("Description", ("Convert VCF format file. The \"chain_file\" and \"input.vcf\" can be regular or compressed "
# "(*.gz, *.Z, *.z, *.bz, *.bz2, *.bzip2) file, local file or URL (http://, https://, ftp://) "
# "pointing to remote file. \"refGenome.fa\" is genome sequence file of the *target assembly*.")),
# ("Example", " CrossMap.py vcf hg19ToHg18.over.chain.gz test.hg19.vcf hg18.fa test.hg18.vcf"),
# ]
# for i,j in msg:
# print('\n' + i + '\n' + '-'*len(i) + '\n' + '\n'.join([' ' + k for k in wrap(j,width=80)]), file=sys.stderr)

#def gvcf_help():
# msg =[
# ("usage","CrossMap.py gvcf <chain_file> <input.gvcf> <refGenome.fa> <output_file>"),
# ("Description", ("Convert GVCF format file. The \"chain_file\" and \"input.gvcf\" can be regular or compressed "
# "(*.gz, *.Z, *.z, *.bz, *.bz2, *.bzip2) file, local file or URL (http://, https://, ftp://) "
# "pointing to remote file. \"refGenome.fa\" is genome sequence file of the *target assembly*.")),
# ("Example", " CrossMap.py gvcf hg19ToHg18.over.chain.gz test.hg19.gvcf hg18.fa test.hg18.gvcf"),
# ]
# for i,j in msg:
# print('\n' + i + '\n' + '-'*len(i) + '\n' + '\n'.join([' ' + k for k in wrap(j,width=80)]), file=sys.stderr)


def maf_help():
msg =[
("usage","CrossMap.py maf <chain_file> <input.maf> <refGenome.fa> <build_name> <output_file>"),
("Description", ("Convert MAF format file. The \"chain_file\" and \"input.maf\" can be regular or compressed "
"(*.gz, *.Z, *.z, *.bz, *.bz2, *.bzip2) file, local file or URL (http://, https://, ftp://) "
"pointing to remote file. \"refGenome.fa\" is genome sequence file of *target assembly*. "
"\"build_name\" is the name of the *target_assembly* (eg \"GRCh38\")")),
("Example", " CrossMap.py maf hg19ToHg38.over.chain.gz test.hg19.maf hg38.fa GRCh38 test.hg38.maf"),
]
for i,j in msg:
print('\n' + i + '\n' + '-'*len(i) + '\n' + '\n'.join([' ' + k for k in wrap(j,width=80)]), file=sys.stderr)


if __name__=='__main__':
Expand All @@ -141,7 +51,8 @@ def maf_help():
'vcf':'convert VCF file.',
'gvcf':'convert GVCF file.',
'maf':'convert MAF (mutation annotation format) file.',
'region':'convert big genomic regions (in BED format) such as CNV blocks.'
'region':'convert big genomic regions (in BED format) such as CNV blocks.',
'viewchain':'print chain file into human readable, block-to-block format.'
}

kwds = list(commands.keys())
Expand Down Expand Up @@ -337,6 +248,13 @@ def maf_help():
else:
maf_help()
sys.exit(0)
elif sys.argv[1].lower() == 'viewchain': #mapping, infile, outfile, liftoverfile, refgenome, ref_name
if len(sys.argv) == 3:
chain_file = sys.argv[2]
read_chain_file(chain_file, print_table=True)
else:
viewchain_help()
sys.exit(0)
else:
general_help(commands)
sys.exit(0)
File renamed without changes.
32 changes: 12 additions & 20 deletions build/lib/cmmodule/bgrMerge.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,36 +2,28 @@
import subprocess
import random
import string
from time import strftime

import logging
from cmmodule import wig_reader
from cmmodule import myutils

def randomword(length):
return ''.join(random.choice(string.ascii_uppercase) for _ in range(length))

def printlog (mesg_lst):
'''print progress into stderr'''
if len(mesg_lst)==1:
msg = "@ " + strftime("%Y-%m-%d %H:%M:%S") + ": " + mesg_lst[0]
else:
msg = "@ " + strftime("%Y-%m-%d %H:%M:%S") + ": " + ' '.join(mesg_lst)
print(msg, file=sys.stderr)

def read_bed_by_chr(f):
'''input bed file'''

# sort input file
tmp_file_name = randomword(10)
printlog (["Sorting bedGraph file:" + f])
logging.info("Sorting bedGraph file: %s" % f)
TMP = open(tmp_file_name,'w')
sort_cmd = myutils.which('sort')
try:
subprocess.call([sort_cmd, "-k1,1", "-k2,2n", f], stdout= TMP)
except:
raise Exception("Cannot find GNU \"sort\" command")
TMP.close()

# generate list
ret_list=[]
line_num = 0
Expand All @@ -40,12 +32,12 @@ def read_bed_by_chr(f):
continue
line_num += 1
line = line.strip()

if line_num == 1:
chrom = line.split()[0]
ret_list.append(line)
continue

if line.split()[0] != chrom:
yield ret_list
ret_list=[]
Expand All @@ -68,11 +60,11 @@ def merge(infile):
score = float(score)
if start < 0 or end < 0 :
continue

if start >= top_marker and end <= int(lines[i+1].split()[1]):
if len(overlap_pos2val) !=0:
for m,n,p in wig_reader.wig_to_bgr2(overlap_pos2val):
yield((chr, m, n, p))
yield((chr, m, n, p))
yield((chr, start, end, score))
overlap_pos2val = {}
else:
Expand All @@ -82,7 +74,7 @@ def merge(infile):
else:
overlap_pos2val[ind] = score
top_marker = max(top_marker, end)

# deal with last line
else:
(last_chr, last_start, last_end, last_score) = lines[-1].split()
Expand All @@ -93,7 +85,7 @@ def merge(infile):
except:
print(last_chr, last_start,last_end)
pass

if last_start >= top_marker:
if len(overlap_pos2val) !=0:
for m,n,p in wig_reader.wig_to_bgr2(overlap_pos2val):
Expand All @@ -106,8 +98,8 @@ def merge(infile):
overlap_pos2val[ind] += last_score
else:
overlap_pos2val[ind] = last_score

if len(overlap_pos2val) !=0:
for m,n,p in wig_reader.wig_to_bgr2(overlap_pos2val):
yield((last_chr, m, n, p))

7 changes: 4 additions & 3 deletions build/lib/cmmodule/ireader.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
"""
#!/usr/bin/env python
# encoding: utf-8

import sys
import bz2
import gzip
import urllib
from subprocess import Popen

def nopen(f, mode="rb"):
if not isinstance(f, str):
Expand All @@ -21,8 +22,8 @@ def nopen(f, mode="rb"):
else urllib.urlopen(f) if f.startswith(("http://", "https://","ftp://")) \
else open(f, mode)


def reader(fname):
for l in nopen(fname):
yield l.decode('utf8').strip().replace("\r", "")

Binary file modified docs/_build/doctrees/environment.pickle
Binary file not shown.
Binary file modified docs/_build/doctrees/index.doctree
Binary file not shown.
37 changes: 37 additions & 0 deletions docs/_build/html/_sources/index.rst.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ How CrossMap works?
Release history
===================

**4/16/2021: Release version 0.5.3**

Add :code:`CrossMap.py viewchain` to convert chain file into block-to-block, more readable format.

**12/08/2020: Release version 0.5.2**

Add '--no-comp-alleles' flag to :code:`CrossMap.py vcf` and :code:`CrossMap.py gvcf`. If set, CrossMap does not check if the "reference allele" is different from the "alternative allele".
Expand Down Expand Up @@ -783,7 +787,40 @@ Example::

1. Input BED file should have at least 3 columns (chrom, start, end). Additional columns will be kept as is.


Convert chain file
-------------------

Typing :code:`CrossMap.py viewchain` without any arguments will print a help message::

Usage
-----
CrossMap.py viewchain <chain_file>
Description
-----------
print chain file into a human readable, tab-separated, 8-column file. The first
4 columns represent 'chrom','start','end','strand' of the source genome
assembly, and the last 4 columns represent 'chrom','start','end','strand' of
the target genome assembly.


Example::

$CrossMap.py viewchain ../data/human/GRCh37_to_GRCh38.chain.gz >chain.tab
$head chain.tab
1 10000 177417 + 1 10000 177417 +
1 227417 267719 + 1 257666 297968 +
1 317719 471368 + 1 347968 501617 -
1 521368 1566075 + 1 585988 1630695 +
1 1566075 1569784 + 1 1630696 1634405 +
1 1569784 1570918 + 1 1634408 1635542 +
1 1570918 1570922 + 1 1635546 1635550 +
1 1570922 1574299 + 1 1635560 1638937 +
1 1574299 1583669 + 1 1638938 1648308 +
1 1583669 1583878 + 1 1648309 1648518 +


Compare to UCSC liftover tool
==============================

Expand Down
34 changes: 34 additions & 0 deletions docs/_build/html/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ <h1>How CrossMap works?<a class="headerlink" href="#how-crossmap-works" title="P
</div>
<div class="section" id="release-history">
<h1>Release history<a class="headerlink" href="#release-history" title="Permalink to this headline"></a></h1>
<p><strong>4/16/2021: Release version 0.5.3</strong></p>
<p>Add <code class="code docutils literal notranslate"><span class="pre">CrossMap.py</span> <span class="pre">viewchain</span></code> to convert chain file into block-to-block, more readable format.</p>
<p><strong>12/08/2020: Release version 0.5.2</strong></p>
<p>Add ‘–no-comp-alleles’ flag to <code class="code docutils literal notranslate"><span class="pre">CrossMap.py</span> <span class="pre">vcf</span></code> and <code class="code docutils literal notranslate"><span class="pre">CrossMap.py</span> <span class="pre">gvcf</span></code>. If set, CrossMap does not check if the “reference allele” is different from the “alternative allele”.</p>
<p><strong>08/19/2020: Release version 0.5.1</strong></p>
Expand Down Expand Up @@ -780,6 +782,37 @@ <h2>Convert large genomic regions<a class="headerlink" href="#convert-large-geno
</ol>
</div>
</div>
<div class="section" id="convert-chain-file">
<h2>Convert chain file<a class="headerlink" href="#convert-chain-file" title="Permalink to this headline"></a></h2>
<p>Typing <code class="code docutils literal notranslate"><span class="pre">CrossMap.py</span> <span class="pre">viewchain</span></code> without any arguments will print a help message:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">Usage</span>
<span class="o">-----</span>
<span class="n">CrossMap</span><span class="o">.</span><span class="n">py</span> <span class="n">viewchain</span> <span class="o">&lt;</span><span class="n">chain_file</span><span class="o">&gt;</span>

<span class="n">Description</span>
<span class="o">-----------</span>
<span class="nb">print</span> <span class="n">chain</span> <span class="n">file</span> <span class="n">into</span> <span class="n">a</span> <span class="n">human</span> <span class="n">readable</span><span class="p">,</span> <span class="n">tab</span><span class="o">-</span><span class="n">separated</span><span class="p">,</span> <span class="mi">8</span><span class="o">-</span><span class="n">column</span> <span class="n">file</span><span class="o">.</span> <span class="n">The</span> <span class="n">first</span>
<span class="mi">4</span> <span class="n">columns</span> <span class="n">represent</span> <span class="s1">&#39;chrom&#39;</span><span class="p">,</span><span class="s1">&#39;start&#39;</span><span class="p">,</span><span class="s1">&#39;end&#39;</span><span class="p">,</span><span class="s1">&#39;strand&#39;</span> <span class="n">of</span> <span class="n">the</span> <span class="n">source</span> <span class="n">genome</span>
<span class="n">assembly</span><span class="p">,</span> <span class="ow">and</span> <span class="n">the</span> <span class="n">last</span> <span class="mi">4</span> <span class="n">columns</span> <span class="n">represent</span> <span class="s1">&#39;chrom&#39;</span><span class="p">,</span><span class="s1">&#39;start&#39;</span><span class="p">,</span><span class="s1">&#39;end&#39;</span><span class="p">,</span><span class="s1">&#39;strand&#39;</span> <span class="n">of</span>
<span class="n">the</span> <span class="n">target</span> <span class="n">genome</span> <span class="n">assembly</span><span class="o">.</span>
</pre></div>
</div>
<p>Example:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span>$CrossMap.py viewchain ../data/human/GRCh37_to_GRCh38.chain.gz &gt;chain.tab
$head chain.tab
1 10000 177417 + 1 10000 177417 +
1 227417 267719 + 1 257666 297968 +
1 317719 471368 + 1 347968 501617 -
1 521368 1566075 + 1 585988 1630695 +
1 1566075 1569784 + 1 1630696 1634405 +
1 1569784 1570918 + 1 1634408 1635542 +
1 1570918 1570922 + 1 1635546 1635550 +
1 1570922 1574299 + 1 1635560 1638937 +
1 1574299 1583669 + 1 1638938 1648308 +
1 1583669 1583878 + 1 1648309 1648518 +
</pre></div>
</div>
</div>
</div>
<div class="section" id="compare-to-ucsc-liftover-tool">
<h1>Compare to UCSC liftover tool<a class="headerlink" href="#compare-to-ucsc-liftover-tool" title="Permalink to this headline"></a></h1>
Expand Down Expand Up @@ -876,6 +909,7 @@ <h3><a href="#">Table of Contents</a></h3>
<li><a class="reference internal" href="#convert-maf-format-files">Convert MAF format files</a></li>
<li><a class="reference internal" href="#convert-gvcf-format-files">Convert GVCF format files</a></li>
<li><a class="reference internal" href="#convert-large-genomic-regions">Convert large genomic regions</a></li>
<li><a class="reference internal" href="#convert-chain-file">Convert chain file</a></li>
</ul>
</li>
<li><a class="reference internal" href="#compare-to-ucsc-liftover-tool">Compare to UCSC liftover tool</a></li>
Expand Down
Loading

0 comments on commit 736624b

Please sign in to comment.