Added sequence-choosing options to last-dotplot.
authorMartin C. Frith
Tue Oct 20 16:38:42 2015 +0900 (2015-10-20)
changeset 651fbb4c3b60998
parent 650 157301c5c6fd
child 652 2538ba367b8e
Added sequence-choosing options to last-dotplot.
scripts/last-dotplot
     1.1 --- a/scripts/last-dotplot	Tue Oct 20 16:26:13 2015 +0900
     1.2 +++ b/scripts/last-dotplot	Tue Oct 20 16:38:42 2015 +0900
     1.3 @@ -9,7 +9,7 @@
     1.4  # according to the number of aligned nt-pairs within it, but the
     1.5  # result is too faint.  How can this be done better?
     1.6  
     1.7 -import fileinput, itertools, optparse, os, re, sys
     1.8 +import fileinput, fnmatch, itertools, optparse, os, re, sys
     1.9  
    1.10  # Try to make PIL/PILLOW work:
    1.11  try: from PIL import Image, ImageDraw, ImageFont, ImageColor
    1.12 @@ -78,12 +78,23 @@
    1.13                  yield chr1, seqlen1, chr2, seqlen2, blocks
    1.14                  mafCount = 0
    1.15  
    1.16 -def readAlignments(lines):
    1.17 +def isWantedSequenceName(name, patterns):
    1.18 +    if not patterns: return True
    1.19 +    base = name.split(".")[-1]  # allow for names like hg19.chr7
    1.20 +    for i in patterns:
    1.21 +        if fnmatch.fnmatchcase(name, i): return True
    1.22 +        if fnmatch.fnmatchcase(base, i): return True
    1.23 +    return False
    1.24 +
    1.25 +def readAlignments(fileName, opts):
    1.26      '''Get alignments and sequence lengths, from MAF or tabular format.'''
    1.27      alignments = []
    1.28      seqLengths1 = {}
    1.29      seqLengths2 = {}
    1.30 +    lines = fileinput.input(fileName)
    1.31      for chr1, seqlen1, chr2, seqlen2, blocks in alignmentInput(lines):
    1.32 +        if not isWantedSequenceName(chr1, opts.seq1): continue
    1.33 +        if not isWantedSequenceName(chr2, opts.seq2): continue
    1.34          aln = chr1, chr2, blocks
    1.35          alignments.append(aln)
    1.36          seqLengths1[chr1] = seqlen1
    1.37 @@ -279,8 +290,7 @@
    1.38      overlap_color = tuple([(i + j) // 2 for i, j in zipped_colors])
    1.39  
    1.40      warn("reading alignments...")
    1.41 -    input = fileinput.input(args[0])
    1.42 -    alignments, seq_size_dic1, seq_size_dic2 = readAlignments(input)
    1.43 +    alignments, seq_size_dic1, seq_size_dic2 = readAlignments(args[0], opts)
    1.44      warn("done")
    1.45  
    1.46      if not alignments: raise Exception("there are no alignments")
    1.47 @@ -361,6 +371,10 @@
    1.48     or: ..."""
    1.49      description = "Draw a dotplot of pair-wise sequence alignments in MAF or tabular format."
    1.50      op = optparse.OptionParser(usage=usage, description=description)
    1.51 +    op.add_option("-1", "--seq1", metavar="PATTERN", action="append",
    1.52 +                  help="which sequences to show from the 1st genome")
    1.53 +    op.add_option("-2", "--seq2", metavar="PATTERN", action="append",
    1.54 +                  help="which sequences to show from the 2nd genome")
    1.55      # Replace "width" & "height" with a single "length" option?
    1.56      op.add_option("-x", "--width", type="int", default=1000,
    1.57                    help="maximum width in pixels (default: %default)")