scripts/last-dotplot
author Martin C. Frith
Wed Aug 21 14:32:10 2019 +0900 (14 months ago)
changeset 984 fb43f850eba2
parent 981 0e184d90d5da
child 1062 6d8d56a74b14
permissions -rwxr-xr-x
dotplot: change satellite-repeat color to purple
     1 #! /usr/bin/env python
     2 
     3 # Read pair-wise alignments in MAF or LAST tabular format: write an
     4 # "Oxford grid", a.k.a. dotplot.
     5 
     6 # TODO: Currently, pixels with zero aligned nt-pairs are white, and
     7 # pixels with one or more aligned nt-pairs are black.  This can look
     8 # too crowded for large genome alignments.  I tried shading each pixel
     9 # according to the number of aligned nt-pairs within it, but the
    10 # result is too faint.  How can this be done better?
    11 
    12 import collections
    13 import functools
    14 import gzip
    15 from fnmatch import fnmatchcase
    16 import logging
    17 from operator import itemgetter
    18 import subprocess
    19 import itertools, optparse, os, re, sys
    20 
    21 # Try to make PIL/PILLOW work:
    22 try:
    23     from PIL import Image, ImageDraw, ImageFont, ImageColor
    24 except ImportError:
    25     import Image, ImageDraw, ImageFont, ImageColor
    26 
    27 try:
    28     from future_builtins import zip
    29 except ImportError:
    30     pass
    31 
    32 def myOpen(fileName):  # faster than fileinput
    33     if fileName is None:
    34         return []
    35     if fileName == "-":
    36         return sys.stdin
    37     if fileName.endswith(".gz"):
    38         return gzip.open(fileName, "rt")  # xxx dubious for Python2
    39     return open(fileName)
    40 
    41 def groupByFirstItem(things):
    42     for k, v in itertools.groupby(things, itemgetter(0)):
    43         yield k, [i[1:] for i in v]
    44 
    45 def croppedBlocks(blocks, ranges1, ranges2):
    46     headBeg1, headBeg2, headSize = blocks[0]
    47     for r1 in ranges1:
    48         for r2 in ranges2:
    49             cropBeg1, cropEnd1 = r1
    50             if headBeg1 < 0:
    51                 cropBeg1, cropEnd1 = -cropEnd1, -cropBeg1
    52             cropBeg2, cropEnd2 = r2
    53             if headBeg2 < 0:
    54                 cropBeg2, cropEnd2 = -cropEnd2, -cropBeg2
    55             for beg1, beg2, size in blocks:
    56                 b1 = max(cropBeg1, beg1)
    57                 e1 = min(cropEnd1, beg1 + size)
    58                 if b1 >= e1: continue
    59                 offset = beg2 - beg1
    60                 b2 = max(cropBeg2, b1 + offset)
    61                 e2 = min(cropEnd2, e1 + offset)
    62                 if b2 >= e2: continue
    63                 yield b2 - offset, b2, e2 - b2
    64 
    65 def tabBlocks(beg1, beg2, blocks):
    66     '''Get the gapless blocks of an alignment, from LAST tabular format.'''
    67     for i in blocks.split(","):
    68         if ":" in i:
    69             x, y = i.split(":")
    70             beg1 += int(x)
    71             beg2 += int(y)
    72         else:
    73             size = int(i)
    74             yield beg1, beg2, size
    75             beg1 += size
    76             beg2 += size
    77 
    78 def mafBlocks(beg1, beg2, seq1, seq2):
    79     '''Get the gapless blocks of an alignment, from MAF format.'''
    80     size = 0
    81     for x, y in zip(seq1, seq2):
    82         if x == "-":
    83             if size:
    84                 yield beg1, beg2, size
    85                 beg1 += size
    86                 beg2 += size
    87                 size = 0
    88             beg2 += 1
    89         elif y == "-":
    90             if size:
    91                 yield beg1, beg2, size
    92                 beg1 += size
    93                 beg2 += size
    94                 size = 0
    95             beg1 += 1
    96         else:
    97             size += 1
    98     if size: yield beg1, beg2, size
    99 
   100 def alignmentFromSegment(qrySeqName, qrySeqLen, segment):
   101     refSeqLen = sys.maxsize  # XXX
   102     refSeqName, refSeqBeg, qrySeqBeg, size = segment
   103     block = refSeqBeg, qrySeqBeg, size
   104     return refSeqName, refSeqLen, qrySeqName, qrySeqLen, [block]
   105 
   106 def alignmentInput(lines):
   107     '''Get alignments and sequence lengths, from MAF or tabular format.'''
   108     mafCount = 0
   109     qrySeqName = ""
   110     segments = []
   111     for line in lines:
   112         w = line.split()
   113         if line[0] == "#":
   114             pass
   115         elif len(w) == 1:
   116             for i in segments:
   117                 yield alignmentFromSegment(qrySeqName, qrySeqLen, i)
   118             qrySeqName = w[0]
   119             qrySeqLen = 0
   120             segments = []
   121         elif len(w) == 2 and qrySeqName and w[1].isdigit():
   122             qrySeqLen += int(w[1])
   123         elif len(w) == 4 and qrySeqName and w[1].isdigit() and w[3].isdigit():
   124             refSeqName, refSeqBeg, refSeqEnd = w[0], int(w[1]), int(w[3])
   125             size = abs(refSeqEnd - refSeqBeg)
   126             if refSeqBeg > refSeqEnd:
   127                 refSeqBeg = -refSeqBeg
   128             segments.append((refSeqName, refSeqBeg, qrySeqLen, size))
   129             qrySeqLen += size
   130         elif line[0].isdigit():  # tabular format
   131             chr1, beg1, seqlen1 = w[1], int(w[2]), int(w[5])
   132             if w[4] == "-": beg1 -= seqlen1
   133             chr2, beg2, seqlen2 = w[6], int(w[7]), int(w[10])
   134             if w[9] == "-": beg2 -= seqlen2
   135             blocks = tabBlocks(beg1, beg2, w[11])
   136             yield chr1, seqlen1, chr2, seqlen2, blocks
   137         elif line[0] == "s":  # MAF format
   138             if mafCount == 0:
   139                 chr1, beg1, seqlen1, seq1 = w[1], int(w[2]), int(w[5]), w[6]
   140                 if w[4] == "-": beg1 -= seqlen1
   141                 mafCount = 1
   142             else:
   143                 chr2, beg2, seqlen2, seq2 = w[1], int(w[2]), int(w[5]), w[6]
   144                 if w[4] == "-": beg2 -= seqlen2
   145                 blocks = mafBlocks(beg1, beg2, seq1, seq2)
   146                 yield chr1, seqlen1, chr2, seqlen2, blocks
   147                 mafCount = 0
   148     for i in segments:
   149         yield alignmentFromSegment(qrySeqName, qrySeqLen, i)
   150 
   151 def seqRequestFromText(text):
   152     if ":" in text:
   153         pattern, interval = text.rsplit(":", 1)
   154         if "-" in interval:
   155             beg, end = interval.rsplit("-", 1)
   156             return pattern, int(beg), int(end)  # beg may be negative
   157     return text, 0, sys.maxsize
   158 
   159 def rangesFromSeqName(seqRequests, name, seqLen):
   160     if seqRequests:
   161         base = name.split(".")[-1]  # allow for names like hg19.chr7
   162         for pat, beg, end in seqRequests:
   163             if fnmatchcase(name, pat) or fnmatchcase(base, pat):
   164                 yield max(beg, 0), min(end, seqLen)
   165     else:
   166         yield 0, seqLen
   167 
   168 def updateSeqs(coverDict, seqRanges, seqName, ranges, coveredRange):
   169     beg, end = coveredRange
   170     if beg < 0:
   171         coveredRange = -end, -beg
   172     if seqName in coverDict:
   173         coverDict[seqName].append(coveredRange)
   174     else:
   175         coverDict[seqName] = [coveredRange]
   176         for beg, end in ranges:
   177             r = seqName, beg, end
   178             seqRanges.append(r)
   179 
   180 def readAlignments(fileName, opts):
   181     '''Get alignments and sequence limits, from MAF or tabular format.'''
   182     seqRequests1 = [seqRequestFromText(i) for i in opts.seq1]
   183     seqRequests2 = [seqRequestFromText(i) for i in opts.seq2]
   184 
   185     alignments = []
   186     seqRanges1 = []
   187     seqRanges2 = []
   188     coverDict1 = {}
   189     coverDict2 = {}
   190     lines = myOpen(fileName)
   191     for seqName1, seqLen1, seqName2, seqLen2, blocks in alignmentInput(lines):
   192         ranges1 = sorted(rangesFromSeqName(seqRequests1, seqName1, seqLen1))
   193         if not ranges1: continue
   194         ranges2 = sorted(rangesFromSeqName(seqRequests2, seqName2, seqLen2))
   195         if not ranges2: continue
   196         b = list(croppedBlocks(list(blocks), ranges1, ranges2))
   197         if not b: continue
   198         aln = seqName1, seqName2, b
   199         alignments.append(aln)
   200         coveredRange1 = b[0][0], b[-1][0] + b[-1][2]
   201         updateSeqs(coverDict1, seqRanges1, seqName1, ranges1, coveredRange1)
   202         coveredRange2 = b[0][1], b[-1][1] + b[-1][2]
   203         updateSeqs(coverDict2, seqRanges2, seqName2, ranges2, coveredRange2)
   204     return alignments, seqRanges1, coverDict1, seqRanges2, coverDict2
   205 
   206 def nameAndRangesFromDict(cropDict, seqName):
   207     if seqName in cropDict:
   208         return seqName, cropDict[seqName]
   209     n = seqName.split(".")[-1]
   210     if n in cropDict:
   211         return n, cropDict[n]
   212     return seqName, []
   213 
   214 def rangesForSecondaryAlignments(primaryRanges, seqLen):
   215     if primaryRanges:
   216         return primaryRanges
   217     return [(0, seqLen)]
   218 
   219 def readSecondaryAlignments(opts, cropRanges1, cropRanges2):
   220     cropDict1 = dict(groupByFirstItem(cropRanges1))
   221     cropDict2 = dict(groupByFirstItem(cropRanges2))
   222 
   223     alignments = []
   224     seqRanges1 = []
   225     seqRanges2 = []
   226     coverDict1 = {}
   227     coverDict2 = {}
   228     lines = myOpen(opts.alignments)
   229     for seqName1, seqLen1, seqName2, seqLen2, blocks in alignmentInput(lines):
   230         seqName1, ranges1 = nameAndRangesFromDict(cropDict1, seqName1)
   231         seqName2, ranges2 = nameAndRangesFromDict(cropDict2, seqName2)
   232         if not ranges1 and not ranges2:
   233             continue
   234         r1 = rangesForSecondaryAlignments(ranges1, seqLen1)
   235         r2 = rangesForSecondaryAlignments(ranges2, seqLen2)
   236         b = list(croppedBlocks(list(blocks), r1, r2))
   237         if not b: continue
   238         aln = seqName1, seqName2, b
   239         alignments.append(aln)
   240         if not ranges1:
   241             coveredRange1 = b[0][0], b[-1][0] + b[-1][2]
   242             updateSeqs(coverDict1, seqRanges1, seqName1, r1, coveredRange1)
   243         if not ranges2:
   244             coveredRange2 = b[0][1], b[-1][1] + b[-1][2]
   245             updateSeqs(coverDict2, seqRanges2, seqName2, r2, coveredRange2)
   246     return alignments, seqRanges1, coverDict1, seqRanges2, coverDict2
   247 
   248 def twoValuesFromOption(text, separator):
   249     if separator in text:
   250         return text.split(separator)
   251     return text, text
   252 
   253 def mergedRanges(ranges):
   254     oldBeg, maxEnd = ranges[0]
   255     for beg, end in ranges:
   256         if beg > maxEnd:
   257             yield oldBeg, maxEnd
   258             oldBeg = beg
   259             maxEnd = end
   260         elif end > maxEnd:
   261             maxEnd = end
   262     yield oldBeg, maxEnd
   263 
   264 def mergedRangesPerSeq(coverDict):
   265     for k, v in coverDict.items():
   266         v.sort()
   267         yield k, list(mergedRanges(v))
   268 
   269 def coveredLength(mergedCoverDict):
   270     return sum(sum(e - b for b, e in v) for v in mergedCoverDict.values())
   271 
   272 def trimmed(seqRanges, coverDict, minAlignedBases, maxGapFrac, endPad, midPad):
   273     maxEndGapFrac, maxMidGapFrac = twoValuesFromOption(maxGapFrac, ",")
   274     maxEndGap = max(float(maxEndGapFrac) * minAlignedBases, endPad * 1.0)
   275     maxMidGap = max(float(maxMidGapFrac) * minAlignedBases, midPad * 2.0)
   276 
   277     for seqName, rangeBeg, rangeEnd in seqRanges:
   278         seqBlocks = coverDict[seqName]
   279         blocks = [i for i in seqBlocks if i[0] < rangeEnd and i[1] > rangeBeg]
   280         if blocks[0][0] - rangeBeg > maxEndGap:
   281             rangeBeg = blocks[0][0] - endPad
   282         for j, y in enumerate(blocks):
   283             if j:
   284                 x = blocks[j - 1]
   285                 if y[0] - x[1] > maxMidGap:
   286                     yield seqName, rangeBeg, x[1] + midPad
   287                     rangeBeg = y[0] - midPad
   288         if rangeEnd - blocks[-1][1] > maxEndGap:
   289             rangeEnd = blocks[-1][1] + endPad
   290         yield seqName, rangeBeg, rangeEnd
   291 
   292 def rangesWithStrandInfo(seqRanges, strandOpt, alignments, seqIndex):
   293     if strandOpt == "1":
   294         forwardMinusReverse = collections.defaultdict(int)
   295         for i in alignments:
   296             blocks = i[2]
   297             beg1, beg2, size = blocks[0]
   298             numOfAlignedLetterPairs = sum(i[2] for i in blocks)
   299             if (beg1 < 0) != (beg2 < 0):  # opposite-strand alignment
   300                 numOfAlignedLetterPairs *= -1
   301             forwardMinusReverse[i[seqIndex]] += numOfAlignedLetterPairs
   302     strandNum = 0
   303     for seqName, beg, end in seqRanges:
   304         if strandOpt == "1":
   305             strandNum = 1 if forwardMinusReverse[seqName] >= 0 else 2
   306         yield seqName, beg, end, strandNum
   307 
   308 def natural_sort_key(my_string):
   309     '''Return a sort key for "natural" ordering, e.g. chr9 < chr10.'''
   310     parts = re.split(r'(\d+)', my_string)
   311     parts[1::2] = map(int, parts[1::2])
   312     return parts
   313 
   314 def nameKey(oneSeqRanges):
   315     return natural_sort_key(oneSeqRanges[0][0])
   316 
   317 def sizeKey(oneSeqRanges):
   318     return sum(b - e for n, b, e, s in oneSeqRanges), nameKey(oneSeqRanges)
   319 
   320 def alignmentKey(seqNamesToLists, oneSeqRanges):
   321     seqName = oneSeqRanges[0][0]
   322     alignmentsOfThisSequence = seqNamesToLists[seqName]
   323     numOfAlignedLetterPairs = sum(i[3] for i in alignmentsOfThisSequence)
   324     toMiddle = numOfAlignedLetterPairs // 2
   325     for i in alignmentsOfThisSequence:
   326         toMiddle -= i[3]
   327         if toMiddle < 0:
   328             return i[1:3]  # sequence-rank and "position" of this alignment
   329 
   330 def rankAndFlipPerSeq(seqRanges):
   331     rangesGroupedBySeqName = itertools.groupby(seqRanges, itemgetter(0))
   332     for rank, group in enumerate(rangesGroupedBySeqName):
   333         seqName, ranges = group
   334         strandNum = next(ranges)[3]
   335         flip = 1 if strandNum < 2 else -1
   336         yield seqName, (rank, flip)
   337 
   338 def alignmentSortData(alignments, seqIndex, otherNamesToRanksAndFlips):
   339     otherIndex = 1 - seqIndex
   340     for i in alignments:
   341         blocks = i[2]
   342         otherRank, otherFlip = otherNamesToRanksAndFlips[i[otherIndex]]
   343         otherPos = otherFlip * abs(blocks[0][otherIndex] +
   344                                    blocks[-1][otherIndex] + blocks[-1][2])
   345         numOfAlignedLetterPairs = sum(i[2] for i in blocks)
   346         yield i[seqIndex], otherRank, otherPos, numOfAlignedLetterPairs
   347 
   348 def mySortedRanges(seqRanges, sortOpt, seqIndex, alignments, otherRanges):
   349     rangesGroupedBySeqName = itertools.groupby(seqRanges, itemgetter(0))
   350     g = [list(ranges) for seqName, ranges in rangesGroupedBySeqName]
   351     for i in g:
   352         if i[0][3] > 1:
   353             i.reverse()
   354     if sortOpt == "1":
   355         g.sort(key=nameKey)
   356     if sortOpt == "2":
   357         g.sort(key=sizeKey)
   358     if sortOpt == "3":
   359         otherNamesToRanksAndFlips = dict(rankAndFlipPerSeq(otherRanges))
   360         alns = sorted(alignmentSortData(alignments, seqIndex,
   361                                         otherNamesToRanksAndFlips))
   362         alnsGroupedBySeqName = itertools.groupby(alns, itemgetter(0))
   363         seqNamesToLists = dict((k, list(v)) for k, v in alnsGroupedBySeqName)
   364         g.sort(key=functools.partial(alignmentKey, seqNamesToLists))
   365     return [j for i in g for j in i]
   366 
   367 def allSortedRanges(opts, alignments, alignmentsB,
   368                     seqRanges1, seqRangesB1, seqRanges2, seqRangesB2):
   369     o1, oB1 = twoValuesFromOption(opts.strands1, ":")
   370     o2, oB2 = twoValuesFromOption(opts.strands2, ":")
   371     if o1 == "1" and o2 == "1":
   372         raise Exception("the strand options have circular dependency")
   373     seqRanges1 = list(rangesWithStrandInfo(seqRanges1, o1, alignments, 0))
   374     seqRanges2 = list(rangesWithStrandInfo(seqRanges2, o2, alignments, 1))
   375     seqRangesB1 = list(rangesWithStrandInfo(seqRangesB1, oB1, alignmentsB, 0))
   376     seqRangesB2 = list(rangesWithStrandInfo(seqRangesB2, oB2, alignmentsB, 1))
   377 
   378     o1, oB1 = twoValuesFromOption(opts.sort1, ":")
   379     o2, oB2 = twoValuesFromOption(opts.sort2, ":")
   380     if o1 == "3" and o2 == "3":
   381         raise Exception("the sort options have circular dependency")
   382     if o1 != "3":
   383         s1 = mySortedRanges(seqRanges1, o1, None, None, None)
   384     if o2 != "3":
   385         s2 = mySortedRanges(seqRanges2, o2, None, None, None)
   386     if o1 == "3":
   387         s1 = mySortedRanges(seqRanges1, o1, 0, alignments, s2)
   388     if o2 == "3":
   389         s2 = mySortedRanges(seqRanges2, o2, 1, alignments, s1)
   390     t1 = mySortedRanges(seqRangesB1, oB1, 0, alignmentsB, s2)
   391     t2 = mySortedRanges(seqRangesB2, oB2, 1, alignmentsB, s1)
   392     return s1 + t1, s2 + t2
   393 
   394 def prettyNum(n):
   395     t = str(n)
   396     groups = []
   397     while t:
   398         groups.append(t[-3:])
   399         t = t[:-3]
   400     return ",".join(reversed(groups))
   401 
   402 def sizeText(size):
   403     suffixes = "bp", "kb", "Mb", "Gb"
   404     for i, x in enumerate(suffixes):
   405         j = 10 ** (i * 3)
   406         if size < j * 10:
   407             return "%.2g" % (1.0 * size / j) + x
   408         if size < j * 1000 or i == len(suffixes) - 1:
   409             return "%.0f" % (1.0 * size / j) + x
   410 
   411 def labelText(seqRange, labelOpt):
   412     seqName, beg, end, strandNum = seqRange
   413     if labelOpt == 1:
   414         return seqName + ": " + sizeText(end - beg)
   415     if labelOpt == 2:
   416         return seqName + ":" + prettyNum(beg) + ": " + sizeText(end - beg)
   417     if labelOpt == 3:
   418         return seqName + ":" + prettyNum(beg) + "-" + prettyNum(end)
   419     return seqName
   420 
   421 def rangeLabels(seqRanges, labelOpt, font, fontsize, image_mode, textRot):
   422     if fontsize:
   423         image_size = 1, 1
   424         im = Image.new(image_mode, image_size)
   425         draw = ImageDraw.Draw(im)
   426     x = y = 0
   427     for r in seqRanges:
   428         text = labelText(r, labelOpt)
   429         if fontsize:
   430             x, y = draw.textsize(text, font=font)
   431             if textRot:
   432                 x, y = y, x
   433         yield text, x, y, r[3]
   434 
   435 def dataFromRanges(sortedRanges, font, fontSize, imageMode, labelOpt, textRot):
   436     for seqName, rangeBeg, rangeEnd, strandNum in sortedRanges:
   437         out = [seqName, str(rangeBeg), str(rangeEnd)]
   438         if strandNum > 0:
   439             out.append(".+-"[strandNum])
   440         logging.info("\t".join(out))
   441     logging.info("")
   442     rangeSizes = [e - b for n, b, e, s in sortedRanges]
   443     labs = list(rangeLabels(sortedRanges, labelOpt, font, fontSize,
   444                             imageMode, textRot))
   445     margin = max(i[2] for i in labs)
   446     # xxx the margin may be too big, because some labels may get omitted
   447     return rangeSizes, labs, margin
   448 
   449 def div_ceil(x, y):
   450     '''Return x / y rounded up.'''
   451     q, r = divmod(x, y)
   452     return q + (r != 0)
   453 
   454 def get_bp_per_pix(rangeSizes, pixTweenRanges, maxPixels):
   455     '''Get the minimum bp-per-pixel that fits in the size limit.'''
   456     logging.info("choosing bp per pixel...")
   457     numOfRanges = len(rangeSizes)
   458     maxPixelsInRanges = maxPixels - pixTweenRanges * (numOfRanges - 1)
   459     if maxPixelsInRanges < numOfRanges:
   460         raise Exception("can't fit the image: too many sequences?")
   461     negLimit = -maxPixelsInRanges
   462     negBpPerPix = sum(rangeSizes) // negLimit
   463     while True:
   464         if sum(i // negBpPerPix for i in rangeSizes) >= negLimit:
   465             return -negBpPerPix
   466         negBpPerPix -= 1
   467 
   468 def getRangePixBegs(rangePixLens, pixTweenRanges, margin):
   469     '''Get the start pixel for each range.'''
   470     rangePixBegs = []
   471     pix_tot = margin - pixTweenRanges
   472     for i in rangePixLens:
   473         pix_tot += pixTweenRanges
   474         rangePixBegs.append(pix_tot)
   475         pix_tot += i
   476     return rangePixBegs
   477 
   478 def pixelData(rangeSizes, bp_per_pix, pixTweenRanges, margin):
   479     '''Return pixel information about the ranges.'''
   480     rangePixLens = [div_ceil(i, bp_per_pix) for i in rangeSizes]
   481     rangePixBegs = getRangePixBegs(rangePixLens, pixTweenRanges, margin)
   482     tot_pix = rangePixBegs[-1] + rangePixLens[-1]
   483     return rangePixBegs, rangePixLens, tot_pix
   484 
   485 def drawLineForward(hits, width, bp_per_pix, beg1, beg2, size):
   486     while True:
   487         q1, r1 = divmod(beg1, bp_per_pix)
   488         q2, r2 = divmod(beg2, bp_per_pix)
   489         hits[q2 * width + q1] |= 1
   490         next_pix = min(bp_per_pix - r1, bp_per_pix - r2)
   491         if next_pix >= size: break
   492         beg1 += next_pix
   493         beg2 += next_pix
   494         size -= next_pix
   495 
   496 def drawLineReverse(hits, width, bp_per_pix, beg1, beg2, size):
   497     while True:
   498         q1, r1 = divmod(beg1, bp_per_pix)
   499         q2, r2 = divmod(beg2, bp_per_pix)
   500         hits[q2 * width + q1] |= 2
   501         next_pix = min(bp_per_pix - r1, r2 + 1)
   502         if next_pix >= size: break
   503         beg1 += next_pix
   504         beg2 -= next_pix
   505         size -= next_pix
   506 
   507 def strandAndOrigin(ranges, beg, size):
   508     isReverseStrand = (beg < 0)
   509     if isReverseStrand:
   510         beg = -(beg + size)
   511     for rangeBeg, rangeEnd, isReverseRange, origin in ranges:
   512         if rangeEnd > beg:  # assumes the ranges are sorted
   513             return (isReverseStrand != isReverseRange), origin
   514 
   515 def alignmentPixels(width, height, alignments, bp_per_pix,
   516                     rangeDict1, rangeDict2):
   517     hits = [0] * (width * height)  # the image data
   518     for seq1, seq2, blocks in alignments:
   519         beg1, beg2, size = blocks[0]
   520         isReverse1, ori1 = strandAndOrigin(rangeDict1[seq1], beg1, size)
   521         isReverse2, ori2 = strandAndOrigin(rangeDict2[seq2], beg2, size)
   522         for beg1, beg2, size in blocks:
   523             if isReverse1:
   524                 beg1 = -(beg1 + size)
   525                 beg2 = -(beg2 + size)
   526             if isReverse1 == isReverse2:
   527                 drawLineForward(hits, width, bp_per_pix,
   528                                 ori1 + beg1, ori2 + beg2, size)
   529             else:
   530                 drawLineReverse(hits, width, bp_per_pix,
   531                                 ori1 + beg1, ori2 - beg2 - 1, size)
   532     return hits
   533 
   534 def orientedBlocks(alignments, seqIndex):
   535     otherIndex = 1 - seqIndex
   536     for a in alignments:
   537         seq1, seq2, blocks = a
   538         for b in blocks:
   539             beg1, beg2, size = b
   540             if b[seqIndex] < 0:
   541                 b = -(beg1 + size), -(beg2 + size), size
   542             yield a[seqIndex], b[seqIndex], a[otherIndex], b[otherIndex], size
   543 
   544 def drawJoins(im, alignments, bpPerPix, seqIndex, rangeDict1, rangeDict2):
   545     blocks = orientedBlocks(alignments, seqIndex)
   546     oldSeq1 = ""
   547     for seq1, beg1, seq2, beg2, size in sorted(blocks):
   548         isReverse1, ori1 = strandAndOrigin(rangeDict1[seq1], beg1, size)
   549         isReverse2, ori2 = strandAndOrigin(rangeDict2[seq2], beg2, size)
   550         end1 = beg1 + size - 1
   551         end2 = beg2 + size - 1
   552         if isReverse1:
   553             beg1 = -(beg1 + 1)
   554             end1 = -(end1 + 1)
   555         if isReverse2:
   556             beg2 = -(beg2 + 1)
   557             end2 = -(end2 + 1)
   558         newPix1 = (ori1 + beg1) // bpPerPix
   559         newPix2 = (ori2 + beg2) // bpPerPix
   560         if seq1 == oldSeq1:
   561             lowerPix2 = min(oldPix2, newPix2)
   562             upperPix2 = max(oldPix2, newPix2)
   563             midPix1 = (oldPix1 + newPix1) // 2
   564             if isReverse1:
   565                 midPix1 = (oldPix1 + newPix1 + 1) // 2
   566                 oldPix1, newPix1 = newPix1, oldPix1
   567             if upperPix2 - lowerPix2 > 1 and oldPix1 <= newPix1 <= oldPix1 + 1:
   568                 if seqIndex == 0:
   569                     box = midPix1, lowerPix2, midPix1 + 1, upperPix2 + 1
   570                 else:
   571                     box = lowerPix2, midPix1, upperPix2 + 1, midPix1 + 1
   572                 im.paste("lightgray", box)
   573         oldPix1 = (ori1 + end1) // bpPerPix
   574         oldPix2 = (ori2 + end2) // bpPerPix
   575         oldSeq1 = seq1
   576 
   577 def expandedSeqDict(seqDict):
   578     '''Allow lookup by short sequence names, e.g. chr7 as well as hg19.chr7.'''
   579     newDict = seqDict.copy()
   580     for name, x in seqDict.items():
   581         if "." in name:
   582             base = name.split(".")[-1]
   583             if base in newDict:  # an ambiguous case was found:
   584                 return seqDict   # so give up completely
   585             newDict[base] = x
   586     return newDict
   587 
   588 def readBed(fileName, rangeDict):
   589     for line in myOpen(fileName):
   590         w = line.split()
   591         if not w: continue
   592         seqName = w[0]
   593         if seqName not in rangeDict: continue
   594         beg = int(w[1])
   595         end = int(w[2])
   596         layer = 900
   597         color = "#fbf"
   598         if len(w) > 4:
   599             if w[4] != ".":
   600                 layer = float(w[4])
   601             if len(w) > 5:
   602                 if len(w) > 8 and w[8].count(",") == 2:
   603                     color = "rgb(" + w[8] + ")"
   604                 else:
   605                     strand = w[5]
   606                     isRev = rangeDict[seqName][0][2]
   607                     if strand == "+" and not isRev or strand == "-" and isRev:
   608                         color = "#ffe8e8"
   609                     if strand == "-" and not isRev or strand == "+" and isRev:
   610                         color = "#e8e8ff"
   611         yield layer, color, seqName, beg, end
   612 
   613 def commaSeparatedInts(text):
   614     return map(int, text.rstrip(",").split(","))
   615 
   616 def readGenePred(opts, fileName, rangeDict):
   617     for line in myOpen(fileName):
   618         fields = line.split()
   619         if not fields: continue
   620         if fields[2] not in "+-": fields = fields[1:]
   621         seqName = fields[1]
   622         if seqName not in rangeDict: continue
   623         #strand = fields[2]
   624         cdsBeg = int(fields[5])
   625         cdsEnd = int(fields[6])
   626         exonBegs = commaSeparatedInts(fields[8])
   627         exonEnds = commaSeparatedInts(fields[9])
   628         for beg, end in zip(exonBegs, exonEnds):
   629             yield 300, opts.exon_color, seqName, beg, end
   630             b = max(beg, cdsBeg)
   631             e = min(end, cdsEnd)
   632             if b < e: yield 400, opts.cds_color, seqName, b, e
   633 
   634 def readRmsk(fileName, rangeDict):
   635     for line in myOpen(fileName):
   636         fields = line.split()
   637         if len(fields) == 17:  # rmsk.txt
   638             seqName = fields[5]
   639             if seqName not in rangeDict: continue  # do this ASAP for speed
   640             beg = int(fields[6])
   641             end = int(fields[7])
   642             strand = fields[9]
   643             repeatClass = fields[11]
   644         elif len(fields) == 15:  # .out
   645             seqName = fields[4]
   646             if seqName not in rangeDict: continue
   647             beg = int(fields[5]) - 1
   648             end = int(fields[6])
   649             strand = fields[8]
   650             repeatClass = fields[10].split("/")[0]
   651         else:
   652             continue
   653         if repeatClass in ("Low_complexity", "Simple_repeat", "Satellite"):
   654             yield 200, "#fbf", seqName, beg, end
   655         elif (strand == "+") != rangeDict[seqName][0][2]:
   656             yield 100, "#ffe8e8", seqName, beg, end
   657         else:
   658             yield 100, "#e8e8ff", seqName, beg, end
   659 
   660 def isExtraFirstGapField(fields):
   661     return fields[4].isdigit()
   662 
   663 def readGaps(opts, fileName, rangeDict):
   664     '''Read locations of unsequenced gaps, from an agp or gap file.'''
   665     for line in myOpen(fileName):
   666         w = line.split()
   667         if not w or w[0][0] == "#": continue
   668         if isExtraFirstGapField(w): w = w[1:]
   669         if w[4] not in "NU": continue
   670         seqName = w[0]
   671         if seqName not in rangeDict: continue
   672         end = int(w[2])
   673         beg = end - int(w[5])  # zero-based coordinate
   674         if w[7] == "yes":
   675             yield 3000, opts.bridged_color, seqName, beg, end
   676         else:
   677             yield 2000, opts.unbridged_color, seqName, beg, end
   678 
   679 def bedBoxes(beds, rangeDict, margin, edge, isTop, bpPerPix):
   680     for layer, color, seqName, bedBeg, bedEnd in beds:
   681         for rangeBeg, rangeEnd, isReverseRange, origin in rangeDict[seqName]:
   682             beg = max(bedBeg, rangeBeg)
   683             end = min(bedEnd, rangeEnd)
   684             if beg >= end: continue
   685             if isReverseRange:
   686                 beg, end = -end, -beg
   687             if layer <= 1000:
   688                 # include partly-covered pixels
   689                 b = (origin + beg) // bpPerPix
   690                 e = div_ceil(origin + end, bpPerPix)
   691             else:
   692                 # exclude partly-covered pixels
   693                 b = div_ceil(origin + beg, bpPerPix)
   694                 e = (origin + end) // bpPerPix
   695                 if e <= b: continue
   696                 if bedEnd >= rangeEnd:  # include partly-covered end pixels
   697                     if isReverseRange:
   698                         b = (origin + beg) // bpPerPix
   699                     else:
   700                         e = div_ceil(origin + end, bpPerPix)
   701             if isTop:
   702                 box = b, margin, e, edge
   703             else:
   704                 box = margin, b, edge, e
   705             yield layer, color, box
   706 
   707 def drawAnnotations(im, boxes):
   708     # xxx use partial transparency for different-color overlaps?
   709     for layer, color, box in boxes:
   710         im.paste(color, box)
   711 
   712 def placedLabels(labels, rangePixBegs, rangePixLens, beg, end):
   713     '''Return axis labels with endpoint & sort-order information.'''
   714     maxWidth = end - beg
   715     for i, j, k in zip(labels, rangePixBegs, rangePixLens):
   716         text, textWidth, textHeight, strandNum = i
   717         if textWidth > maxWidth:
   718             continue
   719         labelBeg = j + (k - textWidth) // 2
   720         labelEnd = labelBeg + textWidth
   721         sortKey = textWidth - k
   722         if labelBeg < beg:
   723             sortKey += maxWidth * (beg - labelBeg)
   724             labelBeg = beg
   725             labelEnd = beg + textWidth
   726         if labelEnd > end:
   727             sortKey += maxWidth * (labelEnd - end)
   728             labelEnd = end
   729             labelBeg = end - textWidth
   730         yield sortKey, labelBeg, labelEnd, text, textHeight, strandNum
   731 
   732 def nonoverlappingLabels(labels, minPixTweenLabels):
   733     '''Get a subset of non-overlapping axis labels, greedily.'''
   734     out = []
   735     for i in labels:
   736         beg = i[1] - minPixTweenLabels
   737         end = i[2] + minPixTweenLabels
   738         if all(j[2] <= beg or j[1] >= end for j in out):
   739             out.append(i)
   740     return out
   741 
   742 def axisImage(labels, rangePixBegs, rangePixLens, textRot,
   743               textAln, font, image_mode, opts):
   744     '''Make an image of axis labels.'''
   745     beg = rangePixBegs[0]
   746     end = rangePixBegs[-1] + rangePixLens[-1]
   747     margin = max(i[2] for i in labels)
   748     labels = sorted(placedLabels(labels, rangePixBegs, rangePixLens, beg, end))
   749     minPixTweenLabels = 0 if textRot else opts.label_space
   750     labels = nonoverlappingLabels(labels, minPixTweenLabels)
   751     image_size = (margin, end) if textRot else (end, margin)
   752     im = Image.new(image_mode, image_size, opts.margin_color)
   753     draw = ImageDraw.Draw(im)
   754     for sortKey, labelBeg, labelEnd, text, textHeight, strandNum in labels:
   755         base = margin - textHeight if textAln else 0
   756         position = (base, labelBeg) if textRot else (labelBeg, base)
   757         fill = ("black", opts.forwardcolor, opts.reversecolor)[strandNum]
   758         draw.text(position, text, font=font, fill=fill)
   759     return im
   760 
   761 def rangesWithOrigins(sortedRanges, rangePixBegs, rangePixLens, bpPerPix):
   762     for i, j, k in zip(sortedRanges, rangePixBegs, rangePixLens):
   763         seqName, rangeBeg, rangeEnd, strandNum = i
   764         isReverseRange = (strandNum > 1)
   765         if isReverseRange:
   766             origin = bpPerPix * (j + k) + rangeBeg
   767         else:
   768             origin = bpPerPix * j - rangeBeg
   769         yield seqName, (rangeBeg, rangeEnd, isReverseRange, origin)
   770 
   771 def rangesPerSeq(sortedRanges, rangePixBegs, rangePixLens, bpPerPix):
   772     a = rangesWithOrigins(sortedRanges, rangePixBegs, rangePixLens, bpPerPix)
   773     for k, v in itertools.groupby(a, itemgetter(0)):
   774         yield k, sorted(i[1] for i in v)
   775 
   776 def getFont(opts):
   777     if opts.fontfile:
   778         return ImageFont.truetype(opts.fontfile, opts.fontsize)
   779     fileNames = []
   780     try:
   781         x = ["fc-match", "-f%{file}", "arial"]
   782         p = subprocess.Popen(x, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
   783                              universal_newlines=True)
   784         out, err = p.communicate()
   785         fileNames.append(out)
   786     except OSError as e:
   787         logging.info("fc-match error: " + str(e))
   788     fileNames.append("/Library/Fonts/Arial.ttf")  # for Mac
   789     for i in fileNames:
   790         try:
   791             font = ImageFont.truetype(i, opts.fontsize)
   792             logging.info("font: " + i)
   793             return font
   794         except IOError as e:
   795             logging.info("font load error: " + str(e))
   796     return ImageFont.load_default()
   797 
   798 def sequenceSizesAndNames(seqRanges):
   799     for seqName, ranges in itertools.groupby(seqRanges, itemgetter(0)):
   800         size = sum(e - b for n, b, e in ranges)
   801         yield size, seqName
   802 
   803 def biggestSequences(seqRanges, maxNumOfSequences):
   804     s = sorted(sequenceSizesAndNames(seqRanges), reverse=True)
   805     if len(s) > maxNumOfSequences:
   806         logging.warning("too many sequences - discarding the smallest ones")
   807         s = s[:maxNumOfSequences]
   808     return set(i[1] for i in s)
   809 
   810 def remainingSequenceRanges(seqRanges, alignments, seqIndex):
   811     remainingSequences = set(i[seqIndex] for i in alignments)
   812     return [i for i in seqRanges if i[0] in remainingSequences]
   813 
   814 def lastDotplot(opts, args):
   815     logLevel = logging.INFO if opts.verbose else logging.WARNING
   816     logging.basicConfig(format="%(filename)s: %(message)s", level=logLevel)
   817 
   818     font = getFont(opts)
   819     image_mode = 'RGB'
   820     forward_color = ImageColor.getcolor(opts.forwardcolor, image_mode)
   821     reverse_color = ImageColor.getcolor(opts.reversecolor, image_mode)
   822     zipped_colors = zip(forward_color, reverse_color)
   823     overlap_color = tuple([(i + j) // 2 for i, j in zipped_colors])
   824 
   825     maxGap1, maxGapB1 = twoValuesFromOption(opts.max_gap1, ":")
   826     maxGap2, maxGapB2 = twoValuesFromOption(opts.max_gap2, ":")
   827 
   828     logging.info("reading alignments...")
   829     alnData = readAlignments(args[0], opts)
   830     alignments, seqRanges1, coverDict1, seqRanges2, coverDict2 = alnData
   831     if not alignments: raise Exception("there are no alignments")
   832     logging.info("cutting...")
   833     coverDict1 = dict(mergedRangesPerSeq(coverDict1))
   834     coverDict2 = dict(mergedRangesPerSeq(coverDict2))
   835     minAlignedBases = min(coveredLength(coverDict1), coveredLength(coverDict2))
   836     pad = int(opts.pad * minAlignedBases)
   837     cutRanges1 = list(trimmed(seqRanges1, coverDict1, minAlignedBases,
   838                               maxGap1, pad, pad))
   839     cutRanges2 = list(trimmed(seqRanges2, coverDict2, minAlignedBases,
   840                               maxGap2, pad, pad))
   841 
   842     biggestSeqs1 = biggestSequences(cutRanges1, opts.maxseqs)
   843     cutRanges1 = [i for i in cutRanges1 if i[0] in biggestSeqs1]
   844     alignments = [i for i in alignments if i[0] in biggestSeqs1]
   845     cutRanges2 = remainingSequenceRanges(cutRanges2, alignments, 1)
   846 
   847     biggestSeqs2 = biggestSequences(cutRanges2, opts.maxseqs)
   848     cutRanges2 = [i for i in cutRanges2 if i[0] in biggestSeqs2]
   849     alignments = [i for i in alignments if i[1] in biggestSeqs2]
   850     cutRanges1 = remainingSequenceRanges(cutRanges1, alignments, 0)
   851 
   852     logging.info("reading secondary alignments...")
   853     alnDataB = readSecondaryAlignments(opts, cutRanges1, cutRanges2)
   854     alignmentsB, seqRangesB1, coverDictB1, seqRangesB2, coverDictB2 = alnDataB
   855     logging.info("cutting...")
   856     coverDictB1 = dict(mergedRangesPerSeq(coverDictB1))
   857     coverDictB2 = dict(mergedRangesPerSeq(coverDictB2))
   858     cutRangesB1 = trimmed(seqRangesB1, coverDictB1, minAlignedBases,
   859                           maxGapB1, 0, 0)
   860     cutRangesB2 = trimmed(seqRangesB2, coverDictB2, minAlignedBases,
   861                           maxGapB2, 0, 0)
   862 
   863     logging.info("sorting...")
   864     sortOut = allSortedRanges(opts, alignments, alignmentsB,
   865                               cutRanges1, cutRangesB1, cutRanges2, cutRangesB2)
   866     sortedRanges1, sortedRanges2 = sortOut
   867 
   868     textRot1 = "vertical".startswith(opts.rot1)
   869     i1 = dataFromRanges(sortedRanges1, font,
   870                         opts.fontsize, image_mode, opts.labels1, textRot1)
   871     rangeSizes1, labelData1, tMargin = i1
   872 
   873     textRot2 = "horizontal".startswith(opts.rot2)
   874     i2 = dataFromRanges(sortedRanges2, font,
   875                         opts.fontsize, image_mode, opts.labels2, textRot2)
   876     rangeSizes2, labelData2, lMargin = i2
   877 
   878     maxPixels1 = opts.width  - lMargin
   879     maxPixels2 = opts.height - tMargin
   880     bpPerPix1 = get_bp_per_pix(rangeSizes1, opts.border_pixels, maxPixels1)
   881     bpPerPix2 = get_bp_per_pix(rangeSizes2, opts.border_pixels, maxPixels2)
   882     bpPerPix = max(bpPerPix1, bpPerPix2)
   883     logging.info("bp per pixel = " + str(bpPerPix))
   884 
   885     p1 = pixelData(rangeSizes1, bpPerPix, opts.border_pixels, lMargin)
   886     rangePixBegs1, rangePixLens1, width = p1
   887     rangeDict1 = dict(rangesPerSeq(sortedRanges1, rangePixBegs1,
   888                                    rangePixLens1, bpPerPix))
   889 
   890     p2 = pixelData(rangeSizes2, bpPerPix, opts.border_pixels, tMargin)
   891     rangePixBegs2, rangePixLens2, height = p2
   892     rangeDict2 = dict(rangesPerSeq(sortedRanges2, rangePixBegs2,
   893                                    rangePixLens2, bpPerPix))
   894 
   895     logging.info("width:  " + str(width))
   896     logging.info("height: " + str(height))
   897 
   898     logging.info("processing alignments...")
   899     allAlignments = alignments + alignmentsB
   900     hits = alignmentPixels(width, height, allAlignments, bpPerPix,
   901                            rangeDict1, rangeDict2)
   902 
   903     logging.info("reading annotations...")
   904 
   905     rangeDict1 = expandedSeqDict(rangeDict1)
   906     beds1 = itertools.chain(readBed(opts.bed1, rangeDict1),
   907                             readRmsk(opts.rmsk1, rangeDict1),
   908                             readGenePred(opts, opts.genePred1, rangeDict1),
   909                             readGaps(opts, opts.gap1, rangeDict1))
   910     b1 = bedBoxes(beds1, rangeDict1, tMargin, height, True, bpPerPix)
   911 
   912     rangeDict2 = expandedSeqDict(rangeDict2)
   913     beds2 = itertools.chain(readBed(opts.bed2, rangeDict2),
   914                             readRmsk(opts.rmsk2, rangeDict2),
   915                             readGenePred(opts, opts.genePred2, rangeDict2),
   916                             readGaps(opts, opts.gap2, rangeDict2))
   917     b2 = bedBoxes(beds2, rangeDict2, lMargin, width, False, bpPerPix)
   918 
   919     boxes = sorted(itertools.chain(b1, b2))
   920 
   921     logging.info("drawing...")
   922 
   923     image_size = width, height
   924     im = Image.new(image_mode, image_size, opts.background_color)
   925 
   926     drawAnnotations(im, boxes)
   927 
   928     joinA, joinB = twoValuesFromOption(opts.join, ":")
   929     if joinA in "13":
   930         drawJoins(im, alignments, bpPerPix, 0, rangeDict1, rangeDict2)
   931     if joinB in "13":
   932         drawJoins(im, alignmentsB, bpPerPix, 0, rangeDict1, rangeDict2)
   933     if joinA in "23":
   934         drawJoins(im, alignments, bpPerPix, 1, rangeDict2, rangeDict1)
   935     if joinB in "23":
   936         drawJoins(im, alignmentsB, bpPerPix, 1, rangeDict2, rangeDict1)
   937 
   938     for i in range(height):
   939         for j in range(width):
   940             store_value = hits[i * width + j]
   941             xy = j, i
   942             if   store_value == 1: im.putpixel(xy, forward_color)
   943             elif store_value == 2: im.putpixel(xy, reverse_color)
   944             elif store_value == 3: im.putpixel(xy, overlap_color)
   945 
   946     if opts.fontsize != 0:
   947         axis1 = axisImage(labelData1, rangePixBegs1, rangePixLens1,
   948                           textRot1, False, font, image_mode, opts)
   949         if textRot1:
   950             axis1 = axis1.transpose(Image.ROTATE_90)
   951         axis2 = axisImage(labelData2, rangePixBegs2, rangePixLens2,
   952                           textRot2, textRot2, font, image_mode, opts)
   953         if not textRot2:
   954             axis2 = axis2.transpose(Image.ROTATE_270)
   955         im.paste(axis1, (0, 0))
   956         im.paste(axis2, (0, 0))
   957 
   958     for i in rangePixBegs1[1:]:
   959         box = i - opts.border_pixels, tMargin, i, height
   960         im.paste(opts.border_color, box)
   961 
   962     for i in rangePixBegs2[1:]:
   963         box = lMargin, i - opts.border_pixels, width, i
   964         im.paste(opts.border_color, box)
   965 
   966     im.save(args[1])
   967 
   968 if __name__ == "__main__":
   969     usage = """%prog --help
   970    or: %prog [options] maf-or-tab-alignments dotplot.png
   971    or: %prog [options] maf-or-tab-alignments dotplot.gif
   972    or: ..."""
   973     description = "Draw a dotplot of pair-wise sequence alignments in MAF or tabular format."
   974     op = optparse.OptionParser(usage=usage, description=description)
   975     op.add_option("-v", "--verbose", action="count",
   976                   help="show progress messages & data about the plot")
   977     # Replace "width" & "height" with a single "length" option?
   978     op.add_option("-x", "--width", metavar="INT", type="int", default=1000,
   979                   help="maximum width in pixels (default: %default)")
   980     op.add_option("-y", "--height", metavar="INT", type="int", default=1000,
   981                   help="maximum height in pixels (default: %default)")
   982     op.add_option("-m", "--maxseqs", type="int", default=100, metavar="M",
   983                   help="maximum number of horizontal or vertical sequences "
   984                   "(default=%default)")
   985     op.add_option("-1", "--seq1", metavar="PATTERN", action="append",
   986                   default=[],
   987                   help="which sequences to show from the 1st genome")
   988     op.add_option("-2", "--seq2", metavar="PATTERN", action="append",
   989                   default=[],
   990                   help="which sequences to show from the 2nd genome")
   991     op.add_option("-c", "--forwardcolor", metavar="COLOR", default="red",
   992                   help="color for forward alignments (default: %default)")
   993     op.add_option("-r", "--reversecolor", metavar="COLOR", default="blue",
   994                   help="color for reverse alignments (default: %default)")
   995     op.add_option("--alignments", metavar="FILE", help="secondary alignments")
   996     op.add_option("--sort1", default="1", metavar="N",
   997                   help="genome1 sequence order: 0=input order, 1=name order, "
   998                   "2=length order, 3=alignment order (default=%default)")
   999     op.add_option("--sort2", default="1", metavar="N",
  1000                   help="genome2 sequence order: 0=input order, 1=name order, "
  1001                   "2=length order, 3=alignment order (default=%default)")
  1002     op.add_option("--strands1", default="0", metavar="N", help=
  1003                   "genome1 sequence orientation: 0=forward orientation, "
  1004                   "1=alignment orientation (default=%default)")
  1005     op.add_option("--strands2", default="0", metavar="N", help=
  1006                   "genome2 sequence orientation: 0=forward orientation, "
  1007                   "1=alignment orientation (default=%default)")
  1008     op.add_option("--max-gap1", metavar="FRAC", default="0.5,2", help=
  1009                   "maximum unaligned (end,mid) gap in genome1: "
  1010                   "fraction of aligned length (default=%default)")
  1011     op.add_option("--max-gap2", metavar="FRAC", default="0.5,2", help=
  1012                   "maximum unaligned (end,mid) gap in genome2: "
  1013                   "fraction of aligned length (default=%default)")
  1014     op.add_option("--pad", metavar="FRAC", type="float", default=0.04, help=
  1015                   "pad length when cutting unaligned gaps: "
  1016                   "fraction of aligned length (default=%default)")
  1017     op.add_option("-j", "--join", default="0", metavar="N", help=
  1018                   "join: 0=nothing, 1=alignments adjacent in genome1, "
  1019                   "2=alignments adjacent in genome2 (default=%default)")
  1020     op.add_option("--border-pixels", metavar="INT", type="int", default=1,
  1021                   help="number of pixels between sequences (default=%default)")
  1022     op.add_option("--border-color", metavar="COLOR", default="black",
  1023                   help="color for pixels between sequences (default=%default)")
  1024     # --break-color and/or --break-pixels for intra-sequence breaks?
  1025     op.add_option("--margin-color", metavar="COLOR", default="#dcdcdc",
  1026                   help="margin color")
  1027 
  1028     og = optparse.OptionGroup(op, "Text options")
  1029     og.add_option("-f", "--fontfile", metavar="FILE",
  1030                   help="TrueType or OpenType font file")
  1031     og.add_option("-s", "--fontsize", metavar="SIZE", type="int", default=14,
  1032                   help="TrueType or OpenType font size (default: %default)")
  1033     og.add_option("--labels1", type="int", default=0, metavar="N", help=
  1034                   "genome1 labels: 0=name, 1=name:length, "
  1035                   "2=name:start:length, 3=name:start-end (default=%default)")
  1036     og.add_option("--labels2", type="int", default=0, metavar="N", help=
  1037                   "genome2 labels: 0=name, 1=name:length, "
  1038                   "2=name:start:length, 3=name:start-end (default=%default)")
  1039     og.add_option("--rot1", metavar="ROT", default="h",
  1040                   help="text rotation for the 1st genome (default=%default)")
  1041     og.add_option("--rot2", metavar="ROT", default="v",
  1042                   help="text rotation for the 2nd genome (default=%default)")
  1043     op.add_option_group(og)
  1044 
  1045     og = optparse.OptionGroup(op, "Annotation options")
  1046     og.add_option("--bed1", metavar="FILE",
  1047                   help="read genome1 annotations from BED file")
  1048     og.add_option("--bed2", metavar="FILE",
  1049                   help="read genome2 annotations from BED file")
  1050     og.add_option("--rmsk1", metavar="FILE", help="read genome1 repeats from "
  1051                   "RepeatMasker .out or rmsk.txt file")
  1052     og.add_option("--rmsk2", metavar="FILE", help="read genome2 repeats from "
  1053                   "RepeatMasker .out or rmsk.txt file")
  1054     op.add_option_group(og)
  1055 
  1056     og = optparse.OptionGroup(op, "Gene options")
  1057     og.add_option("--genePred1", metavar="FILE",
  1058                   help="read genome1 genes from genePred file")
  1059     og.add_option("--genePred2", metavar="FILE",
  1060                   help="read genome2 genes from genePred file")
  1061     og.add_option("--exon-color", metavar="COLOR", default="PaleGreen",
  1062                   help="color for exons (default=%default)")
  1063     og.add_option("--cds-color", metavar="COLOR", default="LimeGreen",
  1064                   help="color for protein-coding regions (default=%default)")
  1065     op.add_option_group(og)
  1066 
  1067     og = optparse.OptionGroup(op, "Unsequenced gap options")
  1068     og.add_option("--gap1", metavar="FILE",
  1069                   help="read genome1 unsequenced gaps from agp or gap file")
  1070     og.add_option("--gap2", metavar="FILE",
  1071                   help="read genome2 unsequenced gaps from agp or gap file")
  1072     og.add_option("--bridged-color", metavar="COLOR", default="yellow",
  1073                   help="color for bridged gaps (default: %default)")
  1074     og.add_option("--unbridged-color", metavar="COLOR", default="orange",
  1075                   help="color for unbridged gaps (default: %default)")
  1076     op.add_option_group(og)
  1077     (opts, args) = op.parse_args()
  1078     if len(args) != 2: op.error("2 arguments needed")
  1079 
  1080     opts.background_color = "white"
  1081     opts.label_space = 5     # minimum number of pixels between axis labels
  1082 
  1083     try: lastDotplot(opts, args)
  1084     except KeyboardInterrupt: pass  # avoid silly error message
  1085     except Exception as e:
  1086         prog = os.path.basename(sys.argv[0])
  1087         sys.exit(prog + ": error: " + str(e))