scripts/last-dotplot
author Martin C. Frith
Fri Jul 31 14:18:38 2020 +0900 (21 months ago)
changeset 1073 8ec299b42e28
parent 1072 e43ecf1c3fb6
child 1074 170ab6c64728
permissions -rwxr-xr-x
last-dotplot: read annotations a bit faster
     1 #! /usr/bin/env python
     2 # Author: Martin C. Frith 2008
     3 # SPDX-License-Identifier: GPL-3.0-or-later
     4 
     5 # Read pair-wise alignments in MAF or LAST tabular format: write an
     6 # "Oxford grid", a.k.a. dotplot.
     7 
     8 # TODO: Currently, pixels with zero aligned nt-pairs are white, and
     9 # pixels with one or more aligned nt-pairs are black.  This can look
    10 # too crowded for large genome alignments.  I tried shading each pixel
    11 # according to the number of aligned nt-pairs within it, but the
    12 # result is too faint.  How can this be done better?
    13 
    14 import collections
    15 import functools
    16 import gzip
    17 from fnmatch import fnmatchcase
    18 import logging
    19 from operator import itemgetter
    20 import subprocess
    21 import itertools, optparse, os, re, sys
    22 
    23 # Try to make PIL/PILLOW work:
    24 try:
    25     from PIL import Image, ImageDraw, ImageFont, ImageColor
    26 except ImportError:
    27     import Image, ImageDraw, ImageFont, ImageColor
    28 
    29 try:
    30     from future_builtins import zip
    31 except ImportError:
    32     pass
    33 
    34 def myOpen(fileName):  # faster than fileinput
    35     if fileName is None:
    36         return []
    37     if fileName == "-":
    38         return sys.stdin
    39     if fileName.endswith(".gz"):
    40         return gzip.open(fileName, "rt")  # xxx dubious for Python2
    41     return open(fileName)
    42 
    43 def groupByFirstItem(things):
    44     for k, v in itertools.groupby(things, itemgetter(0)):
    45         yield k, [i[1:] for i in v]
    46 
    47 def croppedBlocks(blocks, ranges1, ranges2):
    48     headBeg1, headBeg2, headSize = blocks[0]
    49     for r1 in ranges1:
    50         for r2 in ranges2:
    51             cropBeg1, cropEnd1 = r1
    52             if headBeg1 < 0:
    53                 cropBeg1, cropEnd1 = -cropEnd1, -cropBeg1
    54             cropBeg2, cropEnd2 = r2
    55             if headBeg2 < 0:
    56                 cropBeg2, cropEnd2 = -cropEnd2, -cropBeg2
    57             for beg1, beg2, size in blocks:
    58                 b1 = max(cropBeg1, beg1)
    59                 e1 = min(cropEnd1, beg1 + size)
    60                 if b1 >= e1: continue
    61                 offset = beg2 - beg1
    62                 b2 = max(cropBeg2, b1 + offset)
    63                 e2 = min(cropEnd2, e1 + offset)
    64                 if b2 >= e2: continue
    65                 yield b2 - offset, b2, e2 - b2
    66 
    67 def tabBlocks(beg1, beg2, blocks):
    68     '''Get the gapless blocks of an alignment, from LAST tabular format.'''
    69     for i in blocks.split(","):
    70         if ":" in i:
    71             x, y = i.split(":")
    72             beg1 += int(x)
    73             beg2 += int(y)
    74         else:
    75             size = int(i)
    76             yield beg1, beg2, size
    77             beg1 += size
    78             beg2 += size
    79 
    80 def mafBlocks(beg1, beg2, seq1, seq2):
    81     '''Get the gapless blocks of an alignment, from MAF format.'''
    82     size = 0
    83     for x, y in zip(seq1, seq2):
    84         if x == "-":
    85             if size:
    86                 yield beg1, beg2, size
    87                 beg1 += size
    88                 beg2 += size
    89                 size = 0
    90             beg2 += 1
    91         elif y == "-":
    92             if size:
    93                 yield beg1, beg2, size
    94                 beg1 += size
    95                 beg2 += size
    96                 size = 0
    97             beg1 += 1
    98         else:
    99             size += 1
   100     if size: yield beg1, beg2, size
   101 
   102 def alignmentFromSegment(qrySeqName, qrySeqLen, segment):
   103     refSeqLen = sys.maxsize  # XXX
   104     refSeqName, refSeqBeg, qrySeqBeg, size = segment
   105     block = refSeqBeg, qrySeqBeg, size
   106     return refSeqName, refSeqLen, qrySeqName, qrySeqLen, [block]
   107 
   108 def alignmentInput(lines):
   109     '''Get alignments and sequence lengths, from MAF or tabular format.'''
   110     mafCount = 0
   111     qrySeqName = ""
   112     segments = []
   113     for line in lines:
   114         w = line.split()
   115         if line[0] == "#":
   116             pass
   117         elif len(w) == 1:
   118             for i in segments:
   119                 yield alignmentFromSegment(qrySeqName, qrySeqLen, i)
   120             qrySeqName = w[0]
   121             qrySeqLen = 0
   122             segments = []
   123         elif len(w) == 2 and qrySeqName and w[1].isdigit():
   124             qrySeqLen += int(w[1])
   125         elif len(w) == 4 and qrySeqName and w[1].isdigit() and w[3].isdigit():
   126             refSeqName, refSeqBeg, refSeqEnd = w[0], int(w[1]), int(w[3])
   127             size = abs(refSeqEnd - refSeqBeg)
   128             if refSeqBeg > refSeqEnd:
   129                 refSeqBeg = -refSeqBeg
   130             segments.append((refSeqName, refSeqBeg, qrySeqLen, size))
   131             qrySeqLen += size
   132         elif line[0].isdigit():  # tabular format
   133             chr1, beg1, seqlen1 = w[1], int(w[2]), int(w[5])
   134             if w[4] == "-": beg1 -= seqlen1
   135             chr2, beg2, seqlen2 = w[6], int(w[7]), int(w[10])
   136             if w[9] == "-": beg2 -= seqlen2
   137             blocks = tabBlocks(beg1, beg2, w[11])
   138             yield chr1, seqlen1, chr2, seqlen2, blocks
   139         elif line[0] == "s":  # MAF format
   140             if mafCount == 0:
   141                 chr1, beg1, seqlen1, seq1 = w[1], int(w[2]), int(w[5]), w[6]
   142                 if w[4] == "-": beg1 -= seqlen1
   143                 mafCount = 1
   144             else:
   145                 chr2, beg2, seqlen2, seq2 = w[1], int(w[2]), int(w[5]), w[6]
   146                 if w[4] == "-": beg2 -= seqlen2
   147                 blocks = mafBlocks(beg1, beg2, seq1, seq2)
   148                 yield chr1, seqlen1, chr2, seqlen2, blocks
   149                 mafCount = 0
   150     for i in segments:
   151         yield alignmentFromSegment(qrySeqName, qrySeqLen, i)
   152 
   153 def seqRequestFromText(text):
   154     if ":" in text:
   155         pattern, interval = text.rsplit(":", 1)
   156         if "-" in interval:
   157             beg, end = interval.rsplit("-", 1)
   158             return pattern, int(beg), int(end)  # beg may be negative
   159     return text, 0, sys.maxsize
   160 
   161 def rangesFromSeqName(seqRequests, name, seqLen):
   162     if seqRequests:
   163         base = name.split(".")[-1]  # allow for names like hg19.chr7
   164         for pat, beg, end in seqRequests:
   165             if fnmatchcase(name, pat) or fnmatchcase(base, pat):
   166                 yield max(beg, 0), min(end, seqLen)
   167     else:
   168         yield 0, seqLen
   169 
   170 def updateSeqs(coverDict, seqRanges, seqName, ranges, coveredRange):
   171     beg, end = coveredRange
   172     if beg < 0:
   173         coveredRange = -end, -beg
   174     if seqName in coverDict:
   175         coverDict[seqName].append(coveredRange)
   176     else:
   177         coverDict[seqName] = [coveredRange]
   178         for beg, end in ranges:
   179             r = seqName, beg, end
   180             seqRanges.append(r)
   181 
   182 def readAlignments(fileName, opts):
   183     '''Get alignments and sequence limits, from MAF or tabular format.'''
   184     seqRequests1 = [seqRequestFromText(i) for i in opts.seq1]
   185     seqRequests2 = [seqRequestFromText(i) for i in opts.seq2]
   186 
   187     alignments = []
   188     seqRanges1 = []
   189     seqRanges2 = []
   190     coverDict1 = {}
   191     coverDict2 = {}
   192     lines = myOpen(fileName)
   193     for seqName1, seqLen1, seqName2, seqLen2, blocks in alignmentInput(lines):
   194         ranges1 = sorted(rangesFromSeqName(seqRequests1, seqName1, seqLen1))
   195         if not ranges1: continue
   196         ranges2 = sorted(rangesFromSeqName(seqRequests2, seqName2, seqLen2))
   197         if not ranges2: continue
   198         b = list(croppedBlocks(list(blocks), ranges1, ranges2))
   199         if not b: continue
   200         aln = seqName1, seqName2, b
   201         alignments.append(aln)
   202         coveredRange1 = b[0][0], b[-1][0] + b[-1][2]
   203         updateSeqs(coverDict1, seqRanges1, seqName1, ranges1, coveredRange1)
   204         coveredRange2 = b[0][1], b[-1][1] + b[-1][2]
   205         updateSeqs(coverDict2, seqRanges2, seqName2, ranges2, coveredRange2)
   206     return alignments, seqRanges1, coverDict1, seqRanges2, coverDict2
   207 
   208 def nameAndRangesFromDict(cropDict, seqName):
   209     if seqName in cropDict:
   210         return seqName, cropDict[seqName]
   211     n = seqName.split(".")[-1]
   212     if n in cropDict:
   213         return n, cropDict[n]
   214     return seqName, []
   215 
   216 def rangesForSecondaryAlignments(primaryRanges, seqLen):
   217     if primaryRanges:
   218         return primaryRanges
   219     return [(0, seqLen)]
   220 
   221 def readSecondaryAlignments(opts, cropRanges1, cropRanges2):
   222     cropDict1 = dict(groupByFirstItem(cropRanges1))
   223     cropDict2 = dict(groupByFirstItem(cropRanges2))
   224 
   225     alignments = []
   226     seqRanges1 = []
   227     seqRanges2 = []
   228     coverDict1 = {}
   229     coverDict2 = {}
   230     lines = myOpen(opts.alignments)
   231     for seqName1, seqLen1, seqName2, seqLen2, blocks in alignmentInput(lines):
   232         seqName1, ranges1 = nameAndRangesFromDict(cropDict1, seqName1)
   233         seqName2, ranges2 = nameAndRangesFromDict(cropDict2, seqName2)
   234         if not ranges1 and not ranges2:
   235             continue
   236         r1 = rangesForSecondaryAlignments(ranges1, seqLen1)
   237         r2 = rangesForSecondaryAlignments(ranges2, seqLen2)
   238         b = list(croppedBlocks(list(blocks), r1, r2))
   239         if not b: continue
   240         aln = seqName1, seqName2, b
   241         alignments.append(aln)
   242         if not ranges1:
   243             coveredRange1 = b[0][0], b[-1][0] + b[-1][2]
   244             updateSeqs(coverDict1, seqRanges1, seqName1, r1, coveredRange1)
   245         if not ranges2:
   246             coveredRange2 = b[0][1], b[-1][1] + b[-1][2]
   247             updateSeqs(coverDict2, seqRanges2, seqName2, r2, coveredRange2)
   248     return alignments, seqRanges1, coverDict1, seqRanges2, coverDict2
   249 
   250 def twoValuesFromOption(text, separator):
   251     if separator in text:
   252         return text.split(separator)
   253     return text, text
   254 
   255 def mergedRanges(ranges):
   256     oldBeg, maxEnd = ranges[0]
   257     for beg, end in ranges:
   258         if beg > maxEnd:
   259             yield oldBeg, maxEnd
   260             oldBeg = beg
   261             maxEnd = end
   262         elif end > maxEnd:
   263             maxEnd = end
   264     yield oldBeg, maxEnd
   265 
   266 def mergedRangesPerSeq(coverDict):
   267     for k, v in coverDict.items():
   268         v.sort()
   269         yield k, list(mergedRanges(v))
   270 
   271 def coveredLength(mergedCoverDict):
   272     return sum(sum(e - b for b, e in v) for v in mergedCoverDict.values())
   273 
   274 def trimmed(seqRanges, coverDict, minAlignedBases, maxGapFrac, endPad, midPad):
   275     maxEndGapFrac, maxMidGapFrac = twoValuesFromOption(maxGapFrac, ",")
   276     maxEndGap = max(float(maxEndGapFrac) * minAlignedBases, endPad * 1.0)
   277     maxMidGap = max(float(maxMidGapFrac) * minAlignedBases, midPad * 2.0)
   278 
   279     for seqName, rangeBeg, rangeEnd in seqRanges:
   280         seqBlocks = coverDict[seqName]
   281         blocks = [i for i in seqBlocks if i[0] < rangeEnd and i[1] > rangeBeg]
   282         if blocks[0][0] - rangeBeg > maxEndGap:
   283             rangeBeg = blocks[0][0] - endPad
   284         for j, y in enumerate(blocks):
   285             if j:
   286                 x = blocks[j - 1]
   287                 if y[0] - x[1] > maxMidGap:
   288                     yield seqName, rangeBeg, x[1] + midPad
   289                     rangeBeg = y[0] - midPad
   290         if rangeEnd - blocks[-1][1] > maxEndGap:
   291             rangeEnd = blocks[-1][1] + endPad
   292         yield seqName, rangeBeg, rangeEnd
   293 
   294 def rangesWithStrandInfo(seqRanges, strandOpt, alignments, seqIndex):
   295     if strandOpt == "1":
   296         forwardMinusReverse = collections.defaultdict(int)
   297         for i in alignments:
   298             blocks = i[2]
   299             beg1, beg2, size = blocks[0]
   300             numOfAlignedLetterPairs = sum(i[2] for i in blocks)
   301             if (beg1 < 0) != (beg2 < 0):  # opposite-strand alignment
   302                 numOfAlignedLetterPairs *= -1
   303             forwardMinusReverse[i[seqIndex]] += numOfAlignedLetterPairs
   304     strandNum = 0
   305     for seqName, beg, end in seqRanges:
   306         if strandOpt == "1":
   307             strandNum = 1 if forwardMinusReverse[seqName] >= 0 else 2
   308         yield seqName, beg, end, strandNum
   309 
   310 def natural_sort_key(my_string):
   311     '''Return a sort key for "natural" ordering, e.g. chr9 < chr10.'''
   312     parts = re.split(r'(\d+)', my_string)
   313     parts[1::2] = map(int, parts[1::2])
   314     return parts
   315 
   316 def nameKey(oneSeqRanges):
   317     return natural_sort_key(oneSeqRanges[0][0])
   318 
   319 def sizeKey(oneSeqRanges):
   320     return sum(b - e for n, b, e, s in oneSeqRanges), nameKey(oneSeqRanges)
   321 
   322 def alignmentKey(seqNamesToLists, oneSeqRanges):
   323     seqName = oneSeqRanges[0][0]
   324     alignmentsOfThisSequence = seqNamesToLists[seqName]
   325     numOfAlignedLetterPairs = sum(i[3] for i in alignmentsOfThisSequence)
   326     toMiddle = numOfAlignedLetterPairs // 2
   327     for i in alignmentsOfThisSequence:
   328         toMiddle -= i[3]
   329         if toMiddle < 0:
   330             return i[1:3]  # sequence-rank and "position" of this alignment
   331 
   332 def rankAndFlipPerSeq(seqRanges):
   333     rangesGroupedBySeqName = itertools.groupby(seqRanges, itemgetter(0))
   334     for rank, group in enumerate(rangesGroupedBySeqName):
   335         seqName, ranges = group
   336         strandNum = next(ranges)[3]
   337         flip = 1 if strandNum < 2 else -1
   338         yield seqName, (rank, flip)
   339 
   340 def alignmentSortData(alignments, seqIndex, otherNamesToRanksAndFlips):
   341     otherIndex = 1 - seqIndex
   342     for i in alignments:
   343         blocks = i[2]
   344         otherRank, otherFlip = otherNamesToRanksAndFlips[i[otherIndex]]
   345         otherPos = otherFlip * abs(blocks[0][otherIndex] +
   346                                    blocks[-1][otherIndex] + blocks[-1][2])
   347         numOfAlignedLetterPairs = sum(i[2] for i in blocks)
   348         yield i[seqIndex], otherRank, otherPos, numOfAlignedLetterPairs
   349 
   350 def mySortedRanges(seqRanges, sortOpt, seqIndex, alignments, otherRanges):
   351     rangesGroupedBySeqName = itertools.groupby(seqRanges, itemgetter(0))
   352     g = [list(ranges) for seqName, ranges in rangesGroupedBySeqName]
   353     for i in g:
   354         if i[0][3] > 1:
   355             i.reverse()
   356     if sortOpt == "1":
   357         g.sort(key=nameKey)
   358     if sortOpt == "2":
   359         g.sort(key=sizeKey)
   360     if sortOpt == "3":
   361         otherNamesToRanksAndFlips = dict(rankAndFlipPerSeq(otherRanges))
   362         alns = sorted(alignmentSortData(alignments, seqIndex,
   363                                         otherNamesToRanksAndFlips))
   364         alnsGroupedBySeqName = itertools.groupby(alns, itemgetter(0))
   365         seqNamesToLists = dict((k, list(v)) for k, v in alnsGroupedBySeqName)
   366         g.sort(key=functools.partial(alignmentKey, seqNamesToLists))
   367     return [j for i in g for j in i]
   368 
   369 def allSortedRanges(opts, alignments, alignmentsB,
   370                     seqRanges1, seqRangesB1, seqRanges2, seqRangesB2):
   371     o1, oB1 = twoValuesFromOption(opts.strands1, ":")
   372     o2, oB2 = twoValuesFromOption(opts.strands2, ":")
   373     if o1 == "1" and o2 == "1":
   374         raise Exception("the strand options have circular dependency")
   375     seqRanges1 = list(rangesWithStrandInfo(seqRanges1, o1, alignments, 0))
   376     seqRanges2 = list(rangesWithStrandInfo(seqRanges2, o2, alignments, 1))
   377     seqRangesB1 = list(rangesWithStrandInfo(seqRangesB1, oB1, alignmentsB, 0))
   378     seqRangesB2 = list(rangesWithStrandInfo(seqRangesB2, oB2, alignmentsB, 1))
   379 
   380     o1, oB1 = twoValuesFromOption(opts.sort1, ":")
   381     o2, oB2 = twoValuesFromOption(opts.sort2, ":")
   382     if o1 == "3" and o2 == "3":
   383         raise Exception("the sort options have circular dependency")
   384     if o1 != "3":
   385         s1 = mySortedRanges(seqRanges1, o1, None, None, None)
   386     if o2 != "3":
   387         s2 = mySortedRanges(seqRanges2, o2, None, None, None)
   388     if o1 == "3":
   389         s1 = mySortedRanges(seqRanges1, o1, 0, alignments, s2)
   390     if o2 == "3":
   391         s2 = mySortedRanges(seqRanges2, o2, 1, alignments, s1)
   392     t1 = mySortedRanges(seqRangesB1, oB1, 0, alignmentsB, s2)
   393     t2 = mySortedRanges(seqRangesB2, oB2, 1, alignmentsB, s1)
   394     return s1 + t1, s2 + t2
   395 
   396 def prettyNum(n):
   397     t = str(n)
   398     groups = []
   399     while t:
   400         groups.append(t[-3:])
   401         t = t[:-3]
   402     return ",".join(reversed(groups))
   403 
   404 def sizeText(size):
   405     suffixes = "bp", "kb", "Mb", "Gb"
   406     for i, x in enumerate(suffixes):
   407         j = 10 ** (i * 3)
   408         if size < j * 10:
   409             return "%.2g" % (1.0 * size / j) + x
   410         if size < j * 1000 or i == len(suffixes) - 1:
   411             return "%.0f" % (1.0 * size / j) + x
   412 
   413 def labelText(seqRange, labelOpt):
   414     seqName, beg, end, strandNum = seqRange
   415     if labelOpt == 1:
   416         return seqName + ": " + sizeText(end - beg)
   417     if labelOpt == 2:
   418         return seqName + ":" + prettyNum(beg) + ": " + sizeText(end - beg)
   419     if labelOpt == 3:
   420         return seqName + ":" + prettyNum(beg) + "-" + prettyNum(end)
   421     return seqName
   422 
   423 def rangeLabels(seqRanges, labelOpt, font, textDraw, textRot):
   424     x = y = 0
   425     for r in seqRanges:
   426         text = labelText(r, labelOpt)
   427         if textDraw is not None:
   428             x, y = textDraw.textsize(text, font=font)
   429             if textRot:
   430                 x, y = y, x
   431         yield text, x, y, r[3]
   432 
   433 def dataFromRanges(sortedRanges, font, textDraw, labelOpt, textRot):
   434     for seqName, rangeBeg, rangeEnd, strandNum in sortedRanges:
   435         out = [seqName, str(rangeBeg), str(rangeEnd)]
   436         if strandNum > 0:
   437             out.append(".+-"[strandNum])
   438         logging.info("\t".join(out))
   439     logging.info("")
   440     rangeSizes = [e - b for n, b, e, s in sortedRanges]
   441     labs = list(rangeLabels(sortedRanges, labelOpt, font, textDraw, textRot))
   442     margin = max(i[2] for i in labs)
   443     # xxx the margin may be too big, because some labels may get omitted
   444     return rangeSizes, labs, margin
   445 
   446 def div_ceil(x, y):
   447     '''Return x / y rounded up.'''
   448     q, r = divmod(x, y)
   449     return q + (r != 0)
   450 
   451 def get_bp_per_pix(rangeSizes, pixTweenRanges, maxPixels):
   452     '''Get the minimum bp-per-pixel that fits in the size limit.'''
   453     logging.info("choosing bp per pixel...")
   454     numOfRanges = len(rangeSizes)
   455     maxPixelsInRanges = maxPixels - pixTweenRanges * (numOfRanges - 1)
   456     if maxPixelsInRanges < numOfRanges:
   457         raise Exception("can't fit the image: too many sequences?")
   458     negLimit = -maxPixelsInRanges
   459     negBpPerPix = sum(rangeSizes) // negLimit
   460     while True:
   461         if sum(i // negBpPerPix for i in rangeSizes) >= negLimit:
   462             return -negBpPerPix
   463         negBpPerPix -= 1
   464 
   465 def getRangePixBegs(rangePixLens, pixTweenRanges, margin):
   466     '''Get the start pixel for each range.'''
   467     rangePixBegs = []
   468     pix_tot = margin - pixTweenRanges
   469     for i in rangePixLens:
   470         pix_tot += pixTweenRanges
   471         rangePixBegs.append(pix_tot)
   472         pix_tot += i
   473     return rangePixBegs
   474 
   475 def pixelData(rangeSizes, bp_per_pix, pixTweenRanges, margin):
   476     '''Return pixel information about the ranges.'''
   477     rangePixLens = [div_ceil(i, bp_per_pix) for i in rangeSizes]
   478     rangePixBegs = getRangePixBegs(rangePixLens, pixTweenRanges, margin)
   479     tot_pix = rangePixBegs[-1] + rangePixLens[-1]
   480     return rangePixBegs, rangePixLens, tot_pix
   481 
   482 def drawLineForward(hits, width, bp_per_pix, beg1, beg2, size):
   483     while True:
   484         q1, r1 = divmod(beg1, bp_per_pix)
   485         q2, r2 = divmod(beg2, bp_per_pix)
   486         hits[q2 * width + q1] |= 1
   487         next_pix = min(bp_per_pix - r1, bp_per_pix - r2)
   488         if next_pix >= size: break
   489         beg1 += next_pix
   490         beg2 += next_pix
   491         size -= next_pix
   492 
   493 def drawLineReverse(hits, width, bp_per_pix, beg1, beg2, size):
   494     while True:
   495         q1, r1 = divmod(beg1, bp_per_pix)
   496         q2, r2 = divmod(beg2, bp_per_pix)
   497         hits[q2 * width + q1] |= 2
   498         next_pix = min(bp_per_pix - r1, r2 + 1)
   499         if next_pix >= size: break
   500         beg1 += next_pix
   501         beg2 -= next_pix
   502         size -= next_pix
   503 
   504 def strandAndOrigin(ranges, beg, size):
   505     isReverseStrand = (beg < 0)
   506     if isReverseStrand:
   507         beg = -(beg + size)
   508     for rangeBeg, rangeEnd, isReverseRange, origin in ranges:
   509         if rangeEnd > beg:  # assumes the ranges are sorted
   510             return (isReverseStrand != isReverseRange), origin
   511 
   512 def alignmentPixels(width, height, alignments, bp_per_pix,
   513                     rangeDict1, rangeDict2):
   514     hits = [0] * (width * height)  # the image data
   515     for seq1, seq2, blocks in alignments:
   516         beg1, beg2, size = blocks[0]
   517         isReverse1, ori1 = strandAndOrigin(rangeDict1[seq1], beg1, size)
   518         isReverse2, ori2 = strandAndOrigin(rangeDict2[seq2], beg2, size)
   519         for beg1, beg2, size in blocks:
   520             if isReverse1:
   521                 beg1 = -(beg1 + size)
   522                 beg2 = -(beg2 + size)
   523             if isReverse1 == isReverse2:
   524                 drawLineForward(hits, width, bp_per_pix,
   525                                 ori1 + beg1, ori2 + beg2, size)
   526             else:
   527                 drawLineReverse(hits, width, bp_per_pix,
   528                                 ori1 + beg1, ori2 - beg2 - 1, size)
   529     return hits
   530 
   531 def orientedBlocks(alignments, seqIndex):
   532     otherIndex = 1 - seqIndex
   533     for a in alignments:
   534         seq1, seq2, blocks = a
   535         for b in blocks:
   536             beg1, beg2, size = b
   537             if b[seqIndex] < 0:
   538                 b = -(beg1 + size), -(beg2 + size), size
   539             yield a[seqIndex], b[seqIndex], a[otherIndex], b[otherIndex], size
   540 
   541 def drawJoins(im, alignments, bpPerPix, seqIndex, rangeDict1, rangeDict2):
   542     blocks = orientedBlocks(alignments, seqIndex)
   543     oldSeq1 = ""
   544     for seq1, beg1, seq2, beg2, size in sorted(blocks):
   545         isReverse1, ori1 = strandAndOrigin(rangeDict1[seq1], beg1, size)
   546         isReverse2, ori2 = strandAndOrigin(rangeDict2[seq2], beg2, size)
   547         end1 = beg1 + size - 1
   548         end2 = beg2 + size - 1
   549         if isReverse1:
   550             beg1 = -(beg1 + 1)
   551             end1 = -(end1 + 1)
   552         if isReverse2:
   553             beg2 = -(beg2 + 1)
   554             end2 = -(end2 + 1)
   555         newPix1 = (ori1 + beg1) // bpPerPix
   556         newPix2 = (ori2 + beg2) // bpPerPix
   557         if seq1 == oldSeq1:
   558             lowerPix2 = min(oldPix2, newPix2)
   559             upperPix2 = max(oldPix2, newPix2)
   560             midPix1 = (oldPix1 + newPix1) // 2
   561             if isReverse1:
   562                 midPix1 = (oldPix1 + newPix1 + 1) // 2
   563                 oldPix1, newPix1 = newPix1, oldPix1
   564             if upperPix2 - lowerPix2 > 1 and oldPix1 <= newPix1 <= oldPix1 + 1:
   565                 if seqIndex == 0:
   566                     box = midPix1, lowerPix2, midPix1 + 1, upperPix2 + 1
   567                 else:
   568                     box = lowerPix2, midPix1, upperPix2 + 1, midPix1 + 1
   569                 im.paste("lightgray", box)
   570         oldPix1 = (ori1 + end1) // bpPerPix
   571         oldPix2 = (ori2 + end2) // bpPerPix
   572         oldSeq1 = seq1
   573 
   574 def expandedSeqDict(seqDict):
   575     '''Allow lookup by short sequence names, e.g. chr7 as well as hg19.chr7.'''
   576     newDict = seqDict.copy()
   577     for name, x in seqDict.items():
   578         if "." in name:
   579             base = name.split(".")[-1]
   580             if base in newDict:  # an ambiguous case was found:
   581                 return seqDict   # so give up completely
   582             newDict[base] = x
   583     return newDict
   584 
   585 def readBed(fileName, rangeDict):
   586     for line in myOpen(fileName):
   587         w = line.split()
   588         if not w: continue
   589         seqName = w[0]
   590         if seqName not in rangeDict: continue
   591         seqRanges = rangeDict[seqName]
   592         beg = int(w[1])
   593         end = int(w[2])
   594         if all(beg >= i[2] or end <= i[1] for i in seqRanges):
   595             continue
   596         layer = 900
   597         color = "#fbf"
   598         if len(w) > 4:
   599             if w[4] != ".":
   600                 layer = float(w[4])
   601             if len(w) > 5:
   602                 if len(w) > 8 and w[8].count(",") == 2:
   603                     color = "rgb(" + w[8] + ")"
   604                 else:
   605                     strand = w[5]
   606                     isRev = (seqRanges[0][3] > 1)
   607                     if strand == "+" and not isRev or strand == "-" and isRev:
   608                         color = "#ffe8e8"
   609                     if strand == "-" and not isRev or strand == "+" and isRev:
   610                         color = "#e8e8ff"
   611         yield layer, color, seqName, beg, end
   612 
   613 def commaSeparatedInts(text):
   614     return map(int, text.rstrip(",").split(","))
   615 
   616 def readGenePred(opts, fileName, rangeDict):
   617     for line in myOpen(fileName):
   618         fields = line.split()
   619         if not fields: continue
   620         if fields[2] not in "+-":
   621             fields = fields[1:]
   622         seqName = fields[1]
   623         if seqName not in rangeDict: continue
   624         seqRanges = rangeDict[seqName]
   625         #strand = fields[2]
   626         cdsBeg = int(fields[5])
   627         cdsEnd = int(fields[6])
   628         exonBegs = commaSeparatedInts(fields[8])
   629         exonEnds = commaSeparatedInts(fields[9])
   630         for beg, end in zip(exonBegs, exonEnds):
   631             if all(beg >= i[2] or end <= i[1] for i in seqRanges):
   632                 continue
   633             yield 300, opts.exon_color, seqName, beg, end
   634             b = max(beg, cdsBeg)
   635             e = min(end, cdsEnd)
   636             if b < e: yield 400, opts.cds_color, seqName, b, e
   637 
   638 def readRmsk(fileName, rangeDict):
   639     for line in myOpen(fileName):
   640         fields = line.split()
   641         if len(fields) == 17:  # rmsk.txt
   642             seqName = fields[5]
   643             if seqName not in rangeDict: continue  # do this ASAP for speed
   644             beg = int(fields[6])
   645             end = int(fields[7])
   646             strand = fields[9]
   647             repeatClass = fields[11]
   648         elif len(fields) == 15:  # .out
   649             seqName = fields[4]
   650             if seqName not in rangeDict: continue
   651             beg = int(fields[5]) - 1
   652             end = int(fields[6])
   653             strand = fields[8]
   654             repeatClass = fields[10].split("/")[0]
   655         else:
   656             continue
   657         seqRanges = rangeDict[seqName]
   658         if all(beg >= i[2] or end <= i[1] for i in seqRanges):
   659             continue
   660         if repeatClass in ("Low_complexity", "Simple_repeat", "Satellite"):
   661             yield 200, "#fbf", seqName, beg, end
   662         elif (strand == "+") != (seqRanges[0][3] > 1):
   663             yield 100, "#ffe8e8", seqName, beg, end
   664         else:
   665             yield 100, "#e8e8ff", seqName, beg, end
   666 
   667 def isExtraFirstGapField(fields):
   668     return fields[4].isdigit()
   669 
   670 def readGaps(opts, fileName, rangeDict):
   671     '''Read locations of unsequenced gaps, from an agp or gap file.'''
   672     for line in myOpen(fileName):
   673         w = line.split()
   674         if not w or w[0][0] == "#": continue
   675         if isExtraFirstGapField(w): w = w[1:]
   676         if w[4] not in "NU": continue
   677         seqName = w[0]
   678         if seqName not in rangeDict: continue
   679         end = int(w[2])
   680         beg = end - int(w[5])  # zero-based coordinate
   681         if w[7] == "yes":
   682             yield 3000, opts.bridged_color, seqName, beg, end
   683         else:
   684             yield 2000, opts.unbridged_color, seqName, beg, end
   685 
   686 def bedBoxes(beds, rangeDict, margin, edge, isTop, bpPerPix):
   687     for layer, color, seqName, bedBeg, bedEnd in beds:
   688         for rangeBeg, rangeEnd, isReverseRange, origin in rangeDict[seqName]:
   689             beg = max(bedBeg, rangeBeg)
   690             end = min(bedEnd, rangeEnd)
   691             if beg >= end: continue
   692             if isReverseRange:
   693                 beg, end = -end, -beg
   694             if layer <= 1000:
   695                 # include partly-covered pixels
   696                 pixBeg = (origin + beg) // bpPerPix
   697                 pixEnd = div_ceil(origin + end, bpPerPix)
   698             else:
   699                 # exclude partly-covered pixels
   700                 pixBeg = div_ceil(origin + beg, bpPerPix)
   701                 pixEnd = (origin + end) // bpPerPix
   702                 if pixEnd <= pixBeg: continue
   703                 if bedEnd >= rangeEnd:  # include partly-covered end pixels
   704                     if isReverseRange:
   705                         pixBeg = (origin + beg) // bpPerPix
   706                     else:
   707                         pixEnd = div_ceil(origin + end, bpPerPix)
   708             if isTop:
   709                 box = pixBeg, margin, pixEnd, edge
   710             else:
   711                 box = margin, pixBeg, edge, pixEnd
   712             yield layer, color, box
   713 
   714 def drawAnnotations(im, boxes):
   715     # xxx use partial transparency for different-color overlaps?
   716     for layer, color, box in boxes:
   717         im.paste(color, box)
   718 
   719 def placedLabels(labels, rangePixBegs, rangePixLens, beg, end):
   720     '''Return axis labels with endpoint & sort-order information.'''
   721     maxWidth = end - beg
   722     for i, j, k in zip(labels, rangePixBegs, rangePixLens):
   723         text, textWidth, textHeight, strandNum = i
   724         if textWidth > maxWidth:
   725             continue
   726         labelBeg = j + (k - textWidth) // 2
   727         labelEnd = labelBeg + textWidth
   728         sortKey = textWidth - k
   729         if labelBeg < beg:
   730             sortKey += maxWidth * (beg - labelBeg)
   731             labelBeg = beg
   732             labelEnd = beg + textWidth
   733         if labelEnd > end:
   734             sortKey += maxWidth * (labelEnd - end)
   735             labelEnd = end
   736             labelBeg = end - textWidth
   737         yield sortKey, labelBeg, labelEnd, text, textHeight, strandNum
   738 
   739 def nonoverlappingLabels(labels, minPixTweenLabels):
   740     '''Get a subset of non-overlapping axis labels, greedily.'''
   741     out = []
   742     for i in labels:
   743         beg = i[1] - minPixTweenLabels
   744         end = i[2] + minPixTweenLabels
   745         if all(j[2] <= beg or j[1] >= end for j in out):
   746             out.append(i)
   747     return out
   748 
   749 def axisImage(labels, rangePixBegs, rangePixLens, textRot,
   750               textAln, font, image_mode, opts):
   751     '''Make an image of axis labels.'''
   752     beg = rangePixBegs[0]
   753     end = rangePixBegs[-1] + rangePixLens[-1]
   754     margin = max(i[2] for i in labels)
   755     labels = sorted(placedLabels(labels, rangePixBegs, rangePixLens, beg, end))
   756     minPixTweenLabels = 0 if textRot else opts.label_space
   757     labels = nonoverlappingLabels(labels, minPixTweenLabels)
   758     image_size = (margin, end) if textRot else (end, margin)
   759     im = Image.new(image_mode, image_size, opts.margin_color)
   760     draw = ImageDraw.Draw(im)
   761     for sortKey, labelBeg, labelEnd, text, textHeight, strandNum in labels:
   762         base = margin - textHeight if textAln else 0
   763         position = (base, labelBeg) if textRot else (labelBeg, base)
   764         fill = ("black", opts.forwardcolor, opts.reversecolor)[strandNum]
   765         draw.text(position, text, font=font, fill=fill)
   766     return im
   767 
   768 def rangesPerSeq(sortedRanges):
   769     for seqName, group in itertools.groupby(sortedRanges, itemgetter(0)):
   770         yield seqName, sorted(group)
   771 
   772 def rangesWithOrigins(sortedRanges, rangePixBegs, rangePixLens, bpPerPix):
   773     for i, j, k in zip(sortedRanges, rangePixBegs, rangePixLens):
   774         seqName, rangeBeg, rangeEnd, strandNum = i
   775         isReverseRange = (strandNum > 1)
   776         if isReverseRange:
   777             origin = bpPerPix * (j + k) + rangeBeg
   778         else:
   779             origin = bpPerPix * j - rangeBeg
   780         yield seqName, (rangeBeg, rangeEnd, isReverseRange, origin)
   781 
   782 def rangesAndOriginsPerSeq(sortedRanges, rangePixBegs, rangePixLens, bpPerPix):
   783     a = rangesWithOrigins(sortedRanges, rangePixBegs, rangePixLens, bpPerPix)
   784     for seqName, group in itertools.groupby(a, itemgetter(0)):
   785         yield seqName, sorted(i[1] for i in group)
   786 
   787 def getFont(opts):
   788     if opts.fontfile:
   789         return ImageFont.truetype(opts.fontfile, opts.fontsize)
   790     fileNames = []
   791     try:
   792         x = ["fc-match", "-f%{file}", "arial"]
   793         p = subprocess.Popen(x, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
   794                              universal_newlines=True)
   795         out, err = p.communicate()
   796         fileNames.append(out)
   797     except OSError as e:
   798         logging.info("fc-match error: " + str(e))
   799     fileNames.append("/Library/Fonts/Arial.ttf")  # for Mac
   800     for i in fileNames:
   801         try:
   802             font = ImageFont.truetype(i, opts.fontsize)
   803             logging.info("font: " + i)
   804             return font
   805         except IOError as e:
   806             logging.info("font load error: " + str(e))
   807     return ImageFont.load_default()
   808 
   809 def sequenceSizesAndNames(seqRanges):
   810     for seqName, ranges in itertools.groupby(seqRanges, itemgetter(0)):
   811         size = sum(e - b for n, b, e in ranges)
   812         yield size, seqName
   813 
   814 def biggestSequences(seqRanges, maxNumOfSequences):
   815     s = sorted(sequenceSizesAndNames(seqRanges), reverse=True)
   816     if len(s) > maxNumOfSequences:
   817         logging.warning("too many sequences - discarding the smallest ones")
   818         s = s[:maxNumOfSequences]
   819     return set(i[1] for i in s)
   820 
   821 def remainingSequenceRanges(seqRanges, alignments, seqIndex):
   822     remainingSequences = set(i[seqIndex] for i in alignments)
   823     return [i for i in seqRanges if i[0] in remainingSequences]
   824 
   825 def readAnnotations(opts, sortedRanges, bedFile, repFile, geneFile, gapFile):
   826     rangeDict = expandedSeqDict(dict(rangesPerSeq(sortedRanges)))
   827     annots = itertools.chain(readBed(bedFile, rangeDict),
   828                              readRmsk(repFile, rangeDict),
   829                              readGenePred(opts, geneFile, rangeDict),
   830                              readGaps(opts, gapFile, rangeDict))
   831     return annots
   832 
   833 def lastDotplot(opts, args):
   834     logLevel = logging.INFO if opts.verbose else logging.WARNING
   835     logging.basicConfig(format="%(filename)s: %(message)s", level=logLevel)
   836 
   837     font = getFont(opts)
   838     image_mode = 'RGB'
   839     forward_color = ImageColor.getcolor(opts.forwardcolor, image_mode)
   840     reverse_color = ImageColor.getcolor(opts.reversecolor, image_mode)
   841     zipped_colors = zip(forward_color, reverse_color)
   842     overlap_color = tuple([(i + j) // 2 for i, j in zipped_colors])
   843 
   844     maxGap1, maxGapB1 = twoValuesFromOption(opts.max_gap1, ":")
   845     maxGap2, maxGapB2 = twoValuesFromOption(opts.max_gap2, ":")
   846 
   847     logging.info("reading alignments...")
   848     alnData = readAlignments(args[0], opts)
   849     alignments, seqRanges1, coverDict1, seqRanges2, coverDict2 = alnData
   850     if not alignments: raise Exception("there are no alignments")
   851     logging.info("cutting...")
   852     coverDict1 = dict(mergedRangesPerSeq(coverDict1))
   853     coverDict2 = dict(mergedRangesPerSeq(coverDict2))
   854     minAlignedBases = min(coveredLength(coverDict1), coveredLength(coverDict2))
   855     pad = int(opts.pad * minAlignedBases)
   856     cutRanges1 = list(trimmed(seqRanges1, coverDict1, minAlignedBases,
   857                               maxGap1, pad, pad))
   858     cutRanges2 = list(trimmed(seqRanges2, coverDict2, minAlignedBases,
   859                               maxGap2, pad, pad))
   860 
   861     biggestSeqs1 = biggestSequences(cutRanges1, opts.maxseqs)
   862     cutRanges1 = [i for i in cutRanges1 if i[0] in biggestSeqs1]
   863     alignments = [i for i in alignments if i[0] in biggestSeqs1]
   864     cutRanges2 = remainingSequenceRanges(cutRanges2, alignments, 1)
   865 
   866     biggestSeqs2 = biggestSequences(cutRanges2, opts.maxseqs)
   867     cutRanges2 = [i for i in cutRanges2 if i[0] in biggestSeqs2]
   868     alignments = [i for i in alignments if i[1] in biggestSeqs2]
   869     cutRanges1 = remainingSequenceRanges(cutRanges1, alignments, 0)
   870 
   871     logging.info("reading secondary alignments...")
   872     alnDataB = readSecondaryAlignments(opts, cutRanges1, cutRanges2)
   873     alignmentsB, seqRangesB1, coverDictB1, seqRangesB2, coverDictB2 = alnDataB
   874     logging.info("cutting...")
   875     coverDictB1 = dict(mergedRangesPerSeq(coverDictB1))
   876     coverDictB2 = dict(mergedRangesPerSeq(coverDictB2))
   877     cutRangesB1 = trimmed(seqRangesB1, coverDictB1, minAlignedBases,
   878                           maxGapB1, 0, 0)
   879     cutRangesB2 = trimmed(seqRangesB2, coverDictB2, minAlignedBases,
   880                           maxGapB2, 0, 0)
   881 
   882     logging.info("sorting...")
   883     sortOut = allSortedRanges(opts, alignments, alignmentsB,
   884                               cutRanges1, cutRangesB1, cutRanges2, cutRangesB2)
   885     sortedRanges1, sortedRanges2 = sortOut
   886 
   887     textDraw = None
   888     if opts.fontsize:
   889         textDraw = ImageDraw.Draw(Image.new(image_mode, (1, 1)))
   890 
   891     textRot1 = "vertical".startswith(opts.rot1)
   892     i1 = dataFromRanges(sortedRanges1, font, textDraw, opts.labels1, textRot1)
   893     rangeSizes1, labelData1, tMargin = i1
   894 
   895     textRot2 = "horizontal".startswith(opts.rot2)
   896     i2 = dataFromRanges(sortedRanges2, font, textDraw, opts.labels2, textRot2)
   897     rangeSizes2, labelData2, lMargin = i2
   898 
   899     bMargin = rMargin = 0  # xxx
   900 
   901     maxPixels1 = opts.width  - lMargin - rMargin
   902     maxPixels2 = opts.height - tMargin - bMargin
   903     bpPerPix1 = get_bp_per_pix(rangeSizes1, opts.border_pixels, maxPixels1)
   904     bpPerPix2 = get_bp_per_pix(rangeSizes2, opts.border_pixels, maxPixels2)
   905     bpPerPix = max(bpPerPix1, bpPerPix2)
   906     logging.info("bp per pixel = " + str(bpPerPix))
   907 
   908     p1 = pixelData(rangeSizes1, bpPerPix, opts.border_pixels, lMargin)
   909     rangePixBegs1, rangePixLens1, rMarginBeg = p1
   910     width = rMarginBeg + rMargin
   911     rangeDict1 = dict(rangesAndOriginsPerSeq(sortedRanges1, rangePixBegs1,
   912                                              rangePixLens1, bpPerPix))
   913 
   914     p2 = pixelData(rangeSizes2, bpPerPix, opts.border_pixels, tMargin)
   915     rangePixBegs2, rangePixLens2, bMarginBeg = p2
   916     height = bMarginBeg + bMargin
   917     rangeDict2 = dict(rangesAndOriginsPerSeq(sortedRanges2, rangePixBegs2,
   918                                              rangePixLens2, bpPerPix))
   919 
   920     logging.info("width:  " + str(width))
   921     logging.info("height: " + str(height))
   922 
   923     logging.info("processing alignments...")
   924     allAlignments = alignments + alignmentsB
   925     hits = alignmentPixels(width, height, allAlignments, bpPerPix,
   926                            rangeDict1, rangeDict2)
   927 
   928     logging.info("reading annotations...")
   929 
   930     rangeDict1 = expandedSeqDict(rangeDict1)
   931     annots1 = readAnnotations(opts, sortedRanges1,
   932                               opts.bed1, opts.rmsk1, opts.genePred1, opts.gap1)
   933     boxes1 = bedBoxes(annots1, rangeDict1, tMargin, bMarginBeg, 1, bpPerPix)
   934 
   935     rangeDict2 = expandedSeqDict(rangeDict2)
   936     annots2 = readAnnotations(opts, sortedRanges2,
   937                               opts.bed2, opts.rmsk2, opts.genePred2, opts.gap2)
   938     boxes2 = bedBoxes(annots2, rangeDict2, lMargin, rMarginBeg, 0, bpPerPix)
   939 
   940     boxes = sorted(itertools.chain(boxes1, boxes2))
   941 
   942     logging.info("drawing...")
   943 
   944     image_size = width, height
   945     im = Image.new(image_mode, image_size, opts.background_color)
   946 
   947     drawAnnotations(im, boxes)
   948 
   949     joinA, joinB = twoValuesFromOption(opts.join, ":")
   950     if joinA in "13":
   951         drawJoins(im, alignments, bpPerPix, 0, rangeDict1, rangeDict2)
   952     if joinB in "13":
   953         drawJoins(im, alignmentsB, bpPerPix, 0, rangeDict1, rangeDict2)
   954     if joinA in "23":
   955         drawJoins(im, alignments, bpPerPix, 1, rangeDict2, rangeDict1)
   956     if joinB in "23":
   957         drawJoins(im, alignmentsB, bpPerPix, 1, rangeDict2, rangeDict1)
   958 
   959     for i in range(height):
   960         for j in range(width):
   961             store_value = hits[i * width + j]
   962             xy = j, i
   963             if   store_value == 1: im.putpixel(xy, forward_color)
   964             elif store_value == 2: im.putpixel(xy, reverse_color)
   965             elif store_value == 3: im.putpixel(xy, overlap_color)
   966 
   967     if opts.fontsize != 0:
   968         axis1 = axisImage(labelData1, rangePixBegs1, rangePixLens1,
   969                           textRot1, False, font, image_mode, opts)
   970         if textRot1:
   971             axis1 = axis1.transpose(Image.ROTATE_90)
   972         axis2 = axisImage(labelData2, rangePixBegs2, rangePixLens2,
   973                           textRot2, textRot2, font, image_mode, opts)
   974         if not textRot2:
   975             axis2 = axis2.transpose(Image.ROTATE_270)
   976         im.paste(axis1, (0, 0))
   977         im.paste(axis2, (0, 0))
   978 
   979     for i in rangePixBegs1[1:]:
   980         box = i - opts.border_pixels, tMargin, i, bMarginBeg
   981         im.paste(opts.border_color, box)
   982 
   983     for i in rangePixBegs2[1:]:
   984         box = lMargin, i - opts.border_pixels, rMarginBeg, i
   985         im.paste(opts.border_color, box)
   986 
   987     im.save(args[1])
   988 
   989 if __name__ == "__main__":
   990     usage = """%prog --help
   991    or: %prog [options] maf-or-tab-alignments dotplot.png
   992    or: %prog [options] maf-or-tab-alignments dotplot.gif
   993    or: ..."""
   994     description = "Draw a dotplot of pair-wise sequence alignments in MAF or tabular format."
   995     op = optparse.OptionParser(usage=usage, description=description)
   996     op.add_option("-v", "--verbose", action="count",
   997                   help="show progress messages & data about the plot")
   998     # Replace "width" & "height" with a single "length" option?
   999     op.add_option("-x", "--width", metavar="INT", type="int", default=1000,
  1000                   help="maximum width in pixels (default: %default)")
  1001     op.add_option("-y", "--height", metavar="INT", type="int", default=1000,
  1002                   help="maximum height in pixels (default: %default)")
  1003     op.add_option("-m", "--maxseqs", type="int", default=100, metavar="M",
  1004                   help="maximum number of horizontal or vertical sequences "
  1005                   "(default=%default)")
  1006     op.add_option("-1", "--seq1", metavar="PATTERN", action="append",
  1007                   default=[],
  1008                   help="which sequences to show from the 1st genome")
  1009     op.add_option("-2", "--seq2", metavar="PATTERN", action="append",
  1010                   default=[],
  1011                   help="which sequences to show from the 2nd genome")
  1012     op.add_option("-c", "--forwardcolor", metavar="COLOR", default="red",
  1013                   help="color for forward alignments (default: %default)")
  1014     op.add_option("-r", "--reversecolor", metavar="COLOR", default="blue",
  1015                   help="color for reverse alignments (default: %default)")
  1016     op.add_option("--alignments", metavar="FILE", help="secondary alignments")
  1017     op.add_option("--sort1", default="1", metavar="N",
  1018                   help="genome1 sequence order: 0=input order, 1=name order, "
  1019                   "2=length order, 3=alignment order (default=%default)")
  1020     op.add_option("--sort2", default="1", metavar="N",
  1021                   help="genome2 sequence order: 0=input order, 1=name order, "
  1022                   "2=length order, 3=alignment order (default=%default)")
  1023     op.add_option("--strands1", default="0", metavar="N", help=
  1024                   "genome1 sequence orientation: 0=forward orientation, "
  1025                   "1=alignment orientation (default=%default)")
  1026     op.add_option("--strands2", default="0", metavar="N", help=
  1027                   "genome2 sequence orientation: 0=forward orientation, "
  1028                   "1=alignment orientation (default=%default)")
  1029     op.add_option("--max-gap1", metavar="FRAC", default="0.5,2", help=
  1030                   "maximum unaligned (end,mid) gap in genome1: "
  1031                   "fraction of aligned length (default=%default)")
  1032     op.add_option("--max-gap2", metavar="FRAC", default="0.5,2", help=
  1033                   "maximum unaligned (end,mid) gap in genome2: "
  1034                   "fraction of aligned length (default=%default)")
  1035     op.add_option("--pad", metavar="FRAC", type="float", default=0.04, help=
  1036                   "pad length when cutting unaligned gaps: "
  1037                   "fraction of aligned length (default=%default)")
  1038     op.add_option("-j", "--join", default="0", metavar="N", help=
  1039                   "join: 0=nothing, 1=alignments adjacent in genome1, "
  1040                   "2=alignments adjacent in genome2 (default=%default)")
  1041     op.add_option("--border-pixels", metavar="INT", type="int", default=1,
  1042                   help="number of pixels between sequences (default=%default)")
  1043     op.add_option("--border-color", metavar="COLOR", default="black",
  1044                   help="color for pixels between sequences (default=%default)")
  1045     # --break-color and/or --break-pixels for intra-sequence breaks?
  1046     op.add_option("--margin-color", metavar="COLOR", default="#dcdcdc",
  1047                   help="margin color")
  1048 
  1049     og = optparse.OptionGroup(op, "Text options")
  1050     og.add_option("-f", "--fontfile", metavar="FILE",
  1051                   help="TrueType or OpenType font file")
  1052     og.add_option("-s", "--fontsize", metavar="SIZE", type="int", default=14,
  1053                   help="TrueType or OpenType font size (default: %default)")
  1054     og.add_option("--labels1", type="int", default=0, metavar="N", help=
  1055                   "genome1 labels: 0=name, 1=name:length, "
  1056                   "2=name:start:length, 3=name:start-end (default=%default)")
  1057     og.add_option("--labels2", type="int", default=0, metavar="N", help=
  1058                   "genome2 labels: 0=name, 1=name:length, "
  1059                   "2=name:start:length, 3=name:start-end (default=%default)")
  1060     og.add_option("--rot1", metavar="ROT", default="h",
  1061                   help="text rotation for the 1st genome (default=%default)")
  1062     og.add_option("--rot2", metavar="ROT", default="v",
  1063                   help="text rotation for the 2nd genome (default=%default)")
  1064     op.add_option_group(og)
  1065 
  1066     og = optparse.OptionGroup(op, "Annotation options")
  1067     og.add_option("--bed1", metavar="FILE",
  1068                   help="read genome1 annotations from BED file")
  1069     og.add_option("--bed2", metavar="FILE",
  1070                   help="read genome2 annotations from BED file")
  1071     og.add_option("--rmsk1", metavar="FILE", help="read genome1 repeats from "
  1072                   "RepeatMasker .out or rmsk.txt file")
  1073     og.add_option("--rmsk2", metavar="FILE", help="read genome2 repeats from "
  1074                   "RepeatMasker .out or rmsk.txt file")
  1075     op.add_option_group(og)
  1076 
  1077     og = optparse.OptionGroup(op, "Gene options")
  1078     og.add_option("--genePred1", metavar="FILE",
  1079                   help="read genome1 genes from genePred file")
  1080     og.add_option("--genePred2", metavar="FILE",
  1081                   help="read genome2 genes from genePred file")
  1082     og.add_option("--exon-color", metavar="COLOR", default="PaleGreen",
  1083                   help="color for exons (default=%default)")
  1084     og.add_option("--cds-color", metavar="COLOR", default="LimeGreen",
  1085                   help="color for protein-coding regions (default=%default)")
  1086     op.add_option_group(og)
  1087 
  1088     og = optparse.OptionGroup(op, "Unsequenced gap options")
  1089     og.add_option("--gap1", metavar="FILE",
  1090                   help="read genome1 unsequenced gaps from agp or gap file")
  1091     og.add_option("--gap2", metavar="FILE",
  1092                   help="read genome2 unsequenced gaps from agp or gap file")
  1093     og.add_option("--bridged-color", metavar="COLOR", default="yellow",
  1094                   help="color for bridged gaps (default: %default)")
  1095     og.add_option("--unbridged-color", metavar="COLOR", default="orange",
  1096                   help="color for unbridged gaps (default: %default)")
  1097     op.add_option_group(og)
  1098     (opts, args) = op.parse_args()
  1099     if len(args) != 2: op.error("2 arguments needed")
  1100 
  1101     opts.background_color = "white"
  1102     opts.label_space = 5     # minimum number of pixels between axis labels
  1103 
  1104     try: lastDotplot(opts, args)
  1105     except KeyboardInterrupt: pass  # avoid silly error message
  1106     except Exception as e:
  1107         prog = os.path.basename(sys.argv[0])
  1108         sys.exit(prog + ": error: " + str(e))