scripts/last-dotplot
author Martin C. Frith
Fri Jul 31 14:36:00 2020 +0900 (21 months ago)
changeset 1074 170ab6c64728
parent 1073 8ec299b42e28
child 1075 70b0e512d696
permissions -rwxr-xr-x
last-dotplot: implementing annotation names...
     1 #! /usr/bin/env python
     2 # Author: Martin C. Frith 2008
     3 # SPDX-License-Identifier: GPL-3.0-or-later
     4 
     5 # Read pair-wise alignments in MAF or LAST tabular format: write an
     6 # "Oxford grid", a.k.a. dotplot.
     7 
     8 # TODO: Currently, pixels with zero aligned nt-pairs are white, and
     9 # pixels with one or more aligned nt-pairs are black.  This can look
    10 # too crowded for large genome alignments.  I tried shading each pixel
    11 # according to the number of aligned nt-pairs within it, but the
    12 # result is too faint.  How can this be done better?
    13 
    14 import collections
    15 import functools
    16 import gzip
    17 from fnmatch import fnmatchcase
    18 import logging
    19 from operator import itemgetter
    20 import subprocess
    21 import itertools, optparse, os, re, sys
    22 
    23 # Try to make PIL/PILLOW work:
    24 try:
    25     from PIL import Image, ImageDraw, ImageFont, ImageColor
    26 except ImportError:
    27     import Image, ImageDraw, ImageFont, ImageColor
    28 
    29 try:
    30     from future_builtins import zip
    31 except ImportError:
    32     pass
    33 
    34 def myOpen(fileName):  # faster than fileinput
    35     if fileName is None:
    36         return []
    37     if fileName == "-":
    38         return sys.stdin
    39     if fileName.endswith(".gz"):
    40         return gzip.open(fileName, "rt")  # xxx dubious for Python2
    41     return open(fileName)
    42 
    43 def groupByFirstItem(things):
    44     for k, v in itertools.groupby(things, itemgetter(0)):
    45         yield k, [i[1:] for i in v]
    46 
    47 def croppedBlocks(blocks, ranges1, ranges2):
    48     headBeg1, headBeg2, headSize = blocks[0]
    49     for r1 in ranges1:
    50         for r2 in ranges2:
    51             cropBeg1, cropEnd1 = r1
    52             if headBeg1 < 0:
    53                 cropBeg1, cropEnd1 = -cropEnd1, -cropBeg1
    54             cropBeg2, cropEnd2 = r2
    55             if headBeg2 < 0:
    56                 cropBeg2, cropEnd2 = -cropEnd2, -cropBeg2
    57             for beg1, beg2, size in blocks:
    58                 b1 = max(cropBeg1, beg1)
    59                 e1 = min(cropEnd1, beg1 + size)
    60                 if b1 >= e1: continue
    61                 offset = beg2 - beg1
    62                 b2 = max(cropBeg2, b1 + offset)
    63                 e2 = min(cropEnd2, e1 + offset)
    64                 if b2 >= e2: continue
    65                 yield b2 - offset, b2, e2 - b2
    66 
    67 def tabBlocks(beg1, beg2, blocks):
    68     '''Get the gapless blocks of an alignment, from LAST tabular format.'''
    69     for i in blocks.split(","):
    70         if ":" in i:
    71             x, y = i.split(":")
    72             beg1 += int(x)
    73             beg2 += int(y)
    74         else:
    75             size = int(i)
    76             yield beg1, beg2, size
    77             beg1 += size
    78             beg2 += size
    79 
    80 def mafBlocks(beg1, beg2, seq1, seq2):
    81     '''Get the gapless blocks of an alignment, from MAF format.'''
    82     size = 0
    83     for x, y in zip(seq1, seq2):
    84         if x == "-":
    85             if size:
    86                 yield beg1, beg2, size
    87                 beg1 += size
    88                 beg2 += size
    89                 size = 0
    90             beg2 += 1
    91         elif y == "-":
    92             if size:
    93                 yield beg1, beg2, size
    94                 beg1 += size
    95                 beg2 += size
    96                 size = 0
    97             beg1 += 1
    98         else:
    99             size += 1
   100     if size: yield beg1, beg2, size
   101 
   102 def alignmentFromSegment(qrySeqName, qrySeqLen, segment):
   103     refSeqLen = sys.maxsize  # XXX
   104     refSeqName, refSeqBeg, qrySeqBeg, size = segment
   105     block = refSeqBeg, qrySeqBeg, size
   106     return refSeqName, refSeqLen, qrySeqName, qrySeqLen, [block]
   107 
   108 def alignmentInput(lines):
   109     '''Get alignments and sequence lengths, from MAF or tabular format.'''
   110     mafCount = 0
   111     qrySeqName = ""
   112     segments = []
   113     for line in lines:
   114         w = line.split()
   115         if line[0] == "#":
   116             pass
   117         elif len(w) == 1:
   118             for i in segments:
   119                 yield alignmentFromSegment(qrySeqName, qrySeqLen, i)
   120             qrySeqName = w[0]
   121             qrySeqLen = 0
   122             segments = []
   123         elif len(w) == 2 and qrySeqName and w[1].isdigit():
   124             qrySeqLen += int(w[1])
   125         elif len(w) == 4 and qrySeqName and w[1].isdigit() and w[3].isdigit():
   126             refSeqName, refSeqBeg, refSeqEnd = w[0], int(w[1]), int(w[3])
   127             size = abs(refSeqEnd - refSeqBeg)
   128             if refSeqBeg > refSeqEnd:
   129                 refSeqBeg = -refSeqBeg
   130             segments.append((refSeqName, refSeqBeg, qrySeqLen, size))
   131             qrySeqLen += size
   132         elif line[0].isdigit():  # tabular format
   133             chr1, beg1, seqlen1 = w[1], int(w[2]), int(w[5])
   134             if w[4] == "-": beg1 -= seqlen1
   135             chr2, beg2, seqlen2 = w[6], int(w[7]), int(w[10])
   136             if w[9] == "-": beg2 -= seqlen2
   137             blocks = tabBlocks(beg1, beg2, w[11])
   138             yield chr1, seqlen1, chr2, seqlen2, blocks
   139         elif line[0] == "s":  # MAF format
   140             if mafCount == 0:
   141                 chr1, beg1, seqlen1, seq1 = w[1], int(w[2]), int(w[5]), w[6]
   142                 if w[4] == "-": beg1 -= seqlen1
   143                 mafCount = 1
   144             else:
   145                 chr2, beg2, seqlen2, seq2 = w[1], int(w[2]), int(w[5]), w[6]
   146                 if w[4] == "-": beg2 -= seqlen2
   147                 blocks = mafBlocks(beg1, beg2, seq1, seq2)
   148                 yield chr1, seqlen1, chr2, seqlen2, blocks
   149                 mafCount = 0
   150     for i in segments:
   151         yield alignmentFromSegment(qrySeqName, qrySeqLen, i)
   152 
   153 def seqRequestFromText(text):
   154     if ":" in text:
   155         pattern, interval = text.rsplit(":", 1)
   156         if "-" in interval:
   157             beg, end = interval.rsplit("-", 1)
   158             return pattern, int(beg), int(end)  # beg may be negative
   159     return text, 0, sys.maxsize
   160 
   161 def rangesFromSeqName(seqRequests, name, seqLen):
   162     if seqRequests:
   163         base = name.split(".")[-1]  # allow for names like hg19.chr7
   164         for pat, beg, end in seqRequests:
   165             if fnmatchcase(name, pat) or fnmatchcase(base, pat):
   166                 yield max(beg, 0), min(end, seqLen)
   167     else:
   168         yield 0, seqLen
   169 
   170 def updateSeqs(coverDict, seqRanges, seqName, ranges, coveredRange):
   171     beg, end = coveredRange
   172     if beg < 0:
   173         coveredRange = -end, -beg
   174     if seqName in coverDict:
   175         coverDict[seqName].append(coveredRange)
   176     else:
   177         coverDict[seqName] = [coveredRange]
   178         for beg, end in ranges:
   179             r = seqName, beg, end
   180             seqRanges.append(r)
   181 
   182 def readAlignments(fileName, opts):
   183     '''Get alignments and sequence limits, from MAF or tabular format.'''
   184     seqRequests1 = [seqRequestFromText(i) for i in opts.seq1]
   185     seqRequests2 = [seqRequestFromText(i) for i in opts.seq2]
   186 
   187     alignments = []
   188     seqRanges1 = []
   189     seqRanges2 = []
   190     coverDict1 = {}
   191     coverDict2 = {}
   192     lines = myOpen(fileName)
   193     for seqName1, seqLen1, seqName2, seqLen2, blocks in alignmentInput(lines):
   194         ranges1 = sorted(rangesFromSeqName(seqRequests1, seqName1, seqLen1))
   195         if not ranges1: continue
   196         ranges2 = sorted(rangesFromSeqName(seqRequests2, seqName2, seqLen2))
   197         if not ranges2: continue
   198         b = list(croppedBlocks(list(blocks), ranges1, ranges2))
   199         if not b: continue
   200         aln = seqName1, seqName2, b
   201         alignments.append(aln)
   202         coveredRange1 = b[0][0], b[-1][0] + b[-1][2]
   203         updateSeqs(coverDict1, seqRanges1, seqName1, ranges1, coveredRange1)
   204         coveredRange2 = b[0][1], b[-1][1] + b[-1][2]
   205         updateSeqs(coverDict2, seqRanges2, seqName2, ranges2, coveredRange2)
   206     return alignments, seqRanges1, coverDict1, seqRanges2, coverDict2
   207 
   208 def nameAndRangesFromDict(cropDict, seqName):
   209     if seqName in cropDict:
   210         return seqName, cropDict[seqName]
   211     n = seqName.split(".")[-1]
   212     if n in cropDict:
   213         return n, cropDict[n]
   214     return seqName, []
   215 
   216 def rangesForSecondaryAlignments(primaryRanges, seqLen):
   217     if primaryRanges:
   218         return primaryRanges
   219     return [(0, seqLen)]
   220 
   221 def readSecondaryAlignments(opts, cropRanges1, cropRanges2):
   222     cropDict1 = dict(groupByFirstItem(cropRanges1))
   223     cropDict2 = dict(groupByFirstItem(cropRanges2))
   224 
   225     alignments = []
   226     seqRanges1 = []
   227     seqRanges2 = []
   228     coverDict1 = {}
   229     coverDict2 = {}
   230     lines = myOpen(opts.alignments)
   231     for seqName1, seqLen1, seqName2, seqLen2, blocks in alignmentInput(lines):
   232         seqName1, ranges1 = nameAndRangesFromDict(cropDict1, seqName1)
   233         seqName2, ranges2 = nameAndRangesFromDict(cropDict2, seqName2)
   234         if not ranges1 and not ranges2:
   235             continue
   236         r1 = rangesForSecondaryAlignments(ranges1, seqLen1)
   237         r2 = rangesForSecondaryAlignments(ranges2, seqLen2)
   238         b = list(croppedBlocks(list(blocks), r1, r2))
   239         if not b: continue
   240         aln = seqName1, seqName2, b
   241         alignments.append(aln)
   242         if not ranges1:
   243             coveredRange1 = b[0][0], b[-1][0] + b[-1][2]
   244             updateSeqs(coverDict1, seqRanges1, seqName1, r1, coveredRange1)
   245         if not ranges2:
   246             coveredRange2 = b[0][1], b[-1][1] + b[-1][2]
   247             updateSeqs(coverDict2, seqRanges2, seqName2, r2, coveredRange2)
   248     return alignments, seqRanges1, coverDict1, seqRanges2, coverDict2
   249 
   250 def twoValuesFromOption(text, separator):
   251     if separator in text:
   252         return text.split(separator)
   253     return text, text
   254 
   255 def mergedRanges(ranges):
   256     oldBeg, maxEnd = ranges[0]
   257     for beg, end in ranges:
   258         if beg > maxEnd:
   259             yield oldBeg, maxEnd
   260             oldBeg = beg
   261             maxEnd = end
   262         elif end > maxEnd:
   263             maxEnd = end
   264     yield oldBeg, maxEnd
   265 
   266 def mergedRangesPerSeq(coverDict):
   267     for k, v in coverDict.items():
   268         v.sort()
   269         yield k, list(mergedRanges(v))
   270 
   271 def coveredLength(mergedCoverDict):
   272     return sum(sum(e - b for b, e in v) for v in mergedCoverDict.values())
   273 
   274 def trimmed(seqRanges, coverDict, minAlignedBases, maxGapFrac, endPad, midPad):
   275     maxEndGapFrac, maxMidGapFrac = twoValuesFromOption(maxGapFrac, ",")
   276     maxEndGap = max(float(maxEndGapFrac) * minAlignedBases, endPad * 1.0)
   277     maxMidGap = max(float(maxMidGapFrac) * minAlignedBases, midPad * 2.0)
   278 
   279     for seqName, rangeBeg, rangeEnd in seqRanges:
   280         seqBlocks = coverDict[seqName]
   281         blocks = [i for i in seqBlocks if i[0] < rangeEnd and i[1] > rangeBeg]
   282         if blocks[0][0] - rangeBeg > maxEndGap:
   283             rangeBeg = blocks[0][0] - endPad
   284         for j, y in enumerate(blocks):
   285             if j:
   286                 x = blocks[j - 1]
   287                 if y[0] - x[1] > maxMidGap:
   288                     yield seqName, rangeBeg, x[1] + midPad
   289                     rangeBeg = y[0] - midPad
   290         if rangeEnd - blocks[-1][1] > maxEndGap:
   291             rangeEnd = blocks[-1][1] + endPad
   292         yield seqName, rangeBeg, rangeEnd
   293 
   294 def rangesWithStrandInfo(seqRanges, strandOpt, alignments, seqIndex):
   295     if strandOpt == "1":
   296         forwardMinusReverse = collections.defaultdict(int)
   297         for i in alignments:
   298             blocks = i[2]
   299             beg1, beg2, size = blocks[0]
   300             numOfAlignedLetterPairs = sum(i[2] for i in blocks)
   301             if (beg1 < 0) != (beg2 < 0):  # opposite-strand alignment
   302                 numOfAlignedLetterPairs *= -1
   303             forwardMinusReverse[i[seqIndex]] += numOfAlignedLetterPairs
   304     strandNum = 0
   305     for seqName, beg, end in seqRanges:
   306         if strandOpt == "1":
   307             strandNum = 1 if forwardMinusReverse[seqName] >= 0 else 2
   308         yield seqName, beg, end, strandNum
   309 
   310 def natural_sort_key(my_string):
   311     '''Return a sort key for "natural" ordering, e.g. chr9 < chr10.'''
   312     parts = re.split(r'(\d+)', my_string)
   313     parts[1::2] = map(int, parts[1::2])
   314     return parts
   315 
   316 def nameKey(oneSeqRanges):
   317     return natural_sort_key(oneSeqRanges[0][0])
   318 
   319 def sizeKey(oneSeqRanges):
   320     return sum(b - e for n, b, e, s in oneSeqRanges), nameKey(oneSeqRanges)
   321 
   322 def alignmentKey(seqNamesToLists, oneSeqRanges):
   323     seqName = oneSeqRanges[0][0]
   324     alignmentsOfThisSequence = seqNamesToLists[seqName]
   325     numOfAlignedLetterPairs = sum(i[3] for i in alignmentsOfThisSequence)
   326     toMiddle = numOfAlignedLetterPairs // 2
   327     for i in alignmentsOfThisSequence:
   328         toMiddle -= i[3]
   329         if toMiddle < 0:
   330             return i[1:3]  # sequence-rank and "position" of this alignment
   331 
   332 def rankAndFlipPerSeq(seqRanges):
   333     rangesGroupedBySeqName = itertools.groupby(seqRanges, itemgetter(0))
   334     for rank, group in enumerate(rangesGroupedBySeqName):
   335         seqName, ranges = group
   336         strandNum = next(ranges)[3]
   337         flip = 1 if strandNum < 2 else -1
   338         yield seqName, (rank, flip)
   339 
   340 def alignmentSortData(alignments, seqIndex, otherNamesToRanksAndFlips):
   341     otherIndex = 1 - seqIndex
   342     for i in alignments:
   343         blocks = i[2]
   344         otherRank, otherFlip = otherNamesToRanksAndFlips[i[otherIndex]]
   345         otherPos = otherFlip * abs(blocks[0][otherIndex] +
   346                                    blocks[-1][otherIndex] + blocks[-1][2])
   347         numOfAlignedLetterPairs = sum(i[2] for i in blocks)
   348         yield i[seqIndex], otherRank, otherPos, numOfAlignedLetterPairs
   349 
   350 def mySortedRanges(seqRanges, sortOpt, seqIndex, alignments, otherRanges):
   351     rangesGroupedBySeqName = itertools.groupby(seqRanges, itemgetter(0))
   352     g = [list(ranges) for seqName, ranges in rangesGroupedBySeqName]
   353     for i in g:
   354         if i[0][3] > 1:
   355             i.reverse()
   356     if sortOpt == "1":
   357         g.sort(key=nameKey)
   358     if sortOpt == "2":
   359         g.sort(key=sizeKey)
   360     if sortOpt == "3":
   361         otherNamesToRanksAndFlips = dict(rankAndFlipPerSeq(otherRanges))
   362         alns = sorted(alignmentSortData(alignments, seqIndex,
   363                                         otherNamesToRanksAndFlips))
   364         alnsGroupedBySeqName = itertools.groupby(alns, itemgetter(0))
   365         seqNamesToLists = dict((k, list(v)) for k, v in alnsGroupedBySeqName)
   366         g.sort(key=functools.partial(alignmentKey, seqNamesToLists))
   367     return [j for i in g for j in i]
   368 
   369 def allSortedRanges(opts, alignments, alignmentsB,
   370                     seqRanges1, seqRangesB1, seqRanges2, seqRangesB2):
   371     o1, oB1 = twoValuesFromOption(opts.strands1, ":")
   372     o2, oB2 = twoValuesFromOption(opts.strands2, ":")
   373     if o1 == "1" and o2 == "1":
   374         raise Exception("the strand options have circular dependency")
   375     seqRanges1 = list(rangesWithStrandInfo(seqRanges1, o1, alignments, 0))
   376     seqRanges2 = list(rangesWithStrandInfo(seqRanges2, o2, alignments, 1))
   377     seqRangesB1 = list(rangesWithStrandInfo(seqRangesB1, oB1, alignmentsB, 0))
   378     seqRangesB2 = list(rangesWithStrandInfo(seqRangesB2, oB2, alignmentsB, 1))
   379 
   380     o1, oB1 = twoValuesFromOption(opts.sort1, ":")
   381     o2, oB2 = twoValuesFromOption(opts.sort2, ":")
   382     if o1 == "3" and o2 == "3":
   383         raise Exception("the sort options have circular dependency")
   384     if o1 != "3":
   385         s1 = mySortedRanges(seqRanges1, o1, None, None, None)
   386     if o2 != "3":
   387         s2 = mySortedRanges(seqRanges2, o2, None, None, None)
   388     if o1 == "3":
   389         s1 = mySortedRanges(seqRanges1, o1, 0, alignments, s2)
   390     if o2 == "3":
   391         s2 = mySortedRanges(seqRanges2, o2, 1, alignments, s1)
   392     t1 = mySortedRanges(seqRangesB1, oB1, 0, alignmentsB, s2)
   393     t2 = mySortedRanges(seqRangesB2, oB2, 1, alignmentsB, s1)
   394     return s1 + t1, s2 + t2
   395 
   396 def prettyNum(n):
   397     t = str(n)
   398     groups = []
   399     while t:
   400         groups.append(t[-3:])
   401         t = t[:-3]
   402     return ",".join(reversed(groups))
   403 
   404 def sizeText(size):
   405     suffixes = "bp", "kb", "Mb", "Gb"
   406     for i, x in enumerate(suffixes):
   407         j = 10 ** (i * 3)
   408         if size < j * 10:
   409             return "%.2g" % (1.0 * size / j) + x
   410         if size < j * 1000 or i == len(suffixes) - 1:
   411             return "%.0f" % (1.0 * size / j) + x
   412 
   413 def labelText(seqRange, labelOpt):
   414     seqName, beg, end, strandNum = seqRange
   415     if labelOpt == 1:
   416         return seqName + ": " + sizeText(end - beg)
   417     if labelOpt == 2:
   418         return seqName + ":" + prettyNum(beg) + ": " + sizeText(end - beg)
   419     if labelOpt == 3:
   420         return seqName + ":" + prettyNum(beg) + "-" + prettyNum(end)
   421     return seqName
   422 
   423 def rangeLabels(seqRanges, labelOpt, font, textDraw, textRot):
   424     x = y = 0
   425     for r in seqRanges:
   426         text = labelText(r, labelOpt)
   427         if textDraw is not None:
   428             x, y = textDraw.textsize(text, font=font)
   429             if textRot:
   430                 x, y = y, x
   431         yield text, x, y, r[3]
   432 
   433 def dataFromRanges(sortedRanges, font, textDraw, labelOpt, textRot):
   434     for seqName, rangeBeg, rangeEnd, strandNum in sortedRanges:
   435         out = [seqName, str(rangeBeg), str(rangeEnd)]
   436         if strandNum > 0:
   437             out.append(".+-"[strandNum])
   438         logging.info("\t".join(out))
   439     logging.info("")
   440     rangeSizes = [e - b for n, b, e, s in sortedRanges]
   441     labs = list(rangeLabels(sortedRanges, labelOpt, font, textDraw, textRot))
   442     margin = max(i[2] for i in labs)
   443     # xxx the margin may be too big, because some labels may get omitted
   444     return rangeSizes, labs, margin
   445 
   446 def div_ceil(x, y):
   447     '''Return x / y rounded up.'''
   448     q, r = divmod(x, y)
   449     return q + (r != 0)
   450 
   451 def get_bp_per_pix(rangeSizes, pixTweenRanges, maxPixels):
   452     '''Get the minimum bp-per-pixel that fits in the size limit.'''
   453     logging.info("choosing bp per pixel...")
   454     numOfRanges = len(rangeSizes)
   455     maxPixelsInRanges = maxPixels - pixTweenRanges * (numOfRanges - 1)
   456     if maxPixelsInRanges < numOfRanges:
   457         raise Exception("can't fit the image: too many sequences?")
   458     negLimit = -maxPixelsInRanges
   459     negBpPerPix = sum(rangeSizes) // negLimit
   460     while True:
   461         if sum(i // negBpPerPix for i in rangeSizes) >= negLimit:
   462             return -negBpPerPix
   463         negBpPerPix -= 1
   464 
   465 def getRangePixBegs(rangePixLens, pixTweenRanges, margin):
   466     '''Get the start pixel for each range.'''
   467     rangePixBegs = []
   468     pix_tot = margin - pixTweenRanges
   469     for i in rangePixLens:
   470         pix_tot += pixTweenRanges
   471         rangePixBegs.append(pix_tot)
   472         pix_tot += i
   473     return rangePixBegs
   474 
   475 def pixelData(rangeSizes, bp_per_pix, pixTweenRanges, margin):
   476     '''Return pixel information about the ranges.'''
   477     rangePixLens = [div_ceil(i, bp_per_pix) for i in rangeSizes]
   478     rangePixBegs = getRangePixBegs(rangePixLens, pixTweenRanges, margin)
   479     tot_pix = rangePixBegs[-1] + rangePixLens[-1]
   480     return rangePixBegs, rangePixLens, tot_pix
   481 
   482 def drawLineForward(hits, width, bp_per_pix, beg1, beg2, size):
   483     while True:
   484         q1, r1 = divmod(beg1, bp_per_pix)
   485         q2, r2 = divmod(beg2, bp_per_pix)
   486         hits[q2 * width + q1] |= 1
   487         next_pix = min(bp_per_pix - r1, bp_per_pix - r2)
   488         if next_pix >= size: break
   489         beg1 += next_pix
   490         beg2 += next_pix
   491         size -= next_pix
   492 
   493 def drawLineReverse(hits, width, bp_per_pix, beg1, beg2, size):
   494     while True:
   495         q1, r1 = divmod(beg1, bp_per_pix)
   496         q2, r2 = divmod(beg2, bp_per_pix)
   497         hits[q2 * width + q1] |= 2
   498         next_pix = min(bp_per_pix - r1, r2 + 1)
   499         if next_pix >= size: break
   500         beg1 += next_pix
   501         beg2 -= next_pix
   502         size -= next_pix
   503 
   504 def strandAndOrigin(ranges, beg, size):
   505     isReverseStrand = (beg < 0)
   506     if isReverseStrand:
   507         beg = -(beg + size)
   508     for rangeBeg, rangeEnd, isReverseRange, origin in ranges:
   509         if rangeEnd > beg:  # assumes the ranges are sorted
   510             return (isReverseStrand != isReverseRange), origin
   511 
   512 def alignmentPixels(width, height, alignments, bp_per_pix,
   513                     rangeDict1, rangeDict2):
   514     hits = [0] * (width * height)  # the image data
   515     for seq1, seq2, blocks in alignments:
   516         beg1, beg2, size = blocks[0]
   517         isReverse1, ori1 = strandAndOrigin(rangeDict1[seq1], beg1, size)
   518         isReverse2, ori2 = strandAndOrigin(rangeDict2[seq2], beg2, size)
   519         for beg1, beg2, size in blocks:
   520             if isReverse1:
   521                 beg1 = -(beg1 + size)
   522                 beg2 = -(beg2 + size)
   523             if isReverse1 == isReverse2:
   524                 drawLineForward(hits, width, bp_per_pix,
   525                                 ori1 + beg1, ori2 + beg2, size)
   526             else:
   527                 drawLineReverse(hits, width, bp_per_pix,
   528                                 ori1 + beg1, ori2 - beg2 - 1, size)
   529     return hits
   530 
   531 def orientedBlocks(alignments, seqIndex):
   532     otherIndex = 1 - seqIndex
   533     for a in alignments:
   534         seq1, seq2, blocks = a
   535         for b in blocks:
   536             beg1, beg2, size = b
   537             if b[seqIndex] < 0:
   538                 b = -(beg1 + size), -(beg2 + size), size
   539             yield a[seqIndex], b[seqIndex], a[otherIndex], b[otherIndex], size
   540 
   541 def drawJoins(im, alignments, bpPerPix, seqIndex, rangeDict1, rangeDict2):
   542     blocks = orientedBlocks(alignments, seqIndex)
   543     oldSeq1 = ""
   544     for seq1, beg1, seq2, beg2, size in sorted(blocks):
   545         isReverse1, ori1 = strandAndOrigin(rangeDict1[seq1], beg1, size)
   546         isReverse2, ori2 = strandAndOrigin(rangeDict2[seq2], beg2, size)
   547         end1 = beg1 + size - 1
   548         end2 = beg2 + size - 1
   549         if isReverse1:
   550             beg1 = -(beg1 + 1)
   551             end1 = -(end1 + 1)
   552         if isReverse2:
   553             beg2 = -(beg2 + 1)
   554             end2 = -(end2 + 1)
   555         newPix1 = (ori1 + beg1) // bpPerPix
   556         newPix2 = (ori2 + beg2) // bpPerPix
   557         if seq1 == oldSeq1:
   558             lowerPix2 = min(oldPix2, newPix2)
   559             upperPix2 = max(oldPix2, newPix2)
   560             midPix1 = (oldPix1 + newPix1) // 2
   561             if isReverse1:
   562                 midPix1 = (oldPix1 + newPix1 + 1) // 2
   563                 oldPix1, newPix1 = newPix1, oldPix1
   564             if upperPix2 - lowerPix2 > 1 and oldPix1 <= newPix1 <= oldPix1 + 1:
   565                 if seqIndex == 0:
   566                     box = midPix1, lowerPix2, midPix1 + 1, upperPix2 + 1
   567                 else:
   568                     box = lowerPix2, midPix1, upperPix2 + 1, midPix1 + 1
   569                 im.paste("lightgray", box)
   570         oldPix1 = (ori1 + end1) // bpPerPix
   571         oldPix2 = (ori2 + end2) // bpPerPix
   572         oldSeq1 = seq1
   573 
   574 def expandedSeqDict(seqDict):
   575     '''Allow lookup by short sequence names, e.g. chr7 as well as hg19.chr7.'''
   576     newDict = seqDict.copy()
   577     for name, x in seqDict.items():
   578         if "." in name:
   579             base = name.split(".")[-1]
   580             if base in newDict:  # an ambiguous case was found:
   581                 return seqDict   # so give up completely
   582             newDict[base] = x
   583     return newDict
   584 
   585 def readBed(fileName, rangeDict):
   586     for line in myOpen(fileName):
   587         w = line.split()
   588         if not w: continue
   589         seqName = w[0]
   590         if seqName not in rangeDict: continue
   591         seqRanges = rangeDict[seqName]
   592         beg = int(w[1])
   593         end = int(w[2])
   594         if all(beg >= i[2] or end <= i[1] for i in seqRanges):
   595             continue
   596         itemName = w[3] if len(w) > 3 and w[3] != "." else ""
   597         layer = 900
   598         color = "#fbf"
   599         if len(w) > 4:
   600             if w[4] != ".":
   601                 layer = float(w[4])
   602             if len(w) > 5:
   603                 if len(w) > 8 and w[8].count(",") == 2:
   604                     color = "rgb(" + w[8] + ")"
   605                 else:
   606                     strand = w[5]
   607                     isRev = (seqRanges[0][3] > 1)
   608                     if strand == "+" and not isRev or strand == "-" and isRev:
   609                         color = "#ffe8e8"
   610                     if strand == "-" and not isRev or strand == "+" and isRev:
   611                         color = "#e8e8ff"
   612         yield layer, color, seqName, beg, end, itemName
   613 
   614 def commaSeparatedInts(text):
   615     return map(int, text.rstrip(",").split(","))
   616 
   617 def readGenePred(opts, fileName, rangeDict):
   618     for line in myOpen(fileName):
   619         fields = line.split()  # xxx tab ???
   620         if not fields: continue
   621         geneName = fields[12 if len(fields) > 12 else 0]  # XXX ???
   622         if fields[2] not in "+-":
   623             fields = fields[1:]
   624         seqName = fields[1]
   625         if seqName not in rangeDict: continue
   626         seqRanges = rangeDict[seqName]
   627         #strand = fields[2]
   628         cdsBeg = int(fields[5])
   629         cdsEnd = int(fields[6])
   630         exonBegs = commaSeparatedInts(fields[8])
   631         exonEnds = commaSeparatedInts(fields[9])
   632         for beg, end in zip(exonBegs, exonEnds):
   633             if all(beg >= i[2] or end <= i[1] for i in seqRanges):
   634                 continue
   635             yield 300, opts.exon_color, seqName, beg, end, geneName
   636             b = max(beg, cdsBeg)
   637             e = min(end, cdsEnd)
   638             if b < e: yield 400, opts.cds_color, seqName, b, e, ""
   639 
   640 def readRmsk(fileName, rangeDict):
   641     for line in myOpen(fileName):
   642         fields = line.split()
   643         if len(fields) == 17:  # rmsk.txt
   644             seqName = fields[5]
   645             if seqName not in rangeDict: continue  # do this ASAP for speed
   646             beg = int(fields[6])
   647             end = int(fields[7])
   648             strand = fields[9]
   649             repeatName = fields[10]
   650             repeatClass = fields[11]
   651         elif len(fields) == 15:  # .out
   652             seqName = fields[4]
   653             if seqName not in rangeDict: continue
   654             beg = int(fields[5]) - 1
   655             end = int(fields[6])
   656             strand = fields[8]
   657             repeatName = fields[9]
   658             repeatClass = fields[10].split("/")[0]
   659         else:
   660             continue
   661         seqRanges = rangeDict[seqName]
   662         if all(beg >= i[2] or end <= i[1] for i in seqRanges):
   663             continue
   664         if repeatClass in ("Low_complexity", "Simple_repeat", "Satellite"):
   665             yield 200, "#fbf", seqName, beg, end, repeatName
   666         elif (strand == "+") != (seqRanges[0][3] > 1):
   667             yield 100, "#ffe8e8", seqName, beg, end, repeatName
   668         else:
   669             yield 100, "#e8e8ff", seqName, beg, end, repeatName
   670 
   671 def isExtraFirstGapField(fields):
   672     return fields[4].isdigit()
   673 
   674 def readGaps(opts, fileName, rangeDict):
   675     '''Read locations of unsequenced gaps, from an agp or gap file.'''
   676     for line in myOpen(fileName):
   677         w = line.split()
   678         if not w or w[0][0] == "#": continue
   679         if isExtraFirstGapField(w): w = w[1:]
   680         if w[4] not in "NU": continue
   681         seqName = w[0]
   682         if seqName not in rangeDict: continue
   683         end = int(w[2])
   684         beg = end - int(w[5])  # zero-based coordinate
   685         if w[7] == "yes":
   686             yield 3000, opts.bridged_color, seqName, beg, end, ""
   687         else:
   688             yield 2000, opts.unbridged_color, seqName, beg, end, ""
   689 
   690 def bedBoxes(beds, rangeDict, margin, edge, isTop, bpPerPix):
   691     for layer, color, seqName, bedBeg, bedEnd, name in beds:
   692         for rangeBeg, rangeEnd, isReverseRange, origin in rangeDict[seqName]:
   693             beg = max(bedBeg, rangeBeg)
   694             end = min(bedEnd, rangeEnd)
   695             if beg >= end: continue
   696             if isReverseRange:
   697                 beg, end = -end, -beg
   698             if layer <= 1000:
   699                 # include partly-covered pixels
   700                 pixBeg = (origin + beg) // bpPerPix
   701                 pixEnd = div_ceil(origin + end, bpPerPix)
   702             else:
   703                 # exclude partly-covered pixels
   704                 pixBeg = div_ceil(origin + beg, bpPerPix)
   705                 pixEnd = (origin + end) // bpPerPix
   706                 if pixEnd <= pixBeg: continue
   707                 if bedEnd >= rangeEnd:  # include partly-covered end pixels
   708                     if isReverseRange:
   709                         pixBeg = (origin + beg) // bpPerPix
   710                     else:
   711                         pixEnd = div_ceil(origin + end, bpPerPix)
   712             if isTop:
   713                 box = pixBeg, margin, pixEnd, edge
   714             else:
   715                 box = margin, pixBeg, edge, pixEnd
   716             yield layer, color, box
   717 
   718 def drawAnnotations(im, boxes):
   719     # xxx use partial transparency for different-color overlaps?
   720     for layer, color, box in boxes:
   721         im.paste(color, box)
   722 
   723 def placedLabels(labels, rangePixBegs, rangePixLens, beg, end):
   724     '''Return axis labels with endpoint & sort-order information.'''
   725     maxWidth = end - beg
   726     for i, j, k in zip(labels, rangePixBegs, rangePixLens):
   727         text, textWidth, textHeight, strandNum = i
   728         if textWidth > maxWidth:
   729             continue
   730         labelBeg = j + (k - textWidth) // 2
   731         labelEnd = labelBeg + textWidth
   732         sortKey = textWidth - k
   733         if labelBeg < beg:
   734             sortKey += maxWidth * (beg - labelBeg)
   735             labelBeg = beg
   736             labelEnd = beg + textWidth
   737         if labelEnd > end:
   738             sortKey += maxWidth * (labelEnd - end)
   739             labelEnd = end
   740             labelBeg = end - textWidth
   741         yield sortKey, labelBeg, labelEnd, text, textHeight, strandNum
   742 
   743 def nonoverlappingLabels(labels, minPixTweenLabels):
   744     '''Get a subset of non-overlapping axis labels, greedily.'''
   745     out = []
   746     for i in labels:
   747         beg = i[1] - minPixTweenLabels
   748         end = i[2] + minPixTweenLabels
   749         if all(j[2] <= beg or j[1] >= end for j in out):
   750             out.append(i)
   751     return out
   752 
   753 def axisImage(labels, rangePixBegs, rangePixLens, textRot,
   754               textAln, font, image_mode, opts):
   755     '''Make an image of axis labels.'''
   756     beg = rangePixBegs[0]
   757     end = rangePixBegs[-1] + rangePixLens[-1]
   758     margin = max(i[2] for i in labels)
   759     labels = sorted(placedLabels(labels, rangePixBegs, rangePixLens, beg, end))
   760     minPixTweenLabels = 0 if textRot else opts.label_space
   761     labels = nonoverlappingLabels(labels, minPixTweenLabels)
   762     image_size = (margin, end) if textRot else (end, margin)
   763     im = Image.new(image_mode, image_size, opts.margin_color)
   764     draw = ImageDraw.Draw(im)
   765     for sortKey, labelBeg, labelEnd, text, textHeight, strandNum in labels:
   766         base = margin - textHeight if textAln else 0
   767         position = (base, labelBeg) if textRot else (labelBeg, base)
   768         fill = ("black", opts.forwardcolor, opts.reversecolor)[strandNum]
   769         draw.text(position, text, font=font, fill=fill)
   770     return im
   771 
   772 def rangesPerSeq(sortedRanges):
   773     for seqName, group in itertools.groupby(sortedRanges, itemgetter(0)):
   774         yield seqName, sorted(group)
   775 
   776 def rangesWithOrigins(sortedRanges, rangePixBegs, rangePixLens, bpPerPix):
   777     for i, j, k in zip(sortedRanges, rangePixBegs, rangePixLens):
   778         seqName, rangeBeg, rangeEnd, strandNum = i
   779         isReverseRange = (strandNum > 1)
   780         if isReverseRange:
   781             origin = bpPerPix * (j + k) + rangeBeg
   782         else:
   783             origin = bpPerPix * j - rangeBeg
   784         yield seqName, (rangeBeg, rangeEnd, isReverseRange, origin)
   785 
   786 def rangesAndOriginsPerSeq(sortedRanges, rangePixBegs, rangePixLens, bpPerPix):
   787     a = rangesWithOrigins(sortedRanges, rangePixBegs, rangePixLens, bpPerPix)
   788     for seqName, group in itertools.groupby(a, itemgetter(0)):
   789         yield seqName, sorted(i[1] for i in group)
   790 
   791 def getFont(opts):
   792     if opts.fontfile:
   793         return ImageFont.truetype(opts.fontfile, opts.fontsize)
   794     fileNames = []
   795     try:
   796         x = ["fc-match", "-f%{file}", "arial"]
   797         p = subprocess.Popen(x, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
   798                              universal_newlines=True)
   799         out, err = p.communicate()
   800         fileNames.append(out)
   801     except OSError as e:
   802         logging.info("fc-match error: " + str(e))
   803     fileNames.append("/Library/Fonts/Arial.ttf")  # for Mac
   804     for i in fileNames:
   805         try:
   806             font = ImageFont.truetype(i, opts.fontsize)
   807             logging.info("font: " + i)
   808             return font
   809         except IOError as e:
   810             logging.info("font load error: " + str(e))
   811     return ImageFont.load_default()
   812 
   813 def sequenceSizesAndNames(seqRanges):
   814     for seqName, ranges in itertools.groupby(seqRanges, itemgetter(0)):
   815         size = sum(e - b for n, b, e in ranges)
   816         yield size, seqName
   817 
   818 def biggestSequences(seqRanges, maxNumOfSequences):
   819     s = sorted(sequenceSizesAndNames(seqRanges), reverse=True)
   820     if len(s) > maxNumOfSequences:
   821         logging.warning("too many sequences - discarding the smallest ones")
   822         s = s[:maxNumOfSequences]
   823     return set(i[1] for i in s)
   824 
   825 def remainingSequenceRanges(seqRanges, alignments, seqIndex):
   826     remainingSequences = set(i[seqIndex] for i in alignments)
   827     return [i for i in seqRanges if i[0] in remainingSequences]
   828 
   829 def readAnnotations(opts, sortedRanges, bedFile, repFile, geneFile, gapFile):
   830     rangeDict = expandedSeqDict(dict(rangesPerSeq(sortedRanges)))
   831     annots = itertools.chain(readBed(bedFile, rangeDict),
   832                              readRmsk(repFile, rangeDict),
   833                              readGenePred(opts, geneFile, rangeDict),
   834                              readGaps(opts, gapFile, rangeDict))
   835     return annots
   836 
   837 def lastDotplot(opts, args):
   838     logLevel = logging.INFO if opts.verbose else logging.WARNING
   839     logging.basicConfig(format="%(filename)s: %(message)s", level=logLevel)
   840 
   841     font = getFont(opts)
   842     image_mode = 'RGB'
   843     forward_color = ImageColor.getcolor(opts.forwardcolor, image_mode)
   844     reverse_color = ImageColor.getcolor(opts.reversecolor, image_mode)
   845     zipped_colors = zip(forward_color, reverse_color)
   846     overlap_color = tuple([(i + j) // 2 for i, j in zipped_colors])
   847 
   848     maxGap1, maxGapB1 = twoValuesFromOption(opts.max_gap1, ":")
   849     maxGap2, maxGapB2 = twoValuesFromOption(opts.max_gap2, ":")
   850 
   851     logging.info("reading alignments...")
   852     alnData = readAlignments(args[0], opts)
   853     alignments, seqRanges1, coverDict1, seqRanges2, coverDict2 = alnData
   854     if not alignments: raise Exception("there are no alignments")
   855     logging.info("cutting...")
   856     coverDict1 = dict(mergedRangesPerSeq(coverDict1))
   857     coverDict2 = dict(mergedRangesPerSeq(coverDict2))
   858     minAlignedBases = min(coveredLength(coverDict1), coveredLength(coverDict2))
   859     pad = int(opts.pad * minAlignedBases)
   860     cutRanges1 = list(trimmed(seqRanges1, coverDict1, minAlignedBases,
   861                               maxGap1, pad, pad))
   862     cutRanges2 = list(trimmed(seqRanges2, coverDict2, minAlignedBases,
   863                               maxGap2, pad, pad))
   864 
   865     biggestSeqs1 = biggestSequences(cutRanges1, opts.maxseqs)
   866     cutRanges1 = [i for i in cutRanges1 if i[0] in biggestSeqs1]
   867     alignments = [i for i in alignments if i[0] in biggestSeqs1]
   868     cutRanges2 = remainingSequenceRanges(cutRanges2, alignments, 1)
   869 
   870     biggestSeqs2 = biggestSequences(cutRanges2, opts.maxseqs)
   871     cutRanges2 = [i for i in cutRanges2 if i[0] in biggestSeqs2]
   872     alignments = [i for i in alignments if i[1] in biggestSeqs2]
   873     cutRanges1 = remainingSequenceRanges(cutRanges1, alignments, 0)
   874 
   875     logging.info("reading secondary alignments...")
   876     alnDataB = readSecondaryAlignments(opts, cutRanges1, cutRanges2)
   877     alignmentsB, seqRangesB1, coverDictB1, seqRangesB2, coverDictB2 = alnDataB
   878     logging.info("cutting...")
   879     coverDictB1 = dict(mergedRangesPerSeq(coverDictB1))
   880     coverDictB2 = dict(mergedRangesPerSeq(coverDictB2))
   881     cutRangesB1 = trimmed(seqRangesB1, coverDictB1, minAlignedBases,
   882                           maxGapB1, 0, 0)
   883     cutRangesB2 = trimmed(seqRangesB2, coverDictB2, minAlignedBases,
   884                           maxGapB2, 0, 0)
   885 
   886     logging.info("sorting...")
   887     sortOut = allSortedRanges(opts, alignments, alignmentsB,
   888                               cutRanges1, cutRangesB1, cutRanges2, cutRangesB2)
   889     sortedRanges1, sortedRanges2 = sortOut
   890 
   891     textDraw = None
   892     if opts.fontsize:
   893         textDraw = ImageDraw.Draw(Image.new(image_mode, (1, 1)))
   894 
   895     textRot1 = "vertical".startswith(opts.rot1)
   896     i1 = dataFromRanges(sortedRanges1, font, textDraw, opts.labels1, textRot1)
   897     rangeSizes1, labelData1, tMargin = i1
   898 
   899     textRot2 = "horizontal".startswith(opts.rot2)
   900     i2 = dataFromRanges(sortedRanges2, font, textDraw, opts.labels2, textRot2)
   901     rangeSizes2, labelData2, lMargin = i2
   902 
   903     bMargin = rMargin = 0  # xxx
   904 
   905     maxPixels1 = opts.width  - lMargin - rMargin
   906     maxPixels2 = opts.height - tMargin - bMargin
   907     bpPerPix1 = get_bp_per_pix(rangeSizes1, opts.border_pixels, maxPixels1)
   908     bpPerPix2 = get_bp_per_pix(rangeSizes2, opts.border_pixels, maxPixels2)
   909     bpPerPix = max(bpPerPix1, bpPerPix2)
   910     logging.info("bp per pixel = " + str(bpPerPix))
   911 
   912     p1 = pixelData(rangeSizes1, bpPerPix, opts.border_pixels, lMargin)
   913     rangePixBegs1, rangePixLens1, rMarginBeg = p1
   914     width = rMarginBeg + rMargin
   915     rangeDict1 = dict(rangesAndOriginsPerSeq(sortedRanges1, rangePixBegs1,
   916                                              rangePixLens1, bpPerPix))
   917 
   918     p2 = pixelData(rangeSizes2, bpPerPix, opts.border_pixels, tMargin)
   919     rangePixBegs2, rangePixLens2, bMarginBeg = p2
   920     height = bMarginBeg + bMargin
   921     rangeDict2 = dict(rangesAndOriginsPerSeq(sortedRanges2, rangePixBegs2,
   922                                              rangePixLens2, bpPerPix))
   923 
   924     logging.info("width:  " + str(width))
   925     logging.info("height: " + str(height))
   926 
   927     logging.info("processing alignments...")
   928     allAlignments = alignments + alignmentsB
   929     hits = alignmentPixels(width, height, allAlignments, bpPerPix,
   930                            rangeDict1, rangeDict2)
   931 
   932     logging.info("reading annotations...")
   933 
   934     rangeDict1 = expandedSeqDict(rangeDict1)
   935     annots1 = readAnnotations(opts, sortedRanges1,
   936                               opts.bed1, opts.rmsk1, opts.genePred1, opts.gap1)
   937     boxes1 = bedBoxes(annots1, rangeDict1, tMargin, bMarginBeg, 1, bpPerPix)
   938 
   939     rangeDict2 = expandedSeqDict(rangeDict2)
   940     annots2 = readAnnotations(opts, sortedRanges2,
   941                               opts.bed2, opts.rmsk2, opts.genePred2, opts.gap2)
   942     boxes2 = bedBoxes(annots2, rangeDict2, lMargin, rMarginBeg, 0, bpPerPix)
   943 
   944     boxes = sorted(itertools.chain(boxes1, boxes2))
   945 
   946     logging.info("drawing...")
   947 
   948     image_size = width, height
   949     im = Image.new(image_mode, image_size, opts.background_color)
   950 
   951     drawAnnotations(im, boxes)
   952 
   953     joinA, joinB = twoValuesFromOption(opts.join, ":")
   954     if joinA in "13":
   955         drawJoins(im, alignments, bpPerPix, 0, rangeDict1, rangeDict2)
   956     if joinB in "13":
   957         drawJoins(im, alignmentsB, bpPerPix, 0, rangeDict1, rangeDict2)
   958     if joinA in "23":
   959         drawJoins(im, alignments, bpPerPix, 1, rangeDict2, rangeDict1)
   960     if joinB in "23":
   961         drawJoins(im, alignmentsB, bpPerPix, 1, rangeDict2, rangeDict1)
   962 
   963     for i in range(height):
   964         for j in range(width):
   965             store_value = hits[i * width + j]
   966             xy = j, i
   967             if   store_value == 1: im.putpixel(xy, forward_color)
   968             elif store_value == 2: im.putpixel(xy, reverse_color)
   969             elif store_value == 3: im.putpixel(xy, overlap_color)
   970 
   971     if opts.fontsize != 0:
   972         axis1 = axisImage(labelData1, rangePixBegs1, rangePixLens1,
   973                           textRot1, False, font, image_mode, opts)
   974         if textRot1:
   975             axis1 = axis1.transpose(Image.ROTATE_90)
   976         axis2 = axisImage(labelData2, rangePixBegs2, rangePixLens2,
   977                           textRot2, textRot2, font, image_mode, opts)
   978         if not textRot2:
   979             axis2 = axis2.transpose(Image.ROTATE_270)
   980         im.paste(axis1, (0, 0))
   981         im.paste(axis2, (0, 0))
   982 
   983     for i in rangePixBegs1[1:]:
   984         box = i - opts.border_pixels, tMargin, i, bMarginBeg
   985         im.paste(opts.border_color, box)
   986 
   987     for i in rangePixBegs2[1:]:
   988         box = lMargin, i - opts.border_pixels, rMarginBeg, i
   989         im.paste(opts.border_color, box)
   990 
   991     im.save(args[1])
   992 
   993 if __name__ == "__main__":
   994     usage = """%prog --help
   995    or: %prog [options] maf-or-tab-alignments dotplot.png
   996    or: %prog [options] maf-or-tab-alignments dotplot.gif
   997    or: ..."""
   998     description = "Draw a dotplot of pair-wise sequence alignments in MAF or tabular format."
   999     op = optparse.OptionParser(usage=usage, description=description)
  1000     op.add_option("-v", "--verbose", action="count",
  1001                   help="show progress messages & data about the plot")
  1002     # Replace "width" & "height" with a single "length" option?
  1003     op.add_option("-x", "--width", metavar="INT", type="int", default=1000,
  1004                   help="maximum width in pixels (default: %default)")
  1005     op.add_option("-y", "--height", metavar="INT", type="int", default=1000,
  1006                   help="maximum height in pixels (default: %default)")
  1007     op.add_option("-m", "--maxseqs", type="int", default=100, metavar="M",
  1008                   help="maximum number of horizontal or vertical sequences "
  1009                   "(default=%default)")
  1010     op.add_option("-1", "--seq1", metavar="PATTERN", action="append",
  1011                   default=[],
  1012                   help="which sequences to show from the 1st genome")
  1013     op.add_option("-2", "--seq2", metavar="PATTERN", action="append",
  1014                   default=[],
  1015                   help="which sequences to show from the 2nd genome")
  1016     op.add_option("-c", "--forwardcolor", metavar="COLOR", default="red",
  1017                   help="color for forward alignments (default: %default)")
  1018     op.add_option("-r", "--reversecolor", metavar="COLOR", default="blue",
  1019                   help="color for reverse alignments (default: %default)")
  1020     op.add_option("--alignments", metavar="FILE", help="secondary alignments")
  1021     op.add_option("--sort1", default="1", metavar="N",
  1022                   help="genome1 sequence order: 0=input order, 1=name order, "
  1023                   "2=length order, 3=alignment order (default=%default)")
  1024     op.add_option("--sort2", default="1", metavar="N",
  1025                   help="genome2 sequence order: 0=input order, 1=name order, "
  1026                   "2=length order, 3=alignment order (default=%default)")
  1027     op.add_option("--strands1", default="0", metavar="N", help=
  1028                   "genome1 sequence orientation: 0=forward orientation, "
  1029                   "1=alignment orientation (default=%default)")
  1030     op.add_option("--strands2", default="0", metavar="N", help=
  1031                   "genome2 sequence orientation: 0=forward orientation, "
  1032                   "1=alignment orientation (default=%default)")
  1033     op.add_option("--max-gap1", metavar="FRAC", default="0.5,2", help=
  1034                   "maximum unaligned (end,mid) gap in genome1: "
  1035                   "fraction of aligned length (default=%default)")
  1036     op.add_option("--max-gap2", metavar="FRAC", default="0.5,2", help=
  1037                   "maximum unaligned (end,mid) gap in genome2: "
  1038                   "fraction of aligned length (default=%default)")
  1039     op.add_option("--pad", metavar="FRAC", type="float", default=0.04, help=
  1040                   "pad length when cutting unaligned gaps: "
  1041                   "fraction of aligned length (default=%default)")
  1042     op.add_option("-j", "--join", default="0", metavar="N", help=
  1043                   "join: 0=nothing, 1=alignments adjacent in genome1, "
  1044                   "2=alignments adjacent in genome2 (default=%default)")
  1045     op.add_option("--border-pixels", metavar="INT", type="int", default=1,
  1046                   help="number of pixels between sequences (default=%default)")
  1047     op.add_option("--border-color", metavar="COLOR", default="black",
  1048                   help="color for pixels between sequences (default=%default)")
  1049     # --break-color and/or --break-pixels for intra-sequence breaks?
  1050     op.add_option("--margin-color", metavar="COLOR", default="#dcdcdc",
  1051                   help="margin color")
  1052 
  1053     og = optparse.OptionGroup(op, "Text options")
  1054     og.add_option("-f", "--fontfile", metavar="FILE",
  1055                   help="TrueType or OpenType font file")
  1056     og.add_option("-s", "--fontsize", metavar="SIZE", type="int", default=14,
  1057                   help="TrueType or OpenType font size (default: %default)")
  1058     og.add_option("--labels1", type="int", default=0, metavar="N", help=
  1059                   "genome1 labels: 0=name, 1=name:length, "
  1060                   "2=name:start:length, 3=name:start-end (default=%default)")
  1061     og.add_option("--labels2", type="int", default=0, metavar="N", help=
  1062                   "genome2 labels: 0=name, 1=name:length, "
  1063                   "2=name:start:length, 3=name:start-end (default=%default)")
  1064     og.add_option("--rot1", metavar="ROT", default="h",
  1065                   help="text rotation for the 1st genome (default=%default)")
  1066     og.add_option("--rot2", metavar="ROT", default="v",
  1067                   help="text rotation for the 2nd genome (default=%default)")
  1068     op.add_option_group(og)
  1069 
  1070     og = optparse.OptionGroup(op, "Annotation options")
  1071     og.add_option("--bed1", metavar="FILE",
  1072                   help="read genome1 annotations from BED file")
  1073     og.add_option("--bed2", metavar="FILE",
  1074                   help="read genome2 annotations from BED file")
  1075     og.add_option("--rmsk1", metavar="FILE", help="read genome1 repeats from "
  1076                   "RepeatMasker .out or rmsk.txt file")
  1077     og.add_option("--rmsk2", metavar="FILE", help="read genome2 repeats from "
  1078                   "RepeatMasker .out or rmsk.txt file")
  1079     op.add_option_group(og)
  1080 
  1081     og = optparse.OptionGroup(op, "Gene options")
  1082     og.add_option("--genePred1", metavar="FILE",
  1083                   help="read genome1 genes from genePred file")
  1084     og.add_option("--genePred2", metavar="FILE",
  1085                   help="read genome2 genes from genePred file")
  1086     og.add_option("--exon-color", metavar="COLOR", default="PaleGreen",
  1087                   help="color for exons (default=%default)")
  1088     og.add_option("--cds-color", metavar="COLOR", default="LimeGreen",
  1089                   help="color for protein-coding regions (default=%default)")
  1090     op.add_option_group(og)
  1091 
  1092     og = optparse.OptionGroup(op, "Unsequenced gap options")
  1093     og.add_option("--gap1", metavar="FILE",
  1094                   help="read genome1 unsequenced gaps from agp or gap file")
  1095     og.add_option("--gap2", metavar="FILE",
  1096                   help="read genome2 unsequenced gaps from agp or gap file")
  1097     og.add_option("--bridged-color", metavar="COLOR", default="yellow",
  1098                   help="color for bridged gaps (default: %default)")
  1099     og.add_option("--unbridged-color", metavar="COLOR", default="orange",
  1100                   help="color for unbridged gaps (default: %default)")
  1101     op.add_option_group(og)
  1102     (opts, args) = op.parse_args()
  1103     if len(args) != 2: op.error("2 arguments needed")
  1104 
  1105     opts.background_color = "white"
  1106     opts.label_space = 5     # minimum number of pixels between axis labels
  1107 
  1108     try: lastDotplot(opts, args)
  1109     except KeyboardInterrupt: pass  # avoid silly error message
  1110     except Exception as e:
  1111         prog = os.path.basename(sys.argv[0])
  1112         sys.exit(prog + ": error: " + str(e))