scripts/last-dotplot
author Martin C. Frith
Mon Jun 08 14:28:46 2020 +0900 (23 months ago)
changeset 1062 6d8d56a74b14
parent 984 fb43f850eba2
child 1068 a29f9cec16e1
permissions -rwxr-xr-x
Add maf-convert to gff (with help from C. Plessy)
     1 #! /usr/bin/env python
     2 # Author: Martin C. Frith 2008
     3 # SPDX-License-Identifier: GPL-3.0-or-later
     4 
     5 # Read pair-wise alignments in MAF or LAST tabular format: write an
     6 # "Oxford grid", a.k.a. dotplot.
     7 
     8 # TODO: Currently, pixels with zero aligned nt-pairs are white, and
     9 # pixels with one or more aligned nt-pairs are black.  This can look
    10 # too crowded for large genome alignments.  I tried shading each pixel
    11 # according to the number of aligned nt-pairs within it, but the
    12 # result is too faint.  How can this be done better?
    13 
    14 import collections
    15 import functools
    16 import gzip
    17 from fnmatch import fnmatchcase
    18 import logging
    19 from operator import itemgetter
    20 import subprocess
    21 import itertools, optparse, os, re, sys
    22 
    23 # Try to make PIL/PILLOW work:
    24 try:
    25     from PIL import Image, ImageDraw, ImageFont, ImageColor
    26 except ImportError:
    27     import Image, ImageDraw, ImageFont, ImageColor
    28 
    29 try:
    30     from future_builtins import zip
    31 except ImportError:
    32     pass
    33 
    34 def myOpen(fileName):  # faster than fileinput
    35     if fileName is None:
    36         return []
    37     if fileName == "-":
    38         return sys.stdin
    39     if fileName.endswith(".gz"):
    40         return gzip.open(fileName, "rt")  # xxx dubious for Python2
    41     return open(fileName)
    42 
    43 def groupByFirstItem(things):
    44     for k, v in itertools.groupby(things, itemgetter(0)):
    45         yield k, [i[1:] for i in v]
    46 
    47 def croppedBlocks(blocks, ranges1, ranges2):
    48     headBeg1, headBeg2, headSize = blocks[0]
    49     for r1 in ranges1:
    50         for r2 in ranges2:
    51             cropBeg1, cropEnd1 = r1
    52             if headBeg1 < 0:
    53                 cropBeg1, cropEnd1 = -cropEnd1, -cropBeg1
    54             cropBeg2, cropEnd2 = r2
    55             if headBeg2 < 0:
    56                 cropBeg2, cropEnd2 = -cropEnd2, -cropBeg2
    57             for beg1, beg2, size in blocks:
    58                 b1 = max(cropBeg1, beg1)
    59                 e1 = min(cropEnd1, beg1 + size)
    60                 if b1 >= e1: continue
    61                 offset = beg2 - beg1
    62                 b2 = max(cropBeg2, b1 + offset)
    63                 e2 = min(cropEnd2, e1 + offset)
    64                 if b2 >= e2: continue
    65                 yield b2 - offset, b2, e2 - b2
    66 
    67 def tabBlocks(beg1, beg2, blocks):
    68     '''Get the gapless blocks of an alignment, from LAST tabular format.'''
    69     for i in blocks.split(","):
    70         if ":" in i:
    71             x, y = i.split(":")
    72             beg1 += int(x)
    73             beg2 += int(y)
    74         else:
    75             size = int(i)
    76             yield beg1, beg2, size
    77             beg1 += size
    78             beg2 += size
    79 
    80 def mafBlocks(beg1, beg2, seq1, seq2):
    81     '''Get the gapless blocks of an alignment, from MAF format.'''
    82     size = 0
    83     for x, y in zip(seq1, seq2):
    84         if x == "-":
    85             if size:
    86                 yield beg1, beg2, size
    87                 beg1 += size
    88                 beg2 += size
    89                 size = 0
    90             beg2 += 1
    91         elif y == "-":
    92             if size:
    93                 yield beg1, beg2, size
    94                 beg1 += size
    95                 beg2 += size
    96                 size = 0
    97             beg1 += 1
    98         else:
    99             size += 1
   100     if size: yield beg1, beg2, size
   101 
   102 def alignmentFromSegment(qrySeqName, qrySeqLen, segment):
   103     refSeqLen = sys.maxsize  # XXX
   104     refSeqName, refSeqBeg, qrySeqBeg, size = segment
   105     block = refSeqBeg, qrySeqBeg, size
   106     return refSeqName, refSeqLen, qrySeqName, qrySeqLen, [block]
   107 
   108 def alignmentInput(lines):
   109     '''Get alignments and sequence lengths, from MAF or tabular format.'''
   110     mafCount = 0
   111     qrySeqName = ""
   112     segments = []
   113     for line in lines:
   114         w = line.split()
   115         if line[0] == "#":
   116             pass
   117         elif len(w) == 1:
   118             for i in segments:
   119                 yield alignmentFromSegment(qrySeqName, qrySeqLen, i)
   120             qrySeqName = w[0]
   121             qrySeqLen = 0
   122             segments = []
   123         elif len(w) == 2 and qrySeqName and w[1].isdigit():
   124             qrySeqLen += int(w[1])
   125         elif len(w) == 4 and qrySeqName and w[1].isdigit() and w[3].isdigit():
   126             refSeqName, refSeqBeg, refSeqEnd = w[0], int(w[1]), int(w[3])
   127             size = abs(refSeqEnd - refSeqBeg)
   128             if refSeqBeg > refSeqEnd:
   129                 refSeqBeg = -refSeqBeg
   130             segments.append((refSeqName, refSeqBeg, qrySeqLen, size))
   131             qrySeqLen += size
   132         elif line[0].isdigit():  # tabular format
   133             chr1, beg1, seqlen1 = w[1], int(w[2]), int(w[5])
   134             if w[4] == "-": beg1 -= seqlen1
   135             chr2, beg2, seqlen2 = w[6], int(w[7]), int(w[10])
   136             if w[9] == "-": beg2 -= seqlen2
   137             blocks = tabBlocks(beg1, beg2, w[11])
   138             yield chr1, seqlen1, chr2, seqlen2, blocks
   139         elif line[0] == "s":  # MAF format
   140             if mafCount == 0:
   141                 chr1, beg1, seqlen1, seq1 = w[1], int(w[2]), int(w[5]), w[6]
   142                 if w[4] == "-": beg1 -= seqlen1
   143                 mafCount = 1
   144             else:
   145                 chr2, beg2, seqlen2, seq2 = w[1], int(w[2]), int(w[5]), w[6]
   146                 if w[4] == "-": beg2 -= seqlen2
   147                 blocks = mafBlocks(beg1, beg2, seq1, seq2)
   148                 yield chr1, seqlen1, chr2, seqlen2, blocks
   149                 mafCount = 0
   150     for i in segments:
   151         yield alignmentFromSegment(qrySeqName, qrySeqLen, i)
   152 
   153 def seqRequestFromText(text):
   154     if ":" in text:
   155         pattern, interval = text.rsplit(":", 1)
   156         if "-" in interval:
   157             beg, end = interval.rsplit("-", 1)
   158             return pattern, int(beg), int(end)  # beg may be negative
   159     return text, 0, sys.maxsize
   160 
   161 def rangesFromSeqName(seqRequests, name, seqLen):
   162     if seqRequests:
   163         base = name.split(".")[-1]  # allow for names like hg19.chr7
   164         for pat, beg, end in seqRequests:
   165             if fnmatchcase(name, pat) or fnmatchcase(base, pat):
   166                 yield max(beg, 0), min(end, seqLen)
   167     else:
   168         yield 0, seqLen
   169 
   170 def updateSeqs(coverDict, seqRanges, seqName, ranges, coveredRange):
   171     beg, end = coveredRange
   172     if beg < 0:
   173         coveredRange = -end, -beg
   174     if seqName in coverDict:
   175         coverDict[seqName].append(coveredRange)
   176     else:
   177         coverDict[seqName] = [coveredRange]
   178         for beg, end in ranges:
   179             r = seqName, beg, end
   180             seqRanges.append(r)
   181 
   182 def readAlignments(fileName, opts):
   183     '''Get alignments and sequence limits, from MAF or tabular format.'''
   184     seqRequests1 = [seqRequestFromText(i) for i in opts.seq1]
   185     seqRequests2 = [seqRequestFromText(i) for i in opts.seq2]
   186 
   187     alignments = []
   188     seqRanges1 = []
   189     seqRanges2 = []
   190     coverDict1 = {}
   191     coverDict2 = {}
   192     lines = myOpen(fileName)
   193     for seqName1, seqLen1, seqName2, seqLen2, blocks in alignmentInput(lines):
   194         ranges1 = sorted(rangesFromSeqName(seqRequests1, seqName1, seqLen1))
   195         if not ranges1: continue
   196         ranges2 = sorted(rangesFromSeqName(seqRequests2, seqName2, seqLen2))
   197         if not ranges2: continue
   198         b = list(croppedBlocks(list(blocks), ranges1, ranges2))
   199         if not b: continue
   200         aln = seqName1, seqName2, b
   201         alignments.append(aln)
   202         coveredRange1 = b[0][0], b[-1][0] + b[-1][2]
   203         updateSeqs(coverDict1, seqRanges1, seqName1, ranges1, coveredRange1)
   204         coveredRange2 = b[0][1], b[-1][1] + b[-1][2]
   205         updateSeqs(coverDict2, seqRanges2, seqName2, ranges2, coveredRange2)
   206     return alignments, seqRanges1, coverDict1, seqRanges2, coverDict2
   207 
   208 def nameAndRangesFromDict(cropDict, seqName):
   209     if seqName in cropDict:
   210         return seqName, cropDict[seqName]
   211     n = seqName.split(".")[-1]
   212     if n in cropDict:
   213         return n, cropDict[n]
   214     return seqName, []
   215 
   216 def rangesForSecondaryAlignments(primaryRanges, seqLen):
   217     if primaryRanges:
   218         return primaryRanges
   219     return [(0, seqLen)]
   220 
   221 def readSecondaryAlignments(opts, cropRanges1, cropRanges2):
   222     cropDict1 = dict(groupByFirstItem(cropRanges1))
   223     cropDict2 = dict(groupByFirstItem(cropRanges2))
   224 
   225     alignments = []
   226     seqRanges1 = []
   227     seqRanges2 = []
   228     coverDict1 = {}
   229     coverDict2 = {}
   230     lines = myOpen(opts.alignments)
   231     for seqName1, seqLen1, seqName2, seqLen2, blocks in alignmentInput(lines):
   232         seqName1, ranges1 = nameAndRangesFromDict(cropDict1, seqName1)
   233         seqName2, ranges2 = nameAndRangesFromDict(cropDict2, seqName2)
   234         if not ranges1 and not ranges2:
   235             continue
   236         r1 = rangesForSecondaryAlignments(ranges1, seqLen1)
   237         r2 = rangesForSecondaryAlignments(ranges2, seqLen2)
   238         b = list(croppedBlocks(list(blocks), r1, r2))
   239         if not b: continue
   240         aln = seqName1, seqName2, b
   241         alignments.append(aln)
   242         if not ranges1:
   243             coveredRange1 = b[0][0], b[-1][0] + b[-1][2]
   244             updateSeqs(coverDict1, seqRanges1, seqName1, r1, coveredRange1)
   245         if not ranges2:
   246             coveredRange2 = b[0][1], b[-1][1] + b[-1][2]
   247             updateSeqs(coverDict2, seqRanges2, seqName2, r2, coveredRange2)
   248     return alignments, seqRanges1, coverDict1, seqRanges2, coverDict2
   249 
   250 def twoValuesFromOption(text, separator):
   251     if separator in text:
   252         return text.split(separator)
   253     return text, text
   254 
   255 def mergedRanges(ranges):
   256     oldBeg, maxEnd = ranges[0]
   257     for beg, end in ranges:
   258         if beg > maxEnd:
   259             yield oldBeg, maxEnd
   260             oldBeg = beg
   261             maxEnd = end
   262         elif end > maxEnd:
   263             maxEnd = end
   264     yield oldBeg, maxEnd
   265 
   266 def mergedRangesPerSeq(coverDict):
   267     for k, v in coverDict.items():
   268         v.sort()
   269         yield k, list(mergedRanges(v))
   270 
   271 def coveredLength(mergedCoverDict):
   272     return sum(sum(e - b for b, e in v) for v in mergedCoverDict.values())
   273 
   274 def trimmed(seqRanges, coverDict, minAlignedBases, maxGapFrac, endPad, midPad):
   275     maxEndGapFrac, maxMidGapFrac = twoValuesFromOption(maxGapFrac, ",")
   276     maxEndGap = max(float(maxEndGapFrac) * minAlignedBases, endPad * 1.0)
   277     maxMidGap = max(float(maxMidGapFrac) * minAlignedBases, midPad * 2.0)
   278 
   279     for seqName, rangeBeg, rangeEnd in seqRanges:
   280         seqBlocks = coverDict[seqName]
   281         blocks = [i for i in seqBlocks if i[0] < rangeEnd and i[1] > rangeBeg]
   282         if blocks[0][0] - rangeBeg > maxEndGap:
   283             rangeBeg = blocks[0][0] - endPad
   284         for j, y in enumerate(blocks):
   285             if j:
   286                 x = blocks[j - 1]
   287                 if y[0] - x[1] > maxMidGap:
   288                     yield seqName, rangeBeg, x[1] + midPad
   289                     rangeBeg = y[0] - midPad
   290         if rangeEnd - blocks[-1][1] > maxEndGap:
   291             rangeEnd = blocks[-1][1] + endPad
   292         yield seqName, rangeBeg, rangeEnd
   293 
   294 def rangesWithStrandInfo(seqRanges, strandOpt, alignments, seqIndex):
   295     if strandOpt == "1":
   296         forwardMinusReverse = collections.defaultdict(int)
   297         for i in alignments:
   298             blocks = i[2]
   299             beg1, beg2, size = blocks[0]
   300             numOfAlignedLetterPairs = sum(i[2] for i in blocks)
   301             if (beg1 < 0) != (beg2 < 0):  # opposite-strand alignment
   302                 numOfAlignedLetterPairs *= -1
   303             forwardMinusReverse[i[seqIndex]] += numOfAlignedLetterPairs
   304     strandNum = 0
   305     for seqName, beg, end in seqRanges:
   306         if strandOpt == "1":
   307             strandNum = 1 if forwardMinusReverse[seqName] >= 0 else 2
   308         yield seqName, beg, end, strandNum
   309 
   310 def natural_sort_key(my_string):
   311     '''Return a sort key for "natural" ordering, e.g. chr9 < chr10.'''
   312     parts = re.split(r'(\d+)', my_string)
   313     parts[1::2] = map(int, parts[1::2])
   314     return parts
   315 
   316 def nameKey(oneSeqRanges):
   317     return natural_sort_key(oneSeqRanges[0][0])
   318 
   319 def sizeKey(oneSeqRanges):
   320     return sum(b - e for n, b, e, s in oneSeqRanges), nameKey(oneSeqRanges)
   321 
   322 def alignmentKey(seqNamesToLists, oneSeqRanges):
   323     seqName = oneSeqRanges[0][0]
   324     alignmentsOfThisSequence = seqNamesToLists[seqName]
   325     numOfAlignedLetterPairs = sum(i[3] for i in alignmentsOfThisSequence)
   326     toMiddle = numOfAlignedLetterPairs // 2
   327     for i in alignmentsOfThisSequence:
   328         toMiddle -= i[3]
   329         if toMiddle < 0:
   330             return i[1:3]  # sequence-rank and "position" of this alignment
   331 
   332 def rankAndFlipPerSeq(seqRanges):
   333     rangesGroupedBySeqName = itertools.groupby(seqRanges, itemgetter(0))
   334     for rank, group in enumerate(rangesGroupedBySeqName):
   335         seqName, ranges = group
   336         strandNum = next(ranges)[3]
   337         flip = 1 if strandNum < 2 else -1
   338         yield seqName, (rank, flip)
   339 
   340 def alignmentSortData(alignments, seqIndex, otherNamesToRanksAndFlips):
   341     otherIndex = 1 - seqIndex
   342     for i in alignments:
   343         blocks = i[2]
   344         otherRank, otherFlip = otherNamesToRanksAndFlips[i[otherIndex]]
   345         otherPos = otherFlip * abs(blocks[0][otherIndex] +
   346                                    blocks[-1][otherIndex] + blocks[-1][2])
   347         numOfAlignedLetterPairs = sum(i[2] for i in blocks)
   348         yield i[seqIndex], otherRank, otherPos, numOfAlignedLetterPairs
   349 
   350 def mySortedRanges(seqRanges, sortOpt, seqIndex, alignments, otherRanges):
   351     rangesGroupedBySeqName = itertools.groupby(seqRanges, itemgetter(0))
   352     g = [list(ranges) for seqName, ranges in rangesGroupedBySeqName]
   353     for i in g:
   354         if i[0][3] > 1:
   355             i.reverse()
   356     if sortOpt == "1":
   357         g.sort(key=nameKey)
   358     if sortOpt == "2":
   359         g.sort(key=sizeKey)
   360     if sortOpt == "3":
   361         otherNamesToRanksAndFlips = dict(rankAndFlipPerSeq(otherRanges))
   362         alns = sorted(alignmentSortData(alignments, seqIndex,
   363                                         otherNamesToRanksAndFlips))
   364         alnsGroupedBySeqName = itertools.groupby(alns, itemgetter(0))
   365         seqNamesToLists = dict((k, list(v)) for k, v in alnsGroupedBySeqName)
   366         g.sort(key=functools.partial(alignmentKey, seqNamesToLists))
   367     return [j for i in g for j in i]
   368 
   369 def allSortedRanges(opts, alignments, alignmentsB,
   370                     seqRanges1, seqRangesB1, seqRanges2, seqRangesB2):
   371     o1, oB1 = twoValuesFromOption(opts.strands1, ":")
   372     o2, oB2 = twoValuesFromOption(opts.strands2, ":")
   373     if o1 == "1" and o2 == "1":
   374         raise Exception("the strand options have circular dependency")
   375     seqRanges1 = list(rangesWithStrandInfo(seqRanges1, o1, alignments, 0))
   376     seqRanges2 = list(rangesWithStrandInfo(seqRanges2, o2, alignments, 1))
   377     seqRangesB1 = list(rangesWithStrandInfo(seqRangesB1, oB1, alignmentsB, 0))
   378     seqRangesB2 = list(rangesWithStrandInfo(seqRangesB2, oB2, alignmentsB, 1))
   379 
   380     o1, oB1 = twoValuesFromOption(opts.sort1, ":")
   381     o2, oB2 = twoValuesFromOption(opts.sort2, ":")
   382     if o1 == "3" and o2 == "3":
   383         raise Exception("the sort options have circular dependency")
   384     if o1 != "3":
   385         s1 = mySortedRanges(seqRanges1, o1, None, None, None)
   386     if o2 != "3":
   387         s2 = mySortedRanges(seqRanges2, o2, None, None, None)
   388     if o1 == "3":
   389         s1 = mySortedRanges(seqRanges1, o1, 0, alignments, s2)
   390     if o2 == "3":
   391         s2 = mySortedRanges(seqRanges2, o2, 1, alignments, s1)
   392     t1 = mySortedRanges(seqRangesB1, oB1, 0, alignmentsB, s2)
   393     t2 = mySortedRanges(seqRangesB2, oB2, 1, alignmentsB, s1)
   394     return s1 + t1, s2 + t2
   395 
   396 def prettyNum(n):
   397     t = str(n)
   398     groups = []
   399     while t:
   400         groups.append(t[-3:])
   401         t = t[:-3]
   402     return ",".join(reversed(groups))
   403 
   404 def sizeText(size):
   405     suffixes = "bp", "kb", "Mb", "Gb"
   406     for i, x in enumerate(suffixes):
   407         j = 10 ** (i * 3)
   408         if size < j * 10:
   409             return "%.2g" % (1.0 * size / j) + x
   410         if size < j * 1000 or i == len(suffixes) - 1:
   411             return "%.0f" % (1.0 * size / j) + x
   412 
   413 def labelText(seqRange, labelOpt):
   414     seqName, beg, end, strandNum = seqRange
   415     if labelOpt == 1:
   416         return seqName + ": " + sizeText(end - beg)
   417     if labelOpt == 2:
   418         return seqName + ":" + prettyNum(beg) + ": " + sizeText(end - beg)
   419     if labelOpt == 3:
   420         return seqName + ":" + prettyNum(beg) + "-" + prettyNum(end)
   421     return seqName
   422 
   423 def rangeLabels(seqRanges, labelOpt, font, fontsize, image_mode, textRot):
   424     if fontsize:
   425         image_size = 1, 1
   426         im = Image.new(image_mode, image_size)
   427         draw = ImageDraw.Draw(im)
   428     x = y = 0
   429     for r in seqRanges:
   430         text = labelText(r, labelOpt)
   431         if fontsize:
   432             x, y = draw.textsize(text, font=font)
   433             if textRot:
   434                 x, y = y, x
   435         yield text, x, y, r[3]
   436 
   437 def dataFromRanges(sortedRanges, font, fontSize, imageMode, labelOpt, textRot):
   438     for seqName, rangeBeg, rangeEnd, strandNum in sortedRanges:
   439         out = [seqName, str(rangeBeg), str(rangeEnd)]
   440         if strandNum > 0:
   441             out.append(".+-"[strandNum])
   442         logging.info("\t".join(out))
   443     logging.info("")
   444     rangeSizes = [e - b for n, b, e, s in sortedRanges]
   445     labs = list(rangeLabels(sortedRanges, labelOpt, font, fontSize,
   446                             imageMode, textRot))
   447     margin = max(i[2] for i in labs)
   448     # xxx the margin may be too big, because some labels may get omitted
   449     return rangeSizes, labs, margin
   450 
   451 def div_ceil(x, y):
   452     '''Return x / y rounded up.'''
   453     q, r = divmod(x, y)
   454     return q + (r != 0)
   455 
   456 def get_bp_per_pix(rangeSizes, pixTweenRanges, maxPixels):
   457     '''Get the minimum bp-per-pixel that fits in the size limit.'''
   458     logging.info("choosing bp per pixel...")
   459     numOfRanges = len(rangeSizes)
   460     maxPixelsInRanges = maxPixels - pixTweenRanges * (numOfRanges - 1)
   461     if maxPixelsInRanges < numOfRanges:
   462         raise Exception("can't fit the image: too many sequences?")
   463     negLimit = -maxPixelsInRanges
   464     negBpPerPix = sum(rangeSizes) // negLimit
   465     while True:
   466         if sum(i // negBpPerPix for i in rangeSizes) >= negLimit:
   467             return -negBpPerPix
   468         negBpPerPix -= 1
   469 
   470 def getRangePixBegs(rangePixLens, pixTweenRanges, margin):
   471     '''Get the start pixel for each range.'''
   472     rangePixBegs = []
   473     pix_tot = margin - pixTweenRanges
   474     for i in rangePixLens:
   475         pix_tot += pixTweenRanges
   476         rangePixBegs.append(pix_tot)
   477         pix_tot += i
   478     return rangePixBegs
   479 
   480 def pixelData(rangeSizes, bp_per_pix, pixTweenRanges, margin):
   481     '''Return pixel information about the ranges.'''
   482     rangePixLens = [div_ceil(i, bp_per_pix) for i in rangeSizes]
   483     rangePixBegs = getRangePixBegs(rangePixLens, pixTweenRanges, margin)
   484     tot_pix = rangePixBegs[-1] + rangePixLens[-1]
   485     return rangePixBegs, rangePixLens, tot_pix
   486 
   487 def drawLineForward(hits, width, bp_per_pix, beg1, beg2, size):
   488     while True:
   489         q1, r1 = divmod(beg1, bp_per_pix)
   490         q2, r2 = divmod(beg2, bp_per_pix)
   491         hits[q2 * width + q1] |= 1
   492         next_pix = min(bp_per_pix - r1, bp_per_pix - r2)
   493         if next_pix >= size: break
   494         beg1 += next_pix
   495         beg2 += next_pix
   496         size -= next_pix
   497 
   498 def drawLineReverse(hits, width, bp_per_pix, beg1, beg2, size):
   499     while True:
   500         q1, r1 = divmod(beg1, bp_per_pix)
   501         q2, r2 = divmod(beg2, bp_per_pix)
   502         hits[q2 * width + q1] |= 2
   503         next_pix = min(bp_per_pix - r1, r2 + 1)
   504         if next_pix >= size: break
   505         beg1 += next_pix
   506         beg2 -= next_pix
   507         size -= next_pix
   508 
   509 def strandAndOrigin(ranges, beg, size):
   510     isReverseStrand = (beg < 0)
   511     if isReverseStrand:
   512         beg = -(beg + size)
   513     for rangeBeg, rangeEnd, isReverseRange, origin in ranges:
   514         if rangeEnd > beg:  # assumes the ranges are sorted
   515             return (isReverseStrand != isReverseRange), origin
   516 
   517 def alignmentPixels(width, height, alignments, bp_per_pix,
   518                     rangeDict1, rangeDict2):
   519     hits = [0] * (width * height)  # the image data
   520     for seq1, seq2, blocks in alignments:
   521         beg1, beg2, size = blocks[0]
   522         isReverse1, ori1 = strandAndOrigin(rangeDict1[seq1], beg1, size)
   523         isReverse2, ori2 = strandAndOrigin(rangeDict2[seq2], beg2, size)
   524         for beg1, beg2, size in blocks:
   525             if isReverse1:
   526                 beg1 = -(beg1 + size)
   527                 beg2 = -(beg2 + size)
   528             if isReverse1 == isReverse2:
   529                 drawLineForward(hits, width, bp_per_pix,
   530                                 ori1 + beg1, ori2 + beg2, size)
   531             else:
   532                 drawLineReverse(hits, width, bp_per_pix,
   533                                 ori1 + beg1, ori2 - beg2 - 1, size)
   534     return hits
   535 
   536 def orientedBlocks(alignments, seqIndex):
   537     otherIndex = 1 - seqIndex
   538     for a in alignments:
   539         seq1, seq2, blocks = a
   540         for b in blocks:
   541             beg1, beg2, size = b
   542             if b[seqIndex] < 0:
   543                 b = -(beg1 + size), -(beg2 + size), size
   544             yield a[seqIndex], b[seqIndex], a[otherIndex], b[otherIndex], size
   545 
   546 def drawJoins(im, alignments, bpPerPix, seqIndex, rangeDict1, rangeDict2):
   547     blocks = orientedBlocks(alignments, seqIndex)
   548     oldSeq1 = ""
   549     for seq1, beg1, seq2, beg2, size in sorted(blocks):
   550         isReverse1, ori1 = strandAndOrigin(rangeDict1[seq1], beg1, size)
   551         isReverse2, ori2 = strandAndOrigin(rangeDict2[seq2], beg2, size)
   552         end1 = beg1 + size - 1
   553         end2 = beg2 + size - 1
   554         if isReverse1:
   555             beg1 = -(beg1 + 1)
   556             end1 = -(end1 + 1)
   557         if isReverse2:
   558             beg2 = -(beg2 + 1)
   559             end2 = -(end2 + 1)
   560         newPix1 = (ori1 + beg1) // bpPerPix
   561         newPix2 = (ori2 + beg2) // bpPerPix
   562         if seq1 == oldSeq1:
   563             lowerPix2 = min(oldPix2, newPix2)
   564             upperPix2 = max(oldPix2, newPix2)
   565             midPix1 = (oldPix1 + newPix1) // 2
   566             if isReverse1:
   567                 midPix1 = (oldPix1 + newPix1 + 1) // 2
   568                 oldPix1, newPix1 = newPix1, oldPix1
   569             if upperPix2 - lowerPix2 > 1 and oldPix1 <= newPix1 <= oldPix1 + 1:
   570                 if seqIndex == 0:
   571                     box = midPix1, lowerPix2, midPix1 + 1, upperPix2 + 1
   572                 else:
   573                     box = lowerPix2, midPix1, upperPix2 + 1, midPix1 + 1
   574                 im.paste("lightgray", box)
   575         oldPix1 = (ori1 + end1) // bpPerPix
   576         oldPix2 = (ori2 + end2) // bpPerPix
   577         oldSeq1 = seq1
   578 
   579 def expandedSeqDict(seqDict):
   580     '''Allow lookup by short sequence names, e.g. chr7 as well as hg19.chr7.'''
   581     newDict = seqDict.copy()
   582     for name, x in seqDict.items():
   583         if "." in name:
   584             base = name.split(".")[-1]
   585             if base in newDict:  # an ambiguous case was found:
   586                 return seqDict   # so give up completely
   587             newDict[base] = x
   588     return newDict
   589 
   590 def readBed(fileName, rangeDict):
   591     for line in myOpen(fileName):
   592         w = line.split()
   593         if not w: continue
   594         seqName = w[0]
   595         if seqName not in rangeDict: continue
   596         beg = int(w[1])
   597         end = int(w[2])
   598         layer = 900
   599         color = "#fbf"
   600         if len(w) > 4:
   601             if w[4] != ".":
   602                 layer = float(w[4])
   603             if len(w) > 5:
   604                 if len(w) > 8 and w[8].count(",") == 2:
   605                     color = "rgb(" + w[8] + ")"
   606                 else:
   607                     strand = w[5]
   608                     isRev = rangeDict[seqName][0][2]
   609                     if strand == "+" and not isRev or strand == "-" and isRev:
   610                         color = "#ffe8e8"
   611                     if strand == "-" and not isRev or strand == "+" and isRev:
   612                         color = "#e8e8ff"
   613         yield layer, color, seqName, beg, end
   614 
   615 def commaSeparatedInts(text):
   616     return map(int, text.rstrip(",").split(","))
   617 
   618 def readGenePred(opts, fileName, rangeDict):
   619     for line in myOpen(fileName):
   620         fields = line.split()
   621         if not fields: continue
   622         if fields[2] not in "+-": fields = fields[1:]
   623         seqName = fields[1]
   624         if seqName not in rangeDict: continue
   625         #strand = fields[2]
   626         cdsBeg = int(fields[5])
   627         cdsEnd = int(fields[6])
   628         exonBegs = commaSeparatedInts(fields[8])
   629         exonEnds = commaSeparatedInts(fields[9])
   630         for beg, end in zip(exonBegs, exonEnds):
   631             yield 300, opts.exon_color, seqName, beg, end
   632             b = max(beg, cdsBeg)
   633             e = min(end, cdsEnd)
   634             if b < e: yield 400, opts.cds_color, seqName, b, e
   635 
   636 def readRmsk(fileName, rangeDict):
   637     for line in myOpen(fileName):
   638         fields = line.split()
   639         if len(fields) == 17:  # rmsk.txt
   640             seqName = fields[5]
   641             if seqName not in rangeDict: continue  # do this ASAP for speed
   642             beg = int(fields[6])
   643             end = int(fields[7])
   644             strand = fields[9]
   645             repeatClass = fields[11]
   646         elif len(fields) == 15:  # .out
   647             seqName = fields[4]
   648             if seqName not in rangeDict: continue
   649             beg = int(fields[5]) - 1
   650             end = int(fields[6])
   651             strand = fields[8]
   652             repeatClass = fields[10].split("/")[0]
   653         else:
   654             continue
   655         if repeatClass in ("Low_complexity", "Simple_repeat", "Satellite"):
   656             yield 200, "#fbf", seqName, beg, end
   657         elif (strand == "+") != rangeDict[seqName][0][2]:
   658             yield 100, "#ffe8e8", seqName, beg, end
   659         else:
   660             yield 100, "#e8e8ff", seqName, beg, end
   661 
   662 def isExtraFirstGapField(fields):
   663     return fields[4].isdigit()
   664 
   665 def readGaps(opts, fileName, rangeDict):
   666     '''Read locations of unsequenced gaps, from an agp or gap file.'''
   667     for line in myOpen(fileName):
   668         w = line.split()
   669         if not w or w[0][0] == "#": continue
   670         if isExtraFirstGapField(w): w = w[1:]
   671         if w[4] not in "NU": continue
   672         seqName = w[0]
   673         if seqName not in rangeDict: continue
   674         end = int(w[2])
   675         beg = end - int(w[5])  # zero-based coordinate
   676         if w[7] == "yes":
   677             yield 3000, opts.bridged_color, seqName, beg, end
   678         else:
   679             yield 2000, opts.unbridged_color, seqName, beg, end
   680 
   681 def bedBoxes(beds, rangeDict, margin, edge, isTop, bpPerPix):
   682     for layer, color, seqName, bedBeg, bedEnd in beds:
   683         for rangeBeg, rangeEnd, isReverseRange, origin in rangeDict[seqName]:
   684             beg = max(bedBeg, rangeBeg)
   685             end = min(bedEnd, rangeEnd)
   686             if beg >= end: continue
   687             if isReverseRange:
   688                 beg, end = -end, -beg
   689             if layer <= 1000:
   690                 # include partly-covered pixels
   691                 b = (origin + beg) // bpPerPix
   692                 e = div_ceil(origin + end, bpPerPix)
   693             else:
   694                 # exclude partly-covered pixels
   695                 b = div_ceil(origin + beg, bpPerPix)
   696                 e = (origin + end) // bpPerPix
   697                 if e <= b: continue
   698                 if bedEnd >= rangeEnd:  # include partly-covered end pixels
   699                     if isReverseRange:
   700                         b = (origin + beg) // bpPerPix
   701                     else:
   702                         e = div_ceil(origin + end, bpPerPix)
   703             if isTop:
   704                 box = b, margin, e, edge
   705             else:
   706                 box = margin, b, edge, e
   707             yield layer, color, box
   708 
   709 def drawAnnotations(im, boxes):
   710     # xxx use partial transparency for different-color overlaps?
   711     for layer, color, box in boxes:
   712         im.paste(color, box)
   713 
   714 def placedLabels(labels, rangePixBegs, rangePixLens, beg, end):
   715     '''Return axis labels with endpoint & sort-order information.'''
   716     maxWidth = end - beg
   717     for i, j, k in zip(labels, rangePixBegs, rangePixLens):
   718         text, textWidth, textHeight, strandNum = i
   719         if textWidth > maxWidth:
   720             continue
   721         labelBeg = j + (k - textWidth) // 2
   722         labelEnd = labelBeg + textWidth
   723         sortKey = textWidth - k
   724         if labelBeg < beg:
   725             sortKey += maxWidth * (beg - labelBeg)
   726             labelBeg = beg
   727             labelEnd = beg + textWidth
   728         if labelEnd > end:
   729             sortKey += maxWidth * (labelEnd - end)
   730             labelEnd = end
   731             labelBeg = end - textWidth
   732         yield sortKey, labelBeg, labelEnd, text, textHeight, strandNum
   733 
   734 def nonoverlappingLabels(labels, minPixTweenLabels):
   735     '''Get a subset of non-overlapping axis labels, greedily.'''
   736     out = []
   737     for i in labels:
   738         beg = i[1] - minPixTweenLabels
   739         end = i[2] + minPixTweenLabels
   740         if all(j[2] <= beg or j[1] >= end for j in out):
   741             out.append(i)
   742     return out
   743 
   744 def axisImage(labels, rangePixBegs, rangePixLens, textRot,
   745               textAln, font, image_mode, opts):
   746     '''Make an image of axis labels.'''
   747     beg = rangePixBegs[0]
   748     end = rangePixBegs[-1] + rangePixLens[-1]
   749     margin = max(i[2] for i in labels)
   750     labels = sorted(placedLabels(labels, rangePixBegs, rangePixLens, beg, end))
   751     minPixTweenLabels = 0 if textRot else opts.label_space
   752     labels = nonoverlappingLabels(labels, minPixTweenLabels)
   753     image_size = (margin, end) if textRot else (end, margin)
   754     im = Image.new(image_mode, image_size, opts.margin_color)
   755     draw = ImageDraw.Draw(im)
   756     for sortKey, labelBeg, labelEnd, text, textHeight, strandNum in labels:
   757         base = margin - textHeight if textAln else 0
   758         position = (base, labelBeg) if textRot else (labelBeg, base)
   759         fill = ("black", opts.forwardcolor, opts.reversecolor)[strandNum]
   760         draw.text(position, text, font=font, fill=fill)
   761     return im
   762 
   763 def rangesWithOrigins(sortedRanges, rangePixBegs, rangePixLens, bpPerPix):
   764     for i, j, k in zip(sortedRanges, rangePixBegs, rangePixLens):
   765         seqName, rangeBeg, rangeEnd, strandNum = i
   766         isReverseRange = (strandNum > 1)
   767         if isReverseRange:
   768             origin = bpPerPix * (j + k) + rangeBeg
   769         else:
   770             origin = bpPerPix * j - rangeBeg
   771         yield seqName, (rangeBeg, rangeEnd, isReverseRange, origin)
   772 
   773 def rangesPerSeq(sortedRanges, rangePixBegs, rangePixLens, bpPerPix):
   774     a = rangesWithOrigins(sortedRanges, rangePixBegs, rangePixLens, bpPerPix)
   775     for k, v in itertools.groupby(a, itemgetter(0)):
   776         yield k, sorted(i[1] for i in v)
   777 
   778 def getFont(opts):
   779     if opts.fontfile:
   780         return ImageFont.truetype(opts.fontfile, opts.fontsize)
   781     fileNames = []
   782     try:
   783         x = ["fc-match", "-f%{file}", "arial"]
   784         p = subprocess.Popen(x, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
   785                              universal_newlines=True)
   786         out, err = p.communicate()
   787         fileNames.append(out)
   788     except OSError as e:
   789         logging.info("fc-match error: " + str(e))
   790     fileNames.append("/Library/Fonts/Arial.ttf")  # for Mac
   791     for i in fileNames:
   792         try:
   793             font = ImageFont.truetype(i, opts.fontsize)
   794             logging.info("font: " + i)
   795             return font
   796         except IOError as e:
   797             logging.info("font load error: " + str(e))
   798     return ImageFont.load_default()
   799 
   800 def sequenceSizesAndNames(seqRanges):
   801     for seqName, ranges in itertools.groupby(seqRanges, itemgetter(0)):
   802         size = sum(e - b for n, b, e in ranges)
   803         yield size, seqName
   804 
   805 def biggestSequences(seqRanges, maxNumOfSequences):
   806     s = sorted(sequenceSizesAndNames(seqRanges), reverse=True)
   807     if len(s) > maxNumOfSequences:
   808         logging.warning("too many sequences - discarding the smallest ones")
   809         s = s[:maxNumOfSequences]
   810     return set(i[1] for i in s)
   811 
   812 def remainingSequenceRanges(seqRanges, alignments, seqIndex):
   813     remainingSequences = set(i[seqIndex] for i in alignments)
   814     return [i for i in seqRanges if i[0] in remainingSequences]
   815 
   816 def lastDotplot(opts, args):
   817     logLevel = logging.INFO if opts.verbose else logging.WARNING
   818     logging.basicConfig(format="%(filename)s: %(message)s", level=logLevel)
   819 
   820     font = getFont(opts)
   821     image_mode = 'RGB'
   822     forward_color = ImageColor.getcolor(opts.forwardcolor, image_mode)
   823     reverse_color = ImageColor.getcolor(opts.reversecolor, image_mode)
   824     zipped_colors = zip(forward_color, reverse_color)
   825     overlap_color = tuple([(i + j) // 2 for i, j in zipped_colors])
   826 
   827     maxGap1, maxGapB1 = twoValuesFromOption(opts.max_gap1, ":")
   828     maxGap2, maxGapB2 = twoValuesFromOption(opts.max_gap2, ":")
   829 
   830     logging.info("reading alignments...")
   831     alnData = readAlignments(args[0], opts)
   832     alignments, seqRanges1, coverDict1, seqRanges2, coverDict2 = alnData
   833     if not alignments: raise Exception("there are no alignments")
   834     logging.info("cutting...")
   835     coverDict1 = dict(mergedRangesPerSeq(coverDict1))
   836     coverDict2 = dict(mergedRangesPerSeq(coverDict2))
   837     minAlignedBases = min(coveredLength(coverDict1), coveredLength(coverDict2))
   838     pad = int(opts.pad * minAlignedBases)
   839     cutRanges1 = list(trimmed(seqRanges1, coverDict1, minAlignedBases,
   840                               maxGap1, pad, pad))
   841     cutRanges2 = list(trimmed(seqRanges2, coverDict2, minAlignedBases,
   842                               maxGap2, pad, pad))
   843 
   844     biggestSeqs1 = biggestSequences(cutRanges1, opts.maxseqs)
   845     cutRanges1 = [i for i in cutRanges1 if i[0] in biggestSeqs1]
   846     alignments = [i for i in alignments if i[0] in biggestSeqs1]
   847     cutRanges2 = remainingSequenceRanges(cutRanges2, alignments, 1)
   848 
   849     biggestSeqs2 = biggestSequences(cutRanges2, opts.maxseqs)
   850     cutRanges2 = [i for i in cutRanges2 if i[0] in biggestSeqs2]
   851     alignments = [i for i in alignments if i[1] in biggestSeqs2]
   852     cutRanges1 = remainingSequenceRanges(cutRanges1, alignments, 0)
   853 
   854     logging.info("reading secondary alignments...")
   855     alnDataB = readSecondaryAlignments(opts, cutRanges1, cutRanges2)
   856     alignmentsB, seqRangesB1, coverDictB1, seqRangesB2, coverDictB2 = alnDataB
   857     logging.info("cutting...")
   858     coverDictB1 = dict(mergedRangesPerSeq(coverDictB1))
   859     coverDictB2 = dict(mergedRangesPerSeq(coverDictB2))
   860     cutRangesB1 = trimmed(seqRangesB1, coverDictB1, minAlignedBases,
   861                           maxGapB1, 0, 0)
   862     cutRangesB2 = trimmed(seqRangesB2, coverDictB2, minAlignedBases,
   863                           maxGapB2, 0, 0)
   864 
   865     logging.info("sorting...")
   866     sortOut = allSortedRanges(opts, alignments, alignmentsB,
   867                               cutRanges1, cutRangesB1, cutRanges2, cutRangesB2)
   868     sortedRanges1, sortedRanges2 = sortOut
   869 
   870     textRot1 = "vertical".startswith(opts.rot1)
   871     i1 = dataFromRanges(sortedRanges1, font,
   872                         opts.fontsize, image_mode, opts.labels1, textRot1)
   873     rangeSizes1, labelData1, tMargin = i1
   874 
   875     textRot2 = "horizontal".startswith(opts.rot2)
   876     i2 = dataFromRanges(sortedRanges2, font,
   877                         opts.fontsize, image_mode, opts.labels2, textRot2)
   878     rangeSizes2, labelData2, lMargin = i2
   879 
   880     maxPixels1 = opts.width  - lMargin
   881     maxPixels2 = opts.height - tMargin
   882     bpPerPix1 = get_bp_per_pix(rangeSizes1, opts.border_pixels, maxPixels1)
   883     bpPerPix2 = get_bp_per_pix(rangeSizes2, opts.border_pixels, maxPixels2)
   884     bpPerPix = max(bpPerPix1, bpPerPix2)
   885     logging.info("bp per pixel = " + str(bpPerPix))
   886 
   887     p1 = pixelData(rangeSizes1, bpPerPix, opts.border_pixels, lMargin)
   888     rangePixBegs1, rangePixLens1, width = p1
   889     rangeDict1 = dict(rangesPerSeq(sortedRanges1, rangePixBegs1,
   890                                    rangePixLens1, bpPerPix))
   891 
   892     p2 = pixelData(rangeSizes2, bpPerPix, opts.border_pixels, tMargin)
   893     rangePixBegs2, rangePixLens2, height = p2
   894     rangeDict2 = dict(rangesPerSeq(sortedRanges2, rangePixBegs2,
   895                                    rangePixLens2, bpPerPix))
   896 
   897     logging.info("width:  " + str(width))
   898     logging.info("height: " + str(height))
   899 
   900     logging.info("processing alignments...")
   901     allAlignments = alignments + alignmentsB
   902     hits = alignmentPixels(width, height, allAlignments, bpPerPix,
   903                            rangeDict1, rangeDict2)
   904 
   905     logging.info("reading annotations...")
   906 
   907     rangeDict1 = expandedSeqDict(rangeDict1)
   908     beds1 = itertools.chain(readBed(opts.bed1, rangeDict1),
   909                             readRmsk(opts.rmsk1, rangeDict1),
   910                             readGenePred(opts, opts.genePred1, rangeDict1),
   911                             readGaps(opts, opts.gap1, rangeDict1))
   912     b1 = bedBoxes(beds1, rangeDict1, tMargin, height, True, bpPerPix)
   913 
   914     rangeDict2 = expandedSeqDict(rangeDict2)
   915     beds2 = itertools.chain(readBed(opts.bed2, rangeDict2),
   916                             readRmsk(opts.rmsk2, rangeDict2),
   917                             readGenePred(opts, opts.genePred2, rangeDict2),
   918                             readGaps(opts, opts.gap2, rangeDict2))
   919     b2 = bedBoxes(beds2, rangeDict2, lMargin, width, False, bpPerPix)
   920 
   921     boxes = sorted(itertools.chain(b1, b2))
   922 
   923     logging.info("drawing...")
   924 
   925     image_size = width, height
   926     im = Image.new(image_mode, image_size, opts.background_color)
   927 
   928     drawAnnotations(im, boxes)
   929 
   930     joinA, joinB = twoValuesFromOption(opts.join, ":")
   931     if joinA in "13":
   932         drawJoins(im, alignments, bpPerPix, 0, rangeDict1, rangeDict2)
   933     if joinB in "13":
   934         drawJoins(im, alignmentsB, bpPerPix, 0, rangeDict1, rangeDict2)
   935     if joinA in "23":
   936         drawJoins(im, alignments, bpPerPix, 1, rangeDict2, rangeDict1)
   937     if joinB in "23":
   938         drawJoins(im, alignmentsB, bpPerPix, 1, rangeDict2, rangeDict1)
   939 
   940     for i in range(height):
   941         for j in range(width):
   942             store_value = hits[i * width + j]
   943             xy = j, i
   944             if   store_value == 1: im.putpixel(xy, forward_color)
   945             elif store_value == 2: im.putpixel(xy, reverse_color)
   946             elif store_value == 3: im.putpixel(xy, overlap_color)
   947 
   948     if opts.fontsize != 0:
   949         axis1 = axisImage(labelData1, rangePixBegs1, rangePixLens1,
   950                           textRot1, False, font, image_mode, opts)
   951         if textRot1:
   952             axis1 = axis1.transpose(Image.ROTATE_90)
   953         axis2 = axisImage(labelData2, rangePixBegs2, rangePixLens2,
   954                           textRot2, textRot2, font, image_mode, opts)
   955         if not textRot2:
   956             axis2 = axis2.transpose(Image.ROTATE_270)
   957         im.paste(axis1, (0, 0))
   958         im.paste(axis2, (0, 0))
   959 
   960     for i in rangePixBegs1[1:]:
   961         box = i - opts.border_pixels, tMargin, i, height
   962         im.paste(opts.border_color, box)
   963 
   964     for i in rangePixBegs2[1:]:
   965         box = lMargin, i - opts.border_pixels, width, i
   966         im.paste(opts.border_color, box)
   967 
   968     im.save(args[1])
   969 
   970 if __name__ == "__main__":
   971     usage = """%prog --help
   972    or: %prog [options] maf-or-tab-alignments dotplot.png
   973    or: %prog [options] maf-or-tab-alignments dotplot.gif
   974    or: ..."""
   975     description = "Draw a dotplot of pair-wise sequence alignments in MAF or tabular format."
   976     op = optparse.OptionParser(usage=usage, description=description)
   977     op.add_option("-v", "--verbose", action="count",
   978                   help="show progress messages & data about the plot")
   979     # Replace "width" & "height" with a single "length" option?
   980     op.add_option("-x", "--width", metavar="INT", type="int", default=1000,
   981                   help="maximum width in pixels (default: %default)")
   982     op.add_option("-y", "--height", metavar="INT", type="int", default=1000,
   983                   help="maximum height in pixels (default: %default)")
   984     op.add_option("-m", "--maxseqs", type="int", default=100, metavar="M",
   985                   help="maximum number of horizontal or vertical sequences "
   986                   "(default=%default)")
   987     op.add_option("-1", "--seq1", metavar="PATTERN", action="append",
   988                   default=[],
   989                   help="which sequences to show from the 1st genome")
   990     op.add_option("-2", "--seq2", metavar="PATTERN", action="append",
   991                   default=[],
   992                   help="which sequences to show from the 2nd genome")
   993     op.add_option("-c", "--forwardcolor", metavar="COLOR", default="red",
   994                   help="color for forward alignments (default: %default)")
   995     op.add_option("-r", "--reversecolor", metavar="COLOR", default="blue",
   996                   help="color for reverse alignments (default: %default)")
   997     op.add_option("--alignments", metavar="FILE", help="secondary alignments")
   998     op.add_option("--sort1", default="1", metavar="N",
   999                   help="genome1 sequence order: 0=input order, 1=name order, "
  1000                   "2=length order, 3=alignment order (default=%default)")
  1001     op.add_option("--sort2", default="1", metavar="N",
  1002                   help="genome2 sequence order: 0=input order, 1=name order, "
  1003                   "2=length order, 3=alignment order (default=%default)")
  1004     op.add_option("--strands1", default="0", metavar="N", help=
  1005                   "genome1 sequence orientation: 0=forward orientation, "
  1006                   "1=alignment orientation (default=%default)")
  1007     op.add_option("--strands2", default="0", metavar="N", help=
  1008                   "genome2 sequence orientation: 0=forward orientation, "
  1009                   "1=alignment orientation (default=%default)")
  1010     op.add_option("--max-gap1", metavar="FRAC", default="0.5,2", help=
  1011                   "maximum unaligned (end,mid) gap in genome1: "
  1012                   "fraction of aligned length (default=%default)")
  1013     op.add_option("--max-gap2", metavar="FRAC", default="0.5,2", help=
  1014                   "maximum unaligned (end,mid) gap in genome2: "
  1015                   "fraction of aligned length (default=%default)")
  1016     op.add_option("--pad", metavar="FRAC", type="float", default=0.04, help=
  1017                   "pad length when cutting unaligned gaps: "
  1018                   "fraction of aligned length (default=%default)")
  1019     op.add_option("-j", "--join", default="0", metavar="N", help=
  1020                   "join: 0=nothing, 1=alignments adjacent in genome1, "
  1021                   "2=alignments adjacent in genome2 (default=%default)")
  1022     op.add_option("--border-pixels", metavar="INT", type="int", default=1,
  1023                   help="number of pixels between sequences (default=%default)")
  1024     op.add_option("--border-color", metavar="COLOR", default="black",
  1025                   help="color for pixels between sequences (default=%default)")
  1026     # --break-color and/or --break-pixels for intra-sequence breaks?
  1027     op.add_option("--margin-color", metavar="COLOR", default="#dcdcdc",
  1028                   help="margin color")
  1029 
  1030     og = optparse.OptionGroup(op, "Text options")
  1031     og.add_option("-f", "--fontfile", metavar="FILE",
  1032                   help="TrueType or OpenType font file")
  1033     og.add_option("-s", "--fontsize", metavar="SIZE", type="int", default=14,
  1034                   help="TrueType or OpenType font size (default: %default)")
  1035     og.add_option("--labels1", type="int", default=0, metavar="N", help=
  1036                   "genome1 labels: 0=name, 1=name:length, "
  1037                   "2=name:start:length, 3=name:start-end (default=%default)")
  1038     og.add_option("--labels2", type="int", default=0, metavar="N", help=
  1039                   "genome2 labels: 0=name, 1=name:length, "
  1040                   "2=name:start:length, 3=name:start-end (default=%default)")
  1041     og.add_option("--rot1", metavar="ROT", default="h",
  1042                   help="text rotation for the 1st genome (default=%default)")
  1043     og.add_option("--rot2", metavar="ROT", default="v",
  1044                   help="text rotation for the 2nd genome (default=%default)")
  1045     op.add_option_group(og)
  1046 
  1047     og = optparse.OptionGroup(op, "Annotation options")
  1048     og.add_option("--bed1", metavar="FILE",
  1049                   help="read genome1 annotations from BED file")
  1050     og.add_option("--bed2", metavar="FILE",
  1051                   help="read genome2 annotations from BED file")
  1052     og.add_option("--rmsk1", metavar="FILE", help="read genome1 repeats from "
  1053                   "RepeatMasker .out or rmsk.txt file")
  1054     og.add_option("--rmsk2", metavar="FILE", help="read genome2 repeats from "
  1055                   "RepeatMasker .out or rmsk.txt file")
  1056     op.add_option_group(og)
  1057 
  1058     og = optparse.OptionGroup(op, "Gene options")
  1059     og.add_option("--genePred1", metavar="FILE",
  1060                   help="read genome1 genes from genePred file")
  1061     og.add_option("--genePred2", metavar="FILE",
  1062                   help="read genome2 genes from genePred file")
  1063     og.add_option("--exon-color", metavar="COLOR", default="PaleGreen",
  1064                   help="color for exons (default=%default)")
  1065     og.add_option("--cds-color", metavar="COLOR", default="LimeGreen",
  1066                   help="color for protein-coding regions (default=%default)")
  1067     op.add_option_group(og)
  1068 
  1069     og = optparse.OptionGroup(op, "Unsequenced gap options")
  1070     og.add_option("--gap1", metavar="FILE",
  1071                   help="read genome1 unsequenced gaps from agp or gap file")
  1072     og.add_option("--gap2", metavar="FILE",
  1073                   help="read genome2 unsequenced gaps from agp or gap file")
  1074     og.add_option("--bridged-color", metavar="COLOR", default="yellow",
  1075                   help="color for bridged gaps (default: %default)")
  1076     og.add_option("--unbridged-color", metavar="COLOR", default="orange",
  1077                   help="color for unbridged gaps (default: %default)")
  1078     op.add_option_group(og)
  1079     (opts, args) = op.parse_args()
  1080     if len(args) != 2: op.error("2 arguments needed")
  1081 
  1082     opts.background_color = "white"
  1083     opts.label_space = 5     # minimum number of pixels between axis labels
  1084 
  1085     try: lastDotplot(opts, args)
  1086     except KeyboardInterrupt: pass  # avoid silly error message
  1087     except Exception as e:
  1088         prog = os.path.basename(sys.argv[0])
  1089         sys.exit(prog + ": error: " + str(e))