scripts/last-dotplot
author Martin C. Frith
Wed Oct 14 20:45:14 2020 +0900 (19 months ago)
changeset 1133 04663552433d
parent 1132 2b38a62095d1
permissions -rwxr-xr-x
last-dotplot: fix crash on long annotation names
     1 #! /usr/bin/env python
     2 # Author: Martin C. Frith 2008
     3 # SPDX-License-Identifier: GPL-3.0-or-later
     4 
     5 # Read pair-wise alignments in MAF or LAST tabular format: write an
     6 # "Oxford grid", a.k.a. dotplot.
     7 
     8 # TODO: Currently, pixels with zero aligned nt-pairs are white, and
     9 # pixels with one or more aligned nt-pairs are black.  This can look
    10 # too crowded for large genome alignments.  I tried shading each pixel
    11 # according to the number of aligned nt-pairs within it, but the
    12 # result is too faint.  How can this be done better?
    13 
    14 import collections
    15 import functools
    16 import gzip
    17 from fnmatch import fnmatchcase
    18 import logging
    19 from operator import itemgetter
    20 import subprocess
    21 import itertools, optparse, os, re, sys
    22 
    23 # Try to make PIL/PILLOW work:
    24 try:
    25     from PIL import Image, ImageDraw, ImageFont, ImageColor
    26 except ImportError:
    27     import Image, ImageDraw, ImageFont, ImageColor
    28 
    29 try:
    30     from future_builtins import zip
    31 except ImportError:
    32     pass
    33 
    34 def myOpen(fileName):  # faster than fileinput
    35     if fileName is None:
    36         return []
    37     if fileName == "-":
    38         return sys.stdin
    39     if fileName.endswith(".gz"):
    40         return gzip.open(fileName, "rt")  # xxx dubious for Python2
    41     return open(fileName)
    42 
    43 def groupByFirstItem(things):
    44     for k, v in itertools.groupby(things, itemgetter(0)):
    45         yield k, [i[1:] for i in v]
    46 
    47 def croppedBlocks(blocks, ranges1, ranges2):
    48     headBeg1, headBeg2, headSize = blocks[0]
    49     for r1 in ranges1:
    50         for r2 in ranges2:
    51             cropBeg1, cropEnd1 = r1
    52             if headBeg1 < 0:
    53                 cropBeg1, cropEnd1 = -cropEnd1, -cropBeg1
    54             cropBeg2, cropEnd2 = r2
    55             if headBeg2 < 0:
    56                 cropBeg2, cropEnd2 = -cropEnd2, -cropBeg2
    57             for beg1, beg2, size in blocks:
    58                 b1 = max(cropBeg1, beg1)
    59                 e1 = min(cropEnd1, beg1 + size)
    60                 if b1 >= e1: continue
    61                 offset = beg2 - beg1
    62                 b2 = max(cropBeg2, b1 + offset)
    63                 e2 = min(cropEnd2, e1 + offset)
    64                 if b2 >= e2: continue
    65                 yield b2 - offset, b2, e2 - b2
    66 
    67 def tabBlocks(beg1, beg2, blocks):
    68     '''Get the gapless blocks of an alignment, from LAST tabular format.'''
    69     for i in blocks.split(","):
    70         if ":" in i:
    71             x, y = i.split(":")
    72             beg1 += int(x)
    73             beg2 += int(y)
    74         else:
    75             size = int(i)
    76             yield beg1, beg2, size
    77             beg1 += size
    78             beg2 += size
    79 
    80 def mafBlocks(beg1, beg2, seq1, seq2):
    81     '''Get the gapless blocks of an alignment, from MAF format.'''
    82     size = 0
    83     for x, y in zip(seq1, seq2):
    84         if x == "-":
    85             if size:
    86                 yield beg1, beg2, size
    87                 beg1 += size
    88                 beg2 += size
    89                 size = 0
    90             beg2 += 1
    91         elif y == "-":
    92             if size:
    93                 yield beg1, beg2, size
    94                 beg1 += size
    95                 beg2 += size
    96                 size = 0
    97             beg1 += 1
    98         else:
    99             size += 1
   100     if size: yield beg1, beg2, size
   101 
   102 def alignmentFromSegment(qrySeqName, qrySeqLen, segment):
   103     refSeqLen = sys.maxsize  # XXX
   104     refSeqName, refSeqBeg, qrySeqBeg, size = segment
   105     block = refSeqBeg, qrySeqBeg, size
   106     return refSeqName, refSeqLen, qrySeqName, qrySeqLen, [block]
   107 
   108 def alignmentInput(lines):
   109     '''Get alignments and sequence lengths, from MAF or tabular format.'''
   110     mafCount = 0
   111     qrySeqName = ""
   112     segments = []
   113     for line in lines:
   114         w = line.split()
   115         if line[0] == "#":
   116             pass
   117         elif len(w) == 1:
   118             for i in segments:
   119                 yield alignmentFromSegment(qrySeqName, qrySeqLen, i)
   120             qrySeqName = w[0]
   121             qrySeqLen = 0
   122             segments = []
   123         elif len(w) == 2 and qrySeqName and w[1].isdigit():
   124             qrySeqLen += int(w[1])
   125         elif len(w) == 4 and qrySeqName and w[1].isdigit() and w[3].isdigit():
   126             refSeqName, refSeqBeg, refSeqEnd = w[0], int(w[1]), int(w[3])
   127             size = abs(refSeqEnd - refSeqBeg)
   128             if refSeqBeg > refSeqEnd:
   129                 refSeqBeg = -refSeqBeg
   130             segments.append((refSeqName, refSeqBeg, qrySeqLen, size))
   131             qrySeqLen += size
   132         elif line[0].isdigit():  # tabular format
   133             chr1, beg1, seqlen1 = w[1], int(w[2]), int(w[5])
   134             if w[4] == "-": beg1 -= seqlen1
   135             chr2, beg2, seqlen2 = w[6], int(w[7]), int(w[10])
   136             if w[9] == "-": beg2 -= seqlen2
   137             blocks = tabBlocks(beg1, beg2, w[11])
   138             yield chr1, seqlen1, chr2, seqlen2, blocks
   139         elif line[0] == "s":  # MAF format
   140             if mafCount == 0:
   141                 chr1, beg1, seqlen1, seq1 = w[1], int(w[2]), int(w[5]), w[6]
   142                 if w[4] == "-": beg1 -= seqlen1
   143                 mafCount = 1
   144             else:
   145                 chr2, beg2, seqlen2, seq2 = w[1], int(w[2]), int(w[5]), w[6]
   146                 if w[4] == "-": beg2 -= seqlen2
   147                 blocks = mafBlocks(beg1, beg2, seq1, seq2)
   148                 yield chr1, seqlen1, chr2, seqlen2, blocks
   149                 mafCount = 0
   150     for i in segments:
   151         yield alignmentFromSegment(qrySeqName, qrySeqLen, i)
   152 
   153 def seqRequestFromText(text):
   154     if ":" in text:
   155         pattern, interval = text.rsplit(":", 1)
   156         if "-" in interval:
   157             beg, end = interval.rsplit("-", 1)
   158             return pattern, int(beg), int(end)  # beg may be negative
   159     return text, 0, sys.maxsize
   160 
   161 def rangesFromSeqName(seqRequests, name, seqLen):
   162     if seqRequests:
   163         base = name.split(".")[-1]  # allow for names like hg19.chr7
   164         for pat, beg, end in seqRequests:
   165             if fnmatchcase(name, pat) or fnmatchcase(base, pat):
   166                 yield max(beg, 0), min(end, seqLen)
   167     else:
   168         yield 0, seqLen
   169 
   170 def updateSeqs(coverDict, seqRanges, seqName, ranges, coveredRange):
   171     beg, end = coveredRange
   172     if beg < 0:
   173         coveredRange = -end, -beg
   174     if seqName in coverDict:
   175         coverDict[seqName].append(coveredRange)
   176     else:
   177         coverDict[seqName] = [coveredRange]
   178         for beg, end in ranges:
   179             r = seqName, beg, end
   180             seqRanges.append(r)
   181 
   182 def readAlignments(fileName, opts):
   183     '''Get alignments and sequence limits, from MAF or tabular format.'''
   184     seqRequests1 = [seqRequestFromText(i) for i in opts.seq1]
   185     seqRequests2 = [seqRequestFromText(i) for i in opts.seq2]
   186 
   187     alignments = []
   188     seqRanges1 = []
   189     seqRanges2 = []
   190     coverDict1 = {}
   191     coverDict2 = {}
   192     lines = myOpen(fileName)
   193     for seqName1, seqLen1, seqName2, seqLen2, blocks in alignmentInput(lines):
   194         ranges1 = sorted(rangesFromSeqName(seqRequests1, seqName1, seqLen1))
   195         if not ranges1: continue
   196         ranges2 = sorted(rangesFromSeqName(seqRequests2, seqName2, seqLen2))
   197         if not ranges2: continue
   198         b = list(croppedBlocks(list(blocks), ranges1, ranges2))
   199         if not b: continue
   200         aln = seqName1, seqName2, b
   201         alignments.append(aln)
   202         coveredRange1 = b[0][0], b[-1][0] + b[-1][2]
   203         updateSeqs(coverDict1, seqRanges1, seqName1, ranges1, coveredRange1)
   204         coveredRange2 = b[0][1], b[-1][1] + b[-1][2]
   205         updateSeqs(coverDict2, seqRanges2, seqName2, ranges2, coveredRange2)
   206     return alignments, seqRanges1, coverDict1, seqRanges2, coverDict2
   207 
   208 def nameAndRangesFromDict(cropDict, seqName):
   209     if seqName in cropDict:
   210         return seqName, cropDict[seqName]
   211     n = seqName.split(".")[-1]
   212     if n in cropDict:
   213         return n, cropDict[n]
   214     return seqName, []
   215 
   216 def rangesForSecondaryAlignments(primaryRanges, seqLen):
   217     if primaryRanges:
   218         return primaryRanges
   219     return [(0, seqLen)]
   220 
   221 def readSecondaryAlignments(opts, cropRanges1, cropRanges2):
   222     cropDict1 = dict(groupByFirstItem(cropRanges1))
   223     cropDict2 = dict(groupByFirstItem(cropRanges2))
   224 
   225     alignments = []
   226     seqRanges1 = []
   227     seqRanges2 = []
   228     coverDict1 = {}
   229     coverDict2 = {}
   230     lines = myOpen(opts.alignments)
   231     for seqName1, seqLen1, seqName2, seqLen2, blocks in alignmentInput(lines):
   232         seqName1, ranges1 = nameAndRangesFromDict(cropDict1, seqName1)
   233         seqName2, ranges2 = nameAndRangesFromDict(cropDict2, seqName2)
   234         if not ranges1 and not ranges2:
   235             continue
   236         r1 = rangesForSecondaryAlignments(ranges1, seqLen1)
   237         r2 = rangesForSecondaryAlignments(ranges2, seqLen2)
   238         b = list(croppedBlocks(list(blocks), r1, r2))
   239         if not b: continue
   240         aln = seqName1, seqName2, b
   241         alignments.append(aln)
   242         if not ranges1:
   243             coveredRange1 = b[0][0], b[-1][0] + b[-1][2]
   244             updateSeqs(coverDict1, seqRanges1, seqName1, r1, coveredRange1)
   245         if not ranges2:
   246             coveredRange2 = b[0][1], b[-1][1] + b[-1][2]
   247             updateSeqs(coverDict2, seqRanges2, seqName2, r2, coveredRange2)
   248     return alignments, seqRanges1, coverDict1, seqRanges2, coverDict2
   249 
   250 def twoValuesFromOption(text, separator):
   251     if separator in text:
   252         return text.split(separator)
   253     return text, text
   254 
   255 def mergedRanges(ranges):
   256     oldBeg, maxEnd = ranges[0]
   257     for beg, end in ranges:
   258         if beg > maxEnd:
   259             yield oldBeg, maxEnd
   260             oldBeg = beg
   261             maxEnd = end
   262         elif end > maxEnd:
   263             maxEnd = end
   264     yield oldBeg, maxEnd
   265 
   266 def mergedRangesPerSeq(coverDict):
   267     for k, v in coverDict.items():
   268         v.sort()
   269         yield k, list(mergedRanges(v))
   270 
   271 def coveredLength(mergedCoverDict):
   272     return sum(sum(e - b for b, e in v) for v in mergedCoverDict.values())
   273 
   274 def trimmed(seqRanges, coverDict, minAlignedBases, maxGapFrac, endPad, midPad):
   275     maxEndGapFrac, maxMidGapFrac = twoValuesFromOption(maxGapFrac, ",")
   276     maxEndGap = max(float(maxEndGapFrac) * minAlignedBases, endPad * 1.0)
   277     maxMidGap = max(float(maxMidGapFrac) * minAlignedBases, midPad * 2.0)
   278 
   279     for seqName, rangeBeg, rangeEnd in seqRanges:
   280         seqBlocks = coverDict[seqName]
   281         blocks = [i for i in seqBlocks if i[0] < rangeEnd and i[1] > rangeBeg]
   282         if blocks[0][0] - rangeBeg > maxEndGap:
   283             rangeBeg = blocks[0][0] - endPad
   284         for j, y in enumerate(blocks):
   285             if j:
   286                 x = blocks[j - 1]
   287                 if y[0] - x[1] > maxMidGap:
   288                     yield seqName, rangeBeg, x[1] + midPad
   289                     rangeBeg = y[0] - midPad
   290         if rangeEnd - blocks[-1][1] > maxEndGap:
   291             rangeEnd = blocks[-1][1] + endPad
   292         yield seqName, rangeBeg, rangeEnd
   293 
   294 def rangesWithStrandInfo(seqRanges, strandOpt, alignments, seqIndex):
   295     if strandOpt == "1":
   296         forwardMinusReverse = collections.defaultdict(int)
   297         for i in alignments:
   298             blocks = i[2]
   299             beg1, beg2, size = blocks[0]
   300             numOfAlignedLetterPairs = sum(i[2] for i in blocks)
   301             if (beg1 < 0) != (beg2 < 0):  # opposite-strand alignment
   302                 numOfAlignedLetterPairs *= -1
   303             forwardMinusReverse[i[seqIndex]] += numOfAlignedLetterPairs
   304     strandNum = 0
   305     for seqName, beg, end in seqRanges:
   306         if strandOpt == "1":
   307             strandNum = 1 if forwardMinusReverse[seqName] >= 0 else 2
   308         yield seqName, beg, end, strandNum
   309 
   310 def natural_sort_key(my_string):
   311     '''Return a sort key for "natural" ordering, e.g. chr9 < chr10.'''
   312     parts = re.split(r'(\d+)', my_string)
   313     parts[1::2] = map(int, parts[1::2])
   314     return parts
   315 
   316 def nameKey(oneSeqRanges):
   317     return natural_sort_key(oneSeqRanges[0][0])
   318 
   319 def sizeKey(oneSeqRanges):
   320     return sum(b - e for n, b, e, s in oneSeqRanges), nameKey(oneSeqRanges)
   321 
   322 def alignmentKey(seqNamesToLists, oneSeqRanges):
   323     seqName = oneSeqRanges[0][0]
   324     alignmentsOfThisSequence = seqNamesToLists[seqName]
   325     numOfAlignedLetterPairs = sum(i[3] for i in alignmentsOfThisSequence)
   326     toMiddle = numOfAlignedLetterPairs // 2
   327     for i in alignmentsOfThisSequence:
   328         toMiddle -= i[3]
   329         if toMiddle < 0:
   330             return i[1:3]  # sequence-rank and "position" of this alignment
   331 
   332 def rankAndFlipPerSeq(seqRanges):
   333     rangesGroupedBySeqName = itertools.groupby(seqRanges, itemgetter(0))
   334     for rank, group in enumerate(rangesGroupedBySeqName):
   335         seqName, ranges = group
   336         strandNum = next(ranges)[3]
   337         flip = 1 if strandNum < 2 else -1
   338         yield seqName, (rank, flip)
   339 
   340 def alignmentSortData(alignments, seqIndex, otherNamesToRanksAndFlips):
   341     otherIndex = 1 - seqIndex
   342     for i in alignments:
   343         blocks = i[2]
   344         otherRank, otherFlip = otherNamesToRanksAndFlips[i[otherIndex]]
   345         otherPos = otherFlip * abs(blocks[0][otherIndex] +
   346                                    blocks[-1][otherIndex] + blocks[-1][2])
   347         numOfAlignedLetterPairs = sum(i[2] for i in blocks)
   348         yield i[seqIndex], otherRank, otherPos, numOfAlignedLetterPairs
   349 
   350 def mySortedRanges(seqRanges, sortOpt, seqIndex, alignments, otherRanges):
   351     rangesGroupedBySeqName = itertools.groupby(seqRanges, itemgetter(0))
   352     g = [list(ranges) for seqName, ranges in rangesGroupedBySeqName]
   353     for i in g:
   354         if i[0][3] > 1:
   355             i.reverse()
   356     if sortOpt == "1":
   357         g.sort(key=nameKey)
   358     if sortOpt == "2":
   359         g.sort(key=sizeKey)
   360     if sortOpt == "3":
   361         otherNamesToRanksAndFlips = dict(rankAndFlipPerSeq(otherRanges))
   362         alns = sorted(alignmentSortData(alignments, seqIndex,
   363                                         otherNamesToRanksAndFlips))
   364         alnsGroupedBySeqName = itertools.groupby(alns, itemgetter(0))
   365         seqNamesToLists = dict((k, list(v)) for k, v in alnsGroupedBySeqName)
   366         g.sort(key=functools.partial(alignmentKey, seqNamesToLists))
   367     return [j for i in g for j in i]
   368 
   369 def allSortedRanges(opts, alignments, alignmentsB,
   370                     seqRanges1, seqRangesB1, seqRanges2, seqRangesB2):
   371     o1, oB1 = twoValuesFromOption(opts.strands1, ":")
   372     o2, oB2 = twoValuesFromOption(opts.strands2, ":")
   373     if o1 == "1" and o2 == "1":
   374         raise RuntimeError("the strand options have circular dependency")
   375     seqRanges1 = list(rangesWithStrandInfo(seqRanges1, o1, alignments, 0))
   376     seqRanges2 = list(rangesWithStrandInfo(seqRanges2, o2, alignments, 1))
   377     seqRangesB1 = list(rangesWithStrandInfo(seqRangesB1, oB1, alignmentsB, 0))
   378     seqRangesB2 = list(rangesWithStrandInfo(seqRangesB2, oB2, alignmentsB, 1))
   379 
   380     o1, oB1 = twoValuesFromOption(opts.sort1, ":")
   381     o2, oB2 = twoValuesFromOption(opts.sort2, ":")
   382     if o1 == "3" and o2 == "3":
   383         raise RuntimeError("the sort options have circular dependency")
   384     if o1 != "3":
   385         s1 = mySortedRanges(seqRanges1, o1, None, None, None)
   386     if o2 != "3":
   387         s2 = mySortedRanges(seqRanges2, o2, None, None, None)
   388     if o1 == "3":
   389         s1 = mySortedRanges(seqRanges1, o1, 0, alignments, s2)
   390     if o2 == "3":
   391         s2 = mySortedRanges(seqRanges2, o2, 1, alignments, s1)
   392     t1 = mySortedRanges(seqRangesB1, oB1, 0, alignmentsB, s2)
   393     t2 = mySortedRanges(seqRangesB2, oB2, 1, alignmentsB, s1)
   394     return s1 + t1, s2 + t2
   395 
   396 def sizesPerText(texts, font, textDraw):
   397     sizes = 0, 0
   398     for t in texts:
   399         if textDraw is not None:
   400             sizes = textDraw.textsize(t, font=font)
   401         yield t, sizes
   402 
   403 def prettyNum(n):
   404     t = str(n)
   405     groups = []
   406     while t:
   407         groups.append(t[-3:])
   408         t = t[:-3]
   409     return ",".join(reversed(groups))
   410 
   411 def sizeText(size):
   412     suffixes = "bp", "kb", "Mb", "Gb"
   413     for i, x in enumerate(suffixes):
   414         j = 10 ** (i * 3)
   415         if size < j * 10:
   416             return "%.2g" % (1.0 * size / j) + x
   417         if size < j * 1000 or i == len(suffixes) - 1:
   418             return "%.0f" % (1.0 * size / j) + x
   419 
   420 def labelText(seqRange, labelOpt):
   421     seqName, beg, end, strandNum = seqRange
   422     if labelOpt == 1:
   423         return seqName + ": " + sizeText(end - beg)
   424     if labelOpt == 2:
   425         return seqName + ":" + prettyNum(beg) + ": " + sizeText(end - beg)
   426     if labelOpt == 3:
   427         return seqName + ":" + prettyNum(beg) + "-" + prettyNum(end)
   428     return seqName
   429 
   430 def rangeLabels(seqRanges, labelOpt, font, textDraw, textRot):
   431     x = y = 0
   432     for r in seqRanges:
   433         text = labelText(r, labelOpt)
   434         if textDraw is not None:
   435             x, y = textDraw.textsize(text, font=font)
   436             if textRot:
   437                 x, y = y, x
   438         yield text, x, y, r[3]
   439 
   440 def dataFromRanges(sortedRanges, font, textDraw, labelOpt, textRot):
   441     for seqName, rangeBeg, rangeEnd, strandNum in sortedRanges:
   442         out = [seqName, str(rangeBeg), str(rangeEnd)]
   443         if strandNum > 0:
   444             out.append(".+-"[strandNum])
   445         logging.info("\t".join(out))
   446     logging.info("")
   447     rangeSizes = [e - b for n, b, e, s in sortedRanges]
   448     labs = list(rangeLabels(sortedRanges, labelOpt, font, textDraw, textRot))
   449     margin = max(i[2] for i in labs)
   450     # xxx the margin may be too big, because some labels may get omitted
   451     return rangeSizes, labs, margin
   452 
   453 def div_ceil(x, y):
   454     '''Return x / y rounded up.'''
   455     q, r = divmod(x, y)
   456     return q + (r != 0)
   457 
   458 def get_bp_per_pix(rangeSizes, pixTweenRanges, maxPixels):
   459     '''Get the minimum bp-per-pixel that fits in the size limit.'''
   460     logging.info("choosing bp per pixel...")
   461     numOfRanges = len(rangeSizes)
   462     maxPixelsInRanges = maxPixels - pixTweenRanges * (numOfRanges - 1)
   463     if maxPixelsInRanges < numOfRanges:
   464         raise RuntimeError("can't fit the image: too many sequences?")
   465     negLimit = -maxPixelsInRanges
   466     negBpPerPix = sum(rangeSizes) // negLimit
   467     while True:
   468         if sum(i // negBpPerPix for i in rangeSizes) >= negLimit:
   469             return -negBpPerPix
   470         negBpPerPix -= 1
   471 
   472 def getRangePixBegs(rangePixLens, pixTweenRanges, margin):
   473     '''Get the start pixel for each range.'''
   474     rangePixBegs = []
   475     pix_tot = margin - pixTweenRanges
   476     for i in rangePixLens:
   477         pix_tot += pixTweenRanges
   478         rangePixBegs.append(pix_tot)
   479         pix_tot += i
   480     return rangePixBegs
   481 
   482 def pixelData(rangeSizes, bp_per_pix, pixTweenRanges, margin):
   483     '''Return pixel information about the ranges.'''
   484     rangePixLens = [div_ceil(i, bp_per_pix) for i in rangeSizes]
   485     rangePixBegs = getRangePixBegs(rangePixLens, pixTweenRanges, margin)
   486     tot_pix = rangePixBegs[-1] + rangePixLens[-1]
   487     return rangePixBegs, rangePixLens, tot_pix
   488 
   489 def drawLineForward(hits, width, bp_per_pix, beg1, beg2, size):
   490     while True:
   491         q1, r1 = divmod(beg1, bp_per_pix)
   492         q2, r2 = divmod(beg2, bp_per_pix)
   493         hits[q2 * width + q1] |= 1
   494         next_pix = min(bp_per_pix - r1, bp_per_pix - r2)
   495         if next_pix >= size: break
   496         beg1 += next_pix
   497         beg2 += next_pix
   498         size -= next_pix
   499 
   500 def drawLineReverse(hits, width, bp_per_pix, beg1, beg2, size):
   501     while True:
   502         q1, r1 = divmod(beg1, bp_per_pix)
   503         q2, r2 = divmod(beg2, bp_per_pix)
   504         hits[q2 * width + q1] |= 2
   505         next_pix = min(bp_per_pix - r1, r2 + 1)
   506         if next_pix >= size: break
   507         beg1 += next_pix
   508         beg2 -= next_pix
   509         size -= next_pix
   510 
   511 def strandAndOrigin(ranges, beg, size):
   512     isReverseStrand = (beg < 0)
   513     if isReverseStrand:
   514         beg = -(beg + size)
   515     for rangeBeg, rangeEnd, isReverseRange, origin in ranges:
   516         if rangeEnd > beg:  # assumes the ranges are sorted
   517             return (isReverseStrand != isReverseRange), origin
   518 
   519 def alignmentPixels(width, height, alignments, bp_per_pix,
   520                     rangeDict1, rangeDict2):
   521     hits = [0] * (width * height)  # the image data
   522     for seq1, seq2, blocks in alignments:
   523         beg1, beg2, size = blocks[0]
   524         isReverse1, ori1 = strandAndOrigin(rangeDict1[seq1], beg1, size)
   525         isReverse2, ori2 = strandAndOrigin(rangeDict2[seq2], beg2, size)
   526         for beg1, beg2, size in blocks:
   527             if isReverse1:
   528                 beg1 = -(beg1 + size)
   529                 beg2 = -(beg2 + size)
   530             if isReverse1 == isReverse2:
   531                 drawLineForward(hits, width, bp_per_pix,
   532                                 ori1 + beg1, ori2 + beg2, size)
   533             else:
   534                 drawLineReverse(hits, width, bp_per_pix,
   535                                 ori1 + beg1, ori2 - beg2 - 1, size)
   536     return hits
   537 
   538 def orientedBlocks(alignments, seqIndex):
   539     otherIndex = 1 - seqIndex
   540     for a in alignments:
   541         seq1, seq2, blocks = a
   542         for b in blocks:
   543             beg1, beg2, size = b
   544             if b[seqIndex] < 0:
   545                 b = -(beg1 + size), -(beg2 + size), size
   546             yield a[seqIndex], b[seqIndex], a[otherIndex], b[otherIndex], size
   547 
   548 def drawJoins(im, alignments, bpPerPix, seqIndex, rangeDict1, rangeDict2):
   549     blocks = orientedBlocks(alignments, seqIndex)
   550     oldSeq1 = ""
   551     for seq1, beg1, seq2, beg2, size in sorted(blocks):
   552         isReverse1, ori1 = strandAndOrigin(rangeDict1[seq1], beg1, size)
   553         isReverse2, ori2 = strandAndOrigin(rangeDict2[seq2], beg2, size)
   554         end1 = beg1 + size - 1
   555         end2 = beg2 + size - 1
   556         if isReverse1:
   557             beg1 = -(beg1 + 1)
   558             end1 = -(end1 + 1)
   559         if isReverse2:
   560             beg2 = -(beg2 + 1)
   561             end2 = -(end2 + 1)
   562         newPix1 = (ori1 + beg1) // bpPerPix
   563         newPix2 = (ori2 + beg2) // bpPerPix
   564         if seq1 == oldSeq1:
   565             lowerPix2 = min(oldPix2, newPix2)
   566             upperPix2 = max(oldPix2, newPix2)
   567             midPix1 = (oldPix1 + newPix1) // 2
   568             if isReverse1:
   569                 midPix1 = (oldPix1 + newPix1 + 1) // 2
   570                 oldPix1, newPix1 = newPix1, oldPix1
   571             if upperPix2 - lowerPix2 > 1 and oldPix1 <= newPix1 <= oldPix1 + 1:
   572                 if seqIndex == 0:
   573                     box = midPix1, lowerPix2, midPix1 + 1, upperPix2 + 1
   574                 else:
   575                     box = lowerPix2, midPix1, upperPix2 + 1, midPix1 + 1
   576                 im.paste("lightgray", box)
   577         oldPix1 = (ori1 + end1) // bpPerPix
   578         oldPix2 = (ori2 + end2) // bpPerPix
   579         oldSeq1 = seq1
   580 
   581 def expandedSeqDict(seqDict):
   582     '''Allow lookup by short sequence names, e.g. chr7 as well as hg19.chr7.'''
   583     newDict = seqDict.copy()
   584     for name, x in seqDict.items():
   585         if "." in name:
   586             base = name.split(".")[-1]
   587             if base in newDict:  # an ambiguous case was found:
   588                 return seqDict   # so give up completely
   589             newDict[base] = x
   590     return newDict
   591 
   592 def readBed(fileName, rangeDict):
   593     for line in myOpen(fileName):
   594         w = line.split()
   595         if not w: continue
   596         if not w[1].isdigit():
   597             w = w[1:]
   598         seqName = w[0]
   599         if seqName not in rangeDict: continue
   600         seqRanges = rangeDict[seqName]
   601         beg = int(w[1])
   602         end = int(w[2])
   603         if all(beg >= i[2] or end <= i[1] for i in seqRanges):
   604             continue
   605         itemName = w[3] if len(w) > 3 and w[3] != "." else ""
   606         layer = 900
   607         color = "#fbf"
   608         if len(w) > 4:
   609             if w[4] != ".":
   610                 layer = float(w[4])
   611             if len(w) > 5:
   612                 if len(w) > 8 and w[8].count(",") == 2:
   613                     color = "rgb(" + w[8] + ")"
   614                 else:
   615                     strand = w[5]
   616                     isRev = (seqRanges[0][3] > 1)
   617                     if strand == "+" and not isRev or strand == "-" and isRev:
   618                         color = "#ffe8e8"
   619                     if strand == "-" and not isRev or strand == "+" and isRev:
   620                         color = "#e8e8ff"
   621         yield layer, color, seqName, beg, end, itemName
   622 
   623 def commaSeparatedInts(text):
   624     return map(int, text.rstrip(",").split(","))
   625 
   626 def readGenePred(opts, fileName, rangeDict):
   627     for line in myOpen(fileName):
   628         fields = line.split()  # xxx tab ???
   629         if not fields: continue
   630         geneName = fields[12 if len(fields) > 12 else 0]  # XXX ???
   631         if fields[2] not in "+-":
   632             fields = fields[1:]
   633         seqName = fields[1]
   634         if seqName not in rangeDict: continue
   635         seqRanges = rangeDict[seqName]
   636         #strand = fields[2]
   637         cdsBeg = int(fields[5])
   638         cdsEnd = int(fields[6])
   639         exonBegs = commaSeparatedInts(fields[8])
   640         exonEnds = commaSeparatedInts(fields[9])
   641         for beg, end in zip(exonBegs, exonEnds):
   642             if all(beg >= i[2] or end <= i[1] for i in seqRanges):
   643                 continue
   644             yield 300, opts.exon_color, seqName, beg, end, geneName
   645             b = max(beg, cdsBeg)
   646             e = min(end, cdsEnd)
   647             if b < e: yield 400, opts.cds_color, seqName, b, e, ""
   648 
   649 def readRmsk(fileName, rangeDict):
   650     for line in myOpen(fileName):
   651         fields = line.split()
   652         if len(fields) == 17:  # rmsk.txt
   653             seqName = fields[5]
   654             if seqName not in rangeDict: continue  # do this ASAP for speed
   655             beg = int(fields[6])
   656             end = int(fields[7])
   657             strand = fields[9]
   658             repeatName = fields[10]
   659             repeatClass = fields[11]
   660         elif len(fields) == 15:  # .out
   661             seqName = fields[4]
   662             if seqName not in rangeDict: continue
   663             beg = int(fields[5]) - 1
   664             end = int(fields[6])
   665             strand = fields[8]
   666             repeatName = fields[9]
   667             repeatClass = fields[10].split("/")[0]
   668         else:
   669             continue
   670         seqRanges = rangeDict[seqName]
   671         if all(beg >= i[2] or end <= i[1] for i in seqRanges):
   672             continue
   673         if repeatClass in ("Low_complexity", "Simple_repeat", "Satellite"):
   674             yield 200, "#fbf", seqName, beg, end, repeatName
   675         elif (strand == "+") != (seqRanges[0][3] > 1):
   676             yield 100, "#ffe8e8", seqName, beg, end, repeatName
   677         else:
   678             yield 100, "#e8e8ff", seqName, beg, end, repeatName
   679 
   680 def isExtraFirstGapField(fields):
   681     return fields[4].isdigit()
   682 
   683 def readGaps(opts, fileName, rangeDict):
   684     '''Read locations of unsequenced gaps, from an agp or gap file.'''
   685     for line in myOpen(fileName):
   686         w = line.split()
   687         if not w or w[0][0] == "#": continue
   688         if isExtraFirstGapField(w): w = w[1:]
   689         if w[4] not in "NU": continue
   690         seqName = w[0]
   691         if seqName not in rangeDict: continue
   692         end = int(w[2])
   693         beg = end - int(w[5])  # zero-based coordinate
   694         if w[7] == "yes":
   695             yield 3000, opts.bridged_color, seqName, beg, end, ""
   696         else:
   697             yield 2000, opts.unbridged_color, seqName, beg, end, ""
   698 
   699 def bedBoxes(annots, rangeDict, limit, isTop, bpPerPix):
   700     beds, textSizes, margin = annots
   701     cover = [(limit, limit)]
   702     for layer, color, seqName, bedBeg, bedEnd, name in reversed(beds):
   703         textWidth, textHeight = textSizes[name]
   704         for rangeBeg, rangeEnd, isReverseRange, origin in rangeDict[seqName]:
   705             beg = max(bedBeg, rangeBeg)
   706             end = min(bedEnd, rangeEnd)
   707             if beg >= end: continue
   708             if isReverseRange:
   709                 beg, end = -end, -beg
   710             if layer <= 1000:
   711                 # include partly-covered pixels
   712                 pixBeg = (origin + beg) // bpPerPix
   713                 pixEnd = div_ceil(origin + end, bpPerPix)
   714             else:
   715                 # exclude partly-covered pixels
   716                 pixBeg = div_ceil(origin + beg, bpPerPix)
   717                 pixEnd = (origin + end) // bpPerPix
   718                 if pixEnd <= pixBeg: continue
   719                 if bedEnd >= rangeEnd:  # include partly-covered end pixels
   720                     if isReverseRange:
   721                         pixBeg = (origin + beg) // bpPerPix
   722                     else:
   723                         pixEnd = div_ceil(origin + end, bpPerPix)
   724             nameBeg = (pixBeg + pixEnd - textHeight) // 2
   725             nameEnd = nameBeg + textHeight
   726             n = ""
   727             if name and all(e <= nameBeg or b >= nameEnd for b, e in cover):
   728                 if textWidth <= margin:
   729                     cover.append((nameBeg, nameEnd))
   730                     n = name
   731             yield layer, color, isTop, pixBeg, pixEnd, n, nameBeg, textWidth
   732 
   733 def drawAnnotations(im, boxes, tMargin, bMarginBeg, lMargin, rMarginBeg):
   734     # xxx use partial transparency for different-color overlaps?
   735     for layer, color, isTop, beg, end, name, nameBeg, nameLen in boxes:
   736         if isTop:
   737             box = beg, tMargin, end, bMarginBeg
   738         else:
   739             box = lMargin, beg, rMarginBeg, end
   740         im.paste(color, box)
   741 
   742 def placedLabels(labels, rangePixBegs, rangePixLens, beg, end):
   743     '''Return axis labels with endpoint & sort-order information.'''
   744     maxWidth = end - beg
   745     for i, j, k in zip(labels, rangePixBegs, rangePixLens):
   746         text, textWidth, textHeight, strandNum = i
   747         if textWidth > maxWidth:
   748             continue
   749         labelBeg = j + (k - textWidth) // 2
   750         labelEnd = labelBeg + textWidth
   751         sortKey = textWidth - k
   752         if labelBeg < beg:
   753             sortKey += maxWidth * (beg - labelBeg)
   754             labelBeg = beg
   755             labelEnd = beg + textWidth
   756         if labelEnd > end:
   757             sortKey += maxWidth * (labelEnd - end)
   758             labelEnd = end
   759             labelBeg = end - textWidth
   760         yield sortKey, labelBeg, labelEnd, text, textHeight, strandNum
   761 
   762 def nonoverlappingLabels(labels, minPixTweenLabels):
   763     '''Get a subset of non-overlapping axis labels, greedily.'''
   764     out = []
   765     for i in labels:
   766         beg = i[1] - minPixTweenLabels
   767         end = i[2] + minPixTweenLabels
   768         if all(j[2] <= beg or j[1] >= end for j in out):
   769             out.append(i)
   770     return out
   771 
   772 def axisImage(labels, rangePixBegs, rangePixLens, textRot,
   773               textAln, font, image_mode, opts):
   774     '''Make an image of axis labels.'''
   775     beg = rangePixBegs[0]
   776     end = rangePixBegs[-1] + rangePixLens[-1]
   777     margin = max(i[2] for i in labels)
   778     labels = sorted(placedLabels(labels, rangePixBegs, rangePixLens, beg, end))
   779     minPixTweenLabels = 0 if textRot else opts.label_space
   780     labels = nonoverlappingLabels(labels, minPixTweenLabels)
   781     image_size = (margin, end) if textRot else (end, margin)
   782     im = Image.new(image_mode, image_size, opts.margin_color)
   783     draw = ImageDraw.Draw(im)
   784     for sortKey, labelBeg, labelEnd, text, textHeight, strandNum in labels:
   785         base = margin - textHeight if textAln else 0
   786         position = (base, labelBeg) if textRot else (labelBeg, base)
   787         fill = ("black", opts.forwardcolor, opts.reversecolor)[strandNum]
   788         draw.text(position, text, font=font, fill=fill)
   789     return im
   790 
   791 def annoTextImage(opts, image_mode, font, margin, length, boxes, isLeftAlign):
   792     image_size = margin, length
   793     im = Image.new(image_mode, image_size, opts.margin_color)
   794     draw = ImageDraw.Draw(im)
   795     for layer, color, isTop, beg, end, name, nameBeg, nameLen in boxes:
   796         xPosition = 0 if isLeftAlign else margin - nameLen
   797         position = xPosition, nameBeg
   798         draw.text(position, name, font=font, fill="black")
   799     return im
   800 
   801 def rangesPerSeq(sortedRanges):
   802     for seqName, group in itertools.groupby(sortedRanges, itemgetter(0)):
   803         yield seqName, sorted(group)
   804 
   805 def rangesWithOrigins(sortedRanges, rangePixBegs, rangePixLens, bpPerPix):
   806     for i, j, k in zip(sortedRanges, rangePixBegs, rangePixLens):
   807         seqName, rangeBeg, rangeEnd, strandNum = i
   808         isReverseRange = (strandNum > 1)
   809         if isReverseRange:
   810             origin = bpPerPix * (j + k) + rangeBeg
   811         else:
   812             origin = bpPerPix * j - rangeBeg
   813         yield seqName, (rangeBeg, rangeEnd, isReverseRange, origin)
   814 
   815 def rangesAndOriginsPerSeq(sortedRanges, rangePixBegs, rangePixLens, bpPerPix):
   816     a = rangesWithOrigins(sortedRanges, rangePixBegs, rangePixLens, bpPerPix)
   817     for seqName, group in itertools.groupby(a, itemgetter(0)):
   818         yield seqName, sorted(i[1] for i in group)
   819 
   820 def getFont(opts):
   821     if opts.fontfile:
   822         return ImageFont.truetype(opts.fontfile, opts.fontsize)
   823     fileNames = []
   824     try:
   825         x = ["fc-match", "-f%{file}", "arial"]
   826         p = subprocess.Popen(x, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
   827                              universal_newlines=True)
   828         out, err = p.communicate()
   829         fileNames.append(out)
   830     except OSError as e:
   831         logging.info("fc-match error: " + str(e))
   832     fileNames.append("/Library/Fonts/Arial.ttf")  # for Mac
   833     for i in fileNames:
   834         try:
   835             font = ImageFont.truetype(i, opts.fontsize)
   836             logging.info("font: " + i)
   837             return font
   838         except IOError as e:
   839             logging.info("font load error: " + str(e))
   840     return ImageFont.load_default()
   841 
   842 def sequenceSizesAndNames(seqRanges):
   843     for seqName, ranges in itertools.groupby(seqRanges, itemgetter(0)):
   844         size = sum(e - b for n, b, e in ranges)
   845         yield size, seqName
   846 
   847 def biggestSequences(seqRanges, maxNumOfSequences):
   848     s = sorted(sequenceSizesAndNames(seqRanges), reverse=True)
   849     if len(s) > maxNumOfSequences:
   850         logging.warning("too many sequences - discarding the smallest ones")
   851         s = s[:maxNumOfSequences]
   852     return set(i[1] for i in s)
   853 
   854 def remainingSequenceRanges(seqRanges, alignments, seqIndex):
   855     remainingSequences = set(i[seqIndex] for i in alignments)
   856     return [i for i in seqRanges if i[0] in remainingSequences]
   857 
   858 def readAnnotations(opts, font, textDraw, sortedRanges, totalLength,
   859                     bedFile, repFile, geneFile, gapFile):
   860     rangeDict = expandedSeqDict(dict(rangesPerSeq(sortedRanges)))
   861     annots = sorted(itertools.chain(readBed(bedFile, rangeDict),
   862                                     readRmsk(repFile, rangeDict),
   863                                     readGenePred(opts, geneFile, rangeDict),
   864                                     readGaps(opts, gapFile, rangeDict)))
   865     names = set(i[5] for i in annots)
   866     textSizes = dict(sizesPerText(names, font, textDraw))
   867     maxTextLength = totalLength // 2
   868     okLengths = [i[0] for i in textSizes.values() if i[0] <= maxTextLength]
   869     margin = max(okLengths) if okLengths else 0
   870     return annots, textSizes, margin
   871 
   872 def lastDotplot(opts, args):
   873     logLevel = logging.INFO if opts.verbose else logging.WARNING
   874     logging.basicConfig(format="%(filename)s: %(message)s", level=logLevel)
   875 
   876     font = getFont(opts)
   877     image_mode = 'RGB'
   878     forward_color = ImageColor.getcolor(opts.forwardcolor, image_mode)
   879     reverse_color = ImageColor.getcolor(opts.reversecolor, image_mode)
   880     zipped_colors = zip(forward_color, reverse_color)
   881     overlap_color = tuple([(i + j) // 2 for i, j in zipped_colors])
   882 
   883     maxGap1, maxGapB1 = twoValuesFromOption(opts.max_gap1, ":")
   884     maxGap2, maxGapB2 = twoValuesFromOption(opts.max_gap2, ":")
   885 
   886     logging.info("reading alignments...")
   887     alnData = readAlignments(args[0], opts)
   888     alignments, seqRanges1, coverDict1, seqRanges2, coverDict2 = alnData
   889     if not alignments: raise RuntimeError("there are no alignments")
   890     logging.info("cutting...")
   891     coverDict1 = dict(mergedRangesPerSeq(coverDict1))
   892     coverDict2 = dict(mergedRangesPerSeq(coverDict2))
   893     minAlignedBases = min(coveredLength(coverDict1), coveredLength(coverDict2))
   894     pad = int(opts.pad * minAlignedBases)
   895     cutRanges1 = list(trimmed(seqRanges1, coverDict1, minAlignedBases,
   896                               maxGap1, pad, pad))
   897     cutRanges2 = list(trimmed(seqRanges2, coverDict2, minAlignedBases,
   898                               maxGap2, pad, pad))
   899 
   900     biggestSeqs1 = biggestSequences(cutRanges1, opts.maxseqs)
   901     cutRanges1 = [i for i in cutRanges1 if i[0] in biggestSeqs1]
   902     alignments = [i for i in alignments if i[0] in biggestSeqs1]
   903     cutRanges2 = remainingSequenceRanges(cutRanges2, alignments, 1)
   904 
   905     biggestSeqs2 = biggestSequences(cutRanges2, opts.maxseqs)
   906     cutRanges2 = [i for i in cutRanges2 if i[0] in biggestSeqs2]
   907     alignments = [i for i in alignments if i[1] in biggestSeqs2]
   908     cutRanges1 = remainingSequenceRanges(cutRanges1, alignments, 0)
   909 
   910     logging.info("reading secondary alignments...")
   911     alnDataB = readSecondaryAlignments(opts, cutRanges1, cutRanges2)
   912     alignmentsB, seqRangesB1, coverDictB1, seqRangesB2, coverDictB2 = alnDataB
   913     logging.info("cutting...")
   914     coverDictB1 = dict(mergedRangesPerSeq(coverDictB1))
   915     coverDictB2 = dict(mergedRangesPerSeq(coverDictB2))
   916     cutRangesB1 = trimmed(seqRangesB1, coverDictB1, minAlignedBases,
   917                           maxGapB1, 0, 0)
   918     cutRangesB2 = trimmed(seqRangesB2, coverDictB2, minAlignedBases,
   919                           maxGapB2, 0, 0)
   920 
   921     logging.info("sorting...")
   922     sortOut = allSortedRanges(opts, alignments, alignmentsB,
   923                               cutRanges1, cutRangesB1, cutRanges2, cutRangesB2)
   924     sortedRanges1, sortedRanges2 = sortOut
   925 
   926     textDraw = None
   927     if opts.fontsize:
   928         textDraw = ImageDraw.Draw(Image.new(image_mode, (1, 1)))
   929 
   930     textRot1 = "vertical".startswith(opts.rot1)
   931     i1 = dataFromRanges(sortedRanges1, font, textDraw, opts.labels1, textRot1)
   932     rangeSizes1, labelData1, tMargin = i1
   933 
   934     textRot2 = "horizontal".startswith(opts.rot2)
   935     i2 = dataFromRanges(sortedRanges2, font, textDraw, opts.labels2, textRot2)
   936     rangeSizes2, labelData2, lMargin = i2
   937 
   938     logging.info("reading annotations...")
   939 
   940     annots1 = readAnnotations(opts, font, textDraw, sortedRanges1, opts.height,
   941                               opts.bed1, opts.rmsk1, opts.genePred1, opts.gap1)
   942     bMargin = annots1[-1]
   943 
   944     annots2 = readAnnotations(opts, font, textDraw, sortedRanges2, opts.width,
   945                               opts.bed2, opts.rmsk2, opts.genePred2, opts.gap2)
   946     rMargin = annots2[-1]
   947 
   948     maxPixels1 = opts.width  - lMargin - rMargin
   949     maxPixels2 = opts.height - tMargin - bMargin
   950     bpPerPix1 = get_bp_per_pix(rangeSizes1, opts.border_pixels, maxPixels1)
   951     bpPerPix2 = get_bp_per_pix(rangeSizes2, opts.border_pixels, maxPixels2)
   952     bpPerPix = max(bpPerPix1, bpPerPix2)
   953     logging.info("bp per pixel = " + str(bpPerPix))
   954 
   955     p1 = pixelData(rangeSizes1, bpPerPix, opts.border_pixels, lMargin)
   956     rangePixBegs1, rangePixLens1, rMarginBeg = p1
   957     width = rMarginBeg + rMargin
   958     rangeDict1 = dict(rangesAndOriginsPerSeq(sortedRanges1, rangePixBegs1,
   959                                              rangePixLens1, bpPerPix))
   960 
   961     p2 = pixelData(rangeSizes2, bpPerPix, opts.border_pixels, tMargin)
   962     rangePixBegs2, rangePixLens2, bMarginBeg = p2
   963     height = bMarginBeg + bMargin
   964     rangeDict2 = dict(rangesAndOriginsPerSeq(sortedRanges2, rangePixBegs2,
   965                                              rangePixLens2, bpPerPix))
   966 
   967     logging.info("width:  " + str(width))
   968     logging.info("height: " + str(height))
   969 
   970     logging.info("processing alignments...")
   971     allAlignments = alignments + alignmentsB
   972     hits = alignmentPixels(width, height, allAlignments, bpPerPix,
   973                            rangeDict1, rangeDict2)
   974 
   975     rangeDict1 = expandedSeqDict(rangeDict1)
   976     rangeDict2 = expandedSeqDict(rangeDict2)
   977 
   978     boxes1 = list(bedBoxes(annots1, rangeDict1, rMarginBeg, True, bpPerPix))
   979     boxes2 = list(bedBoxes(annots2, rangeDict2, bMarginBeg, False, bpPerPix))
   980     boxes = sorted(itertools.chain(boxes1, boxes2))
   981 
   982     logging.info("drawing...")
   983 
   984     image_size = width, height
   985     im = Image.new(image_mode, image_size, opts.background_color)
   986 
   987     drawAnnotations(im, boxes, tMargin, bMarginBeg, lMargin, rMarginBeg)
   988 
   989     joinA, joinB = twoValuesFromOption(opts.join, ":")
   990     if joinA in "13":
   991         drawJoins(im, alignments, bpPerPix, 0, rangeDict1, rangeDict2)
   992     if joinB in "13":
   993         drawJoins(im, alignmentsB, bpPerPix, 0, rangeDict1, rangeDict2)
   994     if joinA in "23":
   995         drawJoins(im, alignments, bpPerPix, 1, rangeDict2, rangeDict1)
   996     if joinB in "23":
   997         drawJoins(im, alignmentsB, bpPerPix, 1, rangeDict2, rangeDict1)
   998 
   999     for i in range(height):
  1000         for j in range(width):
  1001             store_value = hits[i * width + j]
  1002             xy = j, i
  1003             if   store_value == 1: im.putpixel(xy, forward_color)
  1004             elif store_value == 2: im.putpixel(xy, reverse_color)
  1005             elif store_value == 3: im.putpixel(xy, overlap_color)
  1006 
  1007     if opts.fontsize != 0:
  1008         axis1 = axisImage(labelData1, rangePixBegs1, rangePixLens1,
  1009                           textRot1, False, font, image_mode, opts)
  1010         if textRot1:
  1011             axis1 = axis1.transpose(Image.ROTATE_90)
  1012         axis2 = axisImage(labelData2, rangePixBegs2, rangePixLens2,
  1013                           textRot2, textRot2, font, image_mode, opts)
  1014         if not textRot2:
  1015             axis2 = axis2.transpose(Image.ROTATE_270)
  1016         im.paste(axis1, (0, 0))
  1017         im.paste(axis2, (0, 0))
  1018 
  1019         annoImage1 = annoTextImage(opts, image_mode, font, bMargin, width,
  1020                                    boxes1, False)
  1021         annoImage1 = annoImage1.transpose(Image.ROTATE_90)
  1022         annoImage2 = annoTextImage(opts, image_mode, font, rMargin, height,
  1023                                    boxes2, True)
  1024         im.paste(annoImage1, (0, bMarginBeg))
  1025         im.paste(annoImage2, (rMarginBeg, 0))
  1026 
  1027     for i in rangePixBegs1[1:]:
  1028         box = i - opts.border_pixels, tMargin, i, bMarginBeg
  1029         im.paste(opts.border_color, box)
  1030 
  1031     for i in rangePixBegs2[1:]:
  1032         box = lMargin, i - opts.border_pixels, rMarginBeg, i
  1033         im.paste(opts.border_color, box)
  1034 
  1035     im.save(args[1])
  1036 
  1037 if __name__ == "__main__":
  1038     usage = """%prog --help
  1039    or: %prog [options] maf-or-tab-alignments dotplot.png
  1040    or: %prog [options] maf-or-tab-alignments dotplot.gif
  1041    or: ..."""
  1042     description = "Draw a dotplot of pair-wise sequence alignments in MAF or tabular format."
  1043     op = optparse.OptionParser(usage=usage, description=description)
  1044     op.add_option("-v", "--verbose", action="count",
  1045                   help="show progress messages & data about the plot")
  1046     # Replace "width" & "height" with a single "length" option?
  1047     op.add_option("-x", "--width", metavar="INT", type="int", default=1000,
  1048                   help="maximum width in pixels (default: %default)")
  1049     op.add_option("-y", "--height", metavar="INT", type="int", default=1000,
  1050                   help="maximum height in pixels (default: %default)")
  1051     op.add_option("-m", "--maxseqs", type="int", default=100, metavar="M",
  1052                   help="maximum number of horizontal or vertical sequences "
  1053                   "(default=%default)")
  1054     op.add_option("-1", "--seq1", metavar="PATTERN", action="append",
  1055                   default=[],
  1056                   help="which sequences to show from the 1st genome")
  1057     op.add_option("-2", "--seq2", metavar="PATTERN", action="append",
  1058                   default=[],
  1059                   help="which sequences to show from the 2nd genome")
  1060     op.add_option("-c", "--forwardcolor", metavar="COLOR", default="red",
  1061                   help="color for forward alignments (default: %default)")
  1062     op.add_option("-r", "--reversecolor", metavar="COLOR", default="blue",
  1063                   help="color for reverse alignments (default: %default)")
  1064     op.add_option("--alignments", metavar="FILE", help="secondary alignments")
  1065     op.add_option("--sort1", default="1", metavar="N",
  1066                   help="genome1 sequence order: 0=input order, 1=name order, "
  1067                   "2=length order, 3=alignment order (default=%default)")
  1068     op.add_option("--sort2", default="1", metavar="N",
  1069                   help="genome2 sequence order: 0=input order, 1=name order, "
  1070                   "2=length order, 3=alignment order (default=%default)")
  1071     op.add_option("--strands1", default="0", metavar="N", help=
  1072                   "genome1 sequence orientation: 0=forward orientation, "
  1073                   "1=alignment orientation (default=%default)")
  1074     op.add_option("--strands2", default="0", metavar="N", help=
  1075                   "genome2 sequence orientation: 0=forward orientation, "
  1076                   "1=alignment orientation (default=%default)")
  1077     op.add_option("--max-gap1", metavar="FRAC", default="0.5,2", help=
  1078                   "maximum unaligned (end,mid) gap in genome1: "
  1079                   "fraction of aligned length (default=%default)")
  1080     op.add_option("--max-gap2", metavar="FRAC", default="0.5,2", help=
  1081                   "maximum unaligned (end,mid) gap in genome2: "
  1082                   "fraction of aligned length (default=%default)")
  1083     op.add_option("--pad", metavar="FRAC", type="float", default=0.04, help=
  1084                   "pad length when cutting unaligned gaps: "
  1085                   "fraction of aligned length (default=%default)")
  1086     op.add_option("-j", "--join", default="0", metavar="N", help=
  1087                   "join: 0=nothing, 1=alignments adjacent in genome1, "
  1088                   "2=alignments adjacent in genome2 (default=%default)")
  1089     op.add_option("--border-pixels", metavar="INT", type="int", default=1,
  1090                   help="number of pixels between sequences (default=%default)")
  1091     op.add_option("--border-color", metavar="COLOR", default="black",
  1092                   help="color for pixels between sequences (default=%default)")
  1093     # --break-color and/or --break-pixels for intra-sequence breaks?
  1094     op.add_option("--margin-color", metavar="COLOR", default="#dcdcdc",
  1095                   help="margin color")
  1096 
  1097     og = optparse.OptionGroup(op, "Text options")
  1098     og.add_option("-f", "--fontfile", metavar="FILE",
  1099                   help="TrueType or OpenType font file")
  1100     og.add_option("-s", "--fontsize", metavar="SIZE", type="int", default=14,
  1101                   help="TrueType or OpenType font size (default: %default)")
  1102     og.add_option("--labels1", type="int", default=0, metavar="N", help=
  1103                   "genome1 labels: 0=name, 1=name:length, "
  1104                   "2=name:start:length, 3=name:start-end (default=%default)")
  1105     og.add_option("--labels2", type="int", default=0, metavar="N", help=
  1106                   "genome2 labels: 0=name, 1=name:length, "
  1107                   "2=name:start:length, 3=name:start-end (default=%default)")
  1108     og.add_option("--rot1", metavar="ROT", default="h",
  1109                   help="text rotation for the 1st genome (default=%default)")
  1110     og.add_option("--rot2", metavar="ROT", default="v",
  1111                   help="text rotation for the 2nd genome (default=%default)")
  1112     op.add_option_group(og)
  1113 
  1114     og = optparse.OptionGroup(op, "Annotation options")
  1115     og.add_option("--bed1", metavar="FILE",
  1116                   help="read genome1 annotations from BED file")
  1117     og.add_option("--bed2", metavar="FILE",
  1118                   help="read genome2 annotations from BED file")
  1119     og.add_option("--rmsk1", metavar="FILE", help="read genome1 repeats from "
  1120                   "RepeatMasker .out or rmsk.txt file")
  1121     og.add_option("--rmsk2", metavar="FILE", help="read genome2 repeats from "
  1122                   "RepeatMasker .out or rmsk.txt file")
  1123     op.add_option_group(og)
  1124 
  1125     og = optparse.OptionGroup(op, "Gene options")
  1126     og.add_option("--genePred1", metavar="FILE",
  1127                   help="read genome1 genes from genePred file")
  1128     og.add_option("--genePred2", metavar="FILE",
  1129                   help="read genome2 genes from genePred file")
  1130     og.add_option("--exon-color", metavar="COLOR", default="PaleGreen",
  1131                   help="color for exons (default=%default)")
  1132     og.add_option("--cds-color", metavar="COLOR", default="LimeGreen",
  1133                   help="color for protein-coding regions (default=%default)")
  1134     op.add_option_group(og)
  1135 
  1136     og = optparse.OptionGroup(op, "Unsequenced gap options")
  1137     og.add_option("--gap1", metavar="FILE",
  1138                   help="read genome1 unsequenced gaps from agp or gap file")
  1139     og.add_option("--gap2", metavar="FILE",
  1140                   help="read genome2 unsequenced gaps from agp or gap file")
  1141     og.add_option("--bridged-color", metavar="COLOR", default="yellow",
  1142                   help="color for bridged gaps (default: %default)")
  1143     og.add_option("--unbridged-color", metavar="COLOR", default="orange",
  1144                   help="color for unbridged gaps (default: %default)")
  1145     op.add_option_group(og)
  1146     (opts, args) = op.parse_args()
  1147     if len(args) != 2: op.error("2 arguments needed")
  1148 
  1149     opts.background_color = "white"
  1150     opts.label_space = 5     # minimum number of pixels between axis labels
  1151 
  1152     try: lastDotplot(opts, args)
  1153     except KeyboardInterrupt: pass  # avoid silly error message
  1154     except RuntimeError as e:
  1155         prog = os.path.basename(sys.argv[0])
  1156         sys.exit(prog + ": error: " + str(e))