scripts/last-dotplot
author Martin C. Frith
Wed Nov 08 16:06:17 2017 +0900 (2017-11-08)
changeset 909 92dfad507286
parent 908 6840398323e1
child 910 42653029d900
permissions -rwxr-xr-x
Remove last-dotplot trim options
     1 #! /usr/bin/env python
     2 
     3 # Read pair-wise alignments in MAF or LAST tabular format: write an
     4 # "Oxford grid", a.k.a. dotplot.
     5 
     6 # TODO: Currently, pixels with zero aligned nt-pairs are white, and
     7 # pixels with one or more aligned nt-pairs are black.  This can look
     8 # too crowded for large genome alignments.  I tried shading each pixel
     9 # according to the number of aligned nt-pairs within it, but the
    10 # result is too faint.  How can this be done better?
    11 
    12 import gzip
    13 from fnmatch import fnmatchcase
    14 from operator import itemgetter
    15 import subprocess
    16 import itertools, optparse, os, re, sys
    17 
    18 # Try to make PIL/PILLOW work:
    19 try: from PIL import Image, ImageDraw, ImageFont, ImageColor
    20 except ImportError: import Image, ImageDraw, ImageFont, ImageColor
    21 
    22 def myOpen(fileName):  # faster than fileinput
    23     if fileName is None:
    24         return []
    25     if fileName == "-":
    26         return sys.stdin
    27     if fileName.endswith(".gz"):
    28         return gzip.open(fileName)
    29     return open(fileName)
    30 
    31 def warn(message):
    32     if opts.verbose:
    33         prog = os.path.basename(sys.argv[0])
    34         sys.stderr.write(prog + ": " + message + "\n")
    35 
    36 def croppedBlocks(blocks, ranges1, ranges2):
    37     headBeg1, headBeg2, headSize = blocks[0]
    38     for r1 in ranges1:
    39         for r2 in ranges2:
    40             cropBeg1, cropEnd1 = r1
    41             if headBeg1 < 0:
    42                 cropBeg1, cropEnd1 = -cropEnd1, -cropBeg1
    43             cropBeg2, cropEnd2 = r2
    44             if headBeg2 < 0:
    45                 cropBeg2, cropEnd2 = -cropEnd2, -cropBeg2
    46             for beg1, beg2, size in blocks:
    47                 b1 = max(cropBeg1, beg1)
    48                 e1 = min(cropEnd1, beg1 + size)
    49                 if b1 >= e1: continue
    50                 offset = beg2 - beg1
    51                 b2 = max(cropBeg2, b1 + offset)
    52                 e2 = min(cropEnd2, e1 + offset)
    53                 if b2 >= e2: continue
    54                 yield b2 - offset, b2, e2 - b2
    55 
    56 def tabBlocks(beg1, beg2, blocks):
    57     '''Get the gapless blocks of an alignment, from LAST tabular format.'''
    58     for i in blocks.split(","):
    59         if ":" in i:
    60             x, y = i.split(":")
    61             beg1 += int(x)
    62             beg2 += int(y)
    63         else:
    64             size = int(i)
    65             yield beg1, beg2, size
    66             beg1 += size
    67             beg2 += size
    68 
    69 def mafBlocks(beg1, beg2, seq1, seq2):
    70     '''Get the gapless blocks of an alignment, from MAF format.'''
    71     size = 0
    72     for x, y in itertools.izip(seq1, seq2):
    73         if x == "-":
    74             if size:
    75                 yield beg1, beg2, size
    76                 beg1 += size
    77                 beg2 += size
    78                 size = 0
    79             beg2 += 1
    80         elif y == "-":
    81             if size:
    82                 yield beg1, beg2, size
    83                 beg1 += size
    84                 beg2 += size
    85                 size = 0
    86             beg1 += 1
    87         else:
    88             size += 1
    89     if size: yield beg1, beg2, size
    90 
    91 def alignmentInput(lines):
    92     '''Get alignments and sequence lengths, from MAF or tabular format.'''
    93     mafCount = 0
    94     for line in lines:
    95         w = line.split()
    96         if line[0].isdigit():  # tabular format
    97             chr1, beg1, seqlen1 = w[1], int(w[2]), int(w[5])
    98             if w[4] == "-": beg1 -= seqlen1
    99             chr2, beg2, seqlen2 = w[6], int(w[7]), int(w[10])
   100             if w[9] == "-": beg2 -= seqlen2
   101             blocks = tabBlocks(beg1, beg2, w[11])
   102             yield chr1, seqlen1, chr2, seqlen2, blocks
   103         elif line[0] == "s":  # MAF format
   104             if mafCount == 0:
   105                 chr1, beg1, seqlen1, seq1 = w[1], int(w[2]), int(w[5]), w[6]
   106                 if w[4] == "-": beg1 -= seqlen1
   107                 mafCount = 1
   108             else:
   109                 chr2, beg2, seqlen2, seq2 = w[1], int(w[2]), int(w[5]), w[6]
   110                 if w[4] == "-": beg2 -= seqlen2
   111                 blocks = mafBlocks(beg1, beg2, seq1, seq2)
   112                 yield chr1, seqlen1, chr2, seqlen2, blocks
   113                 mafCount = 0
   114 
   115 def seqRequestFromText(text):
   116     if ":" in text:
   117         pattern, interval = text.rsplit(":", 1)
   118         if "-" in interval:
   119             beg, end = interval.rsplit("-", 1)
   120             return pattern, int(beg), int(end)  # beg may be negative
   121     return text, 0, sys.maxsize
   122 
   123 def rangesFromSeqName(seqRequests, name, seqLen):
   124     if seqRequests:
   125         base = name.split(".")[-1]  # allow for names like hg19.chr7
   126         for pat, beg, end in seqRequests:
   127             if fnmatchcase(name, pat) or fnmatchcase(base, pat):
   128                 yield max(beg, 0), min(end, seqLen)
   129     else:
   130         yield 0, seqLen
   131 
   132 def updateSeqs(coverDict, seqRanges, seqName, ranges, coveredRange):
   133     beg, end = coveredRange
   134     if beg < 0:
   135         coveredRange = -end, -beg
   136     if seqName in coverDict:
   137         coverDict[seqName].append(coveredRange)
   138     else:
   139         coverDict[seqName] = [coveredRange]
   140         for beg, end in ranges:
   141             r = seqName, beg, end
   142             seqRanges.append(r)
   143 
   144 def readAlignments(fileName, opts):
   145     '''Get alignments and sequence limits, from MAF or tabular format.'''
   146     seqRequests1 = map(seqRequestFromText, opts.seq1)
   147     seqRequests2 = map(seqRequestFromText, opts.seq2)
   148 
   149     alignments = []
   150     seqRanges1 = []
   151     seqRanges2 = []
   152     coverDict1 = {}
   153     coverDict2 = {}
   154     lines = myOpen(fileName)
   155     for seqName1, seqLen1, seqName2, seqLen2, blocks in alignmentInput(lines):
   156         ranges1 = sorted(rangesFromSeqName(seqRequests1, seqName1, seqLen1))
   157         if not ranges1: continue
   158         ranges2 = sorted(rangesFromSeqName(seqRequests2, seqName2, seqLen2))
   159         if not ranges2: continue
   160         b = list(croppedBlocks(list(blocks), ranges1, ranges2))
   161         if not b: continue
   162         aln = seqName1, seqName2, b
   163         alignments.append(aln)
   164         coveredRange1 = b[0][0], b[-1][0] + b[-1][2]
   165         updateSeqs(coverDict1, seqRanges1, seqName1, ranges1, coveredRange1)
   166         coveredRange2 = b[0][1], b[-1][1] + b[-1][2]
   167         updateSeqs(coverDict2, seqRanges2, seqName2, ranges2, coveredRange2)
   168     return alignments, seqRanges1, coverDict1, seqRanges2, coverDict2
   169 
   170 def natural_sort_key(my_string):
   171     '''Return a sort key for "natural" ordering, e.g. chr9 < chr10.'''
   172     parts = re.split(r'(\d+)', my_string)
   173     parts[1::2] = map(int, parts[1::2])
   174     return parts
   175 
   176 def nameKey(oneSeqRanges):
   177     return natural_sort_key(oneSeqRanges[0][0])
   178 
   179 def sizeKey(oneSeqRanges):
   180     return sum(b - e for n, b, e in oneSeqRanges), nameKey(oneSeqRanges)
   181 
   182 def getSortedRanges(seqRanges, sortOpt):
   183     g = [list(v) for k, v in itertools.groupby(seqRanges, itemgetter(0))]
   184     if sortOpt == "1":
   185         g.sort(key=nameKey)
   186     if sortOpt == "2":
   187         g.sort(key=sizeKey)
   188     return [j for i in g for j in i]
   189 
   190 def prettyNum(n):
   191     t = str(n)
   192     groups = []
   193     while t:
   194         groups.append(t[-3:])
   195         t = t[:-3]
   196     return ",".join(reversed(groups))
   197 
   198 def sizeText(size):
   199     suffixes = "bp", "kb", "Mb", "Gb"
   200     for i, x in enumerate(suffixes):
   201         j = 10 ** (i * 3)
   202         if size < j * 10:
   203             return "%.2g" % (1.0 * size / j) + x
   204         if size < j * 1000 or i == len(suffixes) - 1:
   205             return "%.0f" % (1.0 * size / j) + x
   206 
   207 def labelText(seqRange, labelOpt):
   208     seqName, beg, end = seqRange
   209     if labelOpt == 1:
   210         return seqName + ": " + sizeText(end - beg)
   211     if labelOpt == 2:
   212         return seqName + ":" + prettyNum(beg) + ": " + sizeText(end - beg)
   213     if labelOpt == 3:
   214         return seqName + ":" + prettyNum(beg) + "-" + prettyNum(end)
   215     return seqName
   216 
   217 def rangeLabels(seqRanges, labelOpt, font, fontsize, image_mode, textRot):
   218     if fontsize:
   219         image_size = 1, 1
   220         im = Image.new(image_mode, image_size)
   221         draw = ImageDraw.Draw(im)
   222     x = y = 0
   223     for r in seqRanges:
   224         text = labelText(r, labelOpt)
   225         if fontsize:
   226             x, y = draw.textsize(text, font=font)
   227             if textRot:
   228                 x, y = y, x
   229         yield text, x, y
   230 
   231 def dataFromRanges(seqRanges, sortOpt, font,
   232                    fontsize, image_mode, labelOpt, textRot):
   233     s = getSortedRanges(seqRanges, sortOpt)
   234     for i in s:
   235         warn("\t".join(map(str, i)))
   236     warn("")
   237     rangeSizes = [e - b for n, b, e in s]
   238     labs = list(rangeLabels(s, labelOpt, font, fontsize, image_mode, textRot))
   239     margin = max(i[2] for i in labs)
   240     # xxx the margin may be too big, because some labels may get omitted
   241     return s, rangeSizes, labs, margin
   242 
   243 def div_ceil(x, y):
   244     '''Return x / y rounded up.'''
   245     q, r = divmod(x, y)
   246     return q + (r != 0)
   247 
   248 def get_bp_per_pix(rangeSizes, pixTweenRanges, maxPixels):
   249     '''Get the minimum bp-per-pixel that fits in the size limit.'''
   250     warn("choosing bp per pixel...")
   251     numOfRanges = len(rangeSizes)
   252     maxPixelsInRanges = maxPixels - pixTweenRanges * (numOfRanges - 1)
   253     if maxPixelsInRanges < numOfRanges:
   254         raise Exception("can't fit the image: too many sequences?")
   255     negLimit = -maxPixelsInRanges
   256     negBpPerPix = sum(rangeSizes) // negLimit
   257     while True:
   258         if sum(i // negBpPerPix for i in rangeSizes) >= negLimit:
   259             return -negBpPerPix
   260         negBpPerPix -= 1
   261 
   262 def getRangePixBegs(rangePixLens, pixTweenRanges, margin):
   263     '''Get the start pixel for each range.'''
   264     rangePixBegs = []
   265     pix_tot = margin - pixTweenRanges
   266     for i in rangePixLens:
   267         pix_tot += pixTweenRanges
   268         rangePixBegs.append(pix_tot)
   269         pix_tot += i
   270     return rangePixBegs
   271 
   272 def pixelData(rangeSizes, bp_per_pix, pixTweenRanges, margin):
   273     '''Return pixel information about the ranges.'''
   274     rangePixLens = [div_ceil(i, bp_per_pix) for i in rangeSizes]
   275     rangePixBegs = getRangePixBegs(rangePixLens, pixTweenRanges, margin)
   276     tot_pix = rangePixBegs[-1] + rangePixLens[-1]
   277     return rangePixBegs, rangePixLens, tot_pix
   278 
   279 def drawLineForward(hits, width, bp_per_pix, beg1, beg2, size):
   280     while True:
   281         q1, r1 = divmod(beg1, bp_per_pix)
   282         q2, r2 = divmod(beg2, bp_per_pix)
   283         hits[q2 * width + q1] |= 1
   284         next_pix = min(bp_per_pix - r1, bp_per_pix - r2)
   285         if next_pix >= size: break
   286         beg1 += next_pix
   287         beg2 += next_pix
   288         size -= next_pix
   289 
   290 def drawLineReverse(hits, width, bp_per_pix, beg1, beg2, size):
   291     beg2 = -1 - beg2
   292     while True:
   293         q1, r1 = divmod(beg1, bp_per_pix)
   294         q2, r2 = divmod(beg2, bp_per_pix)
   295         hits[q2 * width + q1] |= 2
   296         next_pix = min(bp_per_pix - r1, r2 + 1)
   297         if next_pix >= size: break
   298         beg1 += next_pix
   299         beg2 -= next_pix
   300         size -= next_pix
   301 
   302 def findOrigin(ranges, beg, size):
   303     if beg < 0:
   304         beg = -(beg + size)
   305     for rangeBeg, rangeEnd, origin in ranges:
   306         if rangeEnd > beg:
   307             return origin
   308 
   309 def alignmentPixels(width, height, alignments, bp_per_pix,
   310                     rangeDict1, rangeDict2):
   311     hits = [0] * (width * height)  # the image data
   312     for seq1, seq2, blocks in alignments:
   313         beg1, beg2, size = blocks[0]
   314         ori1 = findOrigin(rangeDict1[seq1], beg1, size)
   315         ori2 = findOrigin(rangeDict2[seq2], beg2, size)
   316         for beg1, beg2, size in blocks:
   317             if beg1 < 0:
   318                 beg1 = -(beg1 + size)
   319                 beg2 = -(beg2 + size)
   320             if beg2 >= 0:
   321                 drawLineForward(hits, width, bp_per_pix,
   322                                 beg1 + ori1, beg2 + ori2, size)
   323             else:
   324                 drawLineReverse(hits, width, bp_per_pix,
   325                                 beg1 + ori1, beg2 - ori2, size)
   326     return hits
   327 
   328 def expandedSeqDict(seqDict):
   329     '''Allow lookup by short sequence names, e.g. chr7 as well as hg19.chr7.'''
   330     newDict = seqDict.copy()
   331     for name, x in seqDict.items():
   332         if "." in name:
   333             base = name.split(".")[-1]
   334             if base in newDict:  # an ambiguous case was found:
   335                 return seqDict   # so give up completely
   336             newDict[base] = x
   337     return newDict
   338 
   339 def readBed(fileName, rangeDict):
   340     for line in myOpen(fileName):
   341         w = line.split()
   342         if not w: continue
   343         seqName = w[0]
   344         if seqName not in rangeDict: continue
   345         beg = int(w[1])
   346         end = int(w[2])
   347         layer = 900
   348         color = "#fbf"
   349         if len(w) > 4:
   350             if w[4] != ".":
   351                 layer = float(w[4])
   352             if len(w) > 5:
   353                 if len(w) > 8 and w[8].count(",") == 2:
   354                     color = "rgb(" + w[8] + ")"
   355                 elif w[5] == "+":
   356                     color = "#ffe8e8"
   357                 elif w[5] == "-":
   358                     color = "#e8e8ff"
   359         yield layer, color, seqName, beg, end
   360 
   361 def commaSeparatedInts(text):
   362     return map(int, text.rstrip(",").split(","))
   363 
   364 def readGenePred(opts, fileName, rangeDict):
   365     for line in myOpen(fileName):
   366         fields = line.split()
   367         if not fields: continue
   368         if fields[2] not in "+-": fields = fields[1:]
   369         seqName = fields[1]
   370         if seqName not in rangeDict: continue
   371         #strand = fields[2]
   372         cdsBeg = int(fields[5])
   373         cdsEnd = int(fields[6])
   374         exonBegs = commaSeparatedInts(fields[8])
   375         exonEnds = commaSeparatedInts(fields[9])
   376         for beg, end in zip(exonBegs, exonEnds):
   377             yield 300, opts.exon_color, seqName, beg, end
   378             b = max(beg, cdsBeg)
   379             e = min(end, cdsEnd)
   380             if b < e: yield 400, opts.cds_color, seqName, b, e
   381 
   382 def readRmsk(fileName, rangeDict):
   383     for line in myOpen(fileName):
   384         fields = line.split()
   385         if len(fields) == 17:  # rmsk.txt
   386             seqName = fields[5]
   387             if seqName not in rangeDict: continue  # do this ASAP for speed
   388             beg = int(fields[6])
   389             end = int(fields[7])
   390             strand = fields[9]
   391             repeatClass = fields[11]
   392         elif len(fields) == 15:  # .out
   393             seqName = fields[4]
   394             if seqName not in rangeDict: continue
   395             beg = int(fields[5]) - 1
   396             end = int(fields[6])
   397             strand = fields[8]
   398             repeatClass = fields[10]
   399         else:
   400             continue
   401         if repeatClass in ("Low_complexity", "Simple_repeat"):
   402             yield 200, "#fbf", seqName, beg, end
   403         elif strand == "+":
   404             yield 100, "#ffe8e8", seqName, beg, end
   405         else:
   406             yield 100, "#e8e8ff", seqName, beg, end
   407 
   408 def isExtraFirstGapField(fields):
   409     return fields[4].isdigit()
   410 
   411 def readGaps(opts, fileName, rangeDict):
   412     '''Read locations of unsequenced gaps, from an agp or gap file.'''
   413     for line in myOpen(fileName):
   414         w = line.split()
   415         if not w or w[0][0] == "#": continue
   416         if isExtraFirstGapField(w): w = w[1:]
   417         if w[4] not in "NU": continue
   418         seqName = w[0]
   419         if seqName not in rangeDict: continue
   420         end = int(w[2])
   421         beg = end - int(w[5])  # zero-based coordinate
   422         if w[7] == "yes":
   423             yield 3000, opts.bridged_color, seqName, beg, end
   424         else:
   425             yield 2000, opts.unbridged_color, seqName, beg, end
   426 
   427 def bedBoxes(beds, rangeDict, margin, edge, isTop, bpPerPix):
   428     for layer, color, seqName, bedBeg, bedEnd in beds:
   429         for rangeBeg, rangeEnd, origin in rangeDict[seqName]:
   430             beg = max(bedBeg, rangeBeg)
   431             end = min(bedEnd, rangeEnd)
   432             if beg >= end: continue
   433             if layer <= 1000:
   434                 # include partly-covered pixels
   435                 b = (origin + beg) // bpPerPix
   436                 e = div_ceil(origin + end, bpPerPix)
   437             else:
   438                 # exclude partly-covered pixels
   439                 b = div_ceil(origin + beg, bpPerPix)
   440                 e = (origin + end) // bpPerPix
   441                 if e <= b: continue
   442             if isTop:
   443                 box = b, margin, e, edge
   444             else:
   445                 box = margin, b, edge, e
   446             yield layer, color, box
   447 
   448 def drawAnnotations(im, boxes):
   449     # xxx use partial transparency for different-color overlaps?
   450     for layer, color, box in boxes:
   451         im.paste(color, box)
   452 
   453 def placedLabels(labels, rangePixBegs, rangePixLens, beg, end):
   454     '''Return axis labels with endpoint & sort-order information.'''
   455     maxWidth = end - beg
   456     for i, j, k in zip(labels, rangePixBegs, rangePixLens):
   457         text, textWidth, textHeight = i
   458         if textWidth > maxWidth:
   459             continue
   460         labelBeg = j + (k - textWidth) // 2
   461         labelEnd = labelBeg + textWidth
   462         sortKey = textWidth - k
   463         if labelBeg < beg:
   464             sortKey += maxWidth * (beg - labelBeg)
   465             labelBeg = beg
   466             labelEnd = beg + textWidth
   467         if labelEnd > end:
   468             sortKey += maxWidth * (labelEnd - end)
   469             labelEnd = end
   470             labelBeg = end - textWidth
   471         yield sortKey, labelBeg, labelEnd, text, textHeight
   472 
   473 def nonoverlappingLabels(labels, minPixTweenLabels):
   474     '''Get a subset of non-overlapping axis labels, greedily.'''
   475     out = []
   476     for i in labels:
   477         beg = i[1] - minPixTweenLabels
   478         end = i[2] + minPixTweenLabels
   479         if all(j[2] <= beg or j[1] >= end for j in out):
   480             out.append(i)
   481     return out
   482 
   483 def axisImage(labels, rangePixBegs, rangePixLens, textRot,
   484               textAln, font, image_mode, opts):
   485     '''Make an image of axis labels.'''
   486     beg = rangePixBegs[0]
   487     end = rangePixBegs[-1] + rangePixLens[-1]
   488     margin = max(i[2] for i in labels)
   489     labels = sorted(placedLabels(labels, rangePixBegs, rangePixLens, beg, end))
   490     minPixTweenLabels = 0 if textRot else opts.label_space
   491     labels = nonoverlappingLabels(labels, minPixTweenLabels)
   492     image_size = (margin, end) if textRot else (end, margin)
   493     im = Image.new(image_mode, image_size, opts.margin_color)
   494     draw = ImageDraw.Draw(im)
   495     for i in labels:
   496         base = margin - i[4] if textAln else 0
   497         position = (base, i[1]) if textRot else (i[1], base)
   498         draw.text(position, i[3], font=font, fill=opts.text_color)
   499     return im
   500 
   501 def rangesWithOrigins(sortedRanges, rangePixBegs, bpPerPix):
   502     for i, j in zip(sortedRanges, rangePixBegs):
   503         seqName, rangeBeg, rangeEnd = i
   504         origin = bpPerPix * j - rangeBeg
   505         yield seqName, (rangeBeg, rangeEnd, origin)
   506 
   507 def rangesPerSeq(sortedRanges, rangePixBegs, bpPerPix):
   508     a = rangesWithOrigins(sortedRanges, rangePixBegs, bpPerPix)
   509     for k, v in itertools.groupby(a, itemgetter(0)):
   510         yield k, [i[1] for i in v]
   511 
   512 def getFont(opts):
   513     if opts.fontfile:
   514         return ImageFont.truetype(opts.fontfile, opts.fontsize)
   515     fileNames = []
   516     try:
   517         x = ["fc-match", "-f%{file}", "arial"]
   518         p = subprocess.Popen(x, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
   519         out, err = p.communicate()
   520         fileNames.append(out)
   521     except OSError as e:
   522         warn("fc-match error: " + str(e))
   523     fileNames.append("/Library/Fonts/Arial.ttf")  # for Mac
   524     for i in fileNames:
   525         try:
   526             font = ImageFont.truetype(i, opts.fontsize)
   527             warn("font: " + i)
   528             return font
   529         except IOError as e:
   530             warn("font load error: " + str(e))
   531     return ImageFont.load_default()
   532 
   533 def lastDotplot(opts, args):
   534     font = getFont(opts)
   535     image_mode = 'RGB'
   536     forward_color = ImageColor.getcolor(opts.forwardcolor, image_mode)
   537     reverse_color = ImageColor.getcolor(opts.reversecolor, image_mode)
   538     zipped_colors = zip(forward_color, reverse_color)
   539     overlap_color = tuple([(i + j) // 2 for i, j in zipped_colors])
   540 
   541     warn("reading alignments...")
   542     alnData = readAlignments(args[0], opts)
   543     alignments, seqRanges1, coverDict1, seqRanges2, coverDict2 = alnData
   544     warn("done")
   545     if not alignments: raise Exception("there are no alignments")
   546 
   547     textRot1 = "vertical".startswith(opts.rot1)
   548     i1 = dataFromRanges(seqRanges1, opts.sort1, font,
   549                         opts.fontsize, image_mode, opts.labels1, textRot1)
   550     sortedRanges1, rangeSizes1, labelData1, tMargin = i1
   551 
   552     textRot2 = "horizontal".startswith(opts.rot2)
   553     i2 = dataFromRanges(seqRanges2, opts.sort2, font,
   554                         opts.fontsize, image_mode, opts.labels2, textRot2)
   555     sortedRanges2, rangeSizes2, labelData2, lMargin = i2
   556 
   557     maxPixels1 = opts.width  - lMargin
   558     maxPixels2 = opts.height - tMargin
   559     bpPerPix1 = get_bp_per_pix(rangeSizes1, opts.border_pixels, maxPixels1)
   560     bpPerPix2 = get_bp_per_pix(rangeSizes2, opts.border_pixels, maxPixels2)
   561     bpPerPix = max(bpPerPix1, bpPerPix2)
   562     warn("bp per pixel = " + str(bpPerPix))
   563 
   564     p1 = pixelData(rangeSizes1, bpPerPix, opts.border_pixels, lMargin)
   565     rangePixBegs1, rangePixLens1, width = p1
   566     rangeDict1 = dict(rangesPerSeq(sortedRanges1, rangePixBegs1, bpPerPix))
   567 
   568     p2 = pixelData(rangeSizes2, bpPerPix, opts.border_pixels, tMargin)
   569     rangePixBegs2, rangePixLens2, height = p2
   570     rangeDict2 = dict(rangesPerSeq(sortedRanges2, rangePixBegs2, bpPerPix))
   571 
   572     warn("width:  " + str(width))
   573     warn("height: " + str(height))
   574 
   575     warn("processing alignments...")
   576     hits = alignmentPixels(width, height, alignments, bpPerPix,
   577                            rangeDict1, rangeDict2)
   578 
   579     warn("reading annotations...")
   580 
   581     rangeDict1 = expandedSeqDict(rangeDict1)
   582     beds1 = itertools.chain(readBed(opts.bed1, rangeDict1),
   583                             readRmsk(opts.rmsk1, rangeDict1),
   584                             readGenePred(opts, opts.genePred1, rangeDict1),
   585                             readGaps(opts, opts.gap1, rangeDict1))
   586     b1 = bedBoxes(beds1, rangeDict1, tMargin, height, True, bpPerPix)
   587 
   588     rangeDict2 = expandedSeqDict(rangeDict2)
   589     beds2 = itertools.chain(readBed(opts.bed2, rangeDict2),
   590                             readRmsk(opts.rmsk2, rangeDict2),
   591                             readGenePred(opts, opts.genePred2, rangeDict2),
   592                             readGaps(opts, opts.gap2, rangeDict2))
   593     b2 = bedBoxes(beds2, rangeDict2, lMargin, width, False, bpPerPix)
   594 
   595     boxes = sorted(itertools.chain(b1, b2))
   596 
   597     warn("drawing...")
   598 
   599     image_size = width, height
   600     im = Image.new(image_mode, image_size, opts.background_color)
   601 
   602     drawAnnotations(im, boxes)
   603 
   604     for i in range(height):
   605         for j in range(width):
   606             store_value = hits[i * width + j]
   607             xy = j, i
   608             if   store_value == 1: im.putpixel(xy, forward_color)
   609             elif store_value == 2: im.putpixel(xy, reverse_color)
   610             elif store_value == 3: im.putpixel(xy, overlap_color)
   611 
   612     if opts.fontsize != 0:
   613         axis1 = axisImage(labelData1, rangePixBegs1, rangePixLens1,
   614                           textRot1, False, font, image_mode, opts)
   615         if textRot1:
   616             axis1 = axis1.transpose(Image.ROTATE_90)
   617         axis2 = axisImage(labelData2, rangePixBegs2, rangePixLens2,
   618                           textRot2, textRot2, font, image_mode, opts)
   619         if not textRot2:
   620             axis2 = axis2.transpose(Image.ROTATE_270)
   621         im.paste(axis1, (0, 0))
   622         im.paste(axis2, (0, 0))
   623 
   624     for i in rangePixBegs1[1:]:
   625         box = i - opts.border_pixels, tMargin, i, height
   626         im.paste(opts.border_color, box)
   627 
   628     for i in rangePixBegs2[1:]:
   629         box = lMargin, i - opts.border_pixels, width, i
   630         im.paste(opts.border_color, box)
   631 
   632     im.save(args[1])
   633 
   634 if __name__ == "__main__":
   635     usage = """%prog --help
   636    or: %prog [options] maf-or-tab-alignments dotplot.png
   637    or: %prog [options] maf-or-tab-alignments dotplot.gif
   638    or: ..."""
   639     description = "Draw a dotplot of pair-wise sequence alignments in MAF or tabular format."
   640     op = optparse.OptionParser(usage=usage, description=description)
   641     op.add_option("-v", "--verbose", action="count",
   642                   help="show progress messages & data about the plot")
   643     op.add_option("-1", "--seq1", metavar="PATTERN", action="append",
   644                   default=[],
   645                   help="which sequences to show from the 1st genome")
   646     op.add_option("-2", "--seq2", metavar="PATTERN", action="append",
   647                   default=[],
   648                   help="which sequences to show from the 2nd genome")
   649     # Replace "width" & "height" with a single "length" option?
   650     op.add_option("-x", "--width", type="int", default=1000,
   651                   help="maximum width in pixels (default: %default)")
   652     op.add_option("-y", "--height", type="int", default=1000,
   653                   help="maximum height in pixels (default: %default)")
   654     op.add_option("-c", "--forwardcolor", metavar="COLOR", default="red",
   655                   help="color for forward alignments (default: %default)")
   656     op.add_option("-r", "--reversecolor", metavar="COLOR", default="blue",
   657                   help="color for reverse alignments (default: %default)")
   658     op.add_option("--sort1", default="1", metavar="N",
   659                   help="genome1 sequence order: 0=input order, 1=name order, "
   660                   "2=length order (default=%default)")
   661     op.add_option("--sort2", default="1", metavar="N",
   662                   help="genome2 sequence order: 0=input order, 1=name order, "
   663                   "2=length order (default=%default)")
   664     op.add_option("--border-pixels", metavar="INT", type="int", default=1,
   665                   help="number of pixels between sequences (default=%default)")
   666     op.add_option("--border-color", metavar="COLOR", default="black",
   667                   help="color for pixels between sequences (default=%default)")
   668     op.add_option("--margin-color", metavar="COLOR", default="#dcdcdc",
   669                   help="margin color")
   670 
   671     og = optparse.OptionGroup(op, "Text options")
   672     og.add_option("-f", "--fontfile", metavar="FILE",
   673                   help="TrueType or OpenType font file")
   674     og.add_option("-s", "--fontsize", metavar="SIZE", type="int", default=14,
   675                   help="TrueType or OpenType font size (default: %default)")
   676     og.add_option("--labels1", type="int", default=0, metavar="N", help=
   677                   "genome1 labels: 0=name, 1=name:length, "
   678                   "2=name:start:length, 3=name:start-end (default=%default)")
   679     og.add_option("--labels2", type="int", default=0, metavar="N", help=
   680                   "genome2 labels: 0=name, 1=name:length, "
   681                   "2=name:start:length, 3=name:start-end (default=%default)")
   682     og.add_option("--rot1", metavar="ROT", default="h",
   683                   help="text rotation for the 1st genome (default=%default)")
   684     og.add_option("--rot2", metavar="ROT", default="v",
   685                   help="text rotation for the 2nd genome (default=%default)")
   686     op.add_option_group(og)
   687 
   688     og = optparse.OptionGroup(op, "Annotation options")
   689     og.add_option("--bed1", metavar="FILE",
   690                   help="read genome1 annotations from BED file")
   691     og.add_option("--bed2", metavar="FILE",
   692                   help="read genome2 annotations from BED file")
   693     og.add_option("--rmsk1", metavar="FILE", help="read genome1 repeats from "
   694                   "RepeatMasker .out or rmsk.txt file")
   695     og.add_option("--rmsk2", metavar="FILE", help="read genome2 repeats from "
   696                   "RepeatMasker .out or rmsk.txt file")
   697     op.add_option_group(og)
   698 
   699     og = optparse.OptionGroup(op, "Gene options")
   700     og.add_option("--genePred1", metavar="FILE",
   701                   help="read genome1 genes from genePred file")
   702     og.add_option("--genePred2", metavar="FILE",
   703                   help="read genome2 genes from genePred file")
   704     og.add_option("--exon-color", metavar="COLOR", default="PaleGreen",
   705                   help="color for exons (default=%default)")
   706     og.add_option("--cds-color", metavar="COLOR", default="LimeGreen",
   707                   help="color for protein-coding regions (default=%default)")
   708     op.add_option_group(og)
   709 
   710     og = optparse.OptionGroup(op, "Unsequenced gap options")
   711     og.add_option("--gap1", metavar="FILE",
   712                   help="read genome1 unsequenced gaps from agp or gap file")
   713     og.add_option("--gap2", metavar="FILE",
   714                   help="read genome2 unsequenced gaps from agp or gap file")
   715     og.add_option("--bridged-color", metavar="COLOR", default="yellow",
   716                   help="color for bridged gaps (default: %default)")
   717     og.add_option("--unbridged-color", metavar="COLOR", default="orange",
   718                   help="color for unbridged gaps (default: %default)")
   719     op.add_option_group(og)
   720     (opts, args) = op.parse_args()
   721     if len(args) != 2: op.error("2 arguments needed")
   722 
   723     opts.text_color = "black"
   724     opts.background_color = "white"
   725     opts.label_space = 5     # minimum number of pixels between axis labels
   726 
   727     try: lastDotplot(opts, args)
   728     except KeyboardInterrupt: pass  # avoid silly error message
   729     except Exception as e:
   730         prog = os.path.basename(sys.argv[0])
   731         sys.exit(prog + ": error: " + str(e))