scripts/last-dotplot
author Martin C. Frith
Wed Nov 08 13:46:08 2017 +0900 (2017-11-08)
changeset 906 21fbf377641f
parent 905 8d52cef22afa
child 907 87991b57ed04
permissions -rwxr-xr-x
Refactor last-dotplot
     1 #! /usr/bin/env python
     2 
     3 # Read pair-wise alignments in MAF or LAST tabular format: write an
     4 # "Oxford grid", a.k.a. dotplot.
     5 
     6 # TODO: Currently, pixels with zero aligned nt-pairs are white, and
     7 # pixels with one or more aligned nt-pairs are black.  This can look
     8 # too crowded for large genome alignments.  I tried shading each pixel
     9 # according to the number of aligned nt-pairs within it, but the
    10 # result is too faint.  How can this be done better?
    11 
    12 import gzip
    13 from fnmatch import fnmatchcase
    14 from operator import itemgetter
    15 import subprocess
    16 import itertools, optparse, os, re, sys
    17 
    18 # Try to make PIL/PILLOW work:
    19 try: from PIL import Image, ImageDraw, ImageFont, ImageColor
    20 except ImportError: import Image, ImageDraw, ImageFont, ImageColor
    21 
    22 def myOpen(fileName):  # faster than fileinput
    23     if fileName is None:
    24         return []
    25     if fileName == "-":
    26         return sys.stdin
    27     if fileName.endswith(".gz"):
    28         return gzip.open(fileName)
    29     return open(fileName)
    30 
    31 def warn(message):
    32     if opts.verbose:
    33         prog = os.path.basename(sys.argv[0])
    34         sys.stderr.write(prog + ": " + message + "\n")
    35 
    36 def croppedBlocks(blocks, range1, range2):
    37     cropBeg1, cropEnd1 = range1
    38     cropBeg2, cropEnd2 = range2
    39     if blocks[0][0] < 0: cropBeg1, cropEnd1 = -cropEnd1, -cropBeg1
    40     if blocks[0][1] < 0: cropBeg2, cropEnd2 = -cropEnd2, -cropBeg2
    41     for beg1, beg2, size in blocks:
    42         b1 = max(cropBeg1, beg1)
    43         e1 = min(cropEnd1, beg1 + size)
    44         if b1 >= e1: continue
    45         offset = beg2 - beg1
    46         b2 = max(cropBeg2, b1 + offset)
    47         e2 = min(cropEnd2, e1 + offset)
    48         if b2 >= e2: continue
    49         yield b2 - offset, b2, e2 - b2
    50 
    51 def tabBlocks(beg1, beg2, blocks):
    52     '''Get the gapless blocks of an alignment, from LAST tabular format.'''
    53     for i in blocks.split(","):
    54         if ":" in i:
    55             x, y = i.split(":")
    56             beg1 += int(x)
    57             beg2 += int(y)
    58         else:
    59             size = int(i)
    60             yield beg1, beg2, size
    61             beg1 += size
    62             beg2 += size
    63 
    64 def mafBlocks(beg1, beg2, seq1, seq2):
    65     '''Get the gapless blocks of an alignment, from MAF format.'''
    66     size = 0
    67     for x, y in itertools.izip(seq1, seq2):
    68         if x == "-":
    69             if size:
    70                 yield beg1, beg2, size
    71                 beg1 += size
    72                 beg2 += size
    73                 size = 0
    74             beg2 += 1
    75         elif y == "-":
    76             if size:
    77                 yield beg1, beg2, size
    78                 beg1 += size
    79                 beg2 += size
    80                 size = 0
    81             beg1 += 1
    82         else:
    83             size += 1
    84     if size: yield beg1, beg2, size
    85 
    86 def alignmentInput(lines):
    87     '''Get alignments and sequence lengths, from MAF or tabular format.'''
    88     mafCount = 0
    89     for line in lines:
    90         w = line.split()
    91         if line[0].isdigit():  # tabular format
    92             chr1, beg1, seqlen1 = w[1], int(w[2]), int(w[5])
    93             if w[4] == "-": beg1 -= seqlen1
    94             chr2, beg2, seqlen2 = w[6], int(w[7]), int(w[10])
    95             if w[9] == "-": beg2 -= seqlen2
    96             blocks = tabBlocks(beg1, beg2, w[11])
    97             yield chr1, seqlen1, chr2, seqlen2, blocks
    98         elif line[0] == "s":  # MAF format
    99             if mafCount == 0:
   100                 chr1, beg1, seqlen1, seq1 = w[1], int(w[2]), int(w[5]), w[6]
   101                 if w[4] == "-": beg1 -= seqlen1
   102                 mafCount = 1
   103             else:
   104                 chr2, beg2, seqlen2, seq2 = w[1], int(w[2]), int(w[5]), w[6]
   105                 if w[4] == "-": beg2 -= seqlen2
   106                 blocks = mafBlocks(beg1, beg2, seq1, seq2)
   107                 yield chr1, seqlen1, chr2, seqlen2, blocks
   108                 mafCount = 0
   109 
   110 def seqRequestFromText(text):
   111     if ":" in text:
   112         pattern, interval = text.rsplit(":", 1)
   113         if "-" in interval:
   114             beg, end = interval.rsplit("-", 1)
   115             return pattern, int(beg), int(end)  # beg may be negative
   116     return text, 0, sys.maxsize
   117 
   118 def rangeFromSeqName(seqRequests, name, seqLen):
   119     if not seqRequests: return 0, seqLen
   120     base = name.split(".")[-1]  # allow for names like hg19.chr7
   121     for pat, beg, end in seqRequests:
   122         if fnmatchcase(name, pat) or fnmatchcase(base, pat):
   123             return max(beg, 0), min(end, seqLen)
   124     return None
   125 
   126 def updateSeqs(isTrim, seqNames, seqLimits, seqName, seqRange, blocks, index):
   127     if seqName not in seqLimits:
   128         seqNames.append(seqName)
   129     if isTrim:
   130         beg = blocks[0][index]
   131         end = blocks[-1][index] + blocks[-1][2]
   132         if beg < 0: beg, end = -end, -beg
   133         if seqName in seqLimits:
   134             b, e = seqLimits[seqName]
   135             seqLimits[seqName] = min(b, beg), max(e, end)
   136         else:
   137             seqLimits[seqName] = beg, end
   138     else:
   139         seqLimits[seqName] = seqRange
   140 
   141 def readAlignments(fileName, opts):
   142     '''Get alignments and sequence limits, from MAF or tabular format.'''
   143     seqRequests1 = map(seqRequestFromText, opts.seq1)
   144     seqRequests2 = map(seqRequestFromText, opts.seq2)
   145 
   146     alignments = []
   147     seqNames1 = []
   148     seqNames2 = []
   149     seqLimits1 = {}
   150     seqLimits2 = {}
   151     lines = myOpen(fileName)
   152     for seqName1, seqLen1, seqName2, seqLen2, blocks in alignmentInput(lines):
   153         range1 = rangeFromSeqName(seqRequests1, seqName1, seqLen1)
   154         if not range1: continue
   155         range2 = rangeFromSeqName(seqRequests2, seqName2, seqLen2)
   156         if not range2: continue
   157         b = list(croppedBlocks(list(blocks), range1, range2))
   158         if not b: continue
   159         aln = seqName1, seqName2, b
   160         alignments.append(aln)
   161         updateSeqs(opts.trim1, seqNames1, seqLimits1, seqName1, range1, b, 0)
   162         updateSeqs(opts.trim2, seqNames2, seqLimits2, seqName2, range2, b, 1)
   163     return alignments, seqNames1, seqNames2, seqLimits1, seqLimits2
   164 
   165 def natural_sort_key(my_string):
   166     '''Return a sort key for "natural" ordering, e.g. chr9 < chr10.'''
   167     parts = re.split(r'(\d+)', my_string)
   168     parts[1::2] = map(int, parts[1::2])
   169     return parts
   170 
   171 def prettyNum(n):
   172     t = str(n)
   173     groups = []
   174     while t:
   175         groups.append(t[-3:])
   176         t = t[:-3]
   177     return ",".join(reversed(groups))
   178 
   179 def sizeText(size):
   180     suffixes = "bp", "kb", "Mb", "Gb"
   181     for i, x in enumerate(suffixes):
   182         j = 10 ** (i * 3)
   183         if size < j * 10:
   184             return "%.2g" % (1.0 * size / j) + x
   185         if size < j * 1000 or i == len(suffixes) - 1:
   186             return "%.0f" % (1.0 * size / j) + x
   187 
   188 def labelText(seqRange, labelOpt):
   189     seqName, beg, end = seqRange
   190     if labelOpt == 1:
   191         return seqName + ": " + sizeText(end - beg)
   192     if labelOpt == 2:
   193         return seqName + ":" + prettyNum(beg) + ": " + sizeText(end - beg)
   194     if labelOpt == 3:
   195         return seqName + ":" + prettyNum(beg) + "-" + prettyNum(end)
   196     return seqName
   197 
   198 def rangeLabels(seqRanges, labelOpt, font, fontsize, image_mode, textRot):
   199     if fontsize:
   200         image_size = 1, 1
   201         im = Image.new(image_mode, image_size)
   202         draw = ImageDraw.Draw(im)
   203     x = y = 0
   204     for r in seqRanges:
   205         text = labelText(r, labelOpt)
   206         if fontsize:
   207             x, y = draw.textsize(text, font=font)
   208             if textRot:
   209                 x, y = y, x
   210         yield text, x, y
   211 
   212 def getSeqInfo(sortOpt, seqNames, seqLimits,
   213                font, fontsize, image_mode, labelOpt, textRot):
   214     '''Return miscellaneous information about the sequences.'''
   215     if sortOpt == 1:
   216         seqNames.sort(key=natural_sort_key)
   217     rangeSizes = [seqLimits[i][1] - seqLimits[i][0] for i in seqNames]
   218     for i in seqNames:
   219         r = seqLimits[i]
   220         out = i, str(r[0]), str(r[1])
   221         warn("\t".join(out))
   222     warn("")
   223     if sortOpt == 2:
   224         seqRecords = sorted(zip(rangeSizes, seqNames), reverse=True)
   225         rangeSizes = [i[0] for i in seqRecords]
   226         seqNames = [i[1] for i in seqRecords]
   227     seqRanges = [(i, seqLimits[i][0], seqLimits[i][1]) for i in seqNames]
   228     labelData = list(rangeLabels(seqRanges, labelOpt,
   229                                  font, fontsize, image_mode, textRot))
   230     margin = max(i[2] for i in labelData)
   231     # xxx the margin may be too big, because some labels may get omitted
   232     return seqRanges, rangeSizes, labelData, margin
   233 
   234 def div_ceil(x, y):
   235     '''Return x / y rounded up.'''
   236     q, r = divmod(x, y)
   237     return q + (r != 0)
   238 
   239 def get_bp_per_pix(rangeSizes, pixTweenRanges, maxPixels):
   240     '''Get the minimum bp-per-pixel that fits in the size limit.'''
   241     warn("choosing bp per pixel...")
   242     numOfRanges = len(rangeSizes)
   243     maxPixelsInRanges = maxPixels - pixTweenRanges * (numOfRanges - 1)
   244     if maxPixelsInRanges < numOfRanges:
   245         raise Exception("can't fit the image: too many sequences?")
   246     negLimit = -maxPixelsInRanges
   247     negBpPerPix = sum(rangeSizes) // negLimit
   248     while True:
   249         if sum(i // negBpPerPix for i in rangeSizes) >= negLimit:
   250             return -negBpPerPix
   251         negBpPerPix -= 1
   252 
   253 def getRangePixBegs(rangePixLens, pixTweenRanges, margin):
   254     '''Get the start pixel for each range.'''
   255     rangePixBegs = []
   256     pix_tot = margin - pixTweenRanges
   257     for i in rangePixLens:
   258         pix_tot += pixTweenRanges
   259         rangePixBegs.append(pix_tot)
   260         pix_tot += i
   261     return rangePixBegs
   262 
   263 def pixelData(rangeSizes, bp_per_pix, pixTweenRanges, margin):
   264     '''Return pixel information about the ranges.'''
   265     rangePixLens = [div_ceil(i, bp_per_pix) for i in rangeSizes]
   266     rangePixBegs = getRangePixBegs(rangePixLens, pixTweenRanges, margin)
   267     tot_pix = rangePixBegs[-1] + rangePixLens[-1]
   268     return rangePixBegs, rangePixLens, tot_pix
   269 
   270 def drawLineForward(hits, width, bp_per_pix, beg1, beg2, size):
   271     while True:
   272         q1, r1 = divmod(beg1, bp_per_pix)
   273         q2, r2 = divmod(beg2, bp_per_pix)
   274         hits[q2 * width + q1] |= 1
   275         next_pix = min(bp_per_pix - r1, bp_per_pix - r2)
   276         if next_pix >= size: break
   277         beg1 += next_pix
   278         beg2 += next_pix
   279         size -= next_pix
   280 
   281 def drawLineReverse(hits, width, bp_per_pix, beg1, beg2, size):
   282     beg2 = -1 - beg2
   283     while True:
   284         q1, r1 = divmod(beg1, bp_per_pix)
   285         q2, r2 = divmod(beg2, bp_per_pix)
   286         hits[q2 * width + q1] |= 2
   287         next_pix = min(bp_per_pix - r1, r2 + 1)
   288         if next_pix >= size: break
   289         beg1 += next_pix
   290         beg2 -= next_pix
   291         size -= next_pix
   292 
   293 def findOrigin(ranges, beg, size):
   294     if beg < 0:
   295         beg = -(beg + size)
   296     for rangeBeg, rangeEnd, origin in ranges:
   297         if rangeEnd > beg:
   298             return origin
   299 
   300 def alignmentPixels(width, height, alignments, bp_per_pix,
   301                     rangeDict1, rangeDict2):
   302     hits = [0] * (width * height)  # the image data
   303     for seq1, seq2, blocks in alignments:
   304         beg1, beg2, size = blocks[0]
   305         ori1 = findOrigin(rangeDict1[seq1], beg1, size)
   306         ori2 = findOrigin(rangeDict2[seq2], beg2, size)
   307         for beg1, beg2, size in blocks:
   308             if beg1 < 0:
   309                 beg1 = -(beg1 + size)
   310                 beg2 = -(beg2 + size)
   311             if beg2 >= 0:
   312                 drawLineForward(hits, width, bp_per_pix,
   313                                 beg1 + ori1, beg2 + ori2, size)
   314             else:
   315                 drawLineReverse(hits, width, bp_per_pix,
   316                                 beg1 + ori1, beg2 - ori2, size)
   317     return hits
   318 
   319 def expandedSeqDict(seqDict):
   320     '''Allow lookup by short sequence names, e.g. chr7 as well as hg19.chr7.'''
   321     newDict = seqDict.copy()
   322     for name, x in seqDict.items():
   323         if "." in name:
   324             base = name.split(".")[-1]
   325             if base in newDict:  # an ambiguous case was found:
   326                 return seqDict   # so give up completely
   327             newDict[base] = x
   328     return newDict
   329 
   330 def readBed(fileName, rangeDict):
   331     for line in myOpen(fileName):
   332         w = line.split()
   333         if not w: continue
   334         seqName = w[0]
   335         if seqName not in rangeDict: continue
   336         beg = int(w[1])
   337         end = int(w[2])
   338         layer = 900
   339         color = "#fbf"
   340         if len(w) > 4:
   341             if w[4] != ".":
   342                 layer = float(w[4])
   343             if len(w) > 5:
   344                 if len(w) > 8 and w[8].count(",") == 2:
   345                     color = "rgb(" + w[8] + ")"
   346                 elif w[5] == "+":
   347                     color = "#ffe8e8"
   348                 elif w[5] == "-":
   349                     color = "#e8e8ff"
   350         yield layer, color, seqName, beg, end
   351 
   352 def commaSeparatedInts(text):
   353     return map(int, text.rstrip(",").split(","))
   354 
   355 def readGenePred(opts, fileName, rangeDict):
   356     for line in myOpen(fileName):
   357         fields = line.split()
   358         if not fields: continue
   359         if fields[2] not in "+-": fields = fields[1:]
   360         seqName = fields[1]
   361         if seqName not in rangeDict: continue
   362         #strand = fields[2]
   363         cdsBeg = int(fields[5])
   364         cdsEnd = int(fields[6])
   365         exonBegs = commaSeparatedInts(fields[8])
   366         exonEnds = commaSeparatedInts(fields[9])
   367         for beg, end in zip(exonBegs, exonEnds):
   368             yield 300, opts.exon_color, seqName, beg, end
   369             b = max(beg, cdsBeg)
   370             e = min(end, cdsEnd)
   371             if b < e: yield 400, opts.cds_color, seqName, b, e
   372 
   373 def readRmsk(fileName, rangeDict):
   374     for line in myOpen(fileName):
   375         fields = line.split()
   376         if len(fields) == 17:  # rmsk.txt
   377             seqName = fields[5]
   378             if seqName not in rangeDict: continue  # do this ASAP for speed
   379             beg = int(fields[6])
   380             end = int(fields[7])
   381             strand = fields[9]
   382             repeatClass = fields[11]
   383         elif len(fields) == 15:  # .out
   384             seqName = fields[4]
   385             if seqName not in rangeDict: continue
   386             beg = int(fields[5]) - 1
   387             end = int(fields[6])
   388             strand = fields[8]
   389             repeatClass = fields[10]
   390         else:
   391             continue
   392         if repeatClass in ("Low_complexity", "Simple_repeat"):
   393             yield 200, "#fbf", seqName, beg, end
   394         elif strand == "+":
   395             yield 100, "#ffe8e8", seqName, beg, end
   396         else:
   397             yield 100, "#e8e8ff", seqName, beg, end
   398 
   399 def isExtraFirstGapField(fields):
   400     return fields[4].isdigit()
   401 
   402 def readGaps(opts, fileName, rangeDict):
   403     '''Read locations of unsequenced gaps, from an agp or gap file.'''
   404     for line in myOpen(fileName):
   405         w = line.split()
   406         if not w or w[0][0] == "#": continue
   407         if isExtraFirstGapField(w): w = w[1:]
   408         if w[4] not in "NU": continue
   409         seqName = w[0]
   410         if seqName not in rangeDict: continue
   411         end = int(w[2])
   412         beg = end - int(w[5])  # zero-based coordinate
   413         if w[7] == "yes":
   414             yield 3000, opts.bridged_color, seqName, beg, end
   415         else:
   416             yield 2000, opts.unbridged_color, seqName, beg, end
   417 
   418 def bedBoxes(beds, rangeDict, margin, edge, isTop, bpPerPix):
   419     for layer, color, seqName, bedBeg, bedEnd in beds:
   420         for rangeBeg, rangeEnd, origin in rangeDict[seqName]:
   421             beg = max(bedBeg, rangeBeg)
   422             end = min(bedEnd, rangeEnd)
   423             if beg >= end: continue
   424             if layer <= 1000:
   425                 # include partly-covered pixels
   426                 b = (origin + beg) // bpPerPix
   427                 e = div_ceil(origin + end, bpPerPix)
   428             else:
   429                 # exclude partly-covered pixels
   430                 b = div_ceil(origin + beg, bpPerPix)
   431                 e = (origin + end) // bpPerPix
   432                 if e <= b: continue
   433             if isTop:
   434                 box = b, margin, e, edge
   435             else:
   436                 box = margin, b, edge, e
   437             yield layer, color, box
   438 
   439 def drawAnnotations(im, boxes):
   440     # xxx use partial transparency for different-color overlaps?
   441     for layer, color, box in boxes:
   442         im.paste(color, box)
   443 
   444 def placedLabels(labels, rangePixBegs, rangePixLens, beg, end):
   445     '''Return axis labels with endpoint & sort-order information.'''
   446     maxWidth = end - beg
   447     for i, j, k in zip(labels, rangePixBegs, rangePixLens):
   448         text, textWidth, textHeight = i
   449         if textWidth > maxWidth:
   450             continue
   451         labelBeg = j + (k - textWidth) // 2
   452         labelEnd = labelBeg + textWidth
   453         sortKey = textWidth - k
   454         if labelBeg < beg:
   455             sortKey += maxWidth * (beg - labelBeg)
   456             labelBeg = beg
   457             labelEnd = beg + textWidth
   458         if labelEnd > end:
   459             sortKey += maxWidth * (labelEnd - end)
   460             labelEnd = end
   461             labelBeg = end - textWidth
   462         yield sortKey, labelBeg, labelEnd, text, textHeight
   463 
   464 def nonoverlappingLabels(labels, minPixTweenLabels):
   465     '''Get a subset of non-overlapping axis labels, greedily.'''
   466     out = []
   467     for i in labels:
   468         beg = i[1] - minPixTweenLabels
   469         end = i[2] + minPixTweenLabels
   470         if all(j[2] <= beg or j[1] >= end for j in out):
   471             out.append(i)
   472     return out
   473 
   474 def axisImage(labels, rangePixBegs, rangePixLens, textRot,
   475               textAln, font, image_mode, opts):
   476     '''Make an image of axis labels.'''
   477     beg = rangePixBegs[0]
   478     end = rangePixBegs[-1] + rangePixLens[-1]
   479     margin = max(i[2] for i in labels)
   480     labels = sorted(placedLabels(labels, rangePixBegs, rangePixLens, beg, end))
   481     minPixTweenLabels = 0 if textRot else opts.label_space
   482     labels = nonoverlappingLabels(labels, minPixTweenLabels)
   483     image_size = (margin, end) if textRot else (end, margin)
   484     im = Image.new(image_mode, image_size, opts.margin_color)
   485     draw = ImageDraw.Draw(im)
   486     for i in labels:
   487         base = margin - i[4] if textAln else 0
   488         position = (base, i[1]) if textRot else (i[1], base)
   489         draw.text(position, i[3], font=font, fill=opts.text_color)
   490     return im
   491 
   492 def rangesWithOrigins(sortedRanges, rangePixBegs, bpPerPix):
   493     for i, j in zip(sortedRanges, rangePixBegs):
   494         seqName, rangeBeg, rangeEnd = i
   495         origin = bpPerPix * j - rangeBeg
   496         yield seqName, (rangeBeg, rangeEnd, origin)
   497 
   498 def rangesPerSeq(sortedRanges, rangePixBegs, bpPerPix):
   499     a = rangesWithOrigins(sortedRanges, rangePixBegs, bpPerPix)
   500     for k, v in itertools.groupby(a, itemgetter(0)):
   501         yield k, [i[1] for i in v]
   502 
   503 def getFont(opts):
   504     if opts.fontfile:
   505         return ImageFont.truetype(opts.fontfile, opts.fontsize)
   506     fileNames = []
   507     try:
   508         x = ["fc-match", "-f%{file}", "arial"]
   509         p = subprocess.Popen(x, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
   510         out, err = p.communicate()
   511         fileNames.append(out)
   512     except OSError as e:
   513         warn("fc-match error: " + str(e))
   514     fileNames.append("/Library/Fonts/Arial.ttf")  # for Mac
   515     for i in fileNames:
   516         try:
   517             font = ImageFont.truetype(i, opts.fontsize)
   518             warn("font: " + i)
   519             return font
   520         except IOError as e:
   521             warn("font load error: " + str(e))
   522     return ImageFont.load_default()
   523 
   524 def lastDotplot(opts, args):
   525     font = getFont(opts)
   526     image_mode = 'RGB'
   527     forward_color = ImageColor.getcolor(opts.forwardcolor, image_mode)
   528     reverse_color = ImageColor.getcolor(opts.reversecolor, image_mode)
   529     zipped_colors = zip(forward_color, reverse_color)
   530     overlap_color = tuple([(i + j) // 2 for i, j in zipped_colors])
   531 
   532     warn("reading alignments...")
   533     alnData = readAlignments(args[0], opts)
   534     alignments, seqNames1, seqNames2, seqLimits1, seqLimits2 = alnData
   535     warn("done")
   536     if not alignments: raise Exception("there are no alignments")
   537 
   538     textRot1 = "vertical".startswith(opts.rot1)
   539     i1 = getSeqInfo(opts.sort1, seqNames1, seqLimits1,
   540                     font, opts.fontsize, image_mode, opts.labels1, textRot1)
   541     sortedRanges1, rangeSizes1, labelData1, tMargin = i1
   542 
   543     textRot2 = "horizontal".startswith(opts.rot2)
   544     i2 = getSeqInfo(opts.sort2, seqNames2, seqLimits2,
   545                     font, opts.fontsize, image_mode, opts.labels2, textRot2)
   546     sortedRanges2, rangeSizes2, labelData2, lMargin = i2
   547 
   548     maxPixels1 = opts.width  - lMargin
   549     maxPixels2 = opts.height - tMargin
   550     bpPerPix1 = get_bp_per_pix(rangeSizes1, opts.border_pixels, maxPixels1)
   551     bpPerPix2 = get_bp_per_pix(rangeSizes2, opts.border_pixels, maxPixels2)
   552     bpPerPix = max(bpPerPix1, bpPerPix2)
   553     warn("bp per pixel = " + str(bpPerPix))
   554 
   555     p1 = pixelData(rangeSizes1, bpPerPix, opts.border_pixels, lMargin)
   556     rangePixBegs1, rangePixLens1, width = p1
   557     rangeDict1 = dict(rangesPerSeq(sortedRanges1, rangePixBegs1, bpPerPix))
   558 
   559     p2 = pixelData(rangeSizes2, bpPerPix, opts.border_pixels, tMargin)
   560     rangePixBegs2, rangePixLens2, height = p2
   561     rangeDict2 = dict(rangesPerSeq(sortedRanges2, rangePixBegs2, bpPerPix))
   562 
   563     warn("width:  " + str(width))
   564     warn("height: " + str(height))
   565 
   566     warn("processing alignments...")
   567     hits = alignmentPixels(width, height, alignments, bpPerPix,
   568                            rangeDict1, rangeDict2)
   569 
   570     warn("reading annotations...")
   571 
   572     rangeDict1 = expandedSeqDict(rangeDict1)
   573     beds1 = itertools.chain(readBed(opts.bed1, rangeDict1),
   574                             readRmsk(opts.rmsk1, rangeDict1),
   575                             readGenePred(opts, opts.genePred1, rangeDict1),
   576                             readGaps(opts, opts.gap1, rangeDict1))
   577     b1 = bedBoxes(beds1, rangeDict1, tMargin, height, True, bpPerPix)
   578 
   579     rangeDict2 = expandedSeqDict(rangeDict2)
   580     beds2 = itertools.chain(readBed(opts.bed2, rangeDict2),
   581                             readRmsk(opts.rmsk2, rangeDict2),
   582                             readGenePred(opts, opts.genePred2, rangeDict2),
   583                             readGaps(opts, opts.gap2, rangeDict2))
   584     b2 = bedBoxes(beds2, rangeDict2, lMargin, width, False, bpPerPix)
   585 
   586     boxes = sorted(itertools.chain(b1, b2))
   587 
   588     warn("drawing...")
   589 
   590     image_size = width, height
   591     im = Image.new(image_mode, image_size, opts.background_color)
   592 
   593     drawAnnotations(im, boxes)
   594 
   595     for i in range(height):
   596         for j in range(width):
   597             store_value = hits[i * width + j]
   598             xy = j, i
   599             if   store_value == 1: im.putpixel(xy, forward_color)
   600             elif store_value == 2: im.putpixel(xy, reverse_color)
   601             elif store_value == 3: im.putpixel(xy, overlap_color)
   602 
   603     if opts.fontsize != 0:
   604         axis1 = axisImage(labelData1, rangePixBegs1, rangePixLens1,
   605                           textRot1, False, font, image_mode, opts)
   606         if textRot1:
   607             axis1 = axis1.transpose(Image.ROTATE_90)
   608         axis2 = axisImage(labelData2, rangePixBegs2, rangePixLens2,
   609                           textRot2, textRot2, font, image_mode, opts)
   610         if not textRot2:
   611             axis2 = axis2.transpose(Image.ROTATE_270)
   612         im.paste(axis1, (0, 0))
   613         im.paste(axis2, (0, 0))
   614 
   615     for i in rangePixBegs1[1:]:
   616         box = i - opts.border_pixels, tMargin, i, height
   617         im.paste(opts.border_color, box)
   618 
   619     for i in rangePixBegs2[1:]:
   620         box = lMargin, i - opts.border_pixels, width, i
   621         im.paste(opts.border_color, box)
   622 
   623     im.save(args[1])
   624 
   625 if __name__ == "__main__":
   626     usage = """%prog --help
   627    or: %prog [options] maf-or-tab-alignments dotplot.png
   628    or: %prog [options] maf-or-tab-alignments dotplot.gif
   629    or: ..."""
   630     description = "Draw a dotplot of pair-wise sequence alignments in MAF or tabular format."
   631     op = optparse.OptionParser(usage=usage, description=description)
   632     op.add_option("-v", "--verbose", action="count",
   633                   help="show progress messages & data about the plot")
   634     op.add_option("-1", "--seq1", metavar="PATTERN", action="append",
   635                   default=[],
   636                   help="which sequences to show from the 1st genome")
   637     op.add_option("-2", "--seq2", metavar="PATTERN", action="append",
   638                   default=[],
   639                   help="which sequences to show from the 2nd genome")
   640     # Replace "width" & "height" with a single "length" option?
   641     op.add_option("-x", "--width", type="int", default=1000,
   642                   help="maximum width in pixels (default: %default)")
   643     op.add_option("-y", "--height", type="int", default=1000,
   644                   help="maximum height in pixels (default: %default)")
   645     op.add_option("-c", "--forwardcolor", metavar="COLOR", default="red",
   646                   help="color for forward alignments (default: %default)")
   647     op.add_option("-r", "--reversecolor", metavar="COLOR", default="blue",
   648                   help="color for reverse alignments (default: %default)")
   649     op.add_option("--sort1", type="int", default=1, metavar="N",
   650                   help="genome1 sequence order: 0=input order, 1=name order, "
   651                   "2=length order (default=%default)")
   652     op.add_option("--sort2", type="int", default=1, metavar="N",
   653                   help="genome2 sequence order: 0=input order, 1=name order, "
   654                   "2=length order (default=%default)")
   655     op.add_option("--trim1", action="store_true",
   656                   help="trim unaligned sequence flanks from the 1st genome")
   657     op.add_option("--trim2", action="store_true",
   658                   help="trim unaligned sequence flanks from the 2nd genome")
   659     op.add_option("--border-pixels", metavar="INT", type="int", default=1,
   660                   help="number of pixels between sequences (default=%default)")
   661     op.add_option("--border-color", metavar="COLOR", default="black",
   662                   help="color for pixels between sequences (default=%default)")
   663     op.add_option("--margin-color", metavar="COLOR", default="#dcdcdc",
   664                   help="margin color")
   665 
   666     og = optparse.OptionGroup(op, "Text options")
   667     og.add_option("-f", "--fontfile", metavar="FILE",
   668                   help="TrueType or OpenType font file")
   669     og.add_option("-s", "--fontsize", metavar="SIZE", type="int", default=14,
   670                   help="TrueType or OpenType font size (default: %default)")
   671     og.add_option("--labels1", type="int", default=0, metavar="N", help=
   672                   "genome1 labels: 0=name, 1=name:length, "
   673                   "2=name:start:length, 3=name:start-end (default=%default)")
   674     og.add_option("--labels2", type="int", default=0, metavar="N", help=
   675                   "genome2 labels: 0=name, 1=name:length, "
   676                   "2=name:start:length, 3=name:start-end (default=%default)")
   677     og.add_option("--rot1", metavar="ROT", default="h",
   678                   help="text rotation for the 1st genome (default=%default)")
   679     og.add_option("--rot2", metavar="ROT", default="v",
   680                   help="text rotation for the 2nd genome (default=%default)")
   681     op.add_option_group(og)
   682 
   683     og = optparse.OptionGroup(op, "Annotation options")
   684     og.add_option("--bed1", metavar="FILE",
   685                   help="read genome1 annotations from BED file")
   686     og.add_option("--bed2", metavar="FILE",
   687                   help="read genome2 annotations from BED file")
   688     og.add_option("--rmsk1", metavar="FILE", help="read genome1 repeats from "
   689                   "RepeatMasker .out or rmsk.txt file")
   690     og.add_option("--rmsk2", metavar="FILE", help="read genome2 repeats from "
   691                   "RepeatMasker .out or rmsk.txt file")
   692     op.add_option_group(og)
   693 
   694     og = optparse.OptionGroup(op, "Gene options")
   695     og.add_option("--genePred1", metavar="FILE",
   696                   help="read genome1 genes from genePred file")
   697     og.add_option("--genePred2", metavar="FILE",
   698                   help="read genome2 genes from genePred file")
   699     og.add_option("--exon-color", metavar="COLOR", default="PaleGreen",
   700                   help="color for exons (default=%default)")
   701     og.add_option("--cds-color", metavar="COLOR", default="LimeGreen",
   702                   help="color for protein-coding regions (default=%default)")
   703     op.add_option_group(og)
   704 
   705     og = optparse.OptionGroup(op, "Unsequenced gap options")
   706     og.add_option("--gap1", metavar="FILE",
   707                   help="read genome1 unsequenced gaps from agp or gap file")
   708     og.add_option("--gap2", metavar="FILE",
   709                   help="read genome2 unsequenced gaps from agp or gap file")
   710     og.add_option("--bridged-color", metavar="COLOR", default="yellow",
   711                   help="color for bridged gaps (default: %default)")
   712     og.add_option("--unbridged-color", metavar="COLOR", default="orange",
   713                   help="color for unbridged gaps (default: %default)")
   714     op.add_option_group(og)
   715     (opts, args) = op.parse_args()
   716     if len(args) != 2: op.error("2 arguments needed")
   717 
   718     opts.text_color = "black"
   719     opts.background_color = "white"
   720     opts.label_space = 5     # minimum number of pixels between axis labels
   721 
   722     try: lastDotplot(opts, args)
   723     except KeyboardInterrupt: pass  # avoid silly error message
   724     except Exception as e:
   725         prog = os.path.basename(sys.argv[0])
   726         sys.exit(prog + ": error: " + str(e))