scripts/last-dotplot
author Martin C. Frith
Wed Nov 08 14:22:00 2017 +0900 (2017-11-08)
changeset 907 87991b57ed04
parent 906 21fbf377641f
child 908 6840398323e1
permissions -rwxr-xr-x
Tweak last-dotplot's sort options
     1 #! /usr/bin/env python
     2 
     3 # Read pair-wise alignments in MAF or LAST tabular format: write an
     4 # "Oxford grid", a.k.a. dotplot.
     5 
     6 # TODO: Currently, pixels with zero aligned nt-pairs are white, and
     7 # pixels with one or more aligned nt-pairs are black.  This can look
     8 # too crowded for large genome alignments.  I tried shading each pixel
     9 # according to the number of aligned nt-pairs within it, but the
    10 # result is too faint.  How can this be done better?
    11 
    12 import gzip
    13 from fnmatch import fnmatchcase
    14 from operator import itemgetter
    15 import subprocess
    16 import itertools, optparse, os, re, sys
    17 
    18 # Try to make PIL/PILLOW work:
    19 try: from PIL import Image, ImageDraw, ImageFont, ImageColor
    20 except ImportError: import Image, ImageDraw, ImageFont, ImageColor
    21 
    22 def myOpen(fileName):  # faster than fileinput
    23     if fileName is None:
    24         return []
    25     if fileName == "-":
    26         return sys.stdin
    27     if fileName.endswith(".gz"):
    28         return gzip.open(fileName)
    29     return open(fileName)
    30 
    31 def warn(message):
    32     if opts.verbose:
    33         prog = os.path.basename(sys.argv[0])
    34         sys.stderr.write(prog + ": " + message + "\n")
    35 
    36 def croppedBlocks(blocks, range1, range2):
    37     cropBeg1, cropEnd1 = range1
    38     cropBeg2, cropEnd2 = range2
    39     if blocks[0][0] < 0: cropBeg1, cropEnd1 = -cropEnd1, -cropBeg1
    40     if blocks[0][1] < 0: cropBeg2, cropEnd2 = -cropEnd2, -cropBeg2
    41     for beg1, beg2, size in blocks:
    42         b1 = max(cropBeg1, beg1)
    43         e1 = min(cropEnd1, beg1 + size)
    44         if b1 >= e1: continue
    45         offset = beg2 - beg1
    46         b2 = max(cropBeg2, b1 + offset)
    47         e2 = min(cropEnd2, e1 + offset)
    48         if b2 >= e2: continue
    49         yield b2 - offset, b2, e2 - b2
    50 
    51 def tabBlocks(beg1, beg2, blocks):
    52     '''Get the gapless blocks of an alignment, from LAST tabular format.'''
    53     for i in blocks.split(","):
    54         if ":" in i:
    55             x, y = i.split(":")
    56             beg1 += int(x)
    57             beg2 += int(y)
    58         else:
    59             size = int(i)
    60             yield beg1, beg2, size
    61             beg1 += size
    62             beg2 += size
    63 
    64 def mafBlocks(beg1, beg2, seq1, seq2):
    65     '''Get the gapless blocks of an alignment, from MAF format.'''
    66     size = 0
    67     for x, y in itertools.izip(seq1, seq2):
    68         if x == "-":
    69             if size:
    70                 yield beg1, beg2, size
    71                 beg1 += size
    72                 beg2 += size
    73                 size = 0
    74             beg2 += 1
    75         elif y == "-":
    76             if size:
    77                 yield beg1, beg2, size
    78                 beg1 += size
    79                 beg2 += size
    80                 size = 0
    81             beg1 += 1
    82         else:
    83             size += 1
    84     if size: yield beg1, beg2, size
    85 
    86 def alignmentInput(lines):
    87     '''Get alignments and sequence lengths, from MAF or tabular format.'''
    88     mafCount = 0
    89     for line in lines:
    90         w = line.split()
    91         if line[0].isdigit():  # tabular format
    92             chr1, beg1, seqlen1 = w[1], int(w[2]), int(w[5])
    93             if w[4] == "-": beg1 -= seqlen1
    94             chr2, beg2, seqlen2 = w[6], int(w[7]), int(w[10])
    95             if w[9] == "-": beg2 -= seqlen2
    96             blocks = tabBlocks(beg1, beg2, w[11])
    97             yield chr1, seqlen1, chr2, seqlen2, blocks
    98         elif line[0] == "s":  # MAF format
    99             if mafCount == 0:
   100                 chr1, beg1, seqlen1, seq1 = w[1], int(w[2]), int(w[5]), w[6]
   101                 if w[4] == "-": beg1 -= seqlen1
   102                 mafCount = 1
   103             else:
   104                 chr2, beg2, seqlen2, seq2 = w[1], int(w[2]), int(w[5]), w[6]
   105                 if w[4] == "-": beg2 -= seqlen2
   106                 blocks = mafBlocks(beg1, beg2, seq1, seq2)
   107                 yield chr1, seqlen1, chr2, seqlen2, blocks
   108                 mafCount = 0
   109 
   110 def seqRequestFromText(text):
   111     if ":" in text:
   112         pattern, interval = text.rsplit(":", 1)
   113         if "-" in interval:
   114             beg, end = interval.rsplit("-", 1)
   115             return pattern, int(beg), int(end)  # beg may be negative
   116     return text, 0, sys.maxsize
   117 
   118 def rangeFromSeqName(seqRequests, name, seqLen):
   119     if not seqRequests: return 0, seqLen
   120     base = name.split(".")[-1]  # allow for names like hg19.chr7
   121     for pat, beg, end in seqRequests:
   122         if fnmatchcase(name, pat) or fnmatchcase(base, pat):
   123             return max(beg, 0), min(end, seqLen)
   124     return None
   125 
   126 def updateSeqs(isTrim, seqNames, seqLimits, seqName, seqRange, blocks, index):
   127     if seqName not in seqLimits:
   128         seqNames.append(seqName)
   129     if isTrim:
   130         beg = blocks[0][index]
   131         end = blocks[-1][index] + blocks[-1][2]
   132         if beg < 0: beg, end = -end, -beg
   133         if seqName in seqLimits:
   134             b, e = seqLimits[seqName]
   135             seqLimits[seqName] = min(b, beg), max(e, end)
   136         else:
   137             seqLimits[seqName] = beg, end
   138     else:
   139         seqLimits[seqName] = seqRange
   140 
   141 def readAlignments(fileName, opts):
   142     '''Get alignments and sequence limits, from MAF or tabular format.'''
   143     seqRequests1 = map(seqRequestFromText, opts.seq1)
   144     seqRequests2 = map(seqRequestFromText, opts.seq2)
   145 
   146     alignments = []
   147     seqNames1 = []
   148     seqNames2 = []
   149     seqLimits1 = {}
   150     seqLimits2 = {}
   151     lines = myOpen(fileName)
   152     for seqName1, seqLen1, seqName2, seqLen2, blocks in alignmentInput(lines):
   153         range1 = rangeFromSeqName(seqRequests1, seqName1, seqLen1)
   154         if not range1: continue
   155         range2 = rangeFromSeqName(seqRequests2, seqName2, seqLen2)
   156         if not range2: continue
   157         b = list(croppedBlocks(list(blocks), range1, range2))
   158         if not b: continue
   159         aln = seqName1, seqName2, b
   160         alignments.append(aln)
   161         updateSeqs(opts.trim1, seqNames1, seqLimits1, seqName1, range1, b, 0)
   162         updateSeqs(opts.trim2, seqNames2, seqLimits2, seqName2, range2, b, 1)
   163     return alignments, seqNames1, seqNames2, seqLimits1, seqLimits2
   164 
   165 def natural_sort_key(my_string):
   166     '''Return a sort key for "natural" ordering, e.g. chr9 < chr10.'''
   167     parts = re.split(r'(\d+)', my_string)
   168     parts[1::2] = map(int, parts[1::2])
   169     return parts
   170 
   171 def nameKey(oneSeqRanges):
   172     return natural_sort_key(oneSeqRanges[0][0])
   173 
   174 def sizeKey(oneSeqRanges):
   175     return sum(b - e for n, b, e in oneSeqRanges), nameKey(oneSeqRanges)
   176 
   177 def getSortedRanges(seqRanges, sortOpt):
   178     g = [list(v) for k, v in itertools.groupby(seqRanges, itemgetter(0))]
   179     if sortOpt == "1":
   180         g.sort(key=nameKey)
   181     if sortOpt == "2":
   182         g.sort(key=sizeKey)
   183     return [j for i in g for j in i]
   184 
   185 def prettyNum(n):
   186     t = str(n)
   187     groups = []
   188     while t:
   189         groups.append(t[-3:])
   190         t = t[:-3]
   191     return ",".join(reversed(groups))
   192 
   193 def sizeText(size):
   194     suffixes = "bp", "kb", "Mb", "Gb"
   195     for i, x in enumerate(suffixes):
   196         j = 10 ** (i * 3)
   197         if size < j * 10:
   198             return "%.2g" % (1.0 * size / j) + x
   199         if size < j * 1000 or i == len(suffixes) - 1:
   200             return "%.0f" % (1.0 * size / j) + x
   201 
   202 def labelText(seqRange, labelOpt):
   203     seqName, beg, end = seqRange
   204     if labelOpt == 1:
   205         return seqName + ": " + sizeText(end - beg)
   206     if labelOpt == 2:
   207         return seqName + ":" + prettyNum(beg) + ": " + sizeText(end - beg)
   208     if labelOpt == 3:
   209         return seqName + ":" + prettyNum(beg) + "-" + prettyNum(end)
   210     return seqName
   211 
   212 def rangeLabels(seqRanges, labelOpt, font, fontsize, image_mode, textRot):
   213     if fontsize:
   214         image_size = 1, 1
   215         im = Image.new(image_mode, image_size)
   216         draw = ImageDraw.Draw(im)
   217     x = y = 0
   218     for r in seqRanges:
   219         text = labelText(r, labelOpt)
   220         if fontsize:
   221             x, y = draw.textsize(text, font=font)
   222             if textRot:
   223                 x, y = y, x
   224         yield text, x, y
   225 
   226 def getSeqInfo(sortOpt, seqNames, seqLimits,
   227                font, fontsize, image_mode, labelOpt, textRot):
   228     seqRanges = [(i, seqLimits[i][0], seqLimits[i][1]) for i in seqNames]
   229     s = getSortedRanges(seqRanges, sortOpt)
   230     for i in s:
   231         warn("\t".join(map(str, i)))
   232     warn("")
   233     rangeSizes = [e - b for n, b, e in s]
   234     labs = list(rangeLabels(s, labelOpt, font, fontsize, image_mode, textRot))
   235     margin = max(i[2] for i in labs)
   236     # xxx the margin may be too big, because some labels may get omitted
   237     return s, rangeSizes, labs, margin
   238 
   239 def div_ceil(x, y):
   240     '''Return x / y rounded up.'''
   241     q, r = divmod(x, y)
   242     return q + (r != 0)
   243 
   244 def get_bp_per_pix(rangeSizes, pixTweenRanges, maxPixels):
   245     '''Get the minimum bp-per-pixel that fits in the size limit.'''
   246     warn("choosing bp per pixel...")
   247     numOfRanges = len(rangeSizes)
   248     maxPixelsInRanges = maxPixels - pixTweenRanges * (numOfRanges - 1)
   249     if maxPixelsInRanges < numOfRanges:
   250         raise Exception("can't fit the image: too many sequences?")
   251     negLimit = -maxPixelsInRanges
   252     negBpPerPix = sum(rangeSizes) // negLimit
   253     while True:
   254         if sum(i // negBpPerPix for i in rangeSizes) >= negLimit:
   255             return -negBpPerPix
   256         negBpPerPix -= 1
   257 
   258 def getRangePixBegs(rangePixLens, pixTweenRanges, margin):
   259     '''Get the start pixel for each range.'''
   260     rangePixBegs = []
   261     pix_tot = margin - pixTweenRanges
   262     for i in rangePixLens:
   263         pix_tot += pixTweenRanges
   264         rangePixBegs.append(pix_tot)
   265         pix_tot += i
   266     return rangePixBegs
   267 
   268 def pixelData(rangeSizes, bp_per_pix, pixTweenRanges, margin):
   269     '''Return pixel information about the ranges.'''
   270     rangePixLens = [div_ceil(i, bp_per_pix) for i in rangeSizes]
   271     rangePixBegs = getRangePixBegs(rangePixLens, pixTweenRanges, margin)
   272     tot_pix = rangePixBegs[-1] + rangePixLens[-1]
   273     return rangePixBegs, rangePixLens, tot_pix
   274 
   275 def drawLineForward(hits, width, bp_per_pix, beg1, beg2, size):
   276     while True:
   277         q1, r1 = divmod(beg1, bp_per_pix)
   278         q2, r2 = divmod(beg2, bp_per_pix)
   279         hits[q2 * width + q1] |= 1
   280         next_pix = min(bp_per_pix - r1, bp_per_pix - r2)
   281         if next_pix >= size: break
   282         beg1 += next_pix
   283         beg2 += next_pix
   284         size -= next_pix
   285 
   286 def drawLineReverse(hits, width, bp_per_pix, beg1, beg2, size):
   287     beg2 = -1 - beg2
   288     while True:
   289         q1, r1 = divmod(beg1, bp_per_pix)
   290         q2, r2 = divmod(beg2, bp_per_pix)
   291         hits[q2 * width + q1] |= 2
   292         next_pix = min(bp_per_pix - r1, r2 + 1)
   293         if next_pix >= size: break
   294         beg1 += next_pix
   295         beg2 -= next_pix
   296         size -= next_pix
   297 
   298 def findOrigin(ranges, beg, size):
   299     if beg < 0:
   300         beg = -(beg + size)
   301     for rangeBeg, rangeEnd, origin in ranges:
   302         if rangeEnd > beg:
   303             return origin
   304 
   305 def alignmentPixels(width, height, alignments, bp_per_pix,
   306                     rangeDict1, rangeDict2):
   307     hits = [0] * (width * height)  # the image data
   308     for seq1, seq2, blocks in alignments:
   309         beg1, beg2, size = blocks[0]
   310         ori1 = findOrigin(rangeDict1[seq1], beg1, size)
   311         ori2 = findOrigin(rangeDict2[seq2], beg2, size)
   312         for beg1, beg2, size in blocks:
   313             if beg1 < 0:
   314                 beg1 = -(beg1 + size)
   315                 beg2 = -(beg2 + size)
   316             if beg2 >= 0:
   317                 drawLineForward(hits, width, bp_per_pix,
   318                                 beg1 + ori1, beg2 + ori2, size)
   319             else:
   320                 drawLineReverse(hits, width, bp_per_pix,
   321                                 beg1 + ori1, beg2 - ori2, size)
   322     return hits
   323 
   324 def expandedSeqDict(seqDict):
   325     '''Allow lookup by short sequence names, e.g. chr7 as well as hg19.chr7.'''
   326     newDict = seqDict.copy()
   327     for name, x in seqDict.items():
   328         if "." in name:
   329             base = name.split(".")[-1]
   330             if base in newDict:  # an ambiguous case was found:
   331                 return seqDict   # so give up completely
   332             newDict[base] = x
   333     return newDict
   334 
   335 def readBed(fileName, rangeDict):
   336     for line in myOpen(fileName):
   337         w = line.split()
   338         if not w: continue
   339         seqName = w[0]
   340         if seqName not in rangeDict: continue
   341         beg = int(w[1])
   342         end = int(w[2])
   343         layer = 900
   344         color = "#fbf"
   345         if len(w) > 4:
   346             if w[4] != ".":
   347                 layer = float(w[4])
   348             if len(w) > 5:
   349                 if len(w) > 8 and w[8].count(",") == 2:
   350                     color = "rgb(" + w[8] + ")"
   351                 elif w[5] == "+":
   352                     color = "#ffe8e8"
   353                 elif w[5] == "-":
   354                     color = "#e8e8ff"
   355         yield layer, color, seqName, beg, end
   356 
   357 def commaSeparatedInts(text):
   358     return map(int, text.rstrip(",").split(","))
   359 
   360 def readGenePred(opts, fileName, rangeDict):
   361     for line in myOpen(fileName):
   362         fields = line.split()
   363         if not fields: continue
   364         if fields[2] not in "+-": fields = fields[1:]
   365         seqName = fields[1]
   366         if seqName not in rangeDict: continue
   367         #strand = fields[2]
   368         cdsBeg = int(fields[5])
   369         cdsEnd = int(fields[6])
   370         exonBegs = commaSeparatedInts(fields[8])
   371         exonEnds = commaSeparatedInts(fields[9])
   372         for beg, end in zip(exonBegs, exonEnds):
   373             yield 300, opts.exon_color, seqName, beg, end
   374             b = max(beg, cdsBeg)
   375             e = min(end, cdsEnd)
   376             if b < e: yield 400, opts.cds_color, seqName, b, e
   377 
   378 def readRmsk(fileName, rangeDict):
   379     for line in myOpen(fileName):
   380         fields = line.split()
   381         if len(fields) == 17:  # rmsk.txt
   382             seqName = fields[5]
   383             if seqName not in rangeDict: continue  # do this ASAP for speed
   384             beg = int(fields[6])
   385             end = int(fields[7])
   386             strand = fields[9]
   387             repeatClass = fields[11]
   388         elif len(fields) == 15:  # .out
   389             seqName = fields[4]
   390             if seqName not in rangeDict: continue
   391             beg = int(fields[5]) - 1
   392             end = int(fields[6])
   393             strand = fields[8]
   394             repeatClass = fields[10]
   395         else:
   396             continue
   397         if repeatClass in ("Low_complexity", "Simple_repeat"):
   398             yield 200, "#fbf", seqName, beg, end
   399         elif strand == "+":
   400             yield 100, "#ffe8e8", seqName, beg, end
   401         else:
   402             yield 100, "#e8e8ff", seqName, beg, end
   403 
   404 def isExtraFirstGapField(fields):
   405     return fields[4].isdigit()
   406 
   407 def readGaps(opts, fileName, rangeDict):
   408     '''Read locations of unsequenced gaps, from an agp or gap file.'''
   409     for line in myOpen(fileName):
   410         w = line.split()
   411         if not w or w[0][0] == "#": continue
   412         if isExtraFirstGapField(w): w = w[1:]
   413         if w[4] not in "NU": continue
   414         seqName = w[0]
   415         if seqName not in rangeDict: continue
   416         end = int(w[2])
   417         beg = end - int(w[5])  # zero-based coordinate
   418         if w[7] == "yes":
   419             yield 3000, opts.bridged_color, seqName, beg, end
   420         else:
   421             yield 2000, opts.unbridged_color, seqName, beg, end
   422 
   423 def bedBoxes(beds, rangeDict, margin, edge, isTop, bpPerPix):
   424     for layer, color, seqName, bedBeg, bedEnd in beds:
   425         for rangeBeg, rangeEnd, origin in rangeDict[seqName]:
   426             beg = max(bedBeg, rangeBeg)
   427             end = min(bedEnd, rangeEnd)
   428             if beg >= end: continue
   429             if layer <= 1000:
   430                 # include partly-covered pixels
   431                 b = (origin + beg) // bpPerPix
   432                 e = div_ceil(origin + end, bpPerPix)
   433             else:
   434                 # exclude partly-covered pixels
   435                 b = div_ceil(origin + beg, bpPerPix)
   436                 e = (origin + end) // bpPerPix
   437                 if e <= b: continue
   438             if isTop:
   439                 box = b, margin, e, edge
   440             else:
   441                 box = margin, b, edge, e
   442             yield layer, color, box
   443 
   444 def drawAnnotations(im, boxes):
   445     # xxx use partial transparency for different-color overlaps?
   446     for layer, color, box in boxes:
   447         im.paste(color, box)
   448 
   449 def placedLabels(labels, rangePixBegs, rangePixLens, beg, end):
   450     '''Return axis labels with endpoint & sort-order information.'''
   451     maxWidth = end - beg
   452     for i, j, k in zip(labels, rangePixBegs, rangePixLens):
   453         text, textWidth, textHeight = i
   454         if textWidth > maxWidth:
   455             continue
   456         labelBeg = j + (k - textWidth) // 2
   457         labelEnd = labelBeg + textWidth
   458         sortKey = textWidth - k
   459         if labelBeg < beg:
   460             sortKey += maxWidth * (beg - labelBeg)
   461             labelBeg = beg
   462             labelEnd = beg + textWidth
   463         if labelEnd > end:
   464             sortKey += maxWidth * (labelEnd - end)
   465             labelEnd = end
   466             labelBeg = end - textWidth
   467         yield sortKey, labelBeg, labelEnd, text, textHeight
   468 
   469 def nonoverlappingLabels(labels, minPixTweenLabels):
   470     '''Get a subset of non-overlapping axis labels, greedily.'''
   471     out = []
   472     for i in labels:
   473         beg = i[1] - minPixTweenLabels
   474         end = i[2] + minPixTweenLabels
   475         if all(j[2] <= beg or j[1] >= end for j in out):
   476             out.append(i)
   477     return out
   478 
   479 def axisImage(labels, rangePixBegs, rangePixLens, textRot,
   480               textAln, font, image_mode, opts):
   481     '''Make an image of axis labels.'''
   482     beg = rangePixBegs[0]
   483     end = rangePixBegs[-1] + rangePixLens[-1]
   484     margin = max(i[2] for i in labels)
   485     labels = sorted(placedLabels(labels, rangePixBegs, rangePixLens, beg, end))
   486     minPixTweenLabels = 0 if textRot else opts.label_space
   487     labels = nonoverlappingLabels(labels, minPixTweenLabels)
   488     image_size = (margin, end) if textRot else (end, margin)
   489     im = Image.new(image_mode, image_size, opts.margin_color)
   490     draw = ImageDraw.Draw(im)
   491     for i in labels:
   492         base = margin - i[4] if textAln else 0
   493         position = (base, i[1]) if textRot else (i[1], base)
   494         draw.text(position, i[3], font=font, fill=opts.text_color)
   495     return im
   496 
   497 def rangesWithOrigins(sortedRanges, rangePixBegs, bpPerPix):
   498     for i, j in zip(sortedRanges, rangePixBegs):
   499         seqName, rangeBeg, rangeEnd = i
   500         origin = bpPerPix * j - rangeBeg
   501         yield seqName, (rangeBeg, rangeEnd, origin)
   502 
   503 def rangesPerSeq(sortedRanges, rangePixBegs, bpPerPix):
   504     a = rangesWithOrigins(sortedRanges, rangePixBegs, bpPerPix)
   505     for k, v in itertools.groupby(a, itemgetter(0)):
   506         yield k, [i[1] for i in v]
   507 
   508 def getFont(opts):
   509     if opts.fontfile:
   510         return ImageFont.truetype(opts.fontfile, opts.fontsize)
   511     fileNames = []
   512     try:
   513         x = ["fc-match", "-f%{file}", "arial"]
   514         p = subprocess.Popen(x, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
   515         out, err = p.communicate()
   516         fileNames.append(out)
   517     except OSError as e:
   518         warn("fc-match error: " + str(e))
   519     fileNames.append("/Library/Fonts/Arial.ttf")  # for Mac
   520     for i in fileNames:
   521         try:
   522             font = ImageFont.truetype(i, opts.fontsize)
   523             warn("font: " + i)
   524             return font
   525         except IOError as e:
   526             warn("font load error: " + str(e))
   527     return ImageFont.load_default()
   528 
   529 def lastDotplot(opts, args):
   530     font = getFont(opts)
   531     image_mode = 'RGB'
   532     forward_color = ImageColor.getcolor(opts.forwardcolor, image_mode)
   533     reverse_color = ImageColor.getcolor(opts.reversecolor, image_mode)
   534     zipped_colors = zip(forward_color, reverse_color)
   535     overlap_color = tuple([(i + j) // 2 for i, j in zipped_colors])
   536 
   537     warn("reading alignments...")
   538     alnData = readAlignments(args[0], opts)
   539     alignments, seqNames1, seqNames2, seqLimits1, seqLimits2 = alnData
   540     warn("done")
   541     if not alignments: raise Exception("there are no alignments")
   542 
   543     textRot1 = "vertical".startswith(opts.rot1)
   544     i1 = getSeqInfo(opts.sort1, seqNames1, seqLimits1,
   545                     font, opts.fontsize, image_mode, opts.labels1, textRot1)
   546     sortedRanges1, rangeSizes1, labelData1, tMargin = i1
   547 
   548     textRot2 = "horizontal".startswith(opts.rot2)
   549     i2 = getSeqInfo(opts.sort2, seqNames2, seqLimits2,
   550                     font, opts.fontsize, image_mode, opts.labels2, textRot2)
   551     sortedRanges2, rangeSizes2, labelData2, lMargin = i2
   552 
   553     maxPixels1 = opts.width  - lMargin
   554     maxPixels2 = opts.height - tMargin
   555     bpPerPix1 = get_bp_per_pix(rangeSizes1, opts.border_pixels, maxPixels1)
   556     bpPerPix2 = get_bp_per_pix(rangeSizes2, opts.border_pixels, maxPixels2)
   557     bpPerPix = max(bpPerPix1, bpPerPix2)
   558     warn("bp per pixel = " + str(bpPerPix))
   559 
   560     p1 = pixelData(rangeSizes1, bpPerPix, opts.border_pixels, lMargin)
   561     rangePixBegs1, rangePixLens1, width = p1
   562     rangeDict1 = dict(rangesPerSeq(sortedRanges1, rangePixBegs1, bpPerPix))
   563 
   564     p2 = pixelData(rangeSizes2, bpPerPix, opts.border_pixels, tMargin)
   565     rangePixBegs2, rangePixLens2, height = p2
   566     rangeDict2 = dict(rangesPerSeq(sortedRanges2, rangePixBegs2, bpPerPix))
   567 
   568     warn("width:  " + str(width))
   569     warn("height: " + str(height))
   570 
   571     warn("processing alignments...")
   572     hits = alignmentPixels(width, height, alignments, bpPerPix,
   573                            rangeDict1, rangeDict2)
   574 
   575     warn("reading annotations...")
   576 
   577     rangeDict1 = expandedSeqDict(rangeDict1)
   578     beds1 = itertools.chain(readBed(opts.bed1, rangeDict1),
   579                             readRmsk(opts.rmsk1, rangeDict1),
   580                             readGenePred(opts, opts.genePred1, rangeDict1),
   581                             readGaps(opts, opts.gap1, rangeDict1))
   582     b1 = bedBoxes(beds1, rangeDict1, tMargin, height, True, bpPerPix)
   583 
   584     rangeDict2 = expandedSeqDict(rangeDict2)
   585     beds2 = itertools.chain(readBed(opts.bed2, rangeDict2),
   586                             readRmsk(opts.rmsk2, rangeDict2),
   587                             readGenePred(opts, opts.genePred2, rangeDict2),
   588                             readGaps(opts, opts.gap2, rangeDict2))
   589     b2 = bedBoxes(beds2, rangeDict2, lMargin, width, False, bpPerPix)
   590 
   591     boxes = sorted(itertools.chain(b1, b2))
   592 
   593     warn("drawing...")
   594 
   595     image_size = width, height
   596     im = Image.new(image_mode, image_size, opts.background_color)
   597 
   598     drawAnnotations(im, boxes)
   599 
   600     for i in range(height):
   601         for j in range(width):
   602             store_value = hits[i * width + j]
   603             xy = j, i
   604             if   store_value == 1: im.putpixel(xy, forward_color)
   605             elif store_value == 2: im.putpixel(xy, reverse_color)
   606             elif store_value == 3: im.putpixel(xy, overlap_color)
   607 
   608     if opts.fontsize != 0:
   609         axis1 = axisImage(labelData1, rangePixBegs1, rangePixLens1,
   610                           textRot1, False, font, image_mode, opts)
   611         if textRot1:
   612             axis1 = axis1.transpose(Image.ROTATE_90)
   613         axis2 = axisImage(labelData2, rangePixBegs2, rangePixLens2,
   614                           textRot2, textRot2, font, image_mode, opts)
   615         if not textRot2:
   616             axis2 = axis2.transpose(Image.ROTATE_270)
   617         im.paste(axis1, (0, 0))
   618         im.paste(axis2, (0, 0))
   619 
   620     for i in rangePixBegs1[1:]:
   621         box = i - opts.border_pixels, tMargin, i, height
   622         im.paste(opts.border_color, box)
   623 
   624     for i in rangePixBegs2[1:]:
   625         box = lMargin, i - opts.border_pixels, width, i
   626         im.paste(opts.border_color, box)
   627 
   628     im.save(args[1])
   629 
   630 if __name__ == "__main__":
   631     usage = """%prog --help
   632    or: %prog [options] maf-or-tab-alignments dotplot.png
   633    or: %prog [options] maf-or-tab-alignments dotplot.gif
   634    or: ..."""
   635     description = "Draw a dotplot of pair-wise sequence alignments in MAF or tabular format."
   636     op = optparse.OptionParser(usage=usage, description=description)
   637     op.add_option("-v", "--verbose", action="count",
   638                   help="show progress messages & data about the plot")
   639     op.add_option("-1", "--seq1", metavar="PATTERN", action="append",
   640                   default=[],
   641                   help="which sequences to show from the 1st genome")
   642     op.add_option("-2", "--seq2", metavar="PATTERN", action="append",
   643                   default=[],
   644                   help="which sequences to show from the 2nd genome")
   645     # Replace "width" & "height" with a single "length" option?
   646     op.add_option("-x", "--width", type="int", default=1000,
   647                   help="maximum width in pixels (default: %default)")
   648     op.add_option("-y", "--height", type="int", default=1000,
   649                   help="maximum height in pixels (default: %default)")
   650     op.add_option("-c", "--forwardcolor", metavar="COLOR", default="red",
   651                   help="color for forward alignments (default: %default)")
   652     op.add_option("-r", "--reversecolor", metavar="COLOR", default="blue",
   653                   help="color for reverse alignments (default: %default)")
   654     op.add_option("--sort1", default="1", metavar="N",
   655                   help="genome1 sequence order: 0=input order, 1=name order, "
   656                   "2=length order (default=%default)")
   657     op.add_option("--sort2", default="1", metavar="N",
   658                   help="genome2 sequence order: 0=input order, 1=name order, "
   659                   "2=length order (default=%default)")
   660     op.add_option("--trim1", action="store_true",
   661                   help="trim unaligned sequence flanks from the 1st genome")
   662     op.add_option("--trim2", action="store_true",
   663                   help="trim unaligned sequence flanks from the 2nd genome")
   664     op.add_option("--border-pixels", metavar="INT", type="int", default=1,
   665                   help="number of pixels between sequences (default=%default)")
   666     op.add_option("--border-color", metavar="COLOR", default="black",
   667                   help="color for pixels between sequences (default=%default)")
   668     op.add_option("--margin-color", metavar="COLOR", default="#dcdcdc",
   669                   help="margin color")
   670 
   671     og = optparse.OptionGroup(op, "Text options")
   672     og.add_option("-f", "--fontfile", metavar="FILE",
   673                   help="TrueType or OpenType font file")
   674     og.add_option("-s", "--fontsize", metavar="SIZE", type="int", default=14,
   675                   help="TrueType or OpenType font size (default: %default)")
   676     og.add_option("--labels1", type="int", default=0, metavar="N", help=
   677                   "genome1 labels: 0=name, 1=name:length, "
   678                   "2=name:start:length, 3=name:start-end (default=%default)")
   679     og.add_option("--labels2", type="int", default=0, metavar="N", help=
   680                   "genome2 labels: 0=name, 1=name:length, "
   681                   "2=name:start:length, 3=name:start-end (default=%default)")
   682     og.add_option("--rot1", metavar="ROT", default="h",
   683                   help="text rotation for the 1st genome (default=%default)")
   684     og.add_option("--rot2", metavar="ROT", default="v",
   685                   help="text rotation for the 2nd genome (default=%default)")
   686     op.add_option_group(og)
   687 
   688     og = optparse.OptionGroup(op, "Annotation options")
   689     og.add_option("--bed1", metavar="FILE",
   690                   help="read genome1 annotations from BED file")
   691     og.add_option("--bed2", metavar="FILE",
   692                   help="read genome2 annotations from BED file")
   693     og.add_option("--rmsk1", metavar="FILE", help="read genome1 repeats from "
   694                   "RepeatMasker .out or rmsk.txt file")
   695     og.add_option("--rmsk2", metavar="FILE", help="read genome2 repeats from "
   696                   "RepeatMasker .out or rmsk.txt file")
   697     op.add_option_group(og)
   698 
   699     og = optparse.OptionGroup(op, "Gene options")
   700     og.add_option("--genePred1", metavar="FILE",
   701                   help="read genome1 genes from genePred file")
   702     og.add_option("--genePred2", metavar="FILE",
   703                   help="read genome2 genes from genePred file")
   704     og.add_option("--exon-color", metavar="COLOR", default="PaleGreen",
   705                   help="color for exons (default=%default)")
   706     og.add_option("--cds-color", metavar="COLOR", default="LimeGreen",
   707                   help="color for protein-coding regions (default=%default)")
   708     op.add_option_group(og)
   709 
   710     og = optparse.OptionGroup(op, "Unsequenced gap options")
   711     og.add_option("--gap1", metavar="FILE",
   712                   help="read genome1 unsequenced gaps from agp or gap file")
   713     og.add_option("--gap2", metavar="FILE",
   714                   help="read genome2 unsequenced gaps from agp or gap file")
   715     og.add_option("--bridged-color", metavar="COLOR", default="yellow",
   716                   help="color for bridged gaps (default: %default)")
   717     og.add_option("--unbridged-color", metavar="COLOR", default="orange",
   718                   help="color for unbridged gaps (default: %default)")
   719     op.add_option_group(og)
   720     (opts, args) = op.parse_args()
   721     if len(args) != 2: op.error("2 arguments needed")
   722 
   723     opts.text_color = "black"
   724     opts.background_color = "white"
   725     opts.label_space = 5     # minimum number of pixels between axis labels
   726 
   727     try: lastDotplot(opts, args)
   728     except KeyboardInterrupt: pass  # avoid silly error message
   729     except Exception as e:
   730         prog = os.path.basename(sys.argv[0])
   731         sys.exit(prog + ": error: " + str(e))