scripts/last-dotplot
author Martin C. Frith
Wed Nov 08 15:45:08 2017 +0900 (2017-11-08)
changeset 908 6840398323e1
parent 907 87991b57ed04
child 909 92dfad507286
permissions -rwxr-xr-x
Refactor last-dotplot
     1 #! /usr/bin/env python
     2 
     3 # Read pair-wise alignments in MAF or LAST tabular format: write an
     4 # "Oxford grid", a.k.a. dotplot.
     5 
     6 # TODO: Currently, pixels with zero aligned nt-pairs are white, and
     7 # pixels with one or more aligned nt-pairs are black.  This can look
     8 # too crowded for large genome alignments.  I tried shading each pixel
     9 # according to the number of aligned nt-pairs within it, but the
    10 # result is too faint.  How can this be done better?
    11 
    12 import gzip
    13 from fnmatch import fnmatchcase
    14 from operator import itemgetter
    15 import subprocess
    16 import itertools, optparse, os, re, sys
    17 
    18 # Try to make PIL/PILLOW work:
    19 try: from PIL import Image, ImageDraw, ImageFont, ImageColor
    20 except ImportError: import Image, ImageDraw, ImageFont, ImageColor
    21 
    22 def myOpen(fileName):  # faster than fileinput
    23     if fileName is None:
    24         return []
    25     if fileName == "-":
    26         return sys.stdin
    27     if fileName.endswith(".gz"):
    28         return gzip.open(fileName)
    29     return open(fileName)
    30 
    31 def warn(message):
    32     if opts.verbose:
    33         prog = os.path.basename(sys.argv[0])
    34         sys.stderr.write(prog + ": " + message + "\n")
    35 
    36 def croppedBlocks(blocks, ranges1, ranges2):
    37     headBeg1, headBeg2, headSize = blocks[0]
    38     for r1 in ranges1:
    39         for r2 in ranges2:
    40             cropBeg1, cropEnd1 = r1
    41             if headBeg1 < 0:
    42                 cropBeg1, cropEnd1 = -cropEnd1, -cropBeg1
    43             cropBeg2, cropEnd2 = r2
    44             if headBeg2 < 0:
    45                 cropBeg2, cropEnd2 = -cropEnd2, -cropBeg2
    46             for beg1, beg2, size in blocks:
    47                 b1 = max(cropBeg1, beg1)
    48                 e1 = min(cropEnd1, beg1 + size)
    49                 if b1 >= e1: continue
    50                 offset = beg2 - beg1
    51                 b2 = max(cropBeg2, b1 + offset)
    52                 e2 = min(cropEnd2, e1 + offset)
    53                 if b2 >= e2: continue
    54                 yield b2 - offset, b2, e2 - b2
    55 
    56 def tabBlocks(beg1, beg2, blocks):
    57     '''Get the gapless blocks of an alignment, from LAST tabular format.'''
    58     for i in blocks.split(","):
    59         if ":" in i:
    60             x, y = i.split(":")
    61             beg1 += int(x)
    62             beg2 += int(y)
    63         else:
    64             size = int(i)
    65             yield beg1, beg2, size
    66             beg1 += size
    67             beg2 += size
    68 
    69 def mafBlocks(beg1, beg2, seq1, seq2):
    70     '''Get the gapless blocks of an alignment, from MAF format.'''
    71     size = 0
    72     for x, y in itertools.izip(seq1, seq2):
    73         if x == "-":
    74             if size:
    75                 yield beg1, beg2, size
    76                 beg1 += size
    77                 beg2 += size
    78                 size = 0
    79             beg2 += 1
    80         elif y == "-":
    81             if size:
    82                 yield beg1, beg2, size
    83                 beg1 += size
    84                 beg2 += size
    85                 size = 0
    86             beg1 += 1
    87         else:
    88             size += 1
    89     if size: yield beg1, beg2, size
    90 
    91 def alignmentInput(lines):
    92     '''Get alignments and sequence lengths, from MAF or tabular format.'''
    93     mafCount = 0
    94     for line in lines:
    95         w = line.split()
    96         if line[0].isdigit():  # tabular format
    97             chr1, beg1, seqlen1 = w[1], int(w[2]), int(w[5])
    98             if w[4] == "-": beg1 -= seqlen1
    99             chr2, beg2, seqlen2 = w[6], int(w[7]), int(w[10])
   100             if w[9] == "-": beg2 -= seqlen2
   101             blocks = tabBlocks(beg1, beg2, w[11])
   102             yield chr1, seqlen1, chr2, seqlen2, blocks
   103         elif line[0] == "s":  # MAF format
   104             if mafCount == 0:
   105                 chr1, beg1, seqlen1, seq1 = w[1], int(w[2]), int(w[5]), w[6]
   106                 if w[4] == "-": beg1 -= seqlen1
   107                 mafCount = 1
   108             else:
   109                 chr2, beg2, seqlen2, seq2 = w[1], int(w[2]), int(w[5]), w[6]
   110                 if w[4] == "-": beg2 -= seqlen2
   111                 blocks = mafBlocks(beg1, beg2, seq1, seq2)
   112                 yield chr1, seqlen1, chr2, seqlen2, blocks
   113                 mafCount = 0
   114 
   115 def seqRequestFromText(text):
   116     if ":" in text:
   117         pattern, interval = text.rsplit(":", 1)
   118         if "-" in interval:
   119             beg, end = interval.rsplit("-", 1)
   120             return pattern, int(beg), int(end)  # beg may be negative
   121     return text, 0, sys.maxsize
   122 
   123 def rangeFromSeqName(seqRequests, name, seqLen):
   124     if seqRequests:
   125         base = name.split(".")[-1]  # allow for names like hg19.chr7
   126         for pat, beg, end in seqRequests:
   127             if fnmatchcase(name, pat) or fnmatchcase(base, pat):
   128                 return max(beg, 0), min(end, seqLen)
   129         return None
   130     else:
   131         return 0, seqLen
   132 
   133 def updateSeqs(isTrim, seqNames, seqLimits, seqName, seqRange, blocks, index):
   134     if seqName not in seqLimits:
   135         seqNames.append(seqName)
   136     if isTrim:
   137         beg = blocks[0][index]
   138         end = blocks[-1][index] + blocks[-1][2]
   139         if beg < 0: beg, end = -end, -beg
   140         if seqName in seqLimits:
   141             b, e = seqLimits[seqName]
   142             seqLimits[seqName] = min(b, beg), max(e, end)
   143         else:
   144             seqLimits[seqName] = beg, end
   145     else:
   146         seqLimits[seqName] = seqRange
   147 
   148 def readAlignments(fileName, opts):
   149     '''Get alignments and sequence limits, from MAF or tabular format.'''
   150     seqRequests1 = map(seqRequestFromText, opts.seq1)
   151     seqRequests2 = map(seqRequestFromText, opts.seq2)
   152 
   153     alignments = []
   154     seqNames1 = []
   155     seqNames2 = []
   156     seqLimits1 = {}
   157     seqLimits2 = {}
   158     lines = myOpen(fileName)
   159     for seqName1, seqLen1, seqName2, seqLen2, blocks in alignmentInput(lines):
   160         range1 = rangeFromSeqName(seqRequests1, seqName1, seqLen1)
   161         if not range1: continue
   162         range2 = rangeFromSeqName(seqRequests2, seqName2, seqLen2)
   163         if not range2: continue
   164         b = list(croppedBlocks(list(blocks), [range1], [range2]))
   165         if not b: continue
   166         aln = seqName1, seqName2, b
   167         alignments.append(aln)
   168         updateSeqs(opts.trim1, seqNames1, seqLimits1, seqName1, range1, b, 0)
   169         updateSeqs(opts.trim2, seqNames2, seqLimits2, seqName2, range2, b, 1)
   170     seqRanges1 = [(i, seqLimits1[i][0], seqLimits1[i][1]) for i in seqNames1]
   171     seqRanges2 = [(i, seqLimits2[i][0], seqLimits2[i][1]) for i in seqNames2]
   172     return alignments, seqRanges1, seqRanges2
   173 
   174 def natural_sort_key(my_string):
   175     '''Return a sort key for "natural" ordering, e.g. chr9 < chr10.'''
   176     parts = re.split(r'(\d+)', my_string)
   177     parts[1::2] = map(int, parts[1::2])
   178     return parts
   179 
   180 def nameKey(oneSeqRanges):
   181     return natural_sort_key(oneSeqRanges[0][0])
   182 
   183 def sizeKey(oneSeqRanges):
   184     return sum(b - e for n, b, e in oneSeqRanges), nameKey(oneSeqRanges)
   185 
   186 def getSortedRanges(seqRanges, sortOpt):
   187     g = [list(v) for k, v in itertools.groupby(seqRanges, itemgetter(0))]
   188     if sortOpt == "1":
   189         g.sort(key=nameKey)
   190     if sortOpt == "2":
   191         g.sort(key=sizeKey)
   192     return [j for i in g for j in i]
   193 
   194 def prettyNum(n):
   195     t = str(n)
   196     groups = []
   197     while t:
   198         groups.append(t[-3:])
   199         t = t[:-3]
   200     return ",".join(reversed(groups))
   201 
   202 def sizeText(size):
   203     suffixes = "bp", "kb", "Mb", "Gb"
   204     for i, x in enumerate(suffixes):
   205         j = 10 ** (i * 3)
   206         if size < j * 10:
   207             return "%.2g" % (1.0 * size / j) + x
   208         if size < j * 1000 or i == len(suffixes) - 1:
   209             return "%.0f" % (1.0 * size / j) + x
   210 
   211 def labelText(seqRange, labelOpt):
   212     seqName, beg, end = seqRange
   213     if labelOpt == 1:
   214         return seqName + ": " + sizeText(end - beg)
   215     if labelOpt == 2:
   216         return seqName + ":" + prettyNum(beg) + ": " + sizeText(end - beg)
   217     if labelOpt == 3:
   218         return seqName + ":" + prettyNum(beg) + "-" + prettyNum(end)
   219     return seqName
   220 
   221 def rangeLabels(seqRanges, labelOpt, font, fontsize, image_mode, textRot):
   222     if fontsize:
   223         image_size = 1, 1
   224         im = Image.new(image_mode, image_size)
   225         draw = ImageDraw.Draw(im)
   226     x = y = 0
   227     for r in seqRanges:
   228         text = labelText(r, labelOpt)
   229         if fontsize:
   230             x, y = draw.textsize(text, font=font)
   231             if textRot:
   232                 x, y = y, x
   233         yield text, x, y
   234 
   235 def dataFromRanges(seqRanges, sortOpt, font,
   236                    fontsize, image_mode, labelOpt, textRot):
   237     s = getSortedRanges(seqRanges, sortOpt)
   238     for i in s:
   239         warn("\t".join(map(str, i)))
   240     warn("")
   241     rangeSizes = [e - b for n, b, e in s]
   242     labs = list(rangeLabels(s, labelOpt, font, fontsize, image_mode, textRot))
   243     margin = max(i[2] for i in labs)
   244     # xxx the margin may be too big, because some labels may get omitted
   245     return s, rangeSizes, labs, margin
   246 
   247 def div_ceil(x, y):
   248     '''Return x / y rounded up.'''
   249     q, r = divmod(x, y)
   250     return q + (r != 0)
   251 
   252 def get_bp_per_pix(rangeSizes, pixTweenRanges, maxPixels):
   253     '''Get the minimum bp-per-pixel that fits in the size limit.'''
   254     warn("choosing bp per pixel...")
   255     numOfRanges = len(rangeSizes)
   256     maxPixelsInRanges = maxPixels - pixTweenRanges * (numOfRanges - 1)
   257     if maxPixelsInRanges < numOfRanges:
   258         raise Exception("can't fit the image: too many sequences?")
   259     negLimit = -maxPixelsInRanges
   260     negBpPerPix = sum(rangeSizes) // negLimit
   261     while True:
   262         if sum(i // negBpPerPix for i in rangeSizes) >= negLimit:
   263             return -negBpPerPix
   264         negBpPerPix -= 1
   265 
   266 def getRangePixBegs(rangePixLens, pixTweenRanges, margin):
   267     '''Get the start pixel for each range.'''
   268     rangePixBegs = []
   269     pix_tot = margin - pixTweenRanges
   270     for i in rangePixLens:
   271         pix_tot += pixTweenRanges
   272         rangePixBegs.append(pix_tot)
   273         pix_tot += i
   274     return rangePixBegs
   275 
   276 def pixelData(rangeSizes, bp_per_pix, pixTweenRanges, margin):
   277     '''Return pixel information about the ranges.'''
   278     rangePixLens = [div_ceil(i, bp_per_pix) for i in rangeSizes]
   279     rangePixBegs = getRangePixBegs(rangePixLens, pixTweenRanges, margin)
   280     tot_pix = rangePixBegs[-1] + rangePixLens[-1]
   281     return rangePixBegs, rangePixLens, tot_pix
   282 
   283 def drawLineForward(hits, width, bp_per_pix, beg1, beg2, size):
   284     while True:
   285         q1, r1 = divmod(beg1, bp_per_pix)
   286         q2, r2 = divmod(beg2, bp_per_pix)
   287         hits[q2 * width + q1] |= 1
   288         next_pix = min(bp_per_pix - r1, bp_per_pix - r2)
   289         if next_pix >= size: break
   290         beg1 += next_pix
   291         beg2 += next_pix
   292         size -= next_pix
   293 
   294 def drawLineReverse(hits, width, bp_per_pix, beg1, beg2, size):
   295     beg2 = -1 - beg2
   296     while True:
   297         q1, r1 = divmod(beg1, bp_per_pix)
   298         q2, r2 = divmod(beg2, bp_per_pix)
   299         hits[q2 * width + q1] |= 2
   300         next_pix = min(bp_per_pix - r1, r2 + 1)
   301         if next_pix >= size: break
   302         beg1 += next_pix
   303         beg2 -= next_pix
   304         size -= next_pix
   305 
   306 def findOrigin(ranges, beg, size):
   307     if beg < 0:
   308         beg = -(beg + size)
   309     for rangeBeg, rangeEnd, origin in ranges:
   310         if rangeEnd > beg:
   311             return origin
   312 
   313 def alignmentPixels(width, height, alignments, bp_per_pix,
   314                     rangeDict1, rangeDict2):
   315     hits = [0] * (width * height)  # the image data
   316     for seq1, seq2, blocks in alignments:
   317         beg1, beg2, size = blocks[0]
   318         ori1 = findOrigin(rangeDict1[seq1], beg1, size)
   319         ori2 = findOrigin(rangeDict2[seq2], beg2, size)
   320         for beg1, beg2, size in blocks:
   321             if beg1 < 0:
   322                 beg1 = -(beg1 + size)
   323                 beg2 = -(beg2 + size)
   324             if beg2 >= 0:
   325                 drawLineForward(hits, width, bp_per_pix,
   326                                 beg1 + ori1, beg2 + ori2, size)
   327             else:
   328                 drawLineReverse(hits, width, bp_per_pix,
   329                                 beg1 + ori1, beg2 - ori2, size)
   330     return hits
   331 
   332 def expandedSeqDict(seqDict):
   333     '''Allow lookup by short sequence names, e.g. chr7 as well as hg19.chr7.'''
   334     newDict = seqDict.copy()
   335     for name, x in seqDict.items():
   336         if "." in name:
   337             base = name.split(".")[-1]
   338             if base in newDict:  # an ambiguous case was found:
   339                 return seqDict   # so give up completely
   340             newDict[base] = x
   341     return newDict
   342 
   343 def readBed(fileName, rangeDict):
   344     for line in myOpen(fileName):
   345         w = line.split()
   346         if not w: continue
   347         seqName = w[0]
   348         if seqName not in rangeDict: continue
   349         beg = int(w[1])
   350         end = int(w[2])
   351         layer = 900
   352         color = "#fbf"
   353         if len(w) > 4:
   354             if w[4] != ".":
   355                 layer = float(w[4])
   356             if len(w) > 5:
   357                 if len(w) > 8 and w[8].count(",") == 2:
   358                     color = "rgb(" + w[8] + ")"
   359                 elif w[5] == "+":
   360                     color = "#ffe8e8"
   361                 elif w[5] == "-":
   362                     color = "#e8e8ff"
   363         yield layer, color, seqName, beg, end
   364 
   365 def commaSeparatedInts(text):
   366     return map(int, text.rstrip(",").split(","))
   367 
   368 def readGenePred(opts, fileName, rangeDict):
   369     for line in myOpen(fileName):
   370         fields = line.split()
   371         if not fields: continue
   372         if fields[2] not in "+-": fields = fields[1:]
   373         seqName = fields[1]
   374         if seqName not in rangeDict: continue
   375         #strand = fields[2]
   376         cdsBeg = int(fields[5])
   377         cdsEnd = int(fields[6])
   378         exonBegs = commaSeparatedInts(fields[8])
   379         exonEnds = commaSeparatedInts(fields[9])
   380         for beg, end in zip(exonBegs, exonEnds):
   381             yield 300, opts.exon_color, seqName, beg, end
   382             b = max(beg, cdsBeg)
   383             e = min(end, cdsEnd)
   384             if b < e: yield 400, opts.cds_color, seqName, b, e
   385 
   386 def readRmsk(fileName, rangeDict):
   387     for line in myOpen(fileName):
   388         fields = line.split()
   389         if len(fields) == 17:  # rmsk.txt
   390             seqName = fields[5]
   391             if seqName not in rangeDict: continue  # do this ASAP for speed
   392             beg = int(fields[6])
   393             end = int(fields[7])
   394             strand = fields[9]
   395             repeatClass = fields[11]
   396         elif len(fields) == 15:  # .out
   397             seqName = fields[4]
   398             if seqName not in rangeDict: continue
   399             beg = int(fields[5]) - 1
   400             end = int(fields[6])
   401             strand = fields[8]
   402             repeatClass = fields[10]
   403         else:
   404             continue
   405         if repeatClass in ("Low_complexity", "Simple_repeat"):
   406             yield 200, "#fbf", seqName, beg, end
   407         elif strand == "+":
   408             yield 100, "#ffe8e8", seqName, beg, end
   409         else:
   410             yield 100, "#e8e8ff", seqName, beg, end
   411 
   412 def isExtraFirstGapField(fields):
   413     return fields[4].isdigit()
   414 
   415 def readGaps(opts, fileName, rangeDict):
   416     '''Read locations of unsequenced gaps, from an agp or gap file.'''
   417     for line in myOpen(fileName):
   418         w = line.split()
   419         if not w or w[0][0] == "#": continue
   420         if isExtraFirstGapField(w): w = w[1:]
   421         if w[4] not in "NU": continue
   422         seqName = w[0]
   423         if seqName not in rangeDict: continue
   424         end = int(w[2])
   425         beg = end - int(w[5])  # zero-based coordinate
   426         if w[7] == "yes":
   427             yield 3000, opts.bridged_color, seqName, beg, end
   428         else:
   429             yield 2000, opts.unbridged_color, seqName, beg, end
   430 
   431 def bedBoxes(beds, rangeDict, margin, edge, isTop, bpPerPix):
   432     for layer, color, seqName, bedBeg, bedEnd in beds:
   433         for rangeBeg, rangeEnd, origin in rangeDict[seqName]:
   434             beg = max(bedBeg, rangeBeg)
   435             end = min(bedEnd, rangeEnd)
   436             if beg >= end: continue
   437             if layer <= 1000:
   438                 # include partly-covered pixels
   439                 b = (origin + beg) // bpPerPix
   440                 e = div_ceil(origin + end, bpPerPix)
   441             else:
   442                 # exclude partly-covered pixels
   443                 b = div_ceil(origin + beg, bpPerPix)
   444                 e = (origin + end) // bpPerPix
   445                 if e <= b: continue
   446             if isTop:
   447                 box = b, margin, e, edge
   448             else:
   449                 box = margin, b, edge, e
   450             yield layer, color, box
   451 
   452 def drawAnnotations(im, boxes):
   453     # xxx use partial transparency for different-color overlaps?
   454     for layer, color, box in boxes:
   455         im.paste(color, box)
   456 
   457 def placedLabels(labels, rangePixBegs, rangePixLens, beg, end):
   458     '''Return axis labels with endpoint & sort-order information.'''
   459     maxWidth = end - beg
   460     for i, j, k in zip(labels, rangePixBegs, rangePixLens):
   461         text, textWidth, textHeight = i
   462         if textWidth > maxWidth:
   463             continue
   464         labelBeg = j + (k - textWidth) // 2
   465         labelEnd = labelBeg + textWidth
   466         sortKey = textWidth - k
   467         if labelBeg < beg:
   468             sortKey += maxWidth * (beg - labelBeg)
   469             labelBeg = beg
   470             labelEnd = beg + textWidth
   471         if labelEnd > end:
   472             sortKey += maxWidth * (labelEnd - end)
   473             labelEnd = end
   474             labelBeg = end - textWidth
   475         yield sortKey, labelBeg, labelEnd, text, textHeight
   476 
   477 def nonoverlappingLabels(labels, minPixTweenLabels):
   478     '''Get a subset of non-overlapping axis labels, greedily.'''
   479     out = []
   480     for i in labels:
   481         beg = i[1] - minPixTweenLabels
   482         end = i[2] + minPixTweenLabels
   483         if all(j[2] <= beg or j[1] >= end for j in out):
   484             out.append(i)
   485     return out
   486 
   487 def axisImage(labels, rangePixBegs, rangePixLens, textRot,
   488               textAln, font, image_mode, opts):
   489     '''Make an image of axis labels.'''
   490     beg = rangePixBegs[0]
   491     end = rangePixBegs[-1] + rangePixLens[-1]
   492     margin = max(i[2] for i in labels)
   493     labels = sorted(placedLabels(labels, rangePixBegs, rangePixLens, beg, end))
   494     minPixTweenLabels = 0 if textRot else opts.label_space
   495     labels = nonoverlappingLabels(labels, minPixTweenLabels)
   496     image_size = (margin, end) if textRot else (end, margin)
   497     im = Image.new(image_mode, image_size, opts.margin_color)
   498     draw = ImageDraw.Draw(im)
   499     for i in labels:
   500         base = margin - i[4] if textAln else 0
   501         position = (base, i[1]) if textRot else (i[1], base)
   502         draw.text(position, i[3], font=font, fill=opts.text_color)
   503     return im
   504 
   505 def rangesWithOrigins(sortedRanges, rangePixBegs, bpPerPix):
   506     for i, j in zip(sortedRanges, rangePixBegs):
   507         seqName, rangeBeg, rangeEnd = i
   508         origin = bpPerPix * j - rangeBeg
   509         yield seqName, (rangeBeg, rangeEnd, origin)
   510 
   511 def rangesPerSeq(sortedRanges, rangePixBegs, bpPerPix):
   512     a = rangesWithOrigins(sortedRanges, rangePixBegs, bpPerPix)
   513     for k, v in itertools.groupby(a, itemgetter(0)):
   514         yield k, [i[1] for i in v]
   515 
   516 def getFont(opts):
   517     if opts.fontfile:
   518         return ImageFont.truetype(opts.fontfile, opts.fontsize)
   519     fileNames = []
   520     try:
   521         x = ["fc-match", "-f%{file}", "arial"]
   522         p = subprocess.Popen(x, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
   523         out, err = p.communicate()
   524         fileNames.append(out)
   525     except OSError as e:
   526         warn("fc-match error: " + str(e))
   527     fileNames.append("/Library/Fonts/Arial.ttf")  # for Mac
   528     for i in fileNames:
   529         try:
   530             font = ImageFont.truetype(i, opts.fontsize)
   531             warn("font: " + i)
   532             return font
   533         except IOError as e:
   534             warn("font load error: " + str(e))
   535     return ImageFont.load_default()
   536 
   537 def lastDotplot(opts, args):
   538     font = getFont(opts)
   539     image_mode = 'RGB'
   540     forward_color = ImageColor.getcolor(opts.forwardcolor, image_mode)
   541     reverse_color = ImageColor.getcolor(opts.reversecolor, image_mode)
   542     zipped_colors = zip(forward_color, reverse_color)
   543     overlap_color = tuple([(i + j) // 2 for i, j in zipped_colors])
   544 
   545     warn("reading alignments...")
   546     alnData = readAlignments(args[0], opts)
   547     alignments, seqRanges1, seqRanges2 = alnData
   548     warn("done")
   549     if not alignments: raise Exception("there are no alignments")
   550 
   551     textRot1 = "vertical".startswith(opts.rot1)
   552     i1 = dataFromRanges(seqRanges1, opts.sort1, font,
   553                         opts.fontsize, image_mode, opts.labels1, textRot1)
   554     sortedRanges1, rangeSizes1, labelData1, tMargin = i1
   555 
   556     textRot2 = "horizontal".startswith(opts.rot2)
   557     i2 = dataFromRanges(seqRanges2, opts.sort2, font,
   558                         opts.fontsize, image_mode, opts.labels2, textRot2)
   559     sortedRanges2, rangeSizes2, labelData2, lMargin = i2
   560 
   561     maxPixels1 = opts.width  - lMargin
   562     maxPixels2 = opts.height - tMargin
   563     bpPerPix1 = get_bp_per_pix(rangeSizes1, opts.border_pixels, maxPixels1)
   564     bpPerPix2 = get_bp_per_pix(rangeSizes2, opts.border_pixels, maxPixels2)
   565     bpPerPix = max(bpPerPix1, bpPerPix2)
   566     warn("bp per pixel = " + str(bpPerPix))
   567 
   568     p1 = pixelData(rangeSizes1, bpPerPix, opts.border_pixels, lMargin)
   569     rangePixBegs1, rangePixLens1, width = p1
   570     rangeDict1 = dict(rangesPerSeq(sortedRanges1, rangePixBegs1, bpPerPix))
   571 
   572     p2 = pixelData(rangeSizes2, bpPerPix, opts.border_pixels, tMargin)
   573     rangePixBegs2, rangePixLens2, height = p2
   574     rangeDict2 = dict(rangesPerSeq(sortedRanges2, rangePixBegs2, bpPerPix))
   575 
   576     warn("width:  " + str(width))
   577     warn("height: " + str(height))
   578 
   579     warn("processing alignments...")
   580     hits = alignmentPixels(width, height, alignments, bpPerPix,
   581                            rangeDict1, rangeDict2)
   582 
   583     warn("reading annotations...")
   584 
   585     rangeDict1 = expandedSeqDict(rangeDict1)
   586     beds1 = itertools.chain(readBed(opts.bed1, rangeDict1),
   587                             readRmsk(opts.rmsk1, rangeDict1),
   588                             readGenePred(opts, opts.genePred1, rangeDict1),
   589                             readGaps(opts, opts.gap1, rangeDict1))
   590     b1 = bedBoxes(beds1, rangeDict1, tMargin, height, True, bpPerPix)
   591 
   592     rangeDict2 = expandedSeqDict(rangeDict2)
   593     beds2 = itertools.chain(readBed(opts.bed2, rangeDict2),
   594                             readRmsk(opts.rmsk2, rangeDict2),
   595                             readGenePred(opts, opts.genePred2, rangeDict2),
   596                             readGaps(opts, opts.gap2, rangeDict2))
   597     b2 = bedBoxes(beds2, rangeDict2, lMargin, width, False, bpPerPix)
   598 
   599     boxes = sorted(itertools.chain(b1, b2))
   600 
   601     warn("drawing...")
   602 
   603     image_size = width, height
   604     im = Image.new(image_mode, image_size, opts.background_color)
   605 
   606     drawAnnotations(im, boxes)
   607 
   608     for i in range(height):
   609         for j in range(width):
   610             store_value = hits[i * width + j]
   611             xy = j, i
   612             if   store_value == 1: im.putpixel(xy, forward_color)
   613             elif store_value == 2: im.putpixel(xy, reverse_color)
   614             elif store_value == 3: im.putpixel(xy, overlap_color)
   615 
   616     if opts.fontsize != 0:
   617         axis1 = axisImage(labelData1, rangePixBegs1, rangePixLens1,
   618                           textRot1, False, font, image_mode, opts)
   619         if textRot1:
   620             axis1 = axis1.transpose(Image.ROTATE_90)
   621         axis2 = axisImage(labelData2, rangePixBegs2, rangePixLens2,
   622                           textRot2, textRot2, font, image_mode, opts)
   623         if not textRot2:
   624             axis2 = axis2.transpose(Image.ROTATE_270)
   625         im.paste(axis1, (0, 0))
   626         im.paste(axis2, (0, 0))
   627 
   628     for i in rangePixBegs1[1:]:
   629         box = i - opts.border_pixels, tMargin, i, height
   630         im.paste(opts.border_color, box)
   631 
   632     for i in rangePixBegs2[1:]:
   633         box = lMargin, i - opts.border_pixels, width, i
   634         im.paste(opts.border_color, box)
   635 
   636     im.save(args[1])
   637 
   638 if __name__ == "__main__":
   639     usage = """%prog --help
   640    or: %prog [options] maf-or-tab-alignments dotplot.png
   641    or: %prog [options] maf-or-tab-alignments dotplot.gif
   642    or: ..."""
   643     description = "Draw a dotplot of pair-wise sequence alignments in MAF or tabular format."
   644     op = optparse.OptionParser(usage=usage, description=description)
   645     op.add_option("-v", "--verbose", action="count",
   646                   help="show progress messages & data about the plot")
   647     op.add_option("-1", "--seq1", metavar="PATTERN", action="append",
   648                   default=[],
   649                   help="which sequences to show from the 1st genome")
   650     op.add_option("-2", "--seq2", metavar="PATTERN", action="append",
   651                   default=[],
   652                   help="which sequences to show from the 2nd genome")
   653     # Replace "width" & "height" with a single "length" option?
   654     op.add_option("-x", "--width", type="int", default=1000,
   655                   help="maximum width in pixels (default: %default)")
   656     op.add_option("-y", "--height", type="int", default=1000,
   657                   help="maximum height in pixels (default: %default)")
   658     op.add_option("-c", "--forwardcolor", metavar="COLOR", default="red",
   659                   help="color for forward alignments (default: %default)")
   660     op.add_option("-r", "--reversecolor", metavar="COLOR", default="blue",
   661                   help="color for reverse alignments (default: %default)")
   662     op.add_option("--sort1", default="1", metavar="N",
   663                   help="genome1 sequence order: 0=input order, 1=name order, "
   664                   "2=length order (default=%default)")
   665     op.add_option("--sort2", default="1", metavar="N",
   666                   help="genome2 sequence order: 0=input order, 1=name order, "
   667                   "2=length order (default=%default)")
   668     op.add_option("--trim1", action="store_true",
   669                   help="trim unaligned sequence flanks from the 1st genome")
   670     op.add_option("--trim2", action="store_true",
   671                   help="trim unaligned sequence flanks from the 2nd genome")
   672     op.add_option("--border-pixels", metavar="INT", type="int", default=1,
   673                   help="number of pixels between sequences (default=%default)")
   674     op.add_option("--border-color", metavar="COLOR", default="black",
   675                   help="color for pixels between sequences (default=%default)")
   676     op.add_option("--margin-color", metavar="COLOR", default="#dcdcdc",
   677                   help="margin color")
   678 
   679     og = optparse.OptionGroup(op, "Text options")
   680     og.add_option("-f", "--fontfile", metavar="FILE",
   681                   help="TrueType or OpenType font file")
   682     og.add_option("-s", "--fontsize", metavar="SIZE", type="int", default=14,
   683                   help="TrueType or OpenType font size (default: %default)")
   684     og.add_option("--labels1", type="int", default=0, metavar="N", help=
   685                   "genome1 labels: 0=name, 1=name:length, "
   686                   "2=name:start:length, 3=name:start-end (default=%default)")
   687     og.add_option("--labels2", type="int", default=0, metavar="N", help=
   688                   "genome2 labels: 0=name, 1=name:length, "
   689                   "2=name:start:length, 3=name:start-end (default=%default)")
   690     og.add_option("--rot1", metavar="ROT", default="h",
   691                   help="text rotation for the 1st genome (default=%default)")
   692     og.add_option("--rot2", metavar="ROT", default="v",
   693                   help="text rotation for the 2nd genome (default=%default)")
   694     op.add_option_group(og)
   695 
   696     og = optparse.OptionGroup(op, "Annotation options")
   697     og.add_option("--bed1", metavar="FILE",
   698                   help="read genome1 annotations from BED file")
   699     og.add_option("--bed2", metavar="FILE",
   700                   help="read genome2 annotations from BED file")
   701     og.add_option("--rmsk1", metavar="FILE", help="read genome1 repeats from "
   702                   "RepeatMasker .out or rmsk.txt file")
   703     og.add_option("--rmsk2", metavar="FILE", help="read genome2 repeats from "
   704                   "RepeatMasker .out or rmsk.txt file")
   705     op.add_option_group(og)
   706 
   707     og = optparse.OptionGroup(op, "Gene options")
   708     og.add_option("--genePred1", metavar="FILE",
   709                   help="read genome1 genes from genePred file")
   710     og.add_option("--genePred2", metavar="FILE",
   711                   help="read genome2 genes from genePred file")
   712     og.add_option("--exon-color", metavar="COLOR", default="PaleGreen",
   713                   help="color for exons (default=%default)")
   714     og.add_option("--cds-color", metavar="COLOR", default="LimeGreen",
   715                   help="color for protein-coding regions (default=%default)")
   716     op.add_option_group(og)
   717 
   718     og = optparse.OptionGroup(op, "Unsequenced gap options")
   719     og.add_option("--gap1", metavar="FILE",
   720                   help="read genome1 unsequenced gaps from agp or gap file")
   721     og.add_option("--gap2", metavar="FILE",
   722                   help="read genome2 unsequenced gaps from agp or gap file")
   723     og.add_option("--bridged-color", metavar="COLOR", default="yellow",
   724                   help="color for bridged gaps (default: %default)")
   725     og.add_option("--unbridged-color", metavar="COLOR", default="orange",
   726                   help="color for unbridged gaps (default: %default)")
   727     op.add_option_group(og)
   728     (opts, args) = op.parse_args()
   729     if len(args) != 2: op.error("2 arguments needed")
   730 
   731     opts.text_color = "black"
   732     opts.background_color = "white"
   733     opts.label_space = 5     # minimum number of pixels between axis labels
   734 
   735     try: lastDotplot(opts, args)
   736     except KeyboardInterrupt: pass  # avoid silly error message
   737     except Exception as e:
   738         prog = os.path.basename(sys.argv[0])
   739         sys.exit(prog + ": error: " + str(e))