scripts/last-dotplot
author Martin C. Frith
Wed Nov 08 12:17:50 2017 +0900 (2017-11-08)
changeset 905 8d52cef22afa
parent 904 c2f50cd4e29d
child 906 21fbf377641f
permissions -rwxr-xr-x
Refactor last-dotplot
     1 #! /usr/bin/env python
     2 
     3 # Read pair-wise alignments in MAF or LAST tabular format: write an
     4 # "Oxford grid", a.k.a. dotplot.
     5 
     6 # TODO: Currently, pixels with zero aligned nt-pairs are white, and
     7 # pixels with one or more aligned nt-pairs are black.  This can look
     8 # too crowded for large genome alignments.  I tried shading each pixel
     9 # according to the number of aligned nt-pairs within it, but the
    10 # result is too faint.  How can this be done better?
    11 
    12 import gzip
    13 from fnmatch import fnmatchcase
    14 import subprocess
    15 import itertools, optparse, os, re, sys
    16 
    17 # Try to make PIL/PILLOW work:
    18 try: from PIL import Image, ImageDraw, ImageFont, ImageColor
    19 except ImportError: import Image, ImageDraw, ImageFont, ImageColor
    20 
    21 def myOpen(fileName):  # faster than fileinput
    22     if fileName is None:
    23         return []
    24     if fileName == "-":
    25         return sys.stdin
    26     if fileName.endswith(".gz"):
    27         return gzip.open(fileName)
    28     return open(fileName)
    29 
    30 def warn(message):
    31     if opts.verbose:
    32         prog = os.path.basename(sys.argv[0])
    33         sys.stderr.write(prog + ": " + message + "\n")
    34 
    35 def croppedBlocks(blocks, range1, range2):
    36     cropBeg1, cropEnd1 = range1
    37     cropBeg2, cropEnd2 = range2
    38     if blocks[0][0] < 0: cropBeg1, cropEnd1 = -cropEnd1, -cropBeg1
    39     if blocks[0][1] < 0: cropBeg2, cropEnd2 = -cropEnd2, -cropBeg2
    40     for beg1, beg2, size in blocks:
    41         b1 = max(cropBeg1, beg1)
    42         e1 = min(cropEnd1, beg1 + size)
    43         if b1 >= e1: continue
    44         offset = beg2 - beg1
    45         b2 = max(cropBeg2, b1 + offset)
    46         e2 = min(cropEnd2, e1 + offset)
    47         if b2 >= e2: continue
    48         yield b2 - offset, b2, e2 - b2
    49 
    50 def tabBlocks(beg1, beg2, blocks):
    51     '''Get the gapless blocks of an alignment, from LAST tabular format.'''
    52     for i in blocks.split(","):
    53         if ":" in i:
    54             x, y = i.split(":")
    55             beg1 += int(x)
    56             beg2 += int(y)
    57         else:
    58             size = int(i)
    59             yield beg1, beg2, size
    60             beg1 += size
    61             beg2 += size
    62 
    63 def mafBlocks(beg1, beg2, seq1, seq2):
    64     '''Get the gapless blocks of an alignment, from MAF format.'''
    65     size = 0
    66     for x, y in itertools.izip(seq1, seq2):
    67         if x == "-":
    68             if size:
    69                 yield beg1, beg2, size
    70                 beg1 += size
    71                 beg2 += size
    72                 size = 0
    73             beg2 += 1
    74         elif y == "-":
    75             if size:
    76                 yield beg1, beg2, size
    77                 beg1 += size
    78                 beg2 += size
    79                 size = 0
    80             beg1 += 1
    81         else:
    82             size += 1
    83     if size: yield beg1, beg2, size
    84 
    85 def alignmentInput(lines):
    86     '''Get alignments and sequence lengths, from MAF or tabular format.'''
    87     mafCount = 0
    88     for line in lines:
    89         w = line.split()
    90         if line[0].isdigit():  # tabular format
    91             chr1, beg1, seqlen1 = w[1], int(w[2]), int(w[5])
    92             if w[4] == "-": beg1 -= seqlen1
    93             chr2, beg2, seqlen2 = w[6], int(w[7]), int(w[10])
    94             if w[9] == "-": beg2 -= seqlen2
    95             blocks = tabBlocks(beg1, beg2, w[11])
    96             yield chr1, seqlen1, chr2, seqlen2, blocks
    97         elif line[0] == "s":  # MAF format
    98             if mafCount == 0:
    99                 chr1, beg1, seqlen1, seq1 = w[1], int(w[2]), int(w[5]), w[6]
   100                 if w[4] == "-": beg1 -= seqlen1
   101                 mafCount = 1
   102             else:
   103                 chr2, beg2, seqlen2, seq2 = w[1], int(w[2]), int(w[5]), w[6]
   104                 if w[4] == "-": beg2 -= seqlen2
   105                 blocks = mafBlocks(beg1, beg2, seq1, seq2)
   106                 yield chr1, seqlen1, chr2, seqlen2, blocks
   107                 mafCount = 0
   108 
   109 def seqRequestFromText(text):
   110     if ":" in text:
   111         pattern, interval = text.rsplit(":", 1)
   112         if "-" in interval:
   113             beg, end = interval.rsplit("-", 1)
   114             return pattern, int(beg), int(end)  # beg may be negative
   115     return text, 0, sys.maxsize
   116 
   117 def rangeFromSeqName(seqRequests, name, seqLen):
   118     if not seqRequests: return 0, seqLen
   119     base = name.split(".")[-1]  # allow for names like hg19.chr7
   120     for pat, beg, end in seqRequests:
   121         if fnmatchcase(name, pat) or fnmatchcase(base, pat):
   122             return max(beg, 0), min(end, seqLen)
   123     return None
   124 
   125 def updateSeqs(isTrim, seqNames, seqLimits, seqName, seqRange, blocks, index):
   126     if seqName not in seqLimits:
   127         seqNames.append(seqName)
   128     if isTrim:
   129         beg = blocks[0][index]
   130         end = blocks[-1][index] + blocks[-1][2]
   131         if beg < 0: beg, end = -end, -beg
   132         if seqName in seqLimits:
   133             b, e = seqLimits[seqName]
   134             seqLimits[seqName] = min(b, beg), max(e, end)
   135         else:
   136             seqLimits[seqName] = beg, end
   137     else:
   138         seqLimits[seqName] = seqRange
   139 
   140 def readAlignments(fileName, opts):
   141     '''Get alignments and sequence limits, from MAF or tabular format.'''
   142     seqRequests1 = map(seqRequestFromText, opts.seq1)
   143     seqRequests2 = map(seqRequestFromText, opts.seq2)
   144 
   145     alignments = []
   146     seqNames1 = []
   147     seqNames2 = []
   148     seqLimits1 = {}
   149     seqLimits2 = {}
   150     lines = myOpen(fileName)
   151     for seqName1, seqLen1, seqName2, seqLen2, blocks in alignmentInput(lines):
   152         range1 = rangeFromSeqName(seqRequests1, seqName1, seqLen1)
   153         if not range1: continue
   154         range2 = rangeFromSeqName(seqRequests2, seqName2, seqLen2)
   155         if not range2: continue
   156         b = list(croppedBlocks(list(blocks), range1, range2))
   157         if not b: continue
   158         aln = seqName1, seqName2, b
   159         alignments.append(aln)
   160         updateSeqs(opts.trim1, seqNames1, seqLimits1, seqName1, range1, b, 0)
   161         updateSeqs(opts.trim2, seqNames2, seqLimits2, seqName2, range2, b, 1)
   162     return alignments, seqNames1, seqNames2, seqLimits1, seqLimits2
   163 
   164 def natural_sort_key(my_string):
   165     '''Return a sort key for "natural" ordering, e.g. chr9 < chr10.'''
   166     parts = re.split(r'(\d+)', my_string)
   167     parts[1::2] = map(int, parts[1::2])
   168     return parts
   169 
   170 def prettyNum(n):
   171     t = str(n)
   172     groups = []
   173     while t:
   174         groups.append(t[-3:])
   175         t = t[:-3]
   176     return ",".join(reversed(groups))
   177 
   178 def sizeText(size):
   179     suffixes = "bp", "kb", "Mb", "Gb"
   180     for i, x in enumerate(suffixes):
   181         j = 10 ** (i * 3)
   182         if size < j * 10:
   183             return "%.2g" % (1.0 * size / j) + x
   184         if size < j * 1000 or i == len(suffixes) - 1:
   185             return "%.0f" % (1.0 * size / j) + x
   186 
   187 def labelText(seqRange, labelOpt):
   188     seqName, beg, end = seqRange
   189     if labelOpt == 1:
   190         return seqName + ": " + sizeText(end - beg)
   191     if labelOpt == 2:
   192         return seqName + ":" + prettyNum(beg) + ": " + sizeText(end - beg)
   193     if labelOpt == 3:
   194         return seqName + ":" + prettyNum(beg) + "-" + prettyNum(end)
   195     return seqName
   196 
   197 def rangeLabels(seqRanges, labelOpt, font, fontsize, image_mode, textRot):
   198     if fontsize:
   199         image_size = 1, 1
   200         im = Image.new(image_mode, image_size)
   201         draw = ImageDraw.Draw(im)
   202     x = y = 0
   203     for r in seqRanges:
   204         text = labelText(r, labelOpt)
   205         if fontsize:
   206             x, y = draw.textsize(text, font=font)
   207             if textRot:
   208                 x, y = y, x
   209         yield text, x, y
   210 
   211 def getSeqInfo(sortOpt, seqNames, seqLimits,
   212                font, fontsize, image_mode, labelOpt, textRot):
   213     '''Return miscellaneous information about the sequences.'''
   214     if sortOpt == 1:
   215         seqNames.sort(key=natural_sort_key)
   216     seqSizes = [seqLimits[i][1] - seqLimits[i][0] for i in seqNames]
   217     for i in seqNames:
   218         r = seqLimits[i]
   219         out = i, str(r[0]), str(r[1])
   220         warn("\t".join(out))
   221     warn("")
   222     if sortOpt == 2:
   223         seqRecords = sorted(zip(seqSizes, seqNames), reverse=True)
   224         seqSizes = [i[0] for i in seqRecords]
   225         seqNames = [i[1] for i in seqRecords]
   226     seqRanges = [(i, seqLimits[i][0], seqLimits[i][1]) for i in seqNames]
   227     labelData = list(rangeLabels(seqRanges, labelOpt,
   228                                  font, fontsize, image_mode, textRot))
   229     margin = max(i[2] for i in labelData)
   230     # xxx the margin may be too big, because some labels may get omitted
   231     return seqNames, seqSizes, labelData, margin
   232 
   233 def div_ceil(x, y):
   234     '''Return x / y rounded up.'''
   235     q, r = divmod(x, y)
   236     return q + (r != 0)
   237 
   238 def get_bp_per_pix(rangeSizes, pixTweenRanges, maxPixels):
   239     '''Get the minimum bp-per-pixel that fits in the size limit.'''
   240     warn("choosing bp per pixel...")
   241     numOfRanges = len(rangeSizes)
   242     maxPixelsInRanges = maxPixels - pixTweenRanges * (numOfRanges - 1)
   243     if maxPixelsInRanges < numOfRanges:
   244         raise Exception("can't fit the image: too many sequences?")
   245     negLimit = -maxPixelsInRanges
   246     negBpPerPix = sum(rangeSizes) // negLimit
   247     while True:
   248         if sum(i // negBpPerPix for i in rangeSizes) >= negLimit:
   249             return -negBpPerPix
   250         negBpPerPix -= 1
   251 
   252 def getRangePixBegs(rangePixLens, pixTweenRanges, margin):
   253     '''Get the start pixel for each range.'''
   254     rangePixBegs = []
   255     pix_tot = margin - pixTweenRanges
   256     for i in rangePixLens:
   257         pix_tot += pixTweenRanges
   258         rangePixBegs.append(pix_tot)
   259         pix_tot += i
   260     return rangePixBegs
   261 
   262 def pixelData(rangeSizes, bp_per_pix, pixTweenRanges, margin):
   263     '''Return pixel information about the ranges.'''
   264     rangePixLens = [div_ceil(i, bp_per_pix) for i in rangeSizes]
   265     rangePixBegs = getRangePixBegs(rangePixLens, pixTweenRanges, margin)
   266     tot_pix = rangePixBegs[-1] + rangePixLens[-1]
   267     return rangePixBegs, rangePixLens, tot_pix
   268 
   269 def drawLineForward(hits, width, bp_per_pix, beg1, beg2, size):
   270     while True:
   271         q1, r1 = divmod(beg1, bp_per_pix)
   272         q2, r2 = divmod(beg2, bp_per_pix)
   273         hits[q2 * width + q1] |= 1
   274         next_pix = min(bp_per_pix - r1, bp_per_pix - r2)
   275         if next_pix >= size: break
   276         beg1 += next_pix
   277         beg2 += next_pix
   278         size -= next_pix
   279 
   280 def drawLineReverse(hits, width, bp_per_pix, beg1, beg2, size):
   281     beg2 = -1 - beg2
   282     while True:
   283         q1, r1 = divmod(beg1, bp_per_pix)
   284         q2, r2 = divmod(beg2, bp_per_pix)
   285         hits[q2 * width + q1] |= 2
   286         next_pix = min(bp_per_pix - r1, r2 + 1)
   287         if next_pix >= size: break
   288         beg1 += next_pix
   289         beg2 -= next_pix
   290         size -= next_pix
   291 
   292 def findOrigin(ranges, beg, size):
   293     if beg < 0:
   294         beg = -(beg + size)
   295     for rangeBeg, rangeEnd, origin in ranges:
   296         if rangeEnd > beg:
   297             return origin
   298 
   299 def alignmentPixels(width, height, alignments, bp_per_pix,
   300                     rangeDict1, rangeDict2):
   301     hits = [0] * (width * height)  # the image data
   302     for seq1, seq2, blocks in alignments:
   303         beg1, beg2, size = blocks[0]
   304         ori1 = findOrigin(rangeDict1[seq1], beg1, size)
   305         ori2 = findOrigin(rangeDict2[seq2], beg2, size)
   306         for beg1, beg2, size in blocks:
   307             if beg1 < 0:
   308                 beg1 = -(beg1 + size)
   309                 beg2 = -(beg2 + size)
   310             if beg2 >= 0:
   311                 drawLineForward(hits, width, bp_per_pix,
   312                                 beg1 + ori1, beg2 + ori2, size)
   313             else:
   314                 drawLineReverse(hits, width, bp_per_pix,
   315                                 beg1 + ori1, beg2 - ori2, size)
   316     return hits
   317 
   318 def expandedSeqDict(seqDict):
   319     '''Allow lookup by short sequence names, e.g. chr7 as well as hg19.chr7.'''
   320     newDict = seqDict.copy()
   321     for name, x in seqDict.items():
   322         if "." in name:
   323             base = name.split(".")[-1]
   324             if base in newDict:  # an ambiguous case was found:
   325                 return seqDict   # so give up completely
   326             newDict[base] = x
   327     return newDict
   328 
   329 def readBed(fileName, rangeDict):
   330     for line in myOpen(fileName):
   331         w = line.split()
   332         if not w: continue
   333         seqName = w[0]
   334         if seqName not in rangeDict: continue
   335         beg = int(w[1])
   336         end = int(w[2])
   337         layer = 900
   338         color = "#fbf"
   339         if len(w) > 4:
   340             if w[4] != ".":
   341                 layer = float(w[4])
   342             if len(w) > 5:
   343                 if len(w) > 8 and w[8].count(",") == 2:
   344                     color = "rgb(" + w[8] + ")"
   345                 elif w[5] == "+":
   346                     color = "#ffe8e8"
   347                 elif w[5] == "-":
   348                     color = "#e8e8ff"
   349         yield layer, color, seqName, beg, end
   350 
   351 def commaSeparatedInts(text):
   352     return map(int, text.rstrip(",").split(","))
   353 
   354 def readGenePred(opts, fileName, rangeDict):
   355     for line in myOpen(fileName):
   356         fields = line.split()
   357         if not fields: continue
   358         if fields[2] not in "+-": fields = fields[1:]
   359         seqName = fields[1]
   360         if seqName not in rangeDict: continue
   361         #strand = fields[2]
   362         cdsBeg = int(fields[5])
   363         cdsEnd = int(fields[6])
   364         exonBegs = commaSeparatedInts(fields[8])
   365         exonEnds = commaSeparatedInts(fields[9])
   366         for beg, end in zip(exonBegs, exonEnds):
   367             yield 300, opts.exon_color, seqName, beg, end
   368             b = max(beg, cdsBeg)
   369             e = min(end, cdsEnd)
   370             if b < e: yield 400, opts.cds_color, seqName, b, e
   371 
   372 def readRmsk(fileName, rangeDict):
   373     for line in myOpen(fileName):
   374         fields = line.split()
   375         if len(fields) == 17:  # rmsk.txt
   376             seqName = fields[5]
   377             if seqName not in rangeDict: continue  # do this ASAP for speed
   378             beg = int(fields[6])
   379             end = int(fields[7])
   380             strand = fields[9]
   381             repeatClass = fields[11]
   382         elif len(fields) == 15:  # .out
   383             seqName = fields[4]
   384             if seqName not in rangeDict: continue
   385             beg = int(fields[5]) - 1
   386             end = int(fields[6])
   387             strand = fields[8]
   388             repeatClass = fields[10]
   389         else:
   390             continue
   391         if repeatClass in ("Low_complexity", "Simple_repeat"):
   392             yield 200, "#fbf", seqName, beg, end
   393         elif strand == "+":
   394             yield 100, "#ffe8e8", seqName, beg, end
   395         else:
   396             yield 100, "#e8e8ff", seqName, beg, end
   397 
   398 def isExtraFirstGapField(fields):
   399     return fields[4].isdigit()
   400 
   401 def readGaps(opts, fileName, rangeDict):
   402     '''Read locations of unsequenced gaps, from an agp or gap file.'''
   403     for line in myOpen(fileName):
   404         w = line.split()
   405         if not w or w[0][0] == "#": continue
   406         if isExtraFirstGapField(w): w = w[1:]
   407         if w[4] not in "NU": continue
   408         seqName = w[0]
   409         if seqName not in rangeDict: continue
   410         end = int(w[2])
   411         beg = end - int(w[5])  # zero-based coordinate
   412         if w[7] == "yes":
   413             yield 3000, opts.bridged_color, seqName, beg, end
   414         else:
   415             yield 2000, opts.unbridged_color, seqName, beg, end
   416 
   417 def bedBoxes(beds, rangeDict, margin, edge, isTop, bpPerPix):
   418     for layer, color, seqName, bedBeg, bedEnd in beds:
   419         for rangeBeg, rangeEnd, origin in rangeDict[seqName]:
   420             beg = max(bedBeg, rangeBeg)
   421             end = min(bedEnd, rangeEnd)
   422             if beg >= end: continue
   423             if layer <= 1000:
   424                 # include partly-covered pixels
   425                 b = (origin + beg) // bpPerPix
   426                 e = div_ceil(origin + end, bpPerPix)
   427             else:
   428                 # exclude partly-covered pixels
   429                 b = div_ceil(origin + beg, bpPerPix)
   430                 e = (origin + end) // bpPerPix
   431                 if e <= b: continue
   432             if isTop:
   433                 box = b, margin, e, edge
   434             else:
   435                 box = margin, b, edge, e
   436             yield layer, color, box
   437 
   438 def drawAnnotations(im, boxes):
   439     # xxx use partial transparency for different-color overlaps?
   440     for layer, color, box in boxes:
   441         im.paste(color, box)
   442 
   443 def placedLabels(labels, rangePixBegs, rangePixLens, beg, end):
   444     '''Return axis labels with endpoint & sort-order information.'''
   445     maxWidth = end - beg
   446     for i, j, k in zip(labels, rangePixBegs, rangePixLens):
   447         text, textWidth, textHeight = i
   448         if textWidth > maxWidth:
   449             continue
   450         labelBeg = j + (k - textWidth) // 2
   451         labelEnd = labelBeg + textWidth
   452         sortKey = textWidth - k
   453         if labelBeg < beg:
   454             sortKey += maxWidth * (beg - labelBeg)
   455             labelBeg = beg
   456             labelEnd = beg + textWidth
   457         if labelEnd > end:
   458             sortKey += maxWidth * (labelEnd - end)
   459             labelEnd = end
   460             labelBeg = end - textWidth
   461         yield sortKey, labelBeg, labelEnd, text, textHeight
   462 
   463 def nonoverlappingLabels(labels, minPixTweenLabels):
   464     '''Get a subset of non-overlapping axis labels, greedily.'''
   465     out = []
   466     for i in labels:
   467         beg = i[1] - minPixTweenLabels
   468         end = i[2] + minPixTweenLabels
   469         if all(j[2] <= beg or j[1] >= end for j in out):
   470             out.append(i)
   471     return out
   472 
   473 def axisImage(labels, rangePixBegs, rangePixLens, textRot,
   474               textAln, font, image_mode, opts):
   475     '''Make an image of axis labels.'''
   476     beg = rangePixBegs[0]
   477     end = rangePixBegs[-1] + rangePixLens[-1]
   478     margin = max(i[2] for i in labels)
   479     labels = sorted(placedLabels(labels, rangePixBegs, rangePixLens, beg, end))
   480     minPixTweenLabels = 0 if textRot else opts.label_space
   481     labels = nonoverlappingLabels(labels, minPixTweenLabels)
   482     image_size = (margin, end) if textRot else (end, margin)
   483     im = Image.new(image_mode, image_size, opts.margin_color)
   484     draw = ImageDraw.Draw(im)
   485     for i in labels:
   486         base = margin - i[4] if textAln else 0
   487         position = (base, i[1]) if textRot else (i[1], base)
   488         draw.text(position, i[3], font=font, fill=opts.text_color)
   489     return im
   490 
   491 def rangesPerSeq(seqNames, seqLimits, rangePixBegs, bpPerPix):
   492     for i, j in zip(seqNames, rangePixBegs):
   493         beg, end = seqLimits[i]
   494         origin = bpPerPix * j - beg
   495         r = beg, end, origin
   496         yield i, [r]
   497 
   498 def getFont(opts):
   499     if opts.fontfile:
   500         return ImageFont.truetype(opts.fontfile, opts.fontsize)
   501     fileNames = []
   502     try:
   503         x = ["fc-match", "-f%{file}", "arial"]
   504         p = subprocess.Popen(x, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
   505         out, err = p.communicate()
   506         fileNames.append(out)
   507     except OSError as e:
   508         warn("fc-match error: " + str(e))
   509     fileNames.append("/Library/Fonts/Arial.ttf")  # for Mac
   510     for i in fileNames:
   511         try:
   512             font = ImageFont.truetype(i, opts.fontsize)
   513             warn("font: " + i)
   514             return font
   515         except IOError as e:
   516             warn("font load error: " + str(e))
   517     return ImageFont.load_default()
   518 
   519 def lastDotplot(opts, args):
   520     font = getFont(opts)
   521     image_mode = 'RGB'
   522     forward_color = ImageColor.getcolor(opts.forwardcolor, image_mode)
   523     reverse_color = ImageColor.getcolor(opts.reversecolor, image_mode)
   524     zipped_colors = zip(forward_color, reverse_color)
   525     overlap_color = tuple([(i + j) // 2 for i, j in zipped_colors])
   526 
   527     warn("reading alignments...")
   528     alnData = readAlignments(args[0], opts)
   529     alignments, seqNames1, seqNames2, seqLimits1, seqLimits2 = alnData
   530     warn("done")
   531     if not alignments: raise Exception("there are no alignments")
   532 
   533     textRot1 = "vertical".startswith(opts.rot1)
   534     i1 = getSeqInfo(opts.sort1, seqNames1, seqLimits1,
   535                     font, opts.fontsize, image_mode, opts.labels1, textRot1)
   536     seqNames1, rangeSizes1, labelData1, tMargin = i1
   537 
   538     textRot2 = "horizontal".startswith(opts.rot2)
   539     i2 = getSeqInfo(opts.sort2, seqNames2, seqLimits2,
   540                     font, opts.fontsize, image_mode, opts.labels2, textRot2)
   541     seqNames2, rangeSizes2, labelData2, lMargin = i2
   542 
   543     maxPixels1 = opts.width  - lMargin
   544     maxPixels2 = opts.height - tMargin
   545     bpPerPix1 = get_bp_per_pix(rangeSizes1, opts.border_pixels, maxPixels1)
   546     bpPerPix2 = get_bp_per_pix(rangeSizes2, opts.border_pixels, maxPixels2)
   547     bpPerPix = max(bpPerPix1, bpPerPix2)
   548     warn("bp per pixel = " + str(bpPerPix))
   549 
   550     p1 = pixelData(rangeSizes1, bpPerPix, opts.border_pixels, lMargin)
   551     rangePixBegs1, rangePixLens1, width = p1
   552     rangeDict1 = dict(rangesPerSeq(seqNames1, seqLimits1, rangePixBegs1,
   553                                    bpPerPix))
   554 
   555     p2 = pixelData(rangeSizes2, bpPerPix, opts.border_pixels, tMargin)
   556     rangePixBegs2, rangePixLens2, height = p2
   557     rangeDict2 = dict(rangesPerSeq(seqNames2, seqLimits2, rangePixBegs2,
   558                                    bpPerPix))
   559 
   560     warn("width:  " + str(width))
   561     warn("height: " + str(height))
   562 
   563     warn("processing alignments...")
   564     hits = alignmentPixels(width, height, alignments, bpPerPix,
   565                            rangeDict1, rangeDict2)
   566 
   567     warn("reading annotations...")
   568 
   569     rangeDict1 = expandedSeqDict(rangeDict1)
   570     beds1 = itertools.chain(readBed(opts.bed1, rangeDict1),
   571                             readRmsk(opts.rmsk1, rangeDict1),
   572                             readGenePred(opts, opts.genePred1, rangeDict1),
   573                             readGaps(opts, opts.gap1, rangeDict1))
   574     b1 = bedBoxes(beds1, rangeDict1, tMargin, height, True, bpPerPix)
   575 
   576     rangeDict2 = expandedSeqDict(rangeDict2)
   577     beds2 = itertools.chain(readBed(opts.bed2, rangeDict2),
   578                             readRmsk(opts.rmsk2, rangeDict2),
   579                             readGenePred(opts, opts.genePred2, rangeDict2),
   580                             readGaps(opts, opts.gap2, rangeDict2))
   581     b2 = bedBoxes(beds2, rangeDict2, lMargin, width, False, bpPerPix)
   582 
   583     boxes = sorted(itertools.chain(b1, b2))
   584 
   585     warn("drawing...")
   586 
   587     image_size = width, height
   588     im = Image.new(image_mode, image_size, opts.background_color)
   589 
   590     drawAnnotations(im, boxes)
   591 
   592     for i in range(height):
   593         for j in range(width):
   594             store_value = hits[i * width + j]
   595             xy = j, i
   596             if   store_value == 1: im.putpixel(xy, forward_color)
   597             elif store_value == 2: im.putpixel(xy, reverse_color)
   598             elif store_value == 3: im.putpixel(xy, overlap_color)
   599 
   600     if opts.fontsize != 0:
   601         axis1 = axisImage(labelData1, rangePixBegs1, rangePixLens1,
   602                           textRot1, False, font, image_mode, opts)
   603         if textRot1:
   604             axis1 = axis1.transpose(Image.ROTATE_90)
   605         axis2 = axisImage(labelData2, rangePixBegs2, rangePixLens2,
   606                           textRot2, textRot2, font, image_mode, opts)
   607         if not textRot2:
   608             axis2 = axis2.transpose(Image.ROTATE_270)
   609         im.paste(axis1, (0, 0))
   610         im.paste(axis2, (0, 0))
   611 
   612     for i in rangePixBegs1[1:]:
   613         box = i - opts.border_pixels, tMargin, i, height
   614         im.paste(opts.border_color, box)
   615 
   616     for i in rangePixBegs2[1:]:
   617         box = lMargin, i - opts.border_pixels, width, i
   618         im.paste(opts.border_color, box)
   619 
   620     im.save(args[1])
   621 
   622 if __name__ == "__main__":
   623     usage = """%prog --help
   624    or: %prog [options] maf-or-tab-alignments dotplot.png
   625    or: %prog [options] maf-or-tab-alignments dotplot.gif
   626    or: ..."""
   627     description = "Draw a dotplot of pair-wise sequence alignments in MAF or tabular format."
   628     op = optparse.OptionParser(usage=usage, description=description)
   629     op.add_option("-v", "--verbose", action="count",
   630                   help="show progress messages & data about the plot")
   631     op.add_option("-1", "--seq1", metavar="PATTERN", action="append",
   632                   default=[],
   633                   help="which sequences to show from the 1st genome")
   634     op.add_option("-2", "--seq2", metavar="PATTERN", action="append",
   635                   default=[],
   636                   help="which sequences to show from the 2nd genome")
   637     # Replace "width" & "height" with a single "length" option?
   638     op.add_option("-x", "--width", type="int", default=1000,
   639                   help="maximum width in pixels (default: %default)")
   640     op.add_option("-y", "--height", type="int", default=1000,
   641                   help="maximum height in pixels (default: %default)")
   642     op.add_option("-c", "--forwardcolor", metavar="COLOR", default="red",
   643                   help="color for forward alignments (default: %default)")
   644     op.add_option("-r", "--reversecolor", metavar="COLOR", default="blue",
   645                   help="color for reverse alignments (default: %default)")
   646     op.add_option("--sort1", type="int", default=1, metavar="N",
   647                   help="genome1 sequence order: 0=input order, 1=name order, "
   648                   "2=length order (default=%default)")
   649     op.add_option("--sort2", type="int", default=1, metavar="N",
   650                   help="genome2 sequence order: 0=input order, 1=name order, "
   651                   "2=length order (default=%default)")
   652     op.add_option("--trim1", action="store_true",
   653                   help="trim unaligned sequence flanks from the 1st genome")
   654     op.add_option("--trim2", action="store_true",
   655                   help="trim unaligned sequence flanks from the 2nd genome")
   656     op.add_option("--border-pixels", metavar="INT", type="int", default=1,
   657                   help="number of pixels between sequences (default=%default)")
   658     op.add_option("--border-color", metavar="COLOR", default="black",
   659                   help="color for pixels between sequences (default=%default)")
   660     op.add_option("--margin-color", metavar="COLOR", default="#dcdcdc",
   661                   help="margin color")
   662 
   663     og = optparse.OptionGroup(op, "Text options")
   664     og.add_option("-f", "--fontfile", metavar="FILE",
   665                   help="TrueType or OpenType font file")
   666     og.add_option("-s", "--fontsize", metavar="SIZE", type="int", default=14,
   667                   help="TrueType or OpenType font size (default: %default)")
   668     og.add_option("--labels1", type="int", default=0, metavar="N", help=
   669                   "genome1 labels: 0=name, 1=name:length, "
   670                   "2=name:start:length, 3=name:start-end (default=%default)")
   671     og.add_option("--labels2", type="int", default=0, metavar="N", help=
   672                   "genome2 labels: 0=name, 1=name:length, "
   673                   "2=name:start:length, 3=name:start-end (default=%default)")
   674     og.add_option("--rot1", metavar="ROT", default="h",
   675                   help="text rotation for the 1st genome (default=%default)")
   676     og.add_option("--rot2", metavar="ROT", default="v",
   677                   help="text rotation for the 2nd genome (default=%default)")
   678     op.add_option_group(og)
   679 
   680     og = optparse.OptionGroup(op, "Annotation options")
   681     og.add_option("--bed1", metavar="FILE",
   682                   help="read genome1 annotations from BED file")
   683     og.add_option("--bed2", metavar="FILE",
   684                   help="read genome2 annotations from BED file")
   685     og.add_option("--rmsk1", metavar="FILE", help="read genome1 repeats from "
   686                   "RepeatMasker .out or rmsk.txt file")
   687     og.add_option("--rmsk2", metavar="FILE", help="read genome2 repeats from "
   688                   "RepeatMasker .out or rmsk.txt file")
   689     op.add_option_group(og)
   690 
   691     og = optparse.OptionGroup(op, "Gene options")
   692     og.add_option("--genePred1", metavar="FILE",
   693                   help="read genome1 genes from genePred file")
   694     og.add_option("--genePred2", metavar="FILE",
   695                   help="read genome2 genes from genePred file")
   696     og.add_option("--exon-color", metavar="COLOR", default="PaleGreen",
   697                   help="color for exons (default=%default)")
   698     og.add_option("--cds-color", metavar="COLOR", default="LimeGreen",
   699                   help="color for protein-coding regions (default=%default)")
   700     op.add_option_group(og)
   701 
   702     og = optparse.OptionGroup(op, "Unsequenced gap options")
   703     og.add_option("--gap1", metavar="FILE",
   704                   help="read genome1 unsequenced gaps from agp or gap file")
   705     og.add_option("--gap2", metavar="FILE",
   706                   help="read genome2 unsequenced gaps from agp or gap file")
   707     og.add_option("--bridged-color", metavar="COLOR", default="yellow",
   708                   help="color for bridged gaps (default: %default)")
   709     og.add_option("--unbridged-color", metavar="COLOR", default="orange",
   710                   help="color for unbridged gaps (default: %default)")
   711     op.add_option_group(og)
   712     (opts, args) = op.parse_args()
   713     if len(args) != 2: op.error("2 arguments needed")
   714 
   715     opts.text_color = "black"
   716     opts.background_color = "white"
   717     opts.label_space = 5     # minimum number of pixels between axis labels
   718 
   719     try: lastDotplot(opts, args)
   720     except KeyboardInterrupt: pass  # avoid silly error message
   721     except Exception as e:
   722         prog = os.path.basename(sys.argv[0])
   723         sys.exit(prog + ": error: " + str(e))