scripts/last-dotplot
author Martin C. Frith
Wed Nov 08 11:36:01 2017 +0900 (2017-11-08)
changeset 904 c2f50cd4e29d
parent 903 3af6b02e3f9c
child 905 8d52cef22afa
permissions -rwxr-xr-x
Tweak dotplot labels
     1 #! /usr/bin/env python
     2 
     3 # Read pair-wise alignments in MAF or LAST tabular format: write an
     4 # "Oxford grid", a.k.a. dotplot.
     5 
     6 # TODO: Currently, pixels with zero aligned nt-pairs are white, and
     7 # pixels with one or more aligned nt-pairs are black.  This can look
     8 # too crowded for large genome alignments.  I tried shading each pixel
     9 # according to the number of aligned nt-pairs within it, but the
    10 # result is too faint.  How can this be done better?
    11 
    12 import gzip
    13 from fnmatch import fnmatchcase
    14 import subprocess
    15 import itertools, optparse, os, re, sys
    16 
    17 # Try to make PIL/PILLOW work:
    18 try: from PIL import Image, ImageDraw, ImageFont, ImageColor
    19 except ImportError: import Image, ImageDraw, ImageFont, ImageColor
    20 
    21 def myOpen(fileName):  # faster than fileinput
    22     if fileName is None:
    23         return []
    24     if fileName == "-":
    25         return sys.stdin
    26     if fileName.endswith(".gz"):
    27         return gzip.open(fileName)
    28     return open(fileName)
    29 
    30 def warn(message):
    31     if opts.verbose:
    32         prog = os.path.basename(sys.argv[0])
    33         sys.stderr.write(prog + ": " + message + "\n")
    34 
    35 def croppedBlocks(blocks, range1, range2):
    36     cropBeg1, cropEnd1 = range1
    37     cropBeg2, cropEnd2 = range2
    38     if blocks[0][0] < 0: cropBeg1, cropEnd1 = -cropEnd1, -cropBeg1
    39     if blocks[0][1] < 0: cropBeg2, cropEnd2 = -cropEnd2, -cropBeg2
    40     for beg1, beg2, size in blocks:
    41         b1 = max(cropBeg1, beg1)
    42         e1 = min(cropEnd1, beg1 + size)
    43         if b1 >= e1: continue
    44         offset = beg2 - beg1
    45         b2 = max(cropBeg2, b1 + offset)
    46         e2 = min(cropEnd2, e1 + offset)
    47         if b2 >= e2: continue
    48         yield b2 - offset, b2, e2 - b2
    49 
    50 def tabBlocks(beg1, beg2, blocks):
    51     '''Get the gapless blocks of an alignment, from LAST tabular format.'''
    52     for i in blocks.split(","):
    53         if ":" in i:
    54             x, y = i.split(":")
    55             beg1 += int(x)
    56             beg2 += int(y)
    57         else:
    58             size = int(i)
    59             yield beg1, beg2, size
    60             beg1 += size
    61             beg2 += size
    62 
    63 def mafBlocks(beg1, beg2, seq1, seq2):
    64     '''Get the gapless blocks of an alignment, from MAF format.'''
    65     size = 0
    66     for x, y in itertools.izip(seq1, seq2):
    67         if x == "-":
    68             if size:
    69                 yield beg1, beg2, size
    70                 beg1 += size
    71                 beg2 += size
    72                 size = 0
    73             beg2 += 1
    74         elif y == "-":
    75             if size:
    76                 yield beg1, beg2, size
    77                 beg1 += size
    78                 beg2 += size
    79                 size = 0
    80             beg1 += 1
    81         else:
    82             size += 1
    83     if size: yield beg1, beg2, size
    84 
    85 def alignmentInput(lines):
    86     '''Get alignments and sequence lengths, from MAF or tabular format.'''
    87     mafCount = 0
    88     for line in lines:
    89         w = line.split()
    90         if line[0].isdigit():  # tabular format
    91             chr1, beg1, seqlen1 = w[1], int(w[2]), int(w[5])
    92             if w[4] == "-": beg1 -= seqlen1
    93             chr2, beg2, seqlen2 = w[6], int(w[7]), int(w[10])
    94             if w[9] == "-": beg2 -= seqlen2
    95             blocks = tabBlocks(beg1, beg2, w[11])
    96             yield chr1, seqlen1, chr2, seqlen2, blocks
    97         elif line[0] == "s":  # MAF format
    98             if mafCount == 0:
    99                 chr1, beg1, seqlen1, seq1 = w[1], int(w[2]), int(w[5]), w[6]
   100                 if w[4] == "-": beg1 -= seqlen1
   101                 mafCount = 1
   102             else:
   103                 chr2, beg2, seqlen2, seq2 = w[1], int(w[2]), int(w[5]), w[6]
   104                 if w[4] == "-": beg2 -= seqlen2
   105                 blocks = mafBlocks(beg1, beg2, seq1, seq2)
   106                 yield chr1, seqlen1, chr2, seqlen2, blocks
   107                 mafCount = 0
   108 
   109 def seqRequestFromText(text):
   110     if ":" in text:
   111         pattern, interval = text.rsplit(":", 1)
   112         if "-" in interval:
   113             beg, end = interval.rsplit("-", 1)
   114             return pattern, int(beg), int(end)  # beg may be negative
   115     return text, 0, sys.maxsize
   116 
   117 def rangeFromSeqName(seqRequests, name, seqLen):
   118     if not seqRequests: return 0, seqLen
   119     base = name.split(".")[-1]  # allow for names like hg19.chr7
   120     for pat, beg, end in seqRequests:
   121         if fnmatchcase(name, pat) or fnmatchcase(base, pat):
   122             return max(beg, 0), min(end, seqLen)
   123     return None
   124 
   125 def updateSeqs(isTrim, seqNames, seqLimits, seqName, seqRange, blocks, index):
   126     if seqName not in seqLimits:
   127         seqNames.append(seqName)
   128     if isTrim:
   129         beg = blocks[0][index]
   130         end = blocks[-1][index] + blocks[-1][2]
   131         if beg < 0: beg, end = -end, -beg
   132         if seqName in seqLimits:
   133             b, e = seqLimits[seqName]
   134             seqLimits[seqName] = min(b, beg), max(e, end)
   135         else:
   136             seqLimits[seqName] = beg, end
   137     else:
   138         seqLimits[seqName] = seqRange
   139 
   140 def readAlignments(fileName, opts):
   141     '''Get alignments and sequence limits, from MAF or tabular format.'''
   142     seqRequests1 = map(seqRequestFromText, opts.seq1)
   143     seqRequests2 = map(seqRequestFromText, opts.seq2)
   144 
   145     alignments = []
   146     seqNames1 = []
   147     seqNames2 = []
   148     seqLimits1 = {}
   149     seqLimits2 = {}
   150     lines = myOpen(fileName)
   151     for seqName1, seqLen1, seqName2, seqLen2, blocks in alignmentInput(lines):
   152         range1 = rangeFromSeqName(seqRequests1, seqName1, seqLen1)
   153         if not range1: continue
   154         range2 = rangeFromSeqName(seqRequests2, seqName2, seqLen2)
   155         if not range2: continue
   156         b = list(croppedBlocks(list(blocks), range1, range2))
   157         if not b: continue
   158         aln = seqName1, seqName2, b
   159         alignments.append(aln)
   160         updateSeqs(opts.trim1, seqNames1, seqLimits1, seqName1, range1, b, 0)
   161         updateSeqs(opts.trim2, seqNames2, seqLimits2, seqName2, range2, b, 1)
   162     return alignments, seqNames1, seqNames2, seqLimits1, seqLimits2
   163 
   164 def natural_sort_key(my_string):
   165     '''Return a sort key for "natural" ordering, e.g. chr9 < chr10.'''
   166     parts = re.split(r'(\d+)', my_string)
   167     parts[1::2] = map(int, parts[1::2])
   168     return parts
   169 
   170 def prettyNum(n):
   171     t = str(n)
   172     groups = []
   173     while t:
   174         groups.append(t[-3:])
   175         t = t[:-3]
   176     return ",".join(reversed(groups))
   177 
   178 def sizeText(size):
   179     suffixes = "bp", "kb", "Mb", "Gb"
   180     for i, x in enumerate(suffixes):
   181         j = 10 ** (i * 3)
   182         if size < j * 10:
   183             return "%.2g" % (1.0 * size / j) + x
   184         if size < j * 1000 or i == len(suffixes) - 1:
   185             return "%.0f" % (1.0 * size / j) + x
   186 
   187 def labelText(seqRange, labelOpt):
   188     seqName, beg, end = seqRange
   189     if labelOpt == 1:
   190         return seqName + ": " + sizeText(end - beg)
   191     if labelOpt == 2:
   192         return seqName + ":" + prettyNum(beg) + ": " + sizeText(end - beg)
   193     if labelOpt == 3:
   194         return seqName + ":" + prettyNum(beg) + "-" + prettyNum(end)
   195     return seqName
   196 
   197 def rangeLabels(seqRanges, labelOpt, font, fontsize, image_mode, textRot):
   198     if fontsize:
   199         image_size = 1, 1
   200         im = Image.new(image_mode, image_size)
   201         draw = ImageDraw.Draw(im)
   202     x = y = 0
   203     for r in seqRanges:
   204         text = labelText(r, labelOpt)
   205         if fontsize:
   206             x, y = draw.textsize(text, font=font)
   207             if textRot:
   208                 x, y = y, x
   209         yield text, x, y
   210 
   211 def getSeqInfo(sortOpt, seqNames, seqLimits,
   212                font, fontsize, image_mode, labelOpt, textRot):
   213     '''Return miscellaneous information about the sequences.'''
   214     if sortOpt == 1:
   215         seqNames.sort(key=natural_sort_key)
   216     seqSizes = [seqLimits[i][1] - seqLimits[i][0] for i in seqNames]
   217     for i in seqNames:
   218         r = seqLimits[i]
   219         out = i, str(r[0]), str(r[1])
   220         warn("\t".join(out))
   221     warn("")
   222     if sortOpt == 2:
   223         seqRecords = sorted(zip(seqSizes, seqNames), reverse=True)
   224         seqSizes = [i[0] for i in seqRecords]
   225         seqNames = [i[1] for i in seqRecords]
   226     seqRanges = [(i, seqLimits[i][0], seqLimits[i][1]) for i in seqNames]
   227     labelData = list(rangeLabels(seqRanges, labelOpt,
   228                                  font, fontsize, image_mode, textRot))
   229     margin = max(i[2] for i in labelData)
   230     # xxx the margin may be too big, because some labels may get omitted
   231     return seqNames, seqSizes, labelData, margin
   232 
   233 def div_ceil(x, y):
   234     '''Return x / y rounded up.'''
   235     q, r = divmod(x, y)
   236     return q + (r != 0)
   237 
   238 def get_bp_per_pix(rangeSizes, pixTweenRanges, maxPixels):
   239     '''Get the minimum bp-per-pixel that fits in the size limit.'''
   240     warn("choosing bp per pixel...")
   241     numOfRanges = len(rangeSizes)
   242     maxPixelsInRanges = maxPixels - pixTweenRanges * (numOfRanges - 1)
   243     if maxPixelsInRanges < numOfRanges:
   244         raise Exception("can't fit the image: too many sequences?")
   245     negLimit = -maxPixelsInRanges
   246     negBpPerPix = sum(rangeSizes) // negLimit
   247     while True:
   248         if sum(i // negBpPerPix for i in rangeSizes) >= negLimit:
   249             return -negBpPerPix
   250         negBpPerPix -= 1
   251 
   252 def getRangePixBegs(rangePixLens, pixTweenRanges, margin):
   253     '''Get the start pixel for each range.'''
   254     rangePixBegs = []
   255     pix_tot = margin - pixTweenRanges
   256     for i in rangePixLens:
   257         pix_tot += pixTweenRanges
   258         rangePixBegs.append(pix_tot)
   259         pix_tot += i
   260     return rangePixBegs
   261 
   262 def pixelData(rangeSizes, bp_per_pix, pixTweenRanges, margin):
   263     '''Return pixel information about the ranges.'''
   264     rangePixLens = [div_ceil(i, bp_per_pix) for i in rangeSizes]
   265     rangePixBegs = getRangePixBegs(rangePixLens, pixTweenRanges, margin)
   266     tot_pix = rangePixBegs[-1] + rangePixLens[-1]
   267     return rangePixBegs, rangePixLens, tot_pix
   268 
   269 def drawLineForward(hits, width, bp_per_pix, beg1, beg2, size):
   270     while True:
   271         q1, r1 = divmod(beg1, bp_per_pix)
   272         q2, r2 = divmod(beg2, bp_per_pix)
   273         hits[q2 * width + q1] |= 1
   274         next_pix = min(bp_per_pix - r1, bp_per_pix - r2)
   275         if next_pix >= size: break
   276         beg1 += next_pix
   277         beg2 += next_pix
   278         size -= next_pix
   279 
   280 def drawLineReverse(hits, width, bp_per_pix, beg1, beg2, size):
   281     beg2 = -1 - beg2
   282     while True:
   283         q1, r1 = divmod(beg1, bp_per_pix)
   284         q2, r2 = divmod(beg2, bp_per_pix)
   285         hits[q2 * width + q1] |= 2
   286         next_pix = min(bp_per_pix - r1, r2 + 1)
   287         if next_pix >= size: break
   288         beg1 += next_pix
   289         beg2 -= next_pix
   290         size -= next_pix
   291 
   292 def alignmentPixels(width, height, alignments, bp_per_pix, origins1, origins2):
   293     hits = [0] * (width * height)  # the image data
   294     for seq1, seq2, blocks in alignments:
   295         ori1 = origins1[seq1]
   296         ori2 = origins2[seq2]
   297         for beg1, beg2, size in blocks:
   298             if beg1 < 0:
   299                 beg1 = -(beg1 + size)
   300                 beg2 = -(beg2 + size)
   301             if beg2 >= 0:
   302                 drawLineForward(hits, width, bp_per_pix,
   303                                 beg1 + ori1, beg2 + ori2, size)
   304             else:
   305                 drawLineReverse(hits, width, bp_per_pix,
   306                                 beg1 + ori1, beg2 - ori2, size)
   307     return hits
   308 
   309 def expandedSeqDict(seqDict):
   310     '''Allow lookup by short sequence names, e.g. chr7 as well as hg19.chr7.'''
   311     newDict = seqDict.copy()
   312     for name, x in seqDict.items():
   313         if "." in name:
   314             base = name.split(".")[-1]
   315             if base in newDict:  # an ambiguous case was found:
   316                 return seqDict   # so give up completely
   317             newDict[base] = x
   318     return newDict
   319 
   320 def readBed(fileName, seqLimits):
   321     for line in myOpen(fileName):
   322         w = line.split()
   323         if not w: continue
   324         seqName = w[0]
   325         if seqName not in seqLimits: continue
   326         beg = int(w[1])
   327         end = int(w[2])
   328         layer = 900
   329         color = "#fbf"
   330         if len(w) > 4:
   331             if w[4] != ".":
   332                 layer = float(w[4])
   333             if len(w) > 5:
   334                 if len(w) > 8 and w[8].count(",") == 2:
   335                     color = "rgb(" + w[8] + ")"
   336                 elif w[5] == "+":
   337                     color = "#ffe8e8"
   338                 elif w[5] == "-":
   339                     color = "#e8e8ff"
   340         yield layer, color, seqName, beg, end
   341 
   342 def commaSeparatedInts(text):
   343     return map(int, text.rstrip(",").split(","))
   344 
   345 def readGenePred(opts, fileName, seqLimits):
   346     for line in myOpen(fileName):
   347         fields = line.split()
   348         if not fields: continue
   349         if fields[2] not in "+-": fields = fields[1:]
   350         seqName = fields[1]
   351         if seqName not in seqLimits: continue
   352         #strand = fields[2]
   353         cdsBeg = int(fields[5])
   354         cdsEnd = int(fields[6])
   355         exonBegs = commaSeparatedInts(fields[8])
   356         exonEnds = commaSeparatedInts(fields[9])
   357         for beg, end in zip(exonBegs, exonEnds):
   358             yield 300, opts.exon_color, seqName, beg, end
   359             b = max(beg, cdsBeg)
   360             e = min(end, cdsEnd)
   361             if b < e: yield 400, opts.cds_color, seqName, b, e
   362 
   363 def readRmsk(fileName, seqLimits):
   364     for line in myOpen(fileName):
   365         fields = line.split()
   366         if len(fields) == 17:  # rmsk.txt
   367             seqName = fields[5]
   368             if seqName not in seqLimits: continue  # do this ASAP for speed
   369             beg = int(fields[6])
   370             end = int(fields[7])
   371             strand = fields[9]
   372             repeatClass = fields[11]
   373         elif len(fields) == 15:  # .out
   374             seqName = fields[4]
   375             if seqName not in seqLimits: continue
   376             beg = int(fields[5]) - 1
   377             end = int(fields[6])
   378             strand = fields[8]
   379             repeatClass = fields[10]
   380         else:
   381             continue
   382         if repeatClass in ("Low_complexity", "Simple_repeat"):
   383             yield 200, "#fbf", seqName, beg, end
   384         elif strand == "+":
   385             yield 100, "#ffe8e8", seqName, beg, end
   386         else:
   387             yield 100, "#e8e8ff", seqName, beg, end
   388 
   389 def isExtraFirstGapField(fields):
   390     return fields[4].isdigit()
   391 
   392 def readGaps(opts, fileName, seqLimits):
   393     '''Read locations of unsequenced gaps, from an agp or gap file.'''
   394     for line in myOpen(fileName):
   395         w = line.split()
   396         if not w or w[0][0] == "#": continue
   397         if isExtraFirstGapField(w): w = w[1:]
   398         if w[4] not in "NU": continue
   399         seqName = w[0]
   400         if seqName not in seqLimits: continue
   401         end = int(w[2])
   402         beg = end - int(w[5])  # zero-based coordinate
   403         if w[7] == "yes":
   404             yield 3000, opts.bridged_color, seqName, beg, end
   405         else:
   406             yield 2000, opts.unbridged_color, seqName, beg, end
   407 
   408 def bedBoxes(beds, seqLimits, origins, margin, edge, isTop, bpPerPix):
   409     for layer, color, seqName, bedBeg, bedEnd in beds:
   410         cropBeg, cropEnd = seqLimits[seqName]
   411         beg = max(bedBeg, cropBeg)
   412         end = min(bedEnd, cropEnd)
   413         if beg >= end: continue
   414         ori = origins[seqName]
   415         if layer <= 1000:
   416             # include partly-covered pixels
   417             b = (ori + beg) // bpPerPix
   418             e = div_ceil(ori + end, bpPerPix)
   419         else:
   420             # exclude partly-covered pixels
   421             b = div_ceil(ori + beg, bpPerPix)
   422             e = (ori + end) // bpPerPix
   423             if e <= b: continue
   424         if isTop:
   425             box = b, margin, e, edge
   426         else:
   427             box = margin, b, edge, e
   428         yield layer, color, box
   429 
   430 def drawAnnotations(im, boxes):
   431     # xxx use partial transparency for different-color overlaps?
   432     for layer, color, box in boxes:
   433         im.paste(color, box)
   434 
   435 def placedLabels(labels, rangePixBegs, rangePixLens, beg, end):
   436     '''Return axis labels with endpoint & sort-order information.'''
   437     maxWidth = end - beg
   438     for i, j, k in zip(labels, rangePixBegs, rangePixLens):
   439         text, textWidth, textHeight = i
   440         if textWidth > maxWidth:
   441             continue
   442         labelBeg = j + (k - textWidth) // 2
   443         labelEnd = labelBeg + textWidth
   444         sortKey = textWidth - k
   445         if labelBeg < beg:
   446             sortKey += maxWidth * (beg - labelBeg)
   447             labelBeg = beg
   448             labelEnd = beg + textWidth
   449         if labelEnd > end:
   450             sortKey += maxWidth * (labelEnd - end)
   451             labelEnd = end
   452             labelBeg = end - textWidth
   453         yield sortKey, labelBeg, labelEnd, text, textHeight
   454 
   455 def nonoverlappingLabels(labels, minPixTweenLabels):
   456     '''Get a subset of non-overlapping axis labels, greedily.'''
   457     out = []
   458     for i in labels:
   459         beg = i[1] - minPixTweenLabels
   460         end = i[2] + minPixTweenLabels
   461         if all(j[2] <= beg or j[1] >= end for j in out):
   462             out.append(i)
   463     return out
   464 
   465 def axisImage(labels, rangePixBegs, rangePixLens, textRot,
   466               textAln, font, image_mode, opts):
   467     '''Make an image of axis labels.'''
   468     beg = rangePixBegs[0]
   469     end = rangePixBegs[-1] + rangePixLens[-1]
   470     margin = max(i[2] for i in labels)
   471     labels = sorted(placedLabels(labels, rangePixBegs, rangePixLens, beg, end))
   472     minPixTweenLabels = 0 if textRot else opts.label_space
   473     labels = nonoverlappingLabels(labels, minPixTweenLabels)
   474     image_size = (margin, end) if textRot else (end, margin)
   475     im = Image.new(image_mode, image_size, opts.margin_color)
   476     draw = ImageDraw.Draw(im)
   477     for i in labels:
   478         base = margin - i[4] if textAln else 0
   479         position = (base, i[1]) if textRot else (i[1], base)
   480         draw.text(position, i[3], font=font, fill=opts.text_color)
   481     return im
   482 
   483 def seqOrigins(seqNames, rangePixBegs, seqLimits, bp_per_pix):
   484     for i, j in zip(seqNames, rangePixBegs):
   485         yield i, bp_per_pix * j - seqLimits[i][0]
   486 
   487 def getFont(opts):
   488     if opts.fontfile:
   489         return ImageFont.truetype(opts.fontfile, opts.fontsize)
   490     fileNames = []
   491     try:
   492         x = ["fc-match", "-f%{file}", "arial"]
   493         p = subprocess.Popen(x, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
   494         out, err = p.communicate()
   495         fileNames.append(out)
   496     except OSError as e:
   497         warn("fc-match error: " + str(e))
   498     fileNames.append("/Library/Fonts/Arial.ttf")  # for Mac
   499     for i in fileNames:
   500         try:
   501             font = ImageFont.truetype(i, opts.fontsize)
   502             warn("font: " + i)
   503             return font
   504         except IOError as e:
   505             warn("font load error: " + str(e))
   506     return ImageFont.load_default()
   507 
   508 def lastDotplot(opts, args):
   509     font = getFont(opts)
   510     image_mode = 'RGB'
   511     forward_color = ImageColor.getcolor(opts.forwardcolor, image_mode)
   512     reverse_color = ImageColor.getcolor(opts.reversecolor, image_mode)
   513     zipped_colors = zip(forward_color, reverse_color)
   514     overlap_color = tuple([(i + j) // 2 for i, j in zipped_colors])
   515 
   516     warn("reading alignments...")
   517     alnData = readAlignments(args[0], opts)
   518     alignments, seqNames1, seqNames2, seqLimits1, seqLimits2 = alnData
   519     warn("done")
   520     if not alignments: raise Exception("there are no alignments")
   521 
   522     textRot1 = "vertical".startswith(opts.rot1)
   523     i1 = getSeqInfo(opts.sort1, seqNames1, seqLimits1,
   524                     font, opts.fontsize, image_mode, opts.labels1, textRot1)
   525     seqNames1, rangeSizes1, labelData1, tMargin = i1
   526 
   527     textRot2 = "horizontal".startswith(opts.rot2)
   528     i2 = getSeqInfo(opts.sort2, seqNames2, seqLimits2,
   529                     font, opts.fontsize, image_mode, opts.labels2, textRot2)
   530     seqNames2, rangeSizes2, labelData2, lMargin = i2
   531 
   532     maxPixels1 = opts.width  - lMargin
   533     maxPixels2 = opts.height - tMargin
   534     bpPerPix1 = get_bp_per_pix(rangeSizes1, opts.border_pixels, maxPixels1)
   535     bpPerPix2 = get_bp_per_pix(rangeSizes2, opts.border_pixels, maxPixels2)
   536     bpPerPix = max(bpPerPix1, bpPerPix2)
   537     warn("bp per pixel = " + str(bpPerPix))
   538 
   539     p1 = pixelData(rangeSizes1, bpPerPix, opts.border_pixels, lMargin)
   540     rangePixBegs1, rangePixLens1, width = p1
   541 
   542     p2 = pixelData(rangeSizes2, bpPerPix, opts.border_pixels, tMargin)
   543     rangePixBegs2, rangePixLens2, height = p2
   544 
   545     warn("width:  " + str(width))
   546     warn("height: " + str(height))
   547 
   548     origins1 = dict(seqOrigins(seqNames1, rangePixBegs1, seqLimits1, bpPerPix))
   549     origins2 = dict(seqOrigins(seqNames2, rangePixBegs2, seqLimits2, bpPerPix))
   550 
   551     warn("processing alignments...")
   552     hits = alignmentPixels(width, height, alignments, bpPerPix,
   553                            origins1, origins2)
   554 
   555     warn("reading annotations...")
   556 
   557     seqLimits1 = expandedSeqDict(seqLimits1)
   558     seqLimits2 = expandedSeqDict(seqLimits2)
   559     origins1 = expandedSeqDict(origins1)
   560     origins2 = expandedSeqDict(origins2)
   561 
   562     beds1 = itertools.chain(readBed(opts.bed1, seqLimits1),
   563                             readRmsk(opts.rmsk1, seqLimits1),
   564                             readGenePred(opts, opts.genePred1, seqLimits1),
   565                             readGaps(opts, opts.gap1, seqLimits1))
   566     b1 = bedBoxes(beds1, seqLimits1, origins1, tMargin, height, True, bpPerPix)
   567 
   568     beds2 = itertools.chain(readBed(opts.bed2, seqLimits2),
   569                             readRmsk(opts.rmsk2, seqLimits2),
   570                             readGenePred(opts, opts.genePred2, seqLimits2),
   571                             readGaps(opts, opts.gap2, seqLimits2))
   572     b2 = bedBoxes(beds2, seqLimits2, origins2, lMargin, width, False, bpPerPix)
   573 
   574     boxes = sorted(itertools.chain(b1, b2))
   575 
   576     warn("drawing...")
   577 
   578     image_size = width, height
   579     im = Image.new(image_mode, image_size, opts.background_color)
   580 
   581     drawAnnotations(im, boxes)
   582 
   583     for i in range(height):
   584         for j in range(width):
   585             store_value = hits[i * width + j]
   586             xy = j, i
   587             if   store_value == 1: im.putpixel(xy, forward_color)
   588             elif store_value == 2: im.putpixel(xy, reverse_color)
   589             elif store_value == 3: im.putpixel(xy, overlap_color)
   590 
   591     if opts.fontsize != 0:
   592         axis1 = axisImage(labelData1, rangePixBegs1, rangePixLens1,
   593                           textRot1, False, font, image_mode, opts)
   594         if textRot1:
   595             axis1 = axis1.transpose(Image.ROTATE_90)
   596         axis2 = axisImage(labelData2, rangePixBegs2, rangePixLens2,
   597                           textRot2, textRot2, font, image_mode, opts)
   598         if not textRot2:
   599             axis2 = axis2.transpose(Image.ROTATE_270)
   600         im.paste(axis1, (0, 0))
   601         im.paste(axis2, (0, 0))
   602 
   603     for i in rangePixBegs1[1:]:
   604         box = i - opts.border_pixels, tMargin, i, height
   605         im.paste(opts.border_color, box)
   606 
   607     for i in rangePixBegs2[1:]:
   608         box = lMargin, i - opts.border_pixels, width, i
   609         im.paste(opts.border_color, box)
   610 
   611     im.save(args[1])
   612 
   613 if __name__ == "__main__":
   614     usage = """%prog --help
   615    or: %prog [options] maf-or-tab-alignments dotplot.png
   616    or: %prog [options] maf-or-tab-alignments dotplot.gif
   617    or: ..."""
   618     description = "Draw a dotplot of pair-wise sequence alignments in MAF or tabular format."
   619     op = optparse.OptionParser(usage=usage, description=description)
   620     op.add_option("-v", "--verbose", action="count",
   621                   help="show progress messages & data about the plot")
   622     op.add_option("-1", "--seq1", metavar="PATTERN", action="append",
   623                   default=[],
   624                   help="which sequences to show from the 1st genome")
   625     op.add_option("-2", "--seq2", metavar="PATTERN", action="append",
   626                   default=[],
   627                   help="which sequences to show from the 2nd genome")
   628     # Replace "width" & "height" with a single "length" option?
   629     op.add_option("-x", "--width", type="int", default=1000,
   630                   help="maximum width in pixels (default: %default)")
   631     op.add_option("-y", "--height", type="int", default=1000,
   632                   help="maximum height in pixels (default: %default)")
   633     op.add_option("-c", "--forwardcolor", metavar="COLOR", default="red",
   634                   help="color for forward alignments (default: %default)")
   635     op.add_option("-r", "--reversecolor", metavar="COLOR", default="blue",
   636                   help="color for reverse alignments (default: %default)")
   637     op.add_option("--sort1", type="int", default=1, metavar="N",
   638                   help="genome1 sequence order: 0=input order, 1=name order, "
   639                   "2=length order (default=%default)")
   640     op.add_option("--sort2", type="int", default=1, metavar="N",
   641                   help="genome2 sequence order: 0=input order, 1=name order, "
   642                   "2=length order (default=%default)")
   643     op.add_option("--trim1", action="store_true",
   644                   help="trim unaligned sequence flanks from the 1st genome")
   645     op.add_option("--trim2", action="store_true",
   646                   help="trim unaligned sequence flanks from the 2nd genome")
   647     op.add_option("--border-pixels", metavar="INT", type="int", default=1,
   648                   help="number of pixels between sequences (default=%default)")
   649     op.add_option("--border-color", metavar="COLOR", default="black",
   650                   help="color for pixels between sequences (default=%default)")
   651     op.add_option("--margin-color", metavar="COLOR", default="#dcdcdc",
   652                   help="margin color")
   653 
   654     og = optparse.OptionGroup(op, "Text options")
   655     og.add_option("-f", "--fontfile", metavar="FILE",
   656                   help="TrueType or OpenType font file")
   657     og.add_option("-s", "--fontsize", metavar="SIZE", type="int", default=14,
   658                   help="TrueType or OpenType font size (default: %default)")
   659     og.add_option("--labels1", type="int", default=0, metavar="N", help=
   660                   "genome1 labels: 0=name, 1=name:length, "
   661                   "2=name:start:length, 3=name:start-end (default=%default)")
   662     og.add_option("--labels2", type="int", default=0, metavar="N", help=
   663                   "genome2 labels: 0=name, 1=name:length, "
   664                   "2=name:start:length, 3=name:start-end (default=%default)")
   665     og.add_option("--rot1", metavar="ROT", default="h",
   666                   help="text rotation for the 1st genome (default=%default)")
   667     og.add_option("--rot2", metavar="ROT", default="v",
   668                   help="text rotation for the 2nd genome (default=%default)")
   669     op.add_option_group(og)
   670 
   671     og = optparse.OptionGroup(op, "Annotation options")
   672     og.add_option("--bed1", metavar="FILE",
   673                   help="read genome1 annotations from BED file")
   674     og.add_option("--bed2", metavar="FILE",
   675                   help="read genome2 annotations from BED file")
   676     og.add_option("--rmsk1", metavar="FILE", help="read genome1 repeats from "
   677                   "RepeatMasker .out or rmsk.txt file")
   678     og.add_option("--rmsk2", metavar="FILE", help="read genome2 repeats from "
   679                   "RepeatMasker .out or rmsk.txt file")
   680     op.add_option_group(og)
   681 
   682     og = optparse.OptionGroup(op, "Gene options")
   683     og.add_option("--genePred1", metavar="FILE",
   684                   help="read genome1 genes from genePred file")
   685     og.add_option("--genePred2", metavar="FILE",
   686                   help="read genome2 genes from genePred file")
   687     og.add_option("--exon-color", metavar="COLOR", default="PaleGreen",
   688                   help="color for exons (default=%default)")
   689     og.add_option("--cds-color", metavar="COLOR", default="LimeGreen",
   690                   help="color for protein-coding regions (default=%default)")
   691     op.add_option_group(og)
   692 
   693     og = optparse.OptionGroup(op, "Unsequenced gap options")
   694     og.add_option("--gap1", metavar="FILE",
   695                   help="read genome1 unsequenced gaps from agp or gap file")
   696     og.add_option("--gap2", metavar="FILE",
   697                   help="read genome2 unsequenced gaps from agp or gap file")
   698     og.add_option("--bridged-color", metavar="COLOR", default="yellow",
   699                   help="color for bridged gaps (default: %default)")
   700     og.add_option("--unbridged-color", metavar="COLOR", default="orange",
   701                   help="color for unbridged gaps (default: %default)")
   702     op.add_option_group(og)
   703     (opts, args) = op.parse_args()
   704     if len(args) != 2: op.error("2 arguments needed")
   705 
   706     opts.text_color = "black"
   707     opts.background_color = "white"
   708     opts.label_space = 5     # minimum number of pixels between axis labels
   709 
   710     try: lastDotplot(opts, args)
   711     except KeyboardInterrupt: pass  # avoid silly error message
   712     except Exception as e:
   713         prog = os.path.basename(sys.argv[0])
   714         sys.exit(prog + ": error: " + str(e))