scripts/last-dotplot
author Martin C. Frith
Thu Nov 02 22:18:05 2017 +0900 (2017-11-02)
changeset 901 2003b209d750
parent 900 5ce3e786e05a
child 903 3af6b02e3f9c
permissions -rwxr-xr-x
Tweak last-dotplot's label placement
     1 #! /usr/bin/env python
     2 
     3 # Read pair-wise alignments in MAF or LAST tabular format: write an
     4 # "Oxford grid", a.k.a. dotplot.
     5 
     6 # TODO: Currently, pixels with zero aligned nt-pairs are white, and
     7 # pixels with one or more aligned nt-pairs are black.  This can look
     8 # too crowded for large genome alignments.  I tried shading each pixel
     9 # according to the number of aligned nt-pairs within it, but the
    10 # result is too faint.  How can this be done better?
    11 
    12 import gzip
    13 from fnmatch import fnmatchcase
    14 import itertools, optparse, os, re, sys
    15 
    16 # Try to make PIL/PILLOW work:
    17 try: from PIL import Image, ImageDraw, ImageFont, ImageColor
    18 except ImportError: import Image, ImageDraw, ImageFont, ImageColor
    19 
    20 def myOpen(fileName):  # faster than fileinput
    21     if fileName == "-":
    22         return sys.stdin
    23     if fileName.endswith(".gz"):
    24         return gzip.open(fileName)
    25     return open(fileName)
    26 
    27 def warn(message):
    28     if opts.verbose:
    29         prog = os.path.basename(sys.argv[0])
    30         sys.stderr.write(prog + ": " + message + "\n")
    31 
    32 def croppedBlocks(blocks, range1, range2):
    33     cropBeg1, cropEnd1 = range1
    34     cropBeg2, cropEnd2 = range2
    35     if blocks[0][0] < 0: cropBeg1, cropEnd1 = -cropEnd1, -cropBeg1
    36     if blocks[0][1] < 0: cropBeg2, cropEnd2 = -cropEnd2, -cropBeg2
    37     for beg1, beg2, size in blocks:
    38         b1 = max(cropBeg1, beg1)
    39         e1 = min(cropEnd1, beg1 + size)
    40         if b1 >= e1: continue
    41         offset = beg2 - beg1
    42         b2 = max(cropBeg2, b1 + offset)
    43         e2 = min(cropEnd2, e1 + offset)
    44         if b2 >= e2: continue
    45         yield b2 - offset, b2, e2 - b2
    46 
    47 def tabBlocks(beg1, beg2, blocks):
    48     '''Get the gapless blocks of an alignment, from LAST tabular format.'''
    49     for i in blocks.split(","):
    50         if ":" in i:
    51             x, y = i.split(":")
    52             beg1 += int(x)
    53             beg2 += int(y)
    54         else:
    55             size = int(i)
    56             yield beg1, beg2, size
    57             beg1 += size
    58             beg2 += size
    59 
    60 def mafBlocks(beg1, beg2, seq1, seq2):
    61     '''Get the gapless blocks of an alignment, from MAF format.'''
    62     size = 0
    63     for x, y in itertools.izip(seq1, seq2):
    64         if x == "-":
    65             if size:
    66                 yield beg1, beg2, size
    67                 beg1 += size
    68                 beg2 += size
    69                 size = 0
    70             beg2 += 1
    71         elif y == "-":
    72             if size:
    73                 yield beg1, beg2, size
    74                 beg1 += size
    75                 beg2 += size
    76                 size = 0
    77             beg1 += 1
    78         else:
    79             size += 1
    80     if size: yield beg1, beg2, size
    81 
    82 def alignmentInput(lines):
    83     '''Get alignments and sequence lengths, from MAF or tabular format.'''
    84     mafCount = 0
    85     for line in lines:
    86         w = line.split()
    87         if line[0].isdigit():  # tabular format
    88             chr1, beg1, seqlen1 = w[1], int(w[2]), int(w[5])
    89             if w[4] == "-": beg1 -= seqlen1
    90             chr2, beg2, seqlen2 = w[6], int(w[7]), int(w[10])
    91             if w[9] == "-": beg2 -= seqlen2
    92             blocks = tabBlocks(beg1, beg2, w[11])
    93             yield chr1, seqlen1, chr2, seqlen2, blocks
    94         elif line[0] == "s":  # MAF format
    95             if mafCount == 0:
    96                 chr1, beg1, seqlen1, seq1 = w[1], int(w[2]), int(w[5]), w[6]
    97                 if w[4] == "-": beg1 -= seqlen1
    98                 mafCount = 1
    99             else:
   100                 chr2, beg2, seqlen2, seq2 = w[1], int(w[2]), int(w[5]), w[6]
   101                 if w[4] == "-": beg2 -= seqlen2
   102                 blocks = mafBlocks(beg1, beg2, seq1, seq2)
   103                 yield chr1, seqlen1, chr2, seqlen2, blocks
   104                 mafCount = 0
   105 
   106 def seqRequestFromText(text):
   107     if ":" in text:
   108         pattern, interval = text.rsplit(":", 1)
   109         if "-" in interval:
   110             beg, end = interval.rsplit("-", 1)
   111             return pattern, int(beg), int(end)  # beg may be negative
   112     return text, 0, sys.maxsize
   113 
   114 def rangeFromSeqName(seqRequests, name, seqLen):
   115     if not seqRequests: return 0, seqLen
   116     base = name.split(".")[-1]  # allow for names like hg19.chr7
   117     for pat, beg, end in seqRequests:
   118         if fnmatchcase(name, pat) or fnmatchcase(base, pat):
   119             return max(beg, 0), min(end, seqLen)
   120     return None
   121 
   122 def updateSeqs(isTrim, seqNames, seqLimits, seqName, seqRange, blocks, index):
   123     if seqName not in seqLimits:
   124         seqNames.append(seqName)
   125     if isTrim:
   126         beg = blocks[0][index]
   127         end = blocks[-1][index] + blocks[-1][2]
   128         if beg < 0: beg, end = -end, -beg
   129         if seqName in seqLimits:
   130             b, e = seqLimits[seqName]
   131             seqLimits[seqName] = min(b, beg), max(e, end)
   132         else:
   133             seqLimits[seqName] = beg, end
   134     else:
   135         seqLimits[seqName] = seqRange
   136 
   137 def readAlignments(fileName, opts):
   138     '''Get alignments and sequence limits, from MAF or tabular format.'''
   139     seqRequests1 = map(seqRequestFromText, opts.seq1)
   140     seqRequests2 = map(seqRequestFromText, opts.seq2)
   141 
   142     alignments = []
   143     seqNames1 = []
   144     seqNames2 = []
   145     seqLimits1 = {}
   146     seqLimits2 = {}
   147     lines = myOpen(fileName)
   148     for seqName1, seqLen1, seqName2, seqLen2, blocks in alignmentInput(lines):
   149         range1 = rangeFromSeqName(seqRequests1, seqName1, seqLen1)
   150         if not range1: continue
   151         range2 = rangeFromSeqName(seqRequests2, seqName2, seqLen2)
   152         if not range2: continue
   153         b = list(croppedBlocks(list(blocks), range1, range2))
   154         if not b: continue
   155         aln = seqName1, seqName2, b
   156         alignments.append(aln)
   157         updateSeqs(opts.trim1, seqNames1, seqLimits1, seqName1, range1, b, 0)
   158         updateSeqs(opts.trim2, seqNames2, seqLimits2, seqName2, range2, b, 1)
   159     return alignments, seqNames1, seqNames2, seqLimits1, seqLimits2
   160 
   161 def natural_sort_key(my_string):
   162     '''Return a sort key for "natural" ordering, e.g. chr9 < chr10.'''
   163     parts = re.split(r'(\d+)', my_string)
   164     parts[1::2] = map(int, parts[1::2])
   165     return parts
   166 
   167 def prettyNum(n):
   168     t = str(n)
   169     groups = []
   170     while t:
   171         groups.append(t[-3:])
   172         t = t[:-3]
   173     return " ".join(reversed(groups))
   174 
   175 def sizeText(size):
   176     suffixes = "bp", "kb", "Mb", "Gb"
   177     for i, x in enumerate(suffixes):
   178         j = 10 ** (i * 3)
   179         if size < j * 10:
   180             return "%.2g" % (1.0 * size / j) + x
   181         if size < j * 1000 or i == len(suffixes) - 1:
   182             return "%.0f" % (1.0 * size / j) + x
   183 
   184 def labelText(seqRange, labelOpt):
   185     seqName, beg, end = seqRange
   186     if labelOpt == 1:
   187         return seqName + ": " + sizeText(end - beg)
   188     if labelOpt == 2:
   189         return seqName + ": " + prettyNum(beg) + ": " + sizeText(end - beg)
   190     if labelOpt == 3:
   191         return seqName + ": " + prettyNum(beg) + " - " + prettyNum(end)
   192     return seqName
   193 
   194 def rangeLabels(seqRanges, labelOpt, font, fontsize, image_mode, textRot):
   195     if fontsize:
   196         image_size = 1, 1
   197         im = Image.new(image_mode, image_size)
   198         draw = ImageDraw.Draw(im)
   199     x = y = 0
   200     for r in seqRanges:
   201         text = labelText(r, labelOpt)
   202         if fontsize:
   203             x, y = draw.textsize(text, font=font)
   204             if textRot:
   205                 x, y = y, x
   206         yield text, x, y
   207 
   208 def getSeqInfo(sortOpt, seqNames, seqLimits,
   209                font, fontsize, image_mode, labelOpt, textRot):
   210     '''Return miscellaneous information about the sequences.'''
   211     if sortOpt == 1:
   212         seqNames.sort(key=natural_sort_key)
   213     seqSizes = [seqLimits[i][1] - seqLimits[i][0] for i in seqNames]
   214     for i in seqNames:
   215         r = seqLimits[i]
   216         out = i, str(r[0]), str(r[1])
   217         warn("\t".join(out))
   218     warn("")
   219     if sortOpt == 2:
   220         seqRecords = sorted(zip(seqSizes, seqNames), reverse=True)
   221         seqSizes = [i[0] for i in seqRecords]
   222         seqNames = [i[1] for i in seqRecords]
   223     seqRanges = [(i, seqLimits[i][0], seqLimits[i][1]) for i in seqNames]
   224     labelData = list(rangeLabels(seqRanges, labelOpt,
   225                                  font, fontsize, image_mode, textRot))
   226     margin = max(i[2] for i in labelData)
   227     # xxx the margin may be too big, because some labels may get omitted
   228     return seqNames, seqSizes, labelData, margin
   229 
   230 def div_ceil(x, y):
   231     '''Return x / y rounded up.'''
   232     q, r = divmod(x, y)
   233     return q + (r != 0)
   234 
   235 def get_bp_per_pix(rangeSizes, pixTweenRanges, maxPixels):
   236     '''Get the minimum bp-per-pixel that fits in the size limit.'''
   237     numOfRanges = len(rangeSizes)
   238     maxPixelsInRanges = maxPixels - pixTweenRanges * (numOfRanges - 1)
   239     if maxPixelsInRanges < numOfRanges:
   240         raise Exception("can't fit the image: too many sequences?")
   241     negLimit = -maxPixelsInRanges
   242     negBpPerPix = sum(rangeSizes) // negLimit
   243     while True:
   244         if sum(i // negBpPerPix for i in rangeSizes) >= negLimit:
   245             return -negBpPerPix
   246         negBpPerPix -= 1
   247 
   248 def getRangePixBegs(rangePixLens, pixTweenRanges, margin):
   249     '''Get the start pixel for each range.'''
   250     rangePixBegs = []
   251     pix_tot = margin - pixTweenRanges
   252     for i in rangePixLens:
   253         pix_tot += pixTweenRanges
   254         rangePixBegs.append(pix_tot)
   255         pix_tot += i
   256     return rangePixBegs
   257 
   258 def pixelData(rangeSizes, bp_per_pix, pixTweenRanges, margin):
   259     '''Return pixel information about the ranges.'''
   260     rangePixLens = [div_ceil(i, bp_per_pix) for i in rangeSizes]
   261     rangePixBegs = getRangePixBegs(rangePixLens, pixTweenRanges, margin)
   262     tot_pix = rangePixBegs[-1] + rangePixLens[-1]
   263     return rangePixBegs, rangePixLens, tot_pix
   264 
   265 def drawLineForward(hits, width, bp_per_pix, beg1, beg2, size):
   266     while True:
   267         q1, r1 = divmod(beg1, bp_per_pix)
   268         q2, r2 = divmod(beg2, bp_per_pix)
   269         hits[q2 * width + q1] |= 1
   270         next_pix = min(bp_per_pix - r1, bp_per_pix - r2)
   271         if next_pix >= size: break
   272         beg1 += next_pix
   273         beg2 += next_pix
   274         size -= next_pix
   275 
   276 def drawLineReverse(hits, width, bp_per_pix, beg1, beg2, size):
   277     beg2 = -1 - beg2
   278     while True:
   279         q1, r1 = divmod(beg1, bp_per_pix)
   280         q2, r2 = divmod(beg2, bp_per_pix)
   281         hits[q2 * width + q1] |= 2
   282         next_pix = min(bp_per_pix - r1, r2 + 1)
   283         if next_pix >= size: break
   284         beg1 += next_pix
   285         beg2 -= next_pix
   286         size -= next_pix
   287 
   288 def alignmentPixels(width, height, alignments, bp_per_pix, origins1, origins2):
   289     hits = [0] * (width * height)  # the image data
   290     for seq1, seq2, blocks in alignments:
   291         ori1 = origins1[seq1]
   292         ori2 = origins2[seq2]
   293         for beg1, beg2, size in blocks:
   294             if beg1 < 0:
   295                 beg1 = -(beg1 + size)
   296                 beg2 = -(beg2 + size)
   297             if beg2 >= 0:
   298                 drawLineForward(hits, width, bp_per_pix,
   299                                 beg1 + ori1, beg2 + ori2, size)
   300             else:
   301                 drawLineReverse(hits, width, bp_per_pix,
   302                                 beg1 + ori1, beg2 - ori2, size)
   303     return hits
   304 
   305 def expandedSeqDict(seqDict):
   306     '''Allow lookup by short sequence names, e.g. chr7 as well as hg19.chr7.'''
   307     newDict = seqDict.copy()
   308     for name, x in seqDict.items():
   309         if "." in name:
   310             base = name.split(".")[-1]
   311             if base in newDict:  # an ambiguous case was found:
   312                 return seqDict   # so give up completely
   313             newDict[base] = x
   314     return newDict
   315 
   316 def readBed(fileName, seqLimits):
   317     if not fileName: return
   318     for line in myOpen(fileName):
   319         w = line.split()
   320         if not w: continue
   321         seqName = w[0]
   322         if seqName not in seqLimits: continue
   323         beg = int(w[1])
   324         end = int(w[2])
   325         layer = 900
   326         color = "#fbf"
   327         if len(w) > 4:
   328             if w[4] != ".":
   329                 layer = float(w[4])
   330             if len(w) > 5:
   331                 if len(w) > 8 and w[8].count(",") == 2:
   332                     color = "rgb(" + w[8] + ")"
   333                 elif w[5] == "+":
   334                     color = "#ffe8e8"
   335                 elif w[5] == "-":
   336                     color = "#e8e8ff"
   337         yield layer, color, seqName, beg, end
   338 
   339 def commaSeparatedInts(text):
   340     return map(int, text.rstrip(",").split(","))
   341 
   342 def readGenePred(opts, fileName, seqLimits):
   343     if not fileName: return
   344     for line in myOpen(fileName):
   345         fields = line.split()
   346         if not fields: continue
   347         if fields[2] not in "+-": fields = fields[1:]
   348         seqName = fields[1]
   349         if seqName not in seqLimits: continue
   350         #strand = fields[2]
   351         cdsBeg = int(fields[5])
   352         cdsEnd = int(fields[6])
   353         exonBegs = commaSeparatedInts(fields[8])
   354         exonEnds = commaSeparatedInts(fields[9])
   355         for beg, end in zip(exonBegs, exonEnds):
   356             yield 300, opts.exon_color, seqName, beg, end
   357             b = max(beg, cdsBeg)
   358             e = min(end, cdsEnd)
   359             if b < e: yield 400, opts.cds_color, seqName, b, e
   360 
   361 def readRmsk(fileName, seqLimits):
   362     if not fileName: return
   363     for line in myOpen(fileName):
   364         fields = line.split()
   365         if len(fields) == 17:  # rmsk.txt
   366             seqName = fields[5]
   367             if seqName not in seqLimits: continue  # do this ASAP for speed
   368             beg = int(fields[6])
   369             end = int(fields[7])
   370             strand = fields[9]
   371             repeatClass = fields[11]
   372         elif len(fields) == 15:  # .out
   373             seqName = fields[4]
   374             if seqName not in seqLimits: continue
   375             beg = int(fields[5]) - 1
   376             end = int(fields[6])
   377             strand = fields[8]
   378             repeatClass = fields[10]
   379         else:
   380             continue
   381         if repeatClass in ("Low_complexity", "Simple_repeat"):
   382             yield 200, "#fbf", seqName, beg, end
   383         elif strand == "+":
   384             yield 100, "#ffe8e8", seqName, beg, end
   385         else:
   386             yield 100, "#e8e8ff", seqName, beg, end
   387 
   388 def isExtraFirstGapField(fields):
   389     return fields[4].isdigit()
   390 
   391 def readGaps(opts, fileName, seqLimits):
   392     '''Read locations of unsequenced gaps, from an agp or gap file.'''
   393     if not fileName: return
   394     for line in myOpen(fileName):
   395         w = line.split()
   396         if not w or w[0][0] == "#": continue
   397         if isExtraFirstGapField(w): w = w[1:]
   398         if w[4] not in "NU": continue
   399         seqName = w[0]
   400         if seqName not in seqLimits: continue
   401         end = int(w[2])
   402         beg = end - int(w[5])  # zero-based coordinate
   403         if w[7] == "yes":
   404             yield 3000, opts.bridged_color, seqName, beg, end
   405         else:
   406             yield 2000, opts.unbridged_color, seqName, beg, end
   407 
   408 def bedBoxes(beds, seqLimits, origins, margin, edge, isTop, bpPerPix):
   409     for layer, color, seqName, bedBeg, bedEnd in beds:
   410         cropBeg, cropEnd = seqLimits[seqName]
   411         beg = max(bedBeg, cropBeg)
   412         end = min(bedEnd, cropEnd)
   413         if beg >= end: continue
   414         ori = origins[seqName]
   415         if layer <= 1000:
   416             # include partly-covered pixels
   417             b = (ori + beg) // bpPerPix
   418             e = div_ceil(ori + end, bpPerPix)
   419         else:
   420             # exclude partly-covered pixels
   421             b = div_ceil(ori + beg, bpPerPix)
   422             e = (ori + end) // bpPerPix
   423             if e <= b: continue
   424         if isTop:
   425             box = b, margin, e, edge
   426         else:
   427             box = margin, b, edge, e
   428         yield layer, color, box
   429 
   430 def drawAnnotations(im, boxes):
   431     # xxx use partial transparency for different-color overlaps?
   432     for layer, color, box in boxes:
   433         im.paste(color, box)
   434 
   435 def placedLabels(labels, rangePixBegs, rangePixLens, beg, end):
   436     '''Return axis labels with endpoint & sort-order information.'''
   437     maxWidth = end - beg
   438     for i, j, k in zip(labels, rangePixBegs, rangePixLens):
   439         text, textWidth, textHeight = i
   440         if textWidth > maxWidth:
   441             continue
   442         labelBeg = j + (k - textWidth) // 2
   443         labelEnd = labelBeg + textWidth
   444         sortKey = textWidth - k
   445         if labelBeg < beg:
   446             labelBeg = beg
   447             labelEnd = beg + textWidth
   448             sortKey += maxWidth
   449         if labelEnd > end:
   450             labelEnd = end
   451             labelBeg = end - textWidth
   452             sortKey += maxWidth
   453         yield sortKey, labelBeg, labelEnd, text, textHeight
   454 
   455 def nonoverlappingLabels(labels, minPixTweenLabels):
   456     '''Get a subset of non-overlapping axis labels, greedily.'''
   457     out = []
   458     for i in labels:
   459         beg = i[1] - minPixTweenLabels
   460         end = i[2] + minPixTweenLabels
   461         if all(j[2] <= beg or j[1] >= end for j in out):
   462             out.append(i)
   463     return out
   464 
   465 def axisImage(labels, rangePixBegs, rangePixLens, textRot,
   466               textAln, font, image_mode, opts):
   467     '''Make an image of axis labels.'''
   468     beg = rangePixBegs[0]
   469     end = rangePixBegs[-1] + rangePixLens[-1]
   470     margin = max(i[2] for i in labels)
   471     labels = sorted(placedLabels(labels, rangePixBegs, rangePixLens, beg, end))
   472     minPixTweenLabels = 0 if textRot else opts.label_space
   473     labels = nonoverlappingLabels(labels, minPixTweenLabels)
   474     image_size = (margin, end) if textRot else (end, margin)
   475     im = Image.new(image_mode, image_size, opts.margin_color)
   476     draw = ImageDraw.Draw(im)
   477     for i in labels:
   478         base = margin - i[4] if textAln else 0
   479         position = (base, i[1]) if textRot else (i[1], base)
   480         draw.text(position, i[3], font=font, fill=opts.text_color)
   481     return im
   482 
   483 def seqOrigins(seqNames, rangePixBegs, seqLimits, bp_per_pix):
   484     for i, j in zip(seqNames, rangePixBegs):
   485         yield i, bp_per_pix * j - seqLimits[i][0]
   486 
   487 def lastDotplot(opts, args):
   488     if opts.fontfile:  font = ImageFont.truetype(opts.fontfile, opts.fontsize)
   489     else:              font = ImageFont.load_default()
   490 
   491     image_mode = 'RGB'
   492     forward_color = ImageColor.getcolor(opts.forwardcolor, image_mode)
   493     reverse_color = ImageColor.getcolor(opts.reversecolor, image_mode)
   494     zipped_colors = zip(forward_color, reverse_color)
   495     overlap_color = tuple([(i + j) // 2 for i, j in zipped_colors])
   496 
   497     warn("reading alignments...")
   498     alignmentInfo = readAlignments(args[0], opts)
   499     alignments, seqNames1, seqNames2, seqLimits1, seqLimits2 = alignmentInfo
   500     warn("done")
   501     if not alignments: raise Exception("there are no alignments")
   502 
   503     textRot1 = "vertical".startswith(opts.rot1)
   504     i1 = getSeqInfo(opts.sort1, seqNames1, seqLimits1,
   505                     font, opts.fontsize, image_mode, opts.labels1, textRot1)
   506     seqNames1, rangeSizes1, labelData1, tMargin = i1
   507 
   508     textRot2 = "horizontal".startswith(opts.rot2)
   509     i2 = getSeqInfo(opts.sort2, seqNames2, seqLimits2,
   510                     font, opts.fontsize, image_mode, opts.labels2, textRot2)
   511     seqNames2, rangeSizes2, labelData2, lMargin = i2
   512 
   513     warn("choosing bp per pixel...")
   514     maxPixels1 = opts.width  - lMargin
   515     maxPixels2 = opts.height - tMargin
   516     bpPerPix1 = get_bp_per_pix(rangeSizes1, opts.border_pixels, maxPixels1)
   517     bpPerPix2 = get_bp_per_pix(rangeSizes2, opts.border_pixels, maxPixels2)
   518     bpPerPix = max(bpPerPix1, bpPerPix2)
   519     warn("bp per pixel = " + str(bpPerPix))
   520 
   521     p1 = pixelData(rangeSizes1, bpPerPix, opts.border_pixels, lMargin)
   522     rangePixBegs1, rangePixLens1, width = p1
   523 
   524     p2 = pixelData(rangeSizes2, bpPerPix, opts.border_pixels, tMargin)
   525     rangePixBegs2, rangePixLens2, height = p2
   526 
   527     warn("width:  " + str(width))
   528     warn("height: " + str(height))
   529 
   530     origins1 = dict(seqOrigins(seqNames1, rangePixBegs1, seqLimits1, bpPerPix))
   531     origins2 = dict(seqOrigins(seqNames2, rangePixBegs2, seqLimits2, bpPerPix))
   532 
   533     warn("processing alignments...")
   534     hits = alignmentPixels(width, height, alignments, bpPerPix,
   535                            origins1, origins2)
   536     warn("done")
   537 
   538     image_size = width, height
   539     im = Image.new(image_mode, image_size, opts.background_color)
   540 
   541     seqLimits1 = expandedSeqDict(seqLimits1)
   542     seqLimits2 = expandedSeqDict(seqLimits2)
   543     origins1 = expandedSeqDict(origins1)
   544     origins2 = expandedSeqDict(origins2)
   545 
   546     beds1 = itertools.chain(readBed(opts.bed1, seqLimits1),
   547                             readRmsk(opts.rmsk1, seqLimits1),
   548                             readGenePred(opts, opts.genePred1, seqLimits1),
   549                             readGaps(opts, opts.gap1, seqLimits1))
   550     b1 = bedBoxes(beds1, seqLimits1, origins1, tMargin, height, True, bpPerPix)
   551 
   552     beds2 = itertools.chain(readBed(opts.bed2, seqLimits2),
   553                             readRmsk(opts.rmsk2, seqLimits2),
   554                             readGenePred(opts, opts.genePred2, seqLimits2),
   555                             readGaps(opts, opts.gap2, seqLimits2))
   556     b2 = bedBoxes(beds2, seqLimits2, origins2, lMargin, width, False, bpPerPix)
   557 
   558     boxes = sorted(itertools.chain(b1, b2))
   559     drawAnnotations(im, boxes)
   560 
   561     for i in range(height):
   562         for j in range(width):
   563             store_value = hits[i * width + j]
   564             xy = j, i
   565             if   store_value == 1: im.putpixel(xy, forward_color)
   566             elif store_value == 2: im.putpixel(xy, reverse_color)
   567             elif store_value == 3: im.putpixel(xy, overlap_color)
   568 
   569     if opts.fontsize != 0:
   570         axis1 = axisImage(labelData1, rangePixBegs1, rangePixLens1,
   571                           textRot1, False, font, image_mode, opts)
   572         if textRot1:
   573             axis1 = axis1.transpose(Image.ROTATE_90)
   574         axis2 = axisImage(labelData2, rangePixBegs2, rangePixLens2,
   575                           textRot2, textRot2, font, image_mode, opts)
   576         if not textRot2:
   577             axis2 = axis2.transpose(Image.ROTATE_270)
   578         im.paste(axis1, (0, 0))
   579         im.paste(axis2, (0, 0))
   580 
   581     for i in rangePixBegs1[1:]:
   582         box = i - opts.border_pixels, tMargin, i, height
   583         im.paste(opts.border_color, box)
   584 
   585     for i in rangePixBegs2[1:]:
   586         box = lMargin, i - opts.border_pixels, width, i
   587         im.paste(opts.border_color, box)
   588 
   589     im.save(args[1])
   590 
   591 if __name__ == "__main__":
   592     usage = """%prog --help
   593    or: %prog [options] maf-or-tab-alignments dotplot.png
   594    or: %prog [options] maf-or-tab-alignments dotplot.gif
   595    or: ..."""
   596     description = "Draw a dotplot of pair-wise sequence alignments in MAF or tabular format."
   597     op = optparse.OptionParser(usage=usage, description=description)
   598     op.add_option("-v", "--verbose", action="count",
   599                   help="show progress messages & data about the plot")
   600     op.add_option("-1", "--seq1", metavar="PATTERN", action="append",
   601                   default=[],
   602                   help="which sequences to show from the 1st genome")
   603     op.add_option("-2", "--seq2", metavar="PATTERN", action="append",
   604                   default=[],
   605                   help="which sequences to show from the 2nd genome")
   606     # Replace "width" & "height" with a single "length" option?
   607     op.add_option("-x", "--width", type="int", default=1000,
   608                   help="maximum width in pixels (default: %default)")
   609     op.add_option("-y", "--height", type="int", default=1000,
   610                   help="maximum height in pixels (default: %default)")
   611     op.add_option("-c", "--forwardcolor", metavar="COLOR", default="red",
   612                   help="color for forward alignments (default: %default)")
   613     op.add_option("-r", "--reversecolor", metavar="COLOR", default="blue",
   614                   help="color for reverse alignments (default: %default)")
   615     op.add_option("--sort1", type="int", default=1, metavar="N",
   616                   help="genome1 sequence order: 0=input order, 1=name order, "
   617                   "2=length order (default=%default)")
   618     op.add_option("--sort2", type="int", default=1, metavar="N",
   619                   help="genome2 sequence order: 0=input order, 1=name order, "
   620                   "2=length order (default=%default)")
   621     op.add_option("--trim1", action="store_true",
   622                   help="trim unaligned sequence flanks from the 1st genome")
   623     op.add_option("--trim2", action="store_true",
   624                   help="trim unaligned sequence flanks from the 2nd genome")
   625     op.add_option("--border-pixels", metavar="INT", type="int", default=1,
   626                   help="number of pixels between sequences (default=%default)")
   627     op.add_option("--border-color", metavar="COLOR", default="black",
   628                   help="color for pixels between sequences (default=%default)")
   629     op.add_option("--margin-color", metavar="COLOR", default="#dcdcdc",
   630                   help="margin color")
   631 
   632     og = optparse.OptionGroup(op, "Text options")
   633     og.add_option("-f", "--fontfile", metavar="FILE",
   634                   help="TrueType or OpenType font file")
   635     og.add_option("-s", "--fontsize", metavar="SIZE", type="int", default=11,
   636                   help="TrueType or OpenType font size (default: %default)")
   637     og.add_option("--labels1", type="int", default=0, metavar="N", help=
   638                   "genome1 labels: 0=name, 1=name:length, "
   639                   "2=name:start:length, 3=name:start-end (default=%default)")
   640     og.add_option("--labels2", type="int", default=0, metavar="N", help=
   641                   "genome2 labels: 0=name, 1=name:length, "
   642                   "2=name:start:length, 3=name:start-end (default=%default)")
   643     og.add_option("--rot1", metavar="ROT", default="h",
   644                   help="text rotation for the 1st genome (default=%default)")
   645     og.add_option("--rot2", metavar="ROT", default="v",
   646                   help="text rotation for the 2nd genome (default=%default)")
   647     op.add_option_group(og)
   648 
   649     og = optparse.OptionGroup(op, "Annotation options")
   650     og.add_option("--bed1", metavar="FILE",
   651                   help="read genome1 annotations from BED file")
   652     og.add_option("--bed2", metavar="FILE",
   653                   help="read genome2 annotations from BED file")
   654     og.add_option("--rmsk1", metavar="FILE", help="read genome1 repeats from "
   655                   "RepeatMasker .out or rmsk.txt file")
   656     og.add_option("--rmsk2", metavar="FILE", help="read genome2 repeats from "
   657                   "RepeatMasker .out or rmsk.txt file")
   658     op.add_option_group(og)
   659 
   660     og = optparse.OptionGroup(op, "Gene options")
   661     og.add_option("--genePred1", metavar="FILE",
   662                   help="read genome1 genes from genePred file")
   663     og.add_option("--genePred2", metavar="FILE",
   664                   help="read genome2 genes from genePred file")
   665     og.add_option("--exon-color", metavar="COLOR", default="PaleGreen",
   666                   help="color for exons (default=%default)")
   667     og.add_option("--cds-color", metavar="COLOR", default="LimeGreen",
   668                   help="color for protein-coding regions (default=%default)")
   669     op.add_option_group(og)
   670 
   671     og = optparse.OptionGroup(op, "Unsequenced gap options")
   672     og.add_option("--gap1", metavar="FILE",
   673                   help="read genome1 unsequenced gaps from agp or gap file")
   674     og.add_option("--gap2", metavar="FILE",
   675                   help="read genome2 unsequenced gaps from agp or gap file")
   676     og.add_option("--bridged-color", metavar="COLOR", default="yellow",
   677                   help="color for bridged gaps (default: %default)")
   678     og.add_option("--unbridged-color", metavar="COLOR", default="orange",
   679                   help="color for unbridged gaps (default: %default)")
   680     op.add_option_group(og)
   681     (opts, args) = op.parse_args()
   682     if len(args) != 2: op.error("2 arguments needed")
   683 
   684     opts.text_color = "black"
   685     opts.background_color = "white"
   686     opts.label_space = 5     # minimum number of pixels between axis labels
   687 
   688     try: lastDotplot(opts, args)
   689     except KeyboardInterrupt: pass  # avoid silly error message
   690     except Exception, e:
   691         prog = os.path.basename(sys.argv[0])
   692         sys.exit(prog + ": error: " + str(e))