scripts/last-dotplot
author Martin C. Frith
Wed Nov 08 11:07:22 2017 +0900 (2017-11-08)
changeset 903 3af6b02e3f9c
parent 901 2003b209d750
child 904 c2f50cd4e29d
permissions -rwxr-xr-x
Make last-dotplot try to find a nice font
     1 #! /usr/bin/env python
     2 
     3 # Read pair-wise alignments in MAF or LAST tabular format: write an
     4 # "Oxford grid", a.k.a. dotplot.
     5 
     6 # TODO: Currently, pixels with zero aligned nt-pairs are white, and
     7 # pixels with one or more aligned nt-pairs are black.  This can look
     8 # too crowded for large genome alignments.  I tried shading each pixel
     9 # according to the number of aligned nt-pairs within it, but the
    10 # result is too faint.  How can this be done better?
    11 
    12 import gzip
    13 from fnmatch import fnmatchcase
    14 import subprocess
    15 import itertools, optparse, os, re, sys
    16 
    17 # Try to make PIL/PILLOW work:
    18 try: from PIL import Image, ImageDraw, ImageFont, ImageColor
    19 except ImportError: import Image, ImageDraw, ImageFont, ImageColor
    20 
    21 def myOpen(fileName):  # faster than fileinput
    22     if fileName == "-":
    23         return sys.stdin
    24     if fileName.endswith(".gz"):
    25         return gzip.open(fileName)
    26     return open(fileName)
    27 
    28 def warn(message):
    29     if opts.verbose:
    30         prog = os.path.basename(sys.argv[0])
    31         sys.stderr.write(prog + ": " + message + "\n")
    32 
    33 def croppedBlocks(blocks, range1, range2):
    34     cropBeg1, cropEnd1 = range1
    35     cropBeg2, cropEnd2 = range2
    36     if blocks[0][0] < 0: cropBeg1, cropEnd1 = -cropEnd1, -cropBeg1
    37     if blocks[0][1] < 0: cropBeg2, cropEnd2 = -cropEnd2, -cropBeg2
    38     for beg1, beg2, size in blocks:
    39         b1 = max(cropBeg1, beg1)
    40         e1 = min(cropEnd1, beg1 + size)
    41         if b1 >= e1: continue
    42         offset = beg2 - beg1
    43         b2 = max(cropBeg2, b1 + offset)
    44         e2 = min(cropEnd2, e1 + offset)
    45         if b2 >= e2: continue
    46         yield b2 - offset, b2, e2 - b2
    47 
    48 def tabBlocks(beg1, beg2, blocks):
    49     '''Get the gapless blocks of an alignment, from LAST tabular format.'''
    50     for i in blocks.split(","):
    51         if ":" in i:
    52             x, y = i.split(":")
    53             beg1 += int(x)
    54             beg2 += int(y)
    55         else:
    56             size = int(i)
    57             yield beg1, beg2, size
    58             beg1 += size
    59             beg2 += size
    60 
    61 def mafBlocks(beg1, beg2, seq1, seq2):
    62     '''Get the gapless blocks of an alignment, from MAF format.'''
    63     size = 0
    64     for x, y in itertools.izip(seq1, seq2):
    65         if x == "-":
    66             if size:
    67                 yield beg1, beg2, size
    68                 beg1 += size
    69                 beg2 += size
    70                 size = 0
    71             beg2 += 1
    72         elif y == "-":
    73             if size:
    74                 yield beg1, beg2, size
    75                 beg1 += size
    76                 beg2 += size
    77                 size = 0
    78             beg1 += 1
    79         else:
    80             size += 1
    81     if size: yield beg1, beg2, size
    82 
    83 def alignmentInput(lines):
    84     '''Get alignments and sequence lengths, from MAF or tabular format.'''
    85     mafCount = 0
    86     for line in lines:
    87         w = line.split()
    88         if line[0].isdigit():  # tabular format
    89             chr1, beg1, seqlen1 = w[1], int(w[2]), int(w[5])
    90             if w[4] == "-": beg1 -= seqlen1
    91             chr2, beg2, seqlen2 = w[6], int(w[7]), int(w[10])
    92             if w[9] == "-": beg2 -= seqlen2
    93             blocks = tabBlocks(beg1, beg2, w[11])
    94             yield chr1, seqlen1, chr2, seqlen2, blocks
    95         elif line[0] == "s":  # MAF format
    96             if mafCount == 0:
    97                 chr1, beg1, seqlen1, seq1 = w[1], int(w[2]), int(w[5]), w[6]
    98                 if w[4] == "-": beg1 -= seqlen1
    99                 mafCount = 1
   100             else:
   101                 chr2, beg2, seqlen2, seq2 = w[1], int(w[2]), int(w[5]), w[6]
   102                 if w[4] == "-": beg2 -= seqlen2
   103                 blocks = mafBlocks(beg1, beg2, seq1, seq2)
   104                 yield chr1, seqlen1, chr2, seqlen2, blocks
   105                 mafCount = 0
   106 
   107 def seqRequestFromText(text):
   108     if ":" in text:
   109         pattern, interval = text.rsplit(":", 1)
   110         if "-" in interval:
   111             beg, end = interval.rsplit("-", 1)
   112             return pattern, int(beg), int(end)  # beg may be negative
   113     return text, 0, sys.maxsize
   114 
   115 def rangeFromSeqName(seqRequests, name, seqLen):
   116     if not seqRequests: return 0, seqLen
   117     base = name.split(".")[-1]  # allow for names like hg19.chr7
   118     for pat, beg, end in seqRequests:
   119         if fnmatchcase(name, pat) or fnmatchcase(base, pat):
   120             return max(beg, 0), min(end, seqLen)
   121     return None
   122 
   123 def updateSeqs(isTrim, seqNames, seqLimits, seqName, seqRange, blocks, index):
   124     if seqName not in seqLimits:
   125         seqNames.append(seqName)
   126     if isTrim:
   127         beg = blocks[0][index]
   128         end = blocks[-1][index] + blocks[-1][2]
   129         if beg < 0: beg, end = -end, -beg
   130         if seqName in seqLimits:
   131             b, e = seqLimits[seqName]
   132             seqLimits[seqName] = min(b, beg), max(e, end)
   133         else:
   134             seqLimits[seqName] = beg, end
   135     else:
   136         seqLimits[seqName] = seqRange
   137 
   138 def readAlignments(fileName, opts):
   139     '''Get alignments and sequence limits, from MAF or tabular format.'''
   140     seqRequests1 = map(seqRequestFromText, opts.seq1)
   141     seqRequests2 = map(seqRequestFromText, opts.seq2)
   142 
   143     alignments = []
   144     seqNames1 = []
   145     seqNames2 = []
   146     seqLimits1 = {}
   147     seqLimits2 = {}
   148     lines = myOpen(fileName)
   149     for seqName1, seqLen1, seqName2, seqLen2, blocks in alignmentInput(lines):
   150         range1 = rangeFromSeqName(seqRequests1, seqName1, seqLen1)
   151         if not range1: continue
   152         range2 = rangeFromSeqName(seqRequests2, seqName2, seqLen2)
   153         if not range2: continue
   154         b = list(croppedBlocks(list(blocks), range1, range2))
   155         if not b: continue
   156         aln = seqName1, seqName2, b
   157         alignments.append(aln)
   158         updateSeqs(opts.trim1, seqNames1, seqLimits1, seqName1, range1, b, 0)
   159         updateSeqs(opts.trim2, seqNames2, seqLimits2, seqName2, range2, b, 1)
   160     return alignments, seqNames1, seqNames2, seqLimits1, seqLimits2
   161 
   162 def natural_sort_key(my_string):
   163     '''Return a sort key for "natural" ordering, e.g. chr9 < chr10.'''
   164     parts = re.split(r'(\d+)', my_string)
   165     parts[1::2] = map(int, parts[1::2])
   166     return parts
   167 
   168 def prettyNum(n):
   169     t = str(n)
   170     groups = []
   171     while t:
   172         groups.append(t[-3:])
   173         t = t[:-3]
   174     return " ".join(reversed(groups))
   175 
   176 def sizeText(size):
   177     suffixes = "bp", "kb", "Mb", "Gb"
   178     for i, x in enumerate(suffixes):
   179         j = 10 ** (i * 3)
   180         if size < j * 10:
   181             return "%.2g" % (1.0 * size / j) + x
   182         if size < j * 1000 or i == len(suffixes) - 1:
   183             return "%.0f" % (1.0 * size / j) + x
   184 
   185 def labelText(seqRange, labelOpt):
   186     seqName, beg, end = seqRange
   187     if labelOpt == 1:
   188         return seqName + ": " + sizeText(end - beg)
   189     if labelOpt == 2:
   190         return seqName + ": " + prettyNum(beg) + ": " + sizeText(end - beg)
   191     if labelOpt == 3:
   192         return seqName + ": " + prettyNum(beg) + " - " + prettyNum(end)
   193     return seqName
   194 
   195 def rangeLabels(seqRanges, labelOpt, font, fontsize, image_mode, textRot):
   196     if fontsize:
   197         image_size = 1, 1
   198         im = Image.new(image_mode, image_size)
   199         draw = ImageDraw.Draw(im)
   200     x = y = 0
   201     for r in seqRanges:
   202         text = labelText(r, labelOpt)
   203         if fontsize:
   204             x, y = draw.textsize(text, font=font)
   205             if textRot:
   206                 x, y = y, x
   207         yield text, x, y
   208 
   209 def getSeqInfo(sortOpt, seqNames, seqLimits,
   210                font, fontsize, image_mode, labelOpt, textRot):
   211     '''Return miscellaneous information about the sequences.'''
   212     if sortOpt == 1:
   213         seqNames.sort(key=natural_sort_key)
   214     seqSizes = [seqLimits[i][1] - seqLimits[i][0] for i in seqNames]
   215     for i in seqNames:
   216         r = seqLimits[i]
   217         out = i, str(r[0]), str(r[1])
   218         warn("\t".join(out))
   219     warn("")
   220     if sortOpt == 2:
   221         seqRecords = sorted(zip(seqSizes, seqNames), reverse=True)
   222         seqSizes = [i[0] for i in seqRecords]
   223         seqNames = [i[1] for i in seqRecords]
   224     seqRanges = [(i, seqLimits[i][0], seqLimits[i][1]) for i in seqNames]
   225     labelData = list(rangeLabels(seqRanges, labelOpt,
   226                                  font, fontsize, image_mode, textRot))
   227     margin = max(i[2] for i in labelData)
   228     # xxx the margin may be too big, because some labels may get omitted
   229     return seqNames, seqSizes, labelData, margin
   230 
   231 def div_ceil(x, y):
   232     '''Return x / y rounded up.'''
   233     q, r = divmod(x, y)
   234     return q + (r != 0)
   235 
   236 def get_bp_per_pix(rangeSizes, pixTweenRanges, maxPixels):
   237     '''Get the minimum bp-per-pixel that fits in the size limit.'''
   238     numOfRanges = len(rangeSizes)
   239     maxPixelsInRanges = maxPixels - pixTweenRanges * (numOfRanges - 1)
   240     if maxPixelsInRanges < numOfRanges:
   241         raise Exception("can't fit the image: too many sequences?")
   242     negLimit = -maxPixelsInRanges
   243     negBpPerPix = sum(rangeSizes) // negLimit
   244     while True:
   245         if sum(i // negBpPerPix for i in rangeSizes) >= negLimit:
   246             return -negBpPerPix
   247         negBpPerPix -= 1
   248 
   249 def getRangePixBegs(rangePixLens, pixTweenRanges, margin):
   250     '''Get the start pixel for each range.'''
   251     rangePixBegs = []
   252     pix_tot = margin - pixTweenRanges
   253     for i in rangePixLens:
   254         pix_tot += pixTweenRanges
   255         rangePixBegs.append(pix_tot)
   256         pix_tot += i
   257     return rangePixBegs
   258 
   259 def pixelData(rangeSizes, bp_per_pix, pixTweenRanges, margin):
   260     '''Return pixel information about the ranges.'''
   261     rangePixLens = [div_ceil(i, bp_per_pix) for i in rangeSizes]
   262     rangePixBegs = getRangePixBegs(rangePixLens, pixTweenRanges, margin)
   263     tot_pix = rangePixBegs[-1] + rangePixLens[-1]
   264     return rangePixBegs, rangePixLens, tot_pix
   265 
   266 def drawLineForward(hits, width, bp_per_pix, beg1, beg2, size):
   267     while True:
   268         q1, r1 = divmod(beg1, bp_per_pix)
   269         q2, r2 = divmod(beg2, bp_per_pix)
   270         hits[q2 * width + q1] |= 1
   271         next_pix = min(bp_per_pix - r1, bp_per_pix - r2)
   272         if next_pix >= size: break
   273         beg1 += next_pix
   274         beg2 += next_pix
   275         size -= next_pix
   276 
   277 def drawLineReverse(hits, width, bp_per_pix, beg1, beg2, size):
   278     beg2 = -1 - beg2
   279     while True:
   280         q1, r1 = divmod(beg1, bp_per_pix)
   281         q2, r2 = divmod(beg2, bp_per_pix)
   282         hits[q2 * width + q1] |= 2
   283         next_pix = min(bp_per_pix - r1, r2 + 1)
   284         if next_pix >= size: break
   285         beg1 += next_pix
   286         beg2 -= next_pix
   287         size -= next_pix
   288 
   289 def alignmentPixels(width, height, alignments, bp_per_pix, origins1, origins2):
   290     hits = [0] * (width * height)  # the image data
   291     for seq1, seq2, blocks in alignments:
   292         ori1 = origins1[seq1]
   293         ori2 = origins2[seq2]
   294         for beg1, beg2, size in blocks:
   295             if beg1 < 0:
   296                 beg1 = -(beg1 + size)
   297                 beg2 = -(beg2 + size)
   298             if beg2 >= 0:
   299                 drawLineForward(hits, width, bp_per_pix,
   300                                 beg1 + ori1, beg2 + ori2, size)
   301             else:
   302                 drawLineReverse(hits, width, bp_per_pix,
   303                                 beg1 + ori1, beg2 - ori2, size)
   304     return hits
   305 
   306 def expandedSeqDict(seqDict):
   307     '''Allow lookup by short sequence names, e.g. chr7 as well as hg19.chr7.'''
   308     newDict = seqDict.copy()
   309     for name, x in seqDict.items():
   310         if "." in name:
   311             base = name.split(".")[-1]
   312             if base in newDict:  # an ambiguous case was found:
   313                 return seqDict   # so give up completely
   314             newDict[base] = x
   315     return newDict
   316 
   317 def readBed(fileName, seqLimits):
   318     if not fileName: return
   319     for line in myOpen(fileName):
   320         w = line.split()
   321         if not w: continue
   322         seqName = w[0]
   323         if seqName not in seqLimits: continue
   324         beg = int(w[1])
   325         end = int(w[2])
   326         layer = 900
   327         color = "#fbf"
   328         if len(w) > 4:
   329             if w[4] != ".":
   330                 layer = float(w[4])
   331             if len(w) > 5:
   332                 if len(w) > 8 and w[8].count(",") == 2:
   333                     color = "rgb(" + w[8] + ")"
   334                 elif w[5] == "+":
   335                     color = "#ffe8e8"
   336                 elif w[5] == "-":
   337                     color = "#e8e8ff"
   338         yield layer, color, seqName, beg, end
   339 
   340 def commaSeparatedInts(text):
   341     return map(int, text.rstrip(",").split(","))
   342 
   343 def readGenePred(opts, fileName, seqLimits):
   344     if not fileName: return
   345     for line in myOpen(fileName):
   346         fields = line.split()
   347         if not fields: continue
   348         if fields[2] not in "+-": fields = fields[1:]
   349         seqName = fields[1]
   350         if seqName not in seqLimits: continue
   351         #strand = fields[2]
   352         cdsBeg = int(fields[5])
   353         cdsEnd = int(fields[6])
   354         exonBegs = commaSeparatedInts(fields[8])
   355         exonEnds = commaSeparatedInts(fields[9])
   356         for beg, end in zip(exonBegs, exonEnds):
   357             yield 300, opts.exon_color, seqName, beg, end
   358             b = max(beg, cdsBeg)
   359             e = min(end, cdsEnd)
   360             if b < e: yield 400, opts.cds_color, seqName, b, e
   361 
   362 def readRmsk(fileName, seqLimits):
   363     if not fileName: return
   364     for line in myOpen(fileName):
   365         fields = line.split()
   366         if len(fields) == 17:  # rmsk.txt
   367             seqName = fields[5]
   368             if seqName not in seqLimits: continue  # do this ASAP for speed
   369             beg = int(fields[6])
   370             end = int(fields[7])
   371             strand = fields[9]
   372             repeatClass = fields[11]
   373         elif len(fields) == 15:  # .out
   374             seqName = fields[4]
   375             if seqName not in seqLimits: continue
   376             beg = int(fields[5]) - 1
   377             end = int(fields[6])
   378             strand = fields[8]
   379             repeatClass = fields[10]
   380         else:
   381             continue
   382         if repeatClass in ("Low_complexity", "Simple_repeat"):
   383             yield 200, "#fbf", seqName, beg, end
   384         elif strand == "+":
   385             yield 100, "#ffe8e8", seqName, beg, end
   386         else:
   387             yield 100, "#e8e8ff", seqName, beg, end
   388 
   389 def isExtraFirstGapField(fields):
   390     return fields[4].isdigit()
   391 
   392 def readGaps(opts, fileName, seqLimits):
   393     '''Read locations of unsequenced gaps, from an agp or gap file.'''
   394     if not fileName: return
   395     for line in myOpen(fileName):
   396         w = line.split()
   397         if not w or w[0][0] == "#": continue
   398         if isExtraFirstGapField(w): w = w[1:]
   399         if w[4] not in "NU": continue
   400         seqName = w[0]
   401         if seqName not in seqLimits: continue
   402         end = int(w[2])
   403         beg = end - int(w[5])  # zero-based coordinate
   404         if w[7] == "yes":
   405             yield 3000, opts.bridged_color, seqName, beg, end
   406         else:
   407             yield 2000, opts.unbridged_color, seqName, beg, end
   408 
   409 def bedBoxes(beds, seqLimits, origins, margin, edge, isTop, bpPerPix):
   410     for layer, color, seqName, bedBeg, bedEnd in beds:
   411         cropBeg, cropEnd = seqLimits[seqName]
   412         beg = max(bedBeg, cropBeg)
   413         end = min(bedEnd, cropEnd)
   414         if beg >= end: continue
   415         ori = origins[seqName]
   416         if layer <= 1000:
   417             # include partly-covered pixels
   418             b = (ori + beg) // bpPerPix
   419             e = div_ceil(ori + end, bpPerPix)
   420         else:
   421             # exclude partly-covered pixels
   422             b = div_ceil(ori + beg, bpPerPix)
   423             e = (ori + end) // bpPerPix
   424             if e <= b: continue
   425         if isTop:
   426             box = b, margin, e, edge
   427         else:
   428             box = margin, b, edge, e
   429         yield layer, color, box
   430 
   431 def drawAnnotations(im, boxes):
   432     # xxx use partial transparency for different-color overlaps?
   433     for layer, color, box in boxes:
   434         im.paste(color, box)
   435 
   436 def placedLabels(labels, rangePixBegs, rangePixLens, beg, end):
   437     '''Return axis labels with endpoint & sort-order information.'''
   438     maxWidth = end - beg
   439     for i, j, k in zip(labels, rangePixBegs, rangePixLens):
   440         text, textWidth, textHeight = i
   441         if textWidth > maxWidth:
   442             continue
   443         labelBeg = j + (k - textWidth) // 2
   444         labelEnd = labelBeg + textWidth
   445         sortKey = textWidth - k
   446         if labelBeg < beg:
   447             labelBeg = beg
   448             labelEnd = beg + textWidth
   449             sortKey += maxWidth
   450         if labelEnd > end:
   451             labelEnd = end
   452             labelBeg = end - textWidth
   453             sortKey += maxWidth
   454         yield sortKey, labelBeg, labelEnd, text, textHeight
   455 
   456 def nonoverlappingLabels(labels, minPixTweenLabels):
   457     '''Get a subset of non-overlapping axis labels, greedily.'''
   458     out = []
   459     for i in labels:
   460         beg = i[1] - minPixTweenLabels
   461         end = i[2] + minPixTweenLabels
   462         if all(j[2] <= beg or j[1] >= end for j in out):
   463             out.append(i)
   464     return out
   465 
   466 def axisImage(labels, rangePixBegs, rangePixLens, textRot,
   467               textAln, font, image_mode, opts):
   468     '''Make an image of axis labels.'''
   469     beg = rangePixBegs[0]
   470     end = rangePixBegs[-1] + rangePixLens[-1]
   471     margin = max(i[2] for i in labels)
   472     labels = sorted(placedLabels(labels, rangePixBegs, rangePixLens, beg, end))
   473     minPixTweenLabels = 0 if textRot else opts.label_space
   474     labels = nonoverlappingLabels(labels, minPixTweenLabels)
   475     image_size = (margin, end) if textRot else (end, margin)
   476     im = Image.new(image_mode, image_size, opts.margin_color)
   477     draw = ImageDraw.Draw(im)
   478     for i in labels:
   479         base = margin - i[4] if textAln else 0
   480         position = (base, i[1]) if textRot else (i[1], base)
   481         draw.text(position, i[3], font=font, fill=opts.text_color)
   482     return im
   483 
   484 def seqOrigins(seqNames, rangePixBegs, seqLimits, bp_per_pix):
   485     for i, j in zip(seqNames, rangePixBegs):
   486         yield i, bp_per_pix * j - seqLimits[i][0]
   487 
   488 def getFont(opts):
   489     if opts.fontfile:
   490         return ImageFont.truetype(opts.fontfile, opts.fontsize)
   491     fileNames = []
   492     try:
   493         x = ["fc-match", "-f%{file}", "arial"]
   494         p = subprocess.Popen(x, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
   495         out, err = p.communicate()
   496         fileNames.append(out)
   497     except OSError as e:
   498         warn("fc-match error: " + str(e))
   499     fileNames.append("/Library/Fonts/Arial.ttf")  # for Mac
   500     for i in fileNames:
   501         try:
   502             font = ImageFont.truetype(i, opts.fontsize)
   503             warn("font: " + i)
   504             return font
   505         except IOError as e:
   506             warn("font load error: " + str(e))
   507     return ImageFont.load_default()
   508 
   509 def lastDotplot(opts, args):
   510     font = getFont(opts)
   511     image_mode = 'RGB'
   512     forward_color = ImageColor.getcolor(opts.forwardcolor, image_mode)
   513     reverse_color = ImageColor.getcolor(opts.reversecolor, image_mode)
   514     zipped_colors = zip(forward_color, reverse_color)
   515     overlap_color = tuple([(i + j) // 2 for i, j in zipped_colors])
   516 
   517     warn("reading alignments...")
   518     alignmentInfo = readAlignments(args[0], opts)
   519     alignments, seqNames1, seqNames2, seqLimits1, seqLimits2 = alignmentInfo
   520     warn("done")
   521     if not alignments: raise Exception("there are no alignments")
   522 
   523     textRot1 = "vertical".startswith(opts.rot1)
   524     i1 = getSeqInfo(opts.sort1, seqNames1, seqLimits1,
   525                     font, opts.fontsize, image_mode, opts.labels1, textRot1)
   526     seqNames1, rangeSizes1, labelData1, tMargin = i1
   527 
   528     textRot2 = "horizontal".startswith(opts.rot2)
   529     i2 = getSeqInfo(opts.sort2, seqNames2, seqLimits2,
   530                     font, opts.fontsize, image_mode, opts.labels2, textRot2)
   531     seqNames2, rangeSizes2, labelData2, lMargin = i2
   532 
   533     warn("choosing bp per pixel...")
   534     maxPixels1 = opts.width  - lMargin
   535     maxPixels2 = opts.height - tMargin
   536     bpPerPix1 = get_bp_per_pix(rangeSizes1, opts.border_pixels, maxPixels1)
   537     bpPerPix2 = get_bp_per_pix(rangeSizes2, opts.border_pixels, maxPixels2)
   538     bpPerPix = max(bpPerPix1, bpPerPix2)
   539     warn("bp per pixel = " + str(bpPerPix))
   540 
   541     p1 = pixelData(rangeSizes1, bpPerPix, opts.border_pixels, lMargin)
   542     rangePixBegs1, rangePixLens1, width = p1
   543 
   544     p2 = pixelData(rangeSizes2, bpPerPix, opts.border_pixels, tMargin)
   545     rangePixBegs2, rangePixLens2, height = p2
   546 
   547     warn("width:  " + str(width))
   548     warn("height: " + str(height))
   549 
   550     origins1 = dict(seqOrigins(seqNames1, rangePixBegs1, seqLimits1, bpPerPix))
   551     origins2 = dict(seqOrigins(seqNames2, rangePixBegs2, seqLimits2, bpPerPix))
   552 
   553     warn("processing alignments...")
   554     hits = alignmentPixels(width, height, alignments, bpPerPix,
   555                            origins1, origins2)
   556     warn("done")
   557 
   558     image_size = width, height
   559     im = Image.new(image_mode, image_size, opts.background_color)
   560 
   561     seqLimits1 = expandedSeqDict(seqLimits1)
   562     seqLimits2 = expandedSeqDict(seqLimits2)
   563     origins1 = expandedSeqDict(origins1)
   564     origins2 = expandedSeqDict(origins2)
   565 
   566     beds1 = itertools.chain(readBed(opts.bed1, seqLimits1),
   567                             readRmsk(opts.rmsk1, seqLimits1),
   568                             readGenePred(opts, opts.genePred1, seqLimits1),
   569                             readGaps(opts, opts.gap1, seqLimits1))
   570     b1 = bedBoxes(beds1, seqLimits1, origins1, tMargin, height, True, bpPerPix)
   571 
   572     beds2 = itertools.chain(readBed(opts.bed2, seqLimits2),
   573                             readRmsk(opts.rmsk2, seqLimits2),
   574                             readGenePred(opts, opts.genePred2, seqLimits2),
   575                             readGaps(opts, opts.gap2, seqLimits2))
   576     b2 = bedBoxes(beds2, seqLimits2, origins2, lMargin, width, False, bpPerPix)
   577 
   578     boxes = sorted(itertools.chain(b1, b2))
   579     drawAnnotations(im, boxes)
   580 
   581     for i in range(height):
   582         for j in range(width):
   583             store_value = hits[i * width + j]
   584             xy = j, i
   585             if   store_value == 1: im.putpixel(xy, forward_color)
   586             elif store_value == 2: im.putpixel(xy, reverse_color)
   587             elif store_value == 3: im.putpixel(xy, overlap_color)
   588 
   589     if opts.fontsize != 0:
   590         axis1 = axisImage(labelData1, rangePixBegs1, rangePixLens1,
   591                           textRot1, False, font, image_mode, opts)
   592         if textRot1:
   593             axis1 = axis1.transpose(Image.ROTATE_90)
   594         axis2 = axisImage(labelData2, rangePixBegs2, rangePixLens2,
   595                           textRot2, textRot2, font, image_mode, opts)
   596         if not textRot2:
   597             axis2 = axis2.transpose(Image.ROTATE_270)
   598         im.paste(axis1, (0, 0))
   599         im.paste(axis2, (0, 0))
   600 
   601     for i in rangePixBegs1[1:]:
   602         box = i - opts.border_pixels, tMargin, i, height
   603         im.paste(opts.border_color, box)
   604 
   605     for i in rangePixBegs2[1:]:
   606         box = lMargin, i - opts.border_pixels, width, i
   607         im.paste(opts.border_color, box)
   608 
   609     im.save(args[1])
   610 
   611 if __name__ == "__main__":
   612     usage = """%prog --help
   613    or: %prog [options] maf-or-tab-alignments dotplot.png
   614    or: %prog [options] maf-or-tab-alignments dotplot.gif
   615    or: ..."""
   616     description = "Draw a dotplot of pair-wise sequence alignments in MAF or tabular format."
   617     op = optparse.OptionParser(usage=usage, description=description)
   618     op.add_option("-v", "--verbose", action="count",
   619                   help="show progress messages & data about the plot")
   620     op.add_option("-1", "--seq1", metavar="PATTERN", action="append",
   621                   default=[],
   622                   help="which sequences to show from the 1st genome")
   623     op.add_option("-2", "--seq2", metavar="PATTERN", action="append",
   624                   default=[],
   625                   help="which sequences to show from the 2nd genome")
   626     # Replace "width" & "height" with a single "length" option?
   627     op.add_option("-x", "--width", type="int", default=1000,
   628                   help="maximum width in pixels (default: %default)")
   629     op.add_option("-y", "--height", type="int", default=1000,
   630                   help="maximum height in pixels (default: %default)")
   631     op.add_option("-c", "--forwardcolor", metavar="COLOR", default="red",
   632                   help="color for forward alignments (default: %default)")
   633     op.add_option("-r", "--reversecolor", metavar="COLOR", default="blue",
   634                   help="color for reverse alignments (default: %default)")
   635     op.add_option("--sort1", type="int", default=1, metavar="N",
   636                   help="genome1 sequence order: 0=input order, 1=name order, "
   637                   "2=length order (default=%default)")
   638     op.add_option("--sort2", type="int", default=1, metavar="N",
   639                   help="genome2 sequence order: 0=input order, 1=name order, "
   640                   "2=length order (default=%default)")
   641     op.add_option("--trim1", action="store_true",
   642                   help="trim unaligned sequence flanks from the 1st genome")
   643     op.add_option("--trim2", action="store_true",
   644                   help="trim unaligned sequence flanks from the 2nd genome")
   645     op.add_option("--border-pixels", metavar="INT", type="int", default=1,
   646                   help="number of pixels between sequences (default=%default)")
   647     op.add_option("--border-color", metavar="COLOR", default="black",
   648                   help="color for pixels between sequences (default=%default)")
   649     op.add_option("--margin-color", metavar="COLOR", default="#dcdcdc",
   650                   help="margin color")
   651 
   652     og = optparse.OptionGroup(op, "Text options")
   653     og.add_option("-f", "--fontfile", metavar="FILE",
   654                   help="TrueType or OpenType font file")
   655     og.add_option("-s", "--fontsize", metavar="SIZE", type="int", default=14,
   656                   help="TrueType or OpenType font size (default: %default)")
   657     og.add_option("--labels1", type="int", default=0, metavar="N", help=
   658                   "genome1 labels: 0=name, 1=name:length, "
   659                   "2=name:start:length, 3=name:start-end (default=%default)")
   660     og.add_option("--labels2", type="int", default=0, metavar="N", help=
   661                   "genome2 labels: 0=name, 1=name:length, "
   662                   "2=name:start:length, 3=name:start-end (default=%default)")
   663     og.add_option("--rot1", metavar="ROT", default="h",
   664                   help="text rotation for the 1st genome (default=%default)")
   665     og.add_option("--rot2", metavar="ROT", default="v",
   666                   help="text rotation for the 2nd genome (default=%default)")
   667     op.add_option_group(og)
   668 
   669     og = optparse.OptionGroup(op, "Annotation options")
   670     og.add_option("--bed1", metavar="FILE",
   671                   help="read genome1 annotations from BED file")
   672     og.add_option("--bed2", metavar="FILE",
   673                   help="read genome2 annotations from BED file")
   674     og.add_option("--rmsk1", metavar="FILE", help="read genome1 repeats from "
   675                   "RepeatMasker .out or rmsk.txt file")
   676     og.add_option("--rmsk2", metavar="FILE", help="read genome2 repeats from "
   677                   "RepeatMasker .out or rmsk.txt file")
   678     op.add_option_group(og)
   679 
   680     og = optparse.OptionGroup(op, "Gene options")
   681     og.add_option("--genePred1", metavar="FILE",
   682                   help="read genome1 genes from genePred file")
   683     og.add_option("--genePred2", metavar="FILE",
   684                   help="read genome2 genes from genePred file")
   685     og.add_option("--exon-color", metavar="COLOR", default="PaleGreen",
   686                   help="color for exons (default=%default)")
   687     og.add_option("--cds-color", metavar="COLOR", default="LimeGreen",
   688                   help="color for protein-coding regions (default=%default)")
   689     op.add_option_group(og)
   690 
   691     og = optparse.OptionGroup(op, "Unsequenced gap options")
   692     og.add_option("--gap1", metavar="FILE",
   693                   help="read genome1 unsequenced gaps from agp or gap file")
   694     og.add_option("--gap2", metavar="FILE",
   695                   help="read genome2 unsequenced gaps from agp or gap file")
   696     og.add_option("--bridged-color", metavar="COLOR", default="yellow",
   697                   help="color for bridged gaps (default: %default)")
   698     og.add_option("--unbridged-color", metavar="COLOR", default="orange",
   699                   help="color for unbridged gaps (default: %default)")
   700     op.add_option_group(og)
   701     (opts, args) = op.parse_args()
   702     if len(args) != 2: op.error("2 arguments needed")
   703 
   704     opts.text_color = "black"
   705     opts.background_color = "white"
   706     opts.label_space = 5     # minimum number of pixels between axis labels
   707 
   708     try: lastDotplot(opts, args)
   709     except KeyboardInterrupt: pass  # avoid silly error message
   710     except Exception as e:
   711         prog = os.path.basename(sys.argv[0])
   712         sys.exit(prog + ": error: " + str(e))