scripts/last-dotplot
author Martin C. Frith
Tue Oct 03 18:25:07 2017 +0900 (2017-10-03)
changeset 878 20f5c97a3cfd
parent 877 f5f93e51ef53
child 895 8eb06508b3ed
permissions -rwxr-xr-x
Add text-rotation options to last-dotplot
     1 #! /usr/bin/env python
     2 
     3 # Read pair-wise alignments in MAF or LAST tabular format: write an
     4 # "Oxford grid", a.k.a. dotplot.
     5 
     6 # TODO: Currently, pixels with zero aligned nt-pairs are white, and
     7 # pixels with one or more aligned nt-pairs are black.  This can look
     8 # too crowded for large genome alignments.  I tried shading each pixel
     9 # according to the number of aligned nt-pairs within it, but the
    10 # result is too faint.  How can this be done better?
    11 
    12 import gzip
    13 import fnmatch, itertools, optparse, os, re, sys
    14 
    15 # Try to make PIL/PILLOW work:
    16 try: from PIL import Image, ImageDraw, ImageFont, ImageColor
    17 except ImportError: import Image, ImageDraw, ImageFont, ImageColor
    18 
    19 def myOpen(fileName):  # faster than fileinput
    20     if fileName == "-":
    21         return sys.stdin
    22     if fileName.endswith(".gz"):
    23         return gzip.open(fileName)
    24     return open(fileName)
    25 
    26 def warn(message):
    27     if opts.verbose:
    28         prog = os.path.basename(sys.argv[0])
    29         sys.stderr.write(prog + ": " + message + "\n")
    30 
    31 def croppedBlocks(blocks, range1, range2):
    32     cropBeg1, cropEnd1 = range1
    33     cropBeg2, cropEnd2 = range2
    34     if blocks[0][0] < 0: cropBeg1, cropEnd1 = -cropEnd1, -cropBeg1
    35     if blocks[0][1] < 0: cropBeg2, cropEnd2 = -cropEnd2, -cropBeg2
    36     for beg1, beg2, size in blocks:
    37         b1 = max(cropBeg1, beg1)
    38         e1 = min(cropEnd1, beg1 + size)
    39         if b1 >= e1: continue
    40         offset = beg2 - beg1
    41         b2 = max(cropBeg2, b1 + offset)
    42         e2 = min(cropEnd2, e1 + offset)
    43         if b2 >= e2: continue
    44         yield b2 - offset, b2, e2 - b2
    45 
    46 def tabBlocks(beg1, beg2, blocks):
    47     '''Get the gapless blocks of an alignment, from LAST tabular format.'''
    48     for i in blocks.split(","):
    49         if ":" in i:
    50             x, y = i.split(":")
    51             beg1 += int(x)
    52             beg2 += int(y)
    53         else:
    54             size = int(i)
    55             yield beg1, beg2, size
    56             beg1 += size
    57             beg2 += size
    58 
    59 def mafBlocks(beg1, beg2, seq1, seq2):
    60     '''Get the gapless blocks of an alignment, from MAF format.'''
    61     size = 0
    62     for x, y in itertools.izip(seq1, seq2):
    63         if x == "-":
    64             if size:
    65                 yield beg1, beg2, size
    66                 beg1 += size
    67                 beg2 += size
    68                 size = 0
    69             beg2 += 1
    70         elif y == "-":
    71             if size:
    72                 yield beg1, beg2, size
    73                 beg1 += size
    74                 beg2 += size
    75                 size = 0
    76             beg1 += 1
    77         else:
    78             size += 1
    79     if size: yield beg1, beg2, size
    80 
    81 def alignmentInput(lines):
    82     '''Get alignments and sequence lengths, from MAF or tabular format.'''
    83     mafCount = 0
    84     for line in lines:
    85         w = line.split()
    86         if line[0].isdigit():  # tabular format
    87             chr1, beg1, seqlen1 = w[1], int(w[2]), int(w[5])
    88             if w[4] == "-": beg1 -= seqlen1
    89             chr2, beg2, seqlen2 = w[6], int(w[7]), int(w[10])
    90             if w[9] == "-": beg2 -= seqlen2
    91             blocks = tabBlocks(beg1, beg2, w[11])
    92             yield chr1, seqlen1, chr2, seqlen2, blocks
    93         elif line[0] == "s":  # MAF format
    94             if mafCount == 0:
    95                 chr1, beg1, seqlen1, seq1 = w[1], int(w[2]), int(w[5]), w[6]
    96                 if w[4] == "-": beg1 -= seqlen1
    97                 mafCount = 1
    98             else:
    99                 chr2, beg2, seqlen2, seq2 = w[1], int(w[2]), int(w[5]), w[6]
   100                 if w[4] == "-": beg2 -= seqlen2
   101                 blocks = mafBlocks(beg1, beg2, seq1, seq2)
   102                 yield chr1, seqlen1, chr2, seqlen2, blocks
   103                 mafCount = 0
   104 
   105 def seqRangeFromText(text):
   106     if ":" in text:
   107         pattern, interval = text.rsplit(":", 1)
   108         if "-" in interval:
   109             beg, end = interval.rsplit("-", 1)
   110             return pattern, int(beg), int(end)  # beg may be negative
   111     return text, 0, sys.maxsize
   112 
   113 def rangeFromSeqName(seqRanges, name, seqLen):
   114     if not seqRanges: return 0, seqLen
   115     base = name.split(".")[-1]  # allow for names like hg19.chr7
   116     for pat, beg, end in seqRanges:
   117         if fnmatch.fnmatchcase(name, pat) or fnmatch.fnmatchcase(base, pat):
   118             return max(beg, 0), min(end, seqLen)
   119     return None
   120 
   121 def updateSeqs(isTrim, seqNames, seqLimits, seqName, seqRange, blocks, index):
   122     if seqName not in seqLimits:
   123         seqNames.append(seqName)
   124     if isTrim:
   125         beg = blocks[0][index]
   126         end = blocks[-1][index] + blocks[-1][2]
   127         if beg < 0: beg, end = -end, -beg
   128         if seqName in seqLimits:
   129             b, e = seqLimits[seqName]
   130             seqLimits[seqName] = min(b, beg), max(e, end)
   131         else:
   132             seqLimits[seqName] = beg, end
   133     else:
   134         seqLimits[seqName] = seqRange
   135 
   136 def readAlignments(fileName, opts):
   137     '''Get alignments and sequence limits, from MAF or tabular format.'''
   138     seqRanges1 = map(seqRangeFromText, opts.seq1)
   139     seqRanges2 = map(seqRangeFromText, opts.seq2)
   140 
   141     alignments = []
   142     seqNames1 = []
   143     seqNames2 = []
   144     seqLimits1 = {}
   145     seqLimits2 = {}
   146     lines = myOpen(fileName)
   147     for seqName1, seqLen1, seqName2, seqLen2, blocks in alignmentInput(lines):
   148         range1 = rangeFromSeqName(seqRanges1, seqName1, seqLen1)
   149         if not range1: continue
   150         range2 = rangeFromSeqName(seqRanges2, seqName2, seqLen2)
   151         if not range2: continue
   152         b = list(croppedBlocks(list(blocks), range1, range2))
   153         if not b: continue
   154         aln = seqName1, seqName2, b
   155         alignments.append(aln)
   156         updateSeqs(opts.trim1, seqNames1, seqLimits1, seqName1, range1, b, 0)
   157         updateSeqs(opts.trim2, seqNames2, seqLimits2, seqName2, range2, b, 1)
   158     return alignments, seqNames1, seqNames2, seqLimits1, seqLimits2
   159 
   160 def natural_sort_key(my_string):
   161     '''Return a sort key for "natural" ordering, e.g. chr9 < chr10.'''
   162     parts = re.split(r'(\d+)', my_string)
   163     parts[1::2] = map(int, parts[1::2])
   164     return parts
   165 
   166 def textDimensions(imageDraw, font, textRot, text):
   167     x, y = imageDraw.textsize(text, font=font)
   168     return (y, x) if textRot else (x, y)
   169 
   170 def get_text_sizes(my_strings, font, fontsize, image_mode, textRot):
   171     '''Get widths & heights, in pixels, of some strings.'''
   172     if fontsize == 0: return [(0, 0) for i in my_strings]
   173     image_size = 1, 1
   174     im = Image.new(image_mode, image_size)
   175     draw = ImageDraw.Draw(im)
   176     return [textDimensions(draw, font, textRot, i) for i in my_strings]
   177 
   178 def sizeText(size):
   179     suffixes = "bp", "kb", "Mb", "Gb"
   180     for i, x in enumerate(suffixes):
   181         j = 10 ** (i * 3)
   182         if size < j * 10:
   183             return "%.2g" % (1.0 * size / j) + x
   184         if size < j * 1000 or i == len(suffixes) - 1:
   185             return "%.0f" % (1.0 * size / j) + x
   186 
   187 def seqNameAndSizeText(seqName, seqSize):
   188     return seqName + ": " + sizeText(seqSize)
   189 
   190 def getSeqInfo(sortOpt, seqNames, seqLimits,
   191                font, fontsize, image_mode, isShowSize, textRot):
   192     '''Return miscellaneous information about the sequences.'''
   193     if sortOpt == 1:
   194         seqNames.sort(key=natural_sort_key)
   195     seqSizes = [seqLimits[i][1] - seqLimits[i][0] for i in seqNames]
   196     for i in seqNames:
   197         r = seqLimits[i]
   198         out = i, str(r[0]), str(r[1])
   199         warn("\t".join(out))
   200     warn("")
   201     if sortOpt == 2:
   202         seqRecords = sorted(zip(seqSizes, seqNames), reverse=True)
   203         seqSizes = [i[0] for i in seqRecords]
   204         seqNames = [i[1] for i in seqRecords]
   205     if isShowSize:
   206         seqLabels = map(seqNameAndSizeText, seqNames, seqSizes)
   207     else:
   208         seqLabels = seqNames
   209     labelSizes = get_text_sizes(seqLabels, font, fontsize, image_mode, textRot)
   210     margin = max(i[1] for i in labelSizes)
   211     # xxx the margin may be too big, because some labels may get omitted
   212     return seqNames, seqSizes, seqLabels, labelSizes, margin
   213 
   214 def div_ceil(x, y):
   215     '''Return x / y rounded up.'''
   216     q, r = divmod(x, y)
   217     return q + (r != 0)
   218 
   219 def get_bp_per_pix(seq_sizes, pix_tween_seqs, pix_limit):
   220     '''Get the minimum bp-per-pixel that fits in the size limit.'''
   221     seq_num = len(seq_sizes)
   222     seq_pix_limit = pix_limit - pix_tween_seqs * (seq_num - 1)
   223     if seq_pix_limit < seq_num:
   224         raise Exception("can't fit the image: too many sequences?")
   225     negLimit = -seq_pix_limit
   226     negBpPerPix = sum(seq_sizes) // negLimit
   227     while True:
   228         if sum(i // negBpPerPix for i in seq_sizes) >= negLimit:
   229             return -negBpPerPix
   230         negBpPerPix -= 1
   231 
   232 def get_seq_starts(seq_pix, pix_tween_seqs, margin):
   233     '''Get the start pixel for each sequence.'''
   234     seq_starts = []
   235     pix_tot = margin - pix_tween_seqs
   236     for i in seq_pix:
   237         pix_tot += pix_tween_seqs
   238         seq_starts.append(pix_tot)
   239         pix_tot += i
   240     return seq_starts
   241 
   242 def get_pix_info(seq_sizes, bp_per_pix, pix_tween_seqs, margin):
   243     '''Return pixel information about the sequences.'''
   244     seq_pix = [div_ceil(i, bp_per_pix) for i in seq_sizes]
   245     seq_starts = get_seq_starts(seq_pix, pix_tween_seqs, margin)
   246     tot_pix = seq_starts[-1] + seq_pix[-1]
   247     return seq_pix, seq_starts, tot_pix
   248 
   249 def drawLineForward(hits, width, bp_per_pix, beg1, beg2, size):
   250     while True:
   251         q1, r1 = divmod(beg1, bp_per_pix)
   252         q2, r2 = divmod(beg2, bp_per_pix)
   253         hits[q2 * width + q1] |= 1
   254         next_pix = min(bp_per_pix - r1, bp_per_pix - r2)
   255         if next_pix >= size: break
   256         beg1 += next_pix
   257         beg2 += next_pix
   258         size -= next_pix
   259 
   260 def drawLineReverse(hits, width, bp_per_pix, beg1, beg2, size):
   261     beg2 = -1 - beg2
   262     while True:
   263         q1, r1 = divmod(beg1, bp_per_pix)
   264         q2, r2 = divmod(beg2, bp_per_pix)
   265         hits[q2 * width + q1] |= 2
   266         next_pix = min(bp_per_pix - r1, r2 + 1)
   267         if next_pix >= size: break
   268         beg1 += next_pix
   269         beg2 -= next_pix
   270         size -= next_pix
   271 
   272 def alignmentPixels(width, height, alignments, bp_per_pix, origins1, origins2):
   273     hits = [0] * (width * height)  # the image data
   274     for seq1, seq2, blocks in alignments:
   275         ori1 = origins1[seq1]
   276         ori2 = origins2[seq2]
   277         for beg1, beg2, size in blocks:
   278             if beg1 < 0:
   279                 beg1 = -(beg1 + size)
   280                 beg2 = -(beg2 + size)
   281             if beg2 >= 0:
   282                 drawLineForward(hits, width, bp_per_pix,
   283                                 beg1 + ori1, beg2 + ori2, size)
   284             else:
   285                 drawLineReverse(hits, width, bp_per_pix,
   286                                 beg1 + ori1, beg2 - ori2, size)
   287     return hits
   288 
   289 def expandedSeqDict(seqDict):
   290     '''Allow lookup by short sequence names, e.g. chr7 as well as hg19.chr7.'''
   291     newDict = seqDict.copy()
   292     for name, x in seqDict.items():
   293         if "." in name:
   294             base = name.split(".")[-1]
   295             if base in newDict:  # an ambiguous case was found:
   296                 return seqDict   # so give up completely
   297             newDict[base] = x
   298     return newDict
   299 
   300 def readBed(fileName, seqLimits):
   301     if not fileName: return
   302     for line in myOpen(fileName):
   303         w = line.split()
   304         if not w: continue
   305         seqName = w[0]
   306         if seqName not in seqLimits: continue
   307         beg = int(w[1])
   308         end = int(w[2])
   309         layer = 900
   310         color = "#ffe4ff"
   311         if len(w) > 4:
   312             if w[4] != ".":
   313                 layer = float(w[4])
   314             if len(w) > 5:
   315                 if len(w) > 8 and w[8].count(",") == 2:
   316                     color = "rgb(" + w[8] + ")"
   317                 elif w[5] == "+":
   318                     color = "#fff4f4"
   319                 elif w[5] == "-":
   320                     color = "#f4f4ff"
   321         yield layer, color, seqName, beg, end
   322 
   323 def commaSeparatedInts(text):
   324     return map(int, text.rstrip(",").split(","))
   325 
   326 def readGenePred(opts, fileName, seqLimits):
   327     if not fileName: return
   328     for line in myOpen(fileName):
   329         fields = line.split()
   330         if not fields: continue
   331         if fields[2] not in "+-": fields = fields[1:]
   332         seqName = fields[1]
   333         if seqName not in seqLimits: continue
   334         #strand = fields[2]
   335         cdsBeg = int(fields[5])
   336         cdsEnd = int(fields[6])
   337         exonBegs = commaSeparatedInts(fields[8])
   338         exonEnds = commaSeparatedInts(fields[9])
   339         for beg, end in zip(exonBegs, exonEnds):
   340             yield 300, opts.exon_color, seqName, beg, end
   341             b = max(beg, cdsBeg)
   342             e = min(end, cdsEnd)
   343             if b < e: yield 400, opts.cds_color, seqName, b, e
   344 
   345 def readRmsk(fileName, seqLimits):
   346     if not fileName: return
   347     for line in myOpen(fileName):
   348         fields = line.split()
   349         if len(fields) == 17:  # rmsk.txt
   350             seqName = fields[5]
   351             if seqName not in seqLimits: continue  # do this ASAP for speed
   352             beg = int(fields[6])
   353             end = int(fields[7])
   354             strand = fields[9]
   355             repeatClass = fields[11]
   356         elif len(fields) == 15:  # .out
   357             seqName = fields[4]
   358             if seqName not in seqLimits: continue
   359             beg = int(fields[5]) - 1
   360             end = int(fields[6])
   361             strand = fields[8]
   362             repeatClass = fields[10]
   363         else:
   364             continue
   365         if repeatClass in ("Low_complexity", "Simple_repeat"):
   366             yield 200, "#ffe4ff", seqName, beg, end
   367         elif strand == "+":
   368             yield 100, "#fff4f4", seqName, beg, end
   369         else:
   370             yield 100, "#f4f4ff", seqName, beg, end
   371 
   372 def isExtraFirstGapField(fields):
   373     return fields[4].isdigit()
   374 
   375 def readGaps(opts, fileName, seqLimits):
   376     '''Read locations of unsequenced gaps, from an agp or gap file.'''
   377     if not fileName: return
   378     for line in myOpen(fileName):
   379         w = line.split()
   380         if not w or w[0][0] == "#": continue
   381         if isExtraFirstGapField(w): w = w[1:]
   382         if w[4] not in "NU": continue
   383         seqName = w[0]
   384         if seqName not in seqLimits: continue
   385         end = int(w[2])
   386         beg = end - int(w[5])  # zero-based coordinate
   387         if w[7] == "yes":
   388             yield 3000, opts.bridged_color, seqName, beg, end
   389         else:
   390             yield 2000, opts.unbridged_color, seqName, beg, end
   391 
   392 def bedBoxes(beds, seqLimits, origins, margin, edge, isTop, bpPerPix):
   393     for layer, color, seqName, beg, end in beds:
   394         cropBeg, cropEnd = seqLimits[seqName]
   395         beg = max(beg, cropBeg)
   396         end = min(end, cropEnd)
   397         if beg >= end: continue
   398         ori = origins[seqName]
   399         if layer <= 1000:
   400             # include partly-covered pixels
   401             b = (ori + beg) // bpPerPix
   402             e = div_ceil(ori + end, bpPerPix)
   403         else:
   404             # exclude partly-covered pixels
   405             b = div_ceil(ori + beg, bpPerPix)
   406             e = (ori + end) // bpPerPix
   407             if e <= b: continue
   408         if isTop:
   409             box = b, margin, e, edge
   410         else:
   411             box = margin, b, edge, e
   412         yield layer, color, box
   413 
   414 def drawAnnotations(im, boxes):
   415     # xxx use partial transparency for different-color overlaps?
   416     for layer, color, box in boxes:
   417         im.paste(color, box)
   418 
   419 def make_label(text, text_size, range_start, range_size):
   420     '''Return an axis label with endpoint & sort-order information.'''
   421     text_width, text_height = text_size
   422     label_start = range_start + (range_size - text_width) // 2
   423     label_end   = label_start + text_width
   424     sort_key    = text_width - range_size
   425     return sort_key, label_start, label_end, text, text_height
   426 
   427 def get_nonoverlapping_labels(labels, label_space):
   428     '''Get a subset of non-overlapping axis labels, greedily.'''
   429     nonoverlapping_labels = []
   430     for i in labels:
   431         if True not in [i[1] < j[2] + label_space and j[1] < i[2] + label_space
   432                         for j in nonoverlapping_labels]:
   433             nonoverlapping_labels.append(i)
   434     return nonoverlapping_labels
   435 
   436 def get_axis_image(seqNames, name_sizes, seq_starts, seq_pix, textRot,
   437                    textAln, font, image_mode, opts):
   438     '''Make an image of axis labels.'''
   439     min_pos = seq_starts[0]
   440     max_pos = seq_starts[-1] + seq_pix[-1]
   441     margin = max(i[1] for i in name_sizes)
   442     labels = map(make_label, seqNames, name_sizes, seq_starts, seq_pix)
   443     labels = [i for i in labels if i[1] >= min_pos and i[2] <= max_pos]
   444     labels.sort()
   445     minPixTweenLabels = 0 if textRot else opts.label_space
   446     labels = get_nonoverlapping_labels(labels, minPixTweenLabels)
   447     image_size = (margin, max_pos) if textRot else (max_pos, margin)
   448     im = Image.new(image_mode, image_size, opts.border_color)
   449     draw = ImageDraw.Draw(im)
   450     for i in labels:
   451         base = margin - i[4] if textAln else 0
   452         position = (base, i[1]) if textRot else (i[1], base)
   453         draw.text(position, i[3], font=font, fill=opts.text_color)
   454     return im
   455 
   456 def seqOrigins(seqNames, seq_starts, seqLimits, bp_per_pix):
   457     for i, j in zip(seqNames, seq_starts):
   458         yield i, bp_per_pix * j - seqLimits[i][0]
   459 
   460 def lastDotplot(opts, args):
   461     if opts.fontfile:  font = ImageFont.truetype(opts.fontfile, opts.fontsize)
   462     else:              font = ImageFont.load_default()
   463 
   464     image_mode = 'RGB'
   465     forward_color = ImageColor.getcolor(opts.forwardcolor, image_mode)
   466     reverse_color = ImageColor.getcolor(opts.reversecolor, image_mode)
   467     zipped_colors = zip(forward_color, reverse_color)
   468     overlap_color = tuple([(i + j) // 2 for i, j in zipped_colors])
   469 
   470     warn("reading alignments...")
   471     alignmentInfo = readAlignments(args[0], opts)
   472     alignments, seqNames1, seqNames2, seqLimits1, seqLimits2 = alignmentInfo
   473     warn("done")
   474     if not alignments: raise Exception("there are no alignments")
   475 
   476     textRot1 = "vertical".startswith(opts.rot1)
   477     i1 = getSeqInfo(opts.sort1, seqNames1, seqLimits1,
   478                     font, opts.fontsize, image_mode, opts.lengths1, textRot1)
   479     seqNames1, seqSizes1, seqLabels1, labelSizes1, tMargin = i1
   480 
   481     textRot2 = "horizontal".startswith(opts.rot2)
   482     i2 = getSeqInfo(opts.sort2, seqNames2, seqLimits2,
   483                     font, opts.fontsize, image_mode, opts.lengths2, textRot2)
   484     seqNames2, seqSizes2, seqLabels2, labelSizes2, lMargin = i2
   485 
   486     warn("choosing bp per pixel...")
   487     pix_limit1 = opts.width  - lMargin
   488     pix_limit2 = opts.height - tMargin
   489     bpPerPix1 = get_bp_per_pix(seqSizes1, opts.border_pixels, pix_limit1)
   490     bpPerPix2 = get_bp_per_pix(seqSizes2, opts.border_pixels, pix_limit2)
   491     bpPerPix = max(bpPerPix1, bpPerPix2)
   492     warn("bp per pixel = " + str(bpPerPix))
   493 
   494     seq_pix1, seq_starts1, width  = get_pix_info(seqSizes1, bpPerPix,
   495                                                  opts.border_pixels, lMargin)
   496     seq_pix2, seq_starts2, height = get_pix_info(seqSizes2, bpPerPix,
   497                                                  opts.border_pixels, tMargin)
   498     warn("width:  " + str(width))
   499     warn("height: " + str(height))
   500 
   501     origins1 = dict(seqOrigins(seqNames1, seq_starts1, seqLimits1, bpPerPix))
   502     origins2 = dict(seqOrigins(seqNames2, seq_starts2, seqLimits2, bpPerPix))
   503 
   504     warn("processing alignments...")
   505     hits = alignmentPixels(width, height, alignments, bpPerPix,
   506                            origins1, origins2)
   507     warn("done")
   508 
   509     image_size = width, height
   510     im = Image.new(image_mode, image_size, opts.background_color)
   511 
   512     seqLimits1 = expandedSeqDict(seqLimits1)
   513     seqLimits2 = expandedSeqDict(seqLimits2)
   514     origins1 = expandedSeqDict(origins1)
   515     origins2 = expandedSeqDict(origins2)
   516 
   517     beds1 = itertools.chain(readBed(opts.bed1, seqLimits1),
   518                             readRmsk(opts.rmsk1, seqLimits1),
   519                             readGenePred(opts, opts.genePred1, seqLimits1),
   520                             readGaps(opts, opts.gap1, seqLimits1))
   521     b1 = bedBoxes(beds1, seqLimits1, origins1, tMargin, height, True, bpPerPix)
   522 
   523     beds2 = itertools.chain(readBed(opts.bed2, seqLimits2),
   524                             readRmsk(opts.rmsk2, seqLimits2),
   525                             readGenePred(opts, opts.genePred2, seqLimits2),
   526                             readGaps(opts, opts.gap2, seqLimits2))
   527     b2 = bedBoxes(beds2, seqLimits2, origins2, lMargin, width, False, bpPerPix)
   528 
   529     boxes = sorted(itertools.chain(b1, b2))
   530     drawAnnotations(im, boxes)
   531 
   532     for i in range(height):
   533         for j in range(width):
   534             store_value = hits[i * width + j]
   535             xy = j, i
   536             if   store_value == 1: im.putpixel(xy, forward_color)
   537             elif store_value == 2: im.putpixel(xy, reverse_color)
   538             elif store_value == 3: im.putpixel(xy, overlap_color)
   539 
   540     if opts.fontsize != 0:
   541         axis1 = get_axis_image(seqLabels1, labelSizes1, seq_starts1, seq_pix1,
   542                                textRot1, False, font, image_mode, opts)
   543         if textRot1:
   544             axis1 = axis1.transpose(Image.ROTATE_90)
   545         axis2 = get_axis_image(seqLabels2, labelSizes2, seq_starts2, seq_pix2,
   546                                textRot2, textRot2, font, image_mode, opts)
   547         if not textRot2:
   548             axis2 = axis2.transpose(Image.ROTATE_270)
   549         im.paste(axis1, (0, 0))
   550         im.paste(axis2, (0, 0))
   551 
   552     for i in seq_starts1[1:]:
   553         box = i - opts.border_pixels, tMargin, i, height
   554         im.paste(opts.border_color, box)
   555 
   556     for i in seq_starts2[1:]:
   557         box = lMargin, i - opts.border_pixels, width, i
   558         im.paste(opts.border_color, box)
   559 
   560     im.save(args[1])
   561 
   562 if __name__ == "__main__":
   563     usage = """%prog --help
   564    or: %prog [options] maf-or-tab-alignments dotplot.png
   565    or: %prog [options] maf-or-tab-alignments dotplot.gif
   566    or: ..."""
   567     description = "Draw a dotplot of pair-wise sequence alignments in MAF or tabular format."
   568     op = optparse.OptionParser(usage=usage, description=description)
   569     op.add_option("-v", "--verbose", action="count",
   570                   help="show progress messages & data about the plot")
   571     op.add_option("-1", "--seq1", metavar="PATTERN", action="append",
   572                   default=[],
   573                   help="which sequences to show from the 1st genome")
   574     op.add_option("-2", "--seq2", metavar="PATTERN", action="append",
   575                   default=[],
   576                   help="which sequences to show from the 2nd genome")
   577     # Replace "width" & "height" with a single "length" option?
   578     op.add_option("-x", "--width", type="int", default=1000,
   579                   help="maximum width in pixels (default: %default)")
   580     op.add_option("-y", "--height", type="int", default=1000,
   581                   help="maximum height in pixels (default: %default)")
   582     op.add_option("-c", "--forwardcolor", metavar="COLOR", default="red",
   583                   help="color for forward alignments (default: %default)")
   584     op.add_option("-r", "--reversecolor", metavar="COLOR", default="blue",
   585                   help="color for reverse alignments (default: %default)")
   586     op.add_option("--sort1", type="int", default=1, metavar="N",
   587                   help="genome1 sequence order: 0=input order, 1=name order, "
   588                   "2=length order (default=%default)")
   589     op.add_option("--sort2", type="int", default=1, metavar="N",
   590                   help="genome2 sequence order: 0=input order, 1=name order, "
   591                   "2=length order (default=%default)")
   592     op.add_option("--trim1", action="store_true",
   593                   help="trim unaligned sequence flanks from the 1st genome")
   594     op.add_option("--trim2", action="store_true",
   595                   help="trim unaligned sequence flanks from the 2nd genome")
   596     op.add_option("--border-pixels", metavar="INT", type="int", default=1,
   597                   help="number of pixels between sequences (default=%default)")
   598     op.add_option("--border-color", metavar="COLOR", default="#dcdcdc",
   599                   help="color for pixels between sequences (default=%default)")
   600     # xxx --margin-color?
   601 
   602     og = optparse.OptionGroup(op, "Text options")
   603     og.add_option("-f", "--fontfile", metavar="FILE",
   604                   help="TrueType or OpenType font file")
   605     og.add_option("-s", "--fontsize", metavar="SIZE", type="int", default=11,
   606                   help="TrueType or OpenType font size (default: %default)")
   607     og.add_option("--rot1", metavar="ROT", default="h",
   608                   help="text rotation for the 1st genome (default=%default)")
   609     og.add_option("--rot2", metavar="ROT", default="v",
   610                   help="text rotation for the 2nd genome (default=%default)")
   611     og.add_option("--lengths1", action="store_true",
   612                   help="show sequence lengths for the 1st (horizontal) genome")
   613     og.add_option("--lengths2", action="store_true",
   614                   help="show sequence lengths for the 2nd (vertical) genome")
   615     op.add_option_group(og)
   616 
   617     og = optparse.OptionGroup(op, "Annotation options")
   618     og.add_option("--bed1", metavar="FILE",
   619                   help="read genome1 annotations from BED file")
   620     og.add_option("--bed2", metavar="FILE",
   621                   help="read genome2 annotations from BED file")
   622     og.add_option("--rmsk1", metavar="FILE", help="read genome1 repeats from "
   623                   "RepeatMasker .out or rmsk.txt file")
   624     og.add_option("--rmsk2", metavar="FILE", help="read genome2 repeats from "
   625                   "RepeatMasker .out or rmsk.txt file")
   626     op.add_option_group(og)
   627 
   628     og = optparse.OptionGroup(op, "Gene options")
   629     og.add_option("--genePred1", metavar="FILE",
   630                   help="read genome1 genes from genePred file")
   631     og.add_option("--genePred2", metavar="FILE",
   632                   help="read genome2 genes from genePred file")
   633     og.add_option("--exon-color", metavar="COLOR", default="#dfd",
   634                   help="color for exons (default=%default)")
   635     og.add_option("--cds-color", metavar="COLOR", default="#bdb",
   636                   help="color for protein-coding regions (default=%default)")
   637     op.add_option_group(og)
   638 
   639     og = optparse.OptionGroup(op, "Unsequenced gap options")
   640     og.add_option("--gap1", metavar="FILE",
   641                   help="read genome1 unsequenced gaps from agp or gap file")
   642     og.add_option("--gap2", metavar="FILE",
   643                   help="read genome2 unsequenced gaps from agp or gap file")
   644     og.add_option("--bridged-color", metavar="COLOR", default="yellow",
   645                   help="color for bridged gaps (default: %default)")
   646     og.add_option("--unbridged-color", metavar="COLOR", default="pink",
   647                   help="color for unbridged gaps (default: %default)")
   648     op.add_option_group(og)
   649     (opts, args) = op.parse_args()
   650     if len(args) != 2: op.error("2 arguments needed")
   651 
   652     opts.text_color = "black"
   653     opts.background_color = "white"
   654     opts.label_space = 5     # minimum number of pixels between axis labels
   655 
   656     try: lastDotplot(opts, args)
   657     except KeyboardInterrupt: pass  # avoid silly error message
   658     except Exception, e:
   659         prog = os.path.basename(sys.argv[0])
   660         sys.exit(prog + ": error: " + str(e))