3 # Read pair-wise alignments in MAF or LAST tabular format: write an
4 # "Oxford grid", a.k.a. dotplot.
6 # TODO: Currently, pixels with zero aligned nt-pairs are white, and
7 # pixels with one or more aligned nt-pairs are black. This can look
8 # too crowded for large genome alignments. I tried shading each pixel
9 # according to the number of aligned nt-pairs within it, but the
10 # result is too faint. How can this be done better?
13 from fnmatch import fnmatchcase
14 from operator import itemgetter
16 import itertools, optparse, os, re, sys
18 # Try to make PIL/PILLOW work:
19 try: from PIL import Image, ImageDraw, ImageFont, ImageColor
20 except ImportError: import Image, ImageDraw, ImageFont, ImageColor
22 def myOpen(fileName): # faster than fileinput
27 if fileName.endswith(".gz"):
28 return gzip.open(fileName)
33 prog = os.path.basename(sys.argv[0])
34 sys.stderr.write(prog + ": " + message + "\n")
36 def groupByFirstItem(things):
37 for k, v in itertools.groupby(things, itemgetter(0)):
38 yield k, [i[1:] for i in v]
40 def croppedBlocks(blocks, ranges1, ranges2):
41 headBeg1, headBeg2, headSize = blocks[0]
44 cropBeg1, cropEnd1 = r1
46 cropBeg1, cropEnd1 = -cropEnd1, -cropBeg1
47 cropBeg2, cropEnd2 = r2
49 cropBeg2, cropEnd2 = -cropEnd2, -cropBeg2
50 for beg1, beg2, size in blocks:
51 b1 = max(cropBeg1, beg1)
52 e1 = min(cropEnd1, beg1 + size)
55 b2 = max(cropBeg2, b1 + offset)
56 e2 = min(cropEnd2, e1 + offset)
58 yield b2 - offset, b2, e2 - b2
60 def tabBlocks(beg1, beg2, blocks):
61 '''Get the gapless blocks of an alignment, from LAST tabular format.'''
62 for i in blocks.split(","):
69 yield beg1, beg2, size
73 def mafBlocks(beg1, beg2, seq1, seq2):
74 '''Get the gapless blocks of an alignment, from MAF format.'''
76 for x, y in itertools.izip(seq1, seq2):
79 yield beg1, beg2, size
86 yield beg1, beg2, size
93 if size: yield beg1, beg2, size
95 def alignmentInput(lines):
96 '''Get alignments and sequence lengths, from MAF or tabular format.'''
100 if line[0].isdigit(): # tabular format
101 chr1, beg1, seqlen1 = w[1], int(w[2]), int(w[5])
102 if w[4] == "-": beg1 -= seqlen1
103 chr2, beg2, seqlen2 = w[6], int(w[7]), int(w[10])
104 if w[9] == "-": beg2 -= seqlen2
105 blocks = tabBlocks(beg1, beg2, w[11])
106 yield chr1, seqlen1, chr2, seqlen2, blocks
107 elif line[0] == "s": # MAF format
109 chr1, beg1, seqlen1, seq1 = w[1], int(w[2]), int(w[5]), w[6]
110 if w[4] == "-": beg1 -= seqlen1
113 chr2, beg2, seqlen2, seq2 = w[1], int(w[2]), int(w[5]), w[6]
114 if w[4] == "-": beg2 -= seqlen2
115 blocks = mafBlocks(beg1, beg2, seq1, seq2)
116 yield chr1, seqlen1, chr2, seqlen2, blocks
119 def seqRequestFromText(text):
121 pattern, interval = text.rsplit(":", 1)
123 beg, end = interval.rsplit("-", 1)
124 return pattern, int(beg), int(end) # beg may be negative
125 return text, 0, sys.maxsize
127 def rangesFromSeqName(seqRequests, name, seqLen):
129 base = name.split(".")[-1] # allow for names like hg19.chr7
130 for pat, beg, end in seqRequests:
131 if fnmatchcase(name, pat) or fnmatchcase(base, pat):
132 yield max(beg, 0), min(end, seqLen)
136 def updateSeqs(coverDict, seqRanges, seqName, ranges, coveredRange):
137 beg, end = coveredRange
139 coveredRange = -end, -beg
140 if seqName in coverDict:
141 coverDict[seqName].append(coveredRange)
143 coverDict[seqName] = [coveredRange]
144 for beg, end in ranges:
145 r = seqName, beg, end
148 def readAlignments(fileName, opts):
149 '''Get alignments and sequence limits, from MAF or tabular format.'''
150 seqRequests1 = map(seqRequestFromText, opts.seq1)
151 seqRequests2 = map(seqRequestFromText, opts.seq2)
158 lines = myOpen(fileName)
159 for seqName1, seqLen1, seqName2, seqLen2, blocks in alignmentInput(lines):
160 ranges1 = sorted(rangesFromSeqName(seqRequests1, seqName1, seqLen1))
161 if not ranges1: continue
162 ranges2 = sorted(rangesFromSeqName(seqRequests2, seqName2, seqLen2))
163 if not ranges2: continue
164 b = list(croppedBlocks(list(blocks), ranges1, ranges2))
166 aln = seqName1, seqName2, b
167 alignments.append(aln)
168 coveredRange1 = b[0][0], b[-1][0] + b[-1][2]
169 updateSeqs(coverDict1, seqRanges1, seqName1, ranges1, coveredRange1)
170 coveredRange2 = b[0][1], b[-1][1] + b[-1][2]
171 updateSeqs(coverDict2, seqRanges2, seqName2, ranges2, coveredRange2)
172 return alignments, seqRanges1, coverDict1, seqRanges2, coverDict2
174 def nameAndRangesFromDict(cropDict, seqName):
175 if seqName in cropDict:
176 return seqName, cropDict[seqName]
177 n = seqName.split(".")[-1]
179 return n, cropDict[n]
182 def rangesForSecondaryAlignments(primaryRanges, seqLen):
187 def readSecondaryAlignments(opts, cropRanges1, cropRanges2):
188 cropDict1 = dict(groupByFirstItem(cropRanges1))
189 cropDict2 = dict(groupByFirstItem(cropRanges2))
196 lines = myOpen(opts.alignments)
197 for seqName1, seqLen1, seqName2, seqLen2, blocks in alignmentInput(lines):
198 seqName1, ranges1 = nameAndRangesFromDict(cropDict1, seqName1)
199 seqName2, ranges2 = nameAndRangesFromDict(cropDict2, seqName2)
200 if not ranges1 and not ranges2:
202 r1 = rangesForSecondaryAlignments(ranges1, seqLen1)
203 r2 = rangesForSecondaryAlignments(ranges2, seqLen2)
204 b = list(croppedBlocks(list(blocks), r1, r2))
206 aln = seqName1, seqName2, b
207 alignments.append(aln)
209 coveredRange1 = b[0][0], b[-1][0] + b[-1][2]
210 updateSeqs(coverDict1, seqRanges1, seqName1, r1, coveredRange1)
212 coveredRange2 = b[0][1], b[-1][1] + b[-1][2]
213 updateSeqs(coverDict2, seqRanges2, seqName2, r2, coveredRange2)
214 return alignments, seqRanges1, coverDict1, seqRanges2, coverDict2
216 def twoValuesFromOption(text, separator):
217 if separator in text:
218 return text.split(separator)
221 def mergedRanges(ranges):
222 oldBeg, maxEnd = ranges[0]
223 for beg, end in ranges:
232 def mergedRangesPerSeq(coverDict):
233 for k, v in coverDict.iteritems():
235 yield k, list(mergedRanges(v))
237 def coveredLength(mergedCoverDict):
238 return sum(sum(e - b for b, e in v) for v in mergedCoverDict.itervalues())
240 def trimmed(seqRanges, coverDict, minAlignedBases, maxGapFrac, endPad, midPad):
241 maxEndGapFrac, maxMidGapFrac = twoValuesFromOption(maxGapFrac, ",")
242 maxEndGap = max(float(maxEndGapFrac) * minAlignedBases, endPad * 1.0)
243 maxMidGap = max(float(maxMidGapFrac) * minAlignedBases, midPad * 2.0)
245 for seqName, rangeBeg, rangeEnd in seqRanges:
246 seqBlocks = coverDict[seqName]
247 blocks = [i for i in seqBlocks if i[0] < rangeEnd and i[1] > rangeBeg]
248 if blocks[0][0] - rangeBeg > maxEndGap:
249 rangeBeg = blocks[0][0] - endPad
250 for j, y in enumerate(blocks):
253 if y[0] - x[1] > maxMidGap:
254 yield seqName, rangeBeg, x[1] + midPad
255 rangeBeg = y[0] - midPad
256 if rangeEnd - blocks[-1][1] > maxEndGap:
257 rangeEnd = blocks[-1][1] + endPad
258 yield seqName, rangeBeg, rangeEnd
260 def natural_sort_key(my_string):
261 '''Return a sort key for "natural" ordering, e.g. chr9 < chr10.'''
262 parts = re.split(r'(\d+)', my_string)
263 parts[1::2] = map(int, parts[1::2])
266 def nameKey(oneSeqRanges):
267 return natural_sort_key(oneSeqRanges[0][0])
269 def sizeKey(oneSeqRanges):
270 return sum(b - e for n, b, e in oneSeqRanges), nameKey(oneSeqRanges)
272 def getSortedRanges(seqRanges, sortOpt):
273 g = [list(v) for k, v in itertools.groupby(seqRanges, itemgetter(0))]
278 return [j for i in g for j in i]
280 def allSortedRanges(seqRanges, seqRangesB, sortOpt):
281 x, y = twoValuesFromOption(sortOpt, ":")
282 return getSortedRanges(seqRanges, x) + getSortedRanges(seqRangesB, y)
288 groups.append(t[-3:])
290 return ",".join(reversed(groups))
293 suffixes = "bp", "kb", "Mb", "Gb"
294 for i, x in enumerate(suffixes):
297 return "%.2g" % (1.0 * size / j) + x
298 if size < j * 1000 or i == len(suffixes) - 1:
299 return "%.0f" % (1.0 * size / j) + x
301 def labelText(seqRange, labelOpt):
302 seqName, beg, end = seqRange
304 return seqName + ": " + sizeText(end - beg)
306 return seqName + ":" + prettyNum(beg) + ": " + sizeText(end - beg)
308 return seqName + ":" + prettyNum(beg) + "-" + prettyNum(end)
311 def rangeLabels(seqRanges, labelOpt, font, fontsize, image_mode, textRot):
314 im = Image.new(image_mode, image_size)
315 draw = ImageDraw.Draw(im)
318 text = labelText(r, labelOpt)
320 x, y = draw.textsize(text, font=font)
325 def dataFromRanges(cutRanges, cutRangesB, sortOpt, font,
326 fontsize, image_mode, labelOpt, textRot):
327 s = allSortedRanges(cutRanges, cutRangesB, sortOpt)
329 warn("\t".join(map(str, i)))
331 rangeSizes = [e - b for n, b, e in s]
332 labs = list(rangeLabels(s, labelOpt, font, fontsize, image_mode, textRot))
333 margin = max(i[2] for i in labs)
334 # xxx the margin may be too big, because some labels may get omitted
335 return s, rangeSizes, labs, margin
338 '''Return x / y rounded up.'''
342 def get_bp_per_pix(rangeSizes, pixTweenRanges, maxPixels):
343 '''Get the minimum bp-per-pixel that fits in the size limit.'''
344 warn("choosing bp per pixel...")
345 numOfRanges = len(rangeSizes)
346 maxPixelsInRanges = maxPixels - pixTweenRanges * (numOfRanges - 1)
347 if maxPixelsInRanges < numOfRanges:
348 raise Exception("can't fit the image: too many sequences?")
349 negLimit = -maxPixelsInRanges
350 negBpPerPix = sum(rangeSizes) // negLimit
352 if sum(i // negBpPerPix for i in rangeSizes) >= negLimit:
356 def getRangePixBegs(rangePixLens, pixTweenRanges, margin):
357 '''Get the start pixel for each range.'''
359 pix_tot = margin - pixTweenRanges
360 for i in rangePixLens:
361 pix_tot += pixTweenRanges
362 rangePixBegs.append(pix_tot)
366 def pixelData(rangeSizes, bp_per_pix, pixTweenRanges, margin):
367 '''Return pixel information about the ranges.'''
368 rangePixLens = [div_ceil(i, bp_per_pix) for i in rangeSizes]
369 rangePixBegs = getRangePixBegs(rangePixLens, pixTweenRanges, margin)
370 tot_pix = rangePixBegs[-1] + rangePixLens[-1]
371 return rangePixBegs, rangePixLens, tot_pix
373 def drawLineForward(hits, width, bp_per_pix, beg1, beg2, size):
375 q1, r1 = divmod(beg1, bp_per_pix)
376 q2, r2 = divmod(beg2, bp_per_pix)
377 hits[q2 * width + q1] |= 1
378 next_pix = min(bp_per_pix - r1, bp_per_pix - r2)
379 if next_pix >= size: break
384 def drawLineReverse(hits, width, bp_per_pix, beg1, beg2, size):
387 q1, r1 = divmod(beg1, bp_per_pix)
388 q2, r2 = divmod(beg2, bp_per_pix)
389 hits[q2 * width + q1] |= 2
390 next_pix = min(bp_per_pix - r1, r2 + 1)
391 if next_pix >= size: break
396 def findOrigin(ranges, beg, size):
399 for rangeBeg, rangeEnd, origin in ranges:
403 def alignmentPixels(width, height, alignments, bp_per_pix,
404 rangeDict1, rangeDict2):
405 hits = [0] * (width * height) # the image data
406 for seq1, seq2, blocks in alignments:
407 beg1, beg2, size = blocks[0]
408 ori1 = findOrigin(rangeDict1[seq1], beg1, size)
409 ori2 = findOrigin(rangeDict2[seq2], beg2, size)
410 for beg1, beg2, size in blocks:
412 beg1 = -(beg1 + size)
413 beg2 = -(beg2 + size)
415 drawLineForward(hits, width, bp_per_pix,
416 beg1 + ori1, beg2 + ori2, size)
418 drawLineReverse(hits, width, bp_per_pix,
419 beg1 + ori1, beg2 - ori2, size)
422 def expandedSeqDict(seqDict):
423 '''Allow lookup by short sequence names, e.g. chr7 as well as hg19.chr7.'''
424 newDict = seqDict.copy()
425 for name, x in seqDict.items():
427 base = name.split(".")[-1]
428 if base in newDict: # an ambiguous case was found:
429 return seqDict # so give up completely
433 def readBed(fileName, rangeDict):
434 for line in myOpen(fileName):
438 if seqName not in rangeDict: continue
447 if len(w) > 8 and w[8].count(",") == 2:
448 color = "rgb(" + w[8] + ")"
453 yield layer, color, seqName, beg, end
455 def commaSeparatedInts(text):
456 return map(int, text.rstrip(",").split(","))
458 def readGenePred(opts, fileName, rangeDict):
459 for line in myOpen(fileName):
460 fields = line.split()
461 if not fields: continue
462 if fields[2] not in "+-": fields = fields[1:]
464 if seqName not in rangeDict: continue
466 cdsBeg = int(fields[5])
467 cdsEnd = int(fields[6])
468 exonBegs = commaSeparatedInts(fields[8])
469 exonEnds = commaSeparatedInts(fields[9])
470 for beg, end in zip(exonBegs, exonEnds):
471 yield 300, opts.exon_color, seqName, beg, end
474 if b < e: yield 400, opts.cds_color, seqName, b, e
476 def readRmsk(fileName, rangeDict):
477 for line in myOpen(fileName):
478 fields = line.split()
479 if len(fields) == 17: # rmsk.txt
481 if seqName not in rangeDict: continue # do this ASAP for speed
485 repeatClass = fields[11]
486 elif len(fields) == 15: # .out
488 if seqName not in rangeDict: continue
489 beg = int(fields[5]) - 1
492 repeatClass = fields[10]
495 if repeatClass in ("Low_complexity", "Simple_repeat"):
496 yield 200, "#fbf", seqName, beg, end
498 yield 100, "#ffe8e8", seqName, beg, end
500 yield 100, "#e8e8ff", seqName, beg, end
502 def isExtraFirstGapField(fields):
503 return fields[4].isdigit()
505 def readGaps(opts, fileName, rangeDict):
506 '''Read locations of unsequenced gaps, from an agp or gap file.'''
507 for line in myOpen(fileName):
509 if not w or w[0][0] == "#": continue
510 if isExtraFirstGapField(w): w = w[1:]
511 if w[4] not in "NU": continue
513 if seqName not in rangeDict: continue
515 beg = end - int(w[5]) # zero-based coordinate
517 yield 3000, opts.bridged_color, seqName, beg, end
519 yield 2000, opts.unbridged_color, seqName, beg, end
521 def bedBoxes(beds, rangeDict, margin, edge, isTop, bpPerPix):
522 for layer, color, seqName, bedBeg, bedEnd in beds:
523 for rangeBeg, rangeEnd, origin in rangeDict[seqName]:
524 beg = max(bedBeg, rangeBeg)
525 end = min(bedEnd, rangeEnd)
526 if beg >= end: continue
528 # include partly-covered pixels
529 b = (origin + beg) // bpPerPix
530 e = div_ceil(origin + end, bpPerPix)
532 # exclude partly-covered pixels
533 b = div_ceil(origin + beg, bpPerPix)
534 e = (origin + end) // bpPerPix
537 box = b, margin, e, edge
539 box = margin, b, edge, e
540 yield layer, color, box
542 def drawAnnotations(im, boxes):
543 # xxx use partial transparency for different-color overlaps?
544 for layer, color, box in boxes:
547 def placedLabels(labels, rangePixBegs, rangePixLens, beg, end):
548 '''Return axis labels with endpoint & sort-order information.'''
550 for i, j, k in zip(labels, rangePixBegs, rangePixLens):
551 text, textWidth, textHeight = i
552 if textWidth > maxWidth:
554 labelBeg = j + (k - textWidth) // 2
555 labelEnd = labelBeg + textWidth
556 sortKey = textWidth - k
558 sortKey += maxWidth * (beg - labelBeg)
560 labelEnd = beg + textWidth
562 sortKey += maxWidth * (labelEnd - end)
564 labelBeg = end - textWidth
565 yield sortKey, labelBeg, labelEnd, text, textHeight
567 def nonoverlappingLabels(labels, minPixTweenLabels):
568 '''Get a subset of non-overlapping axis labels, greedily.'''
571 beg = i[1] - minPixTweenLabels
572 end = i[2] + minPixTweenLabels
573 if all(j[2] <= beg or j[1] >= end for j in out):
577 def axisImage(labels, rangePixBegs, rangePixLens, textRot,
578 textAln, font, image_mode, opts):
579 '''Make an image of axis labels.'''
580 beg = rangePixBegs[0]
581 end = rangePixBegs[-1] + rangePixLens[-1]
582 margin = max(i[2] for i in labels)
583 labels = sorted(placedLabels(labels, rangePixBegs, rangePixLens, beg, end))
584 minPixTweenLabels = 0 if textRot else opts.label_space
585 labels = nonoverlappingLabels(labels, minPixTweenLabels)
586 image_size = (margin, end) if textRot else (end, margin)
587 im = Image.new(image_mode, image_size, opts.margin_color)
588 draw = ImageDraw.Draw(im)
590 base = margin - i[4] if textAln else 0
591 position = (base, i[1]) if textRot else (i[1], base)
592 draw.text(position, i[3], font=font, fill=opts.text_color)
595 def rangesWithOrigins(sortedRanges, rangePixBegs, bpPerPix):
596 for i, j in zip(sortedRanges, rangePixBegs):
597 seqName, rangeBeg, rangeEnd = i
598 origin = bpPerPix * j - rangeBeg
599 yield seqName, (rangeBeg, rangeEnd, origin)
601 def rangesPerSeq(sortedRanges, rangePixBegs, bpPerPix):
602 a = rangesWithOrigins(sortedRanges, rangePixBegs, bpPerPix)
603 for k, v in itertools.groupby(a, itemgetter(0)):
604 yield k, [i[1] for i in v]
608 return ImageFont.truetype(opts.fontfile, opts.fontsize)
611 x = ["fc-match", "-f%{file}", "arial"]
612 p = subprocess.Popen(x, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
613 out, err = p.communicate()
614 fileNames.append(out)
616 warn("fc-match error: " + str(e))
617 fileNames.append("/Library/Fonts/Arial.ttf") # for Mac
620 font = ImageFont.truetype(i, opts.fontsize)
624 warn("font load error: " + str(e))
625 return ImageFont.load_default()
627 def lastDotplot(opts, args):
630 forward_color = ImageColor.getcolor(opts.forwardcolor, image_mode)
631 reverse_color = ImageColor.getcolor(opts.reversecolor, image_mode)
632 zipped_colors = zip(forward_color, reverse_color)
633 overlap_color = tuple([(i + j) // 2 for i, j in zipped_colors])
635 maxGap1, maxGapB1 = twoValuesFromOption(opts.max_gap1, ":")
636 maxGap2, maxGapB2 = twoValuesFromOption(opts.max_gap2, ":")
638 warn("reading alignments...")
639 alnData = readAlignments(args[0], opts)
640 alignments, seqRanges1, coverDict1, seqRanges2, coverDict2 = alnData
641 if not alignments: raise Exception("there are no alignments")
643 coverDict1 = dict(mergedRangesPerSeq(coverDict1))
644 coverDict2 = dict(mergedRangesPerSeq(coverDict2))
645 minAlignedBases = min(coveredLength(coverDict1), coveredLength(coverDict2))
646 pad = int(opts.pad * minAlignedBases)
647 cutRanges1 = list(trimmed(seqRanges1, coverDict1, minAlignedBases,
649 cutRanges2 = list(trimmed(seqRanges2, coverDict2, minAlignedBases,
652 warn("reading secondary alignments...")
653 alnDataB = readSecondaryAlignments(opts, cutRanges1, cutRanges2)
654 alignmentsB, seqRangesB1, coverDictB1, seqRangesB2, coverDictB2 = alnDataB
656 coverDictB1 = dict(mergedRangesPerSeq(coverDictB1))
657 coverDictB2 = dict(mergedRangesPerSeq(coverDictB2))
658 cutRangesB1 = trimmed(seqRangesB1, coverDictB1, minAlignedBases,
660 cutRangesB2 = trimmed(seqRangesB2, coverDictB2, minAlignedBases,
663 textRot1 = "vertical".startswith(opts.rot1)
664 i1 = dataFromRanges(cutRanges1, cutRangesB1, opts.sort1, font,
665 opts.fontsize, image_mode, opts.labels1, textRot1)
666 sortedRanges1, rangeSizes1, labelData1, tMargin = i1
668 textRot2 = "horizontal".startswith(opts.rot2)
669 i2 = dataFromRanges(cutRanges2, cutRangesB2, opts.sort2, font,
670 opts.fontsize, image_mode, opts.labels2, textRot2)
671 sortedRanges2, rangeSizes2, labelData2, lMargin = i2
673 maxPixels1 = opts.width - lMargin
674 maxPixels2 = opts.height - tMargin
675 bpPerPix1 = get_bp_per_pix(rangeSizes1, opts.border_pixels, maxPixels1)
676 bpPerPix2 = get_bp_per_pix(rangeSizes2, opts.border_pixels, maxPixels2)
677 bpPerPix = max(bpPerPix1, bpPerPix2)
678 warn("bp per pixel = " + str(bpPerPix))
680 p1 = pixelData(rangeSizes1, bpPerPix, opts.border_pixels, lMargin)
681 rangePixBegs1, rangePixLens1, width = p1
682 rangeDict1 = dict(rangesPerSeq(sortedRanges1, rangePixBegs1, bpPerPix))
684 p2 = pixelData(rangeSizes2, bpPerPix, opts.border_pixels, tMargin)
685 rangePixBegs2, rangePixLens2, height = p2
686 rangeDict2 = dict(rangesPerSeq(sortedRanges2, rangePixBegs2, bpPerPix))
688 warn("width: " + str(width))
689 warn("height: " + str(height))
691 warn("processing alignments...")
692 hits = alignmentPixels(width, height, alignments + alignmentsB, bpPerPix,
693 rangeDict1, rangeDict2)
695 warn("reading annotations...")
697 rangeDict1 = expandedSeqDict(rangeDict1)
698 beds1 = itertools.chain(readBed(opts.bed1, rangeDict1),
699 readRmsk(opts.rmsk1, rangeDict1),
700 readGenePred(opts, opts.genePred1, rangeDict1),
701 readGaps(opts, opts.gap1, rangeDict1))
702 b1 = bedBoxes(beds1, rangeDict1, tMargin, height, True, bpPerPix)
704 rangeDict2 = expandedSeqDict(rangeDict2)
705 beds2 = itertools.chain(readBed(opts.bed2, rangeDict2),
706 readRmsk(opts.rmsk2, rangeDict2),
707 readGenePred(opts, opts.genePred2, rangeDict2),
708 readGaps(opts, opts.gap2, rangeDict2))
709 b2 = bedBoxes(beds2, rangeDict2, lMargin, width, False, bpPerPix)
711 boxes = sorted(itertools.chain(b1, b2))
715 image_size = width, height
716 im = Image.new(image_mode, image_size, opts.background_color)
718 drawAnnotations(im, boxes)
720 for i in range(height):
721 for j in range(width):
722 store_value = hits[i * width + j]
724 if store_value == 1: im.putpixel(xy, forward_color)
725 elif store_value == 2: im.putpixel(xy, reverse_color)
726 elif store_value == 3: im.putpixel(xy, overlap_color)
728 if opts.fontsize != 0:
729 axis1 = axisImage(labelData1, rangePixBegs1, rangePixLens1,
730 textRot1, False, font, image_mode, opts)
732 axis1 = axis1.transpose(Image.ROTATE_90)
733 axis2 = axisImage(labelData2, rangePixBegs2, rangePixLens2,
734 textRot2, textRot2, font, image_mode, opts)
736 axis2 = axis2.transpose(Image.ROTATE_270)
737 im.paste(axis1, (0, 0))
738 im.paste(axis2, (0, 0))
740 for i in rangePixBegs1[1:]:
741 box = i - opts.border_pixels, tMargin, i, height
742 im.paste(opts.border_color, box)
744 for i in rangePixBegs2[1:]:
745 box = lMargin, i - opts.border_pixels, width, i
746 im.paste(opts.border_color, box)
750 if __name__ == "__main__":
751 usage = """%prog --help
752 or: %prog [options] maf-or-tab-alignments dotplot.png
753 or: %prog [options] maf-or-tab-alignments dotplot.gif
755 description = "Draw a dotplot of pair-wise sequence alignments in MAF or tabular format."
756 op = optparse.OptionParser(usage=usage, description=description)
757 op.add_option("-v", "--verbose", action="count",
758 help="show progress messages & data about the plot")
759 op.add_option("-1", "--seq1", metavar="PATTERN", action="append",
761 help="which sequences to show from the 1st genome")
762 op.add_option("-2", "--seq2", metavar="PATTERN", action="append",
764 help="which sequences to show from the 2nd genome")
765 # Replace "width" & "height" with a single "length" option?
766 op.add_option("-x", "--width", type="int", default=1000,
767 help="maximum width in pixels (default: %default)")
768 op.add_option("-y", "--height", type="int", default=1000,
769 help="maximum height in pixels (default: %default)")
770 op.add_option("-c", "--forwardcolor", metavar="COLOR", default="red",
771 help="color for forward alignments (default: %default)")
772 op.add_option("-r", "--reversecolor", metavar="COLOR", default="blue",
773 help="color for reverse alignments (default: %default)")
774 op.add_option("--alignments", metavar="FILE", help="secondary alignments")
775 op.add_option("--sort1", default="1", metavar="N",
776 help="genome1 sequence order: 0=input order, 1=name order, "
777 "2=length order (default=%default)")
778 op.add_option("--sort2", default="1", metavar="N",
779 help="genome2 sequence order: 0=input order, 1=name order, "
780 "2=length order (default=%default)")
781 op.add_option("--max-gap1", metavar="FRAC", default="0.5,2", help=
782 "maximum unaligned (end,mid) gap in genome1: "
783 "fraction of aligned length (default=%default)")
784 op.add_option("--max-gap2", metavar="FRAC", default="0.5,2", help=
785 "maximum unaligned (end,mid) gap in genome2: "
786 "fraction of aligned length (default=%default)")
787 op.add_option("--pad", metavar="FRAC", type="float", default=0.04, help=
788 "pad length when cutting unaligned gaps: "
789 "fraction of aligned length (default=%default)")
790 op.add_option("--border-pixels", metavar="INT", type="int", default=1,
791 help="number of pixels between sequences (default=%default)")
792 op.add_option("--border-color", metavar="COLOR", default="black",
793 help="color for pixels between sequences (default=%default)")
794 # --break-color and/or --break-pixels for intra-sequence breaks?
795 op.add_option("--margin-color", metavar="COLOR", default="#dcdcdc",
798 og = optparse.OptionGroup(op, "Text options")
799 og.add_option("-f", "--fontfile", metavar="FILE",
800 help="TrueType or OpenType font file")
801 og.add_option("-s", "--fontsize", metavar="SIZE", type="int", default=14,
802 help="TrueType or OpenType font size (default: %default)")
803 og.add_option("--labels1", type="int", default=0, metavar="N", help=
804 "genome1 labels: 0=name, 1=name:length, "
805 "2=name:start:length, 3=name:start-end (default=%default)")
806 og.add_option("--labels2", type="int", default=0, metavar="N", help=
807 "genome2 labels: 0=name, 1=name:length, "
808 "2=name:start:length, 3=name:start-end (default=%default)")
809 og.add_option("--rot1", metavar="ROT", default="h",
810 help="text rotation for the 1st genome (default=%default)")
811 og.add_option("--rot2", metavar="ROT", default="v",
812 help="text rotation for the 2nd genome (default=%default)")
813 op.add_option_group(og)
815 og = optparse.OptionGroup(op, "Annotation options")
816 og.add_option("--bed1", metavar="FILE",
817 help="read genome1 annotations from BED file")
818 og.add_option("--bed2", metavar="FILE",
819 help="read genome2 annotations from BED file")
820 og.add_option("--rmsk1", metavar="FILE", help="read genome1 repeats from "
821 "RepeatMasker .out or rmsk.txt file")
822 og.add_option("--rmsk2", metavar="FILE", help="read genome2 repeats from "
823 "RepeatMasker .out or rmsk.txt file")
824 op.add_option_group(og)
826 og = optparse.OptionGroup(op, "Gene options")
827 og.add_option("--genePred1", metavar="FILE",
828 help="read genome1 genes from genePred file")
829 og.add_option("--genePred2", metavar="FILE",
830 help="read genome2 genes from genePred file")
831 og.add_option("--exon-color", metavar="COLOR", default="PaleGreen",
832 help="color for exons (default=%default)")
833 og.add_option("--cds-color", metavar="COLOR", default="LimeGreen",
834 help="color for protein-coding regions (default=%default)")
835 op.add_option_group(og)
837 og = optparse.OptionGroup(op, "Unsequenced gap options")
838 og.add_option("--gap1", metavar="FILE",
839 help="read genome1 unsequenced gaps from agp or gap file")
840 og.add_option("--gap2", metavar="FILE",
841 help="read genome2 unsequenced gaps from agp or gap file")
842 og.add_option("--bridged-color", metavar="COLOR", default="yellow",
843 help="color for bridged gaps (default: %default)")
844 og.add_option("--unbridged-color", metavar="COLOR", default="orange",
845 help="color for unbridged gaps (default: %default)")
846 op.add_option_group(og)
847 (opts, args) = op.parse_args()
848 if len(args) != 2: op.error("2 arguments needed")
850 opts.text_color = "black"
851 opts.background_color = "white"
852 opts.label_space = 5 # minimum number of pixels between axis labels
854 try: lastDotplot(opts, args)
855 except KeyboardInterrupt: pass # avoid silly error message
856 except Exception as e:
857 prog = os.path.basename(sys.argv[0])
858 sys.exit(prog + ": error: " + str(e))