3 # Read pair-wise alignments in MAF or LAST tabular format: write an
4 # "Oxford grid", a.k.a. dotplot.
6 # TODO: Currently, pixels with zero aligned nt-pairs are white, and
7 # pixels with one or more aligned nt-pairs are black. This can look
8 # too crowded for large genome alignments. I tried shading each pixel
9 # according to the number of aligned nt-pairs within it, but the
10 # result is too faint. How can this be done better?
13 from fnmatch import fnmatchcase
14 from operator import itemgetter
16 import itertools, optparse, os, re, sys
18 # Try to make PIL/PILLOW work:
19 try: from PIL import Image, ImageDraw, ImageFont, ImageColor
20 except ImportError: import Image, ImageDraw, ImageFont, ImageColor
22 def myOpen(fileName): # faster than fileinput
27 if fileName.endswith(".gz"):
28 return gzip.open(fileName)
33 prog = os.path.basename(sys.argv[0])
34 sys.stderr.write(prog + ": " + message + "\n")
36 def groupByFirstItem(things):
37 for k, v in itertools.groupby(things, itemgetter(0)):
38 yield k, [i[1:] for i in v]
40 def croppedBlocks(blocks, ranges1, ranges2):
41 headBeg1, headBeg2, headSize = blocks[0]
44 cropBeg1, cropEnd1 = r1
46 cropBeg1, cropEnd1 = -cropEnd1, -cropBeg1
47 cropBeg2, cropEnd2 = r2
49 cropBeg2, cropEnd2 = -cropEnd2, -cropBeg2
50 for beg1, beg2, size in blocks:
51 b1 = max(cropBeg1, beg1)
52 e1 = min(cropEnd1, beg1 + size)
55 b2 = max(cropBeg2, b1 + offset)
56 e2 = min(cropEnd2, e1 + offset)
58 yield b2 - offset, b2, e2 - b2
60 def tabBlocks(beg1, beg2, blocks):
61 '''Get the gapless blocks of an alignment, from LAST tabular format.'''
62 for i in blocks.split(","):
69 yield beg1, beg2, size
73 def mafBlocks(beg1, beg2, seq1, seq2):
74 '''Get the gapless blocks of an alignment, from MAF format.'''
76 for x, y in itertools.izip(seq1, seq2):
79 yield beg1, beg2, size
86 yield beg1, beg2, size
93 if size: yield beg1, beg2, size
95 def alignmentInput(lines):
96 '''Get alignments and sequence lengths, from MAF or tabular format.'''
100 if line[0].isdigit(): # tabular format
101 chr1, beg1, seqlen1 = w[1], int(w[2]), int(w[5])
102 if w[4] == "-": beg1 -= seqlen1
103 chr2, beg2, seqlen2 = w[6], int(w[7]), int(w[10])
104 if w[9] == "-": beg2 -= seqlen2
105 blocks = tabBlocks(beg1, beg2, w[11])
106 yield chr1, seqlen1, chr2, seqlen2, blocks
107 elif line[0] == "s": # MAF format
109 chr1, beg1, seqlen1, seq1 = w[1], int(w[2]), int(w[5]), w[6]
110 if w[4] == "-": beg1 -= seqlen1
113 chr2, beg2, seqlen2, seq2 = w[1], int(w[2]), int(w[5]), w[6]
114 if w[4] == "-": beg2 -= seqlen2
115 blocks = mafBlocks(beg1, beg2, seq1, seq2)
116 yield chr1, seqlen1, chr2, seqlen2, blocks
119 def seqRequestFromText(text):
121 pattern, interval = text.rsplit(":", 1)
123 beg, end = interval.rsplit("-", 1)
124 return pattern, int(beg), int(end) # beg may be negative
125 return text, 0, sys.maxsize
127 def rangesFromSeqName(seqRequests, name, seqLen):
129 base = name.split(".")[-1] # allow for names like hg19.chr7
130 for pat, beg, end in seqRequests:
131 if fnmatchcase(name, pat) or fnmatchcase(base, pat):
132 yield max(beg, 0), min(end, seqLen)
136 def updateSeqs(coverDict, seqRanges, seqName, ranges, coveredRange):
137 beg, end = coveredRange
139 coveredRange = -end, -beg
140 if seqName in coverDict:
141 coverDict[seqName].append(coveredRange)
143 coverDict[seqName] = [coveredRange]
144 for beg, end in ranges:
145 r = seqName, beg, end
148 def readAlignments(fileName, opts):
149 '''Get alignments and sequence limits, from MAF or tabular format.'''
150 seqRequests1 = map(seqRequestFromText, opts.seq1)
151 seqRequests2 = map(seqRequestFromText, opts.seq2)
158 lines = myOpen(fileName)
159 for seqName1, seqLen1, seqName2, seqLen2, blocks in alignmentInput(lines):
160 ranges1 = sorted(rangesFromSeqName(seqRequests1, seqName1, seqLen1))
161 if not ranges1: continue
162 ranges2 = sorted(rangesFromSeqName(seqRequests2, seqName2, seqLen2))
163 if not ranges2: continue
164 b = list(croppedBlocks(list(blocks), ranges1, ranges2))
166 aln = seqName1, seqName2, b
167 alignments.append(aln)
168 coveredRange1 = b[0][0], b[-1][0] + b[-1][2]
169 updateSeqs(coverDict1, seqRanges1, seqName1, ranges1, coveredRange1)
170 coveredRange2 = b[0][1], b[-1][1] + b[-1][2]
171 updateSeqs(coverDict2, seqRanges2, seqName2, ranges2, coveredRange2)
172 return alignments, seqRanges1, coverDict1, seqRanges2, coverDict2
174 def nameAndRangesFromDict(cropDict, seqName):
175 if seqName in cropDict:
176 return seqName, cropDict[seqName]
177 n = seqName.split(".")[-1]
179 return n, cropDict[n]
182 def rangesForSecondaryAlignments(primaryRanges, seqLen):
187 def readSecondaryAlignments(opts, cropRanges1, cropRanges2):
188 cropDict1 = dict(groupByFirstItem(cropRanges1))
189 cropDict2 = dict(groupByFirstItem(cropRanges2))
196 lines = myOpen(opts.alignments)
197 for seqName1, seqLen1, seqName2, seqLen2, blocks in alignmentInput(lines):
198 seqName1, ranges1 = nameAndRangesFromDict(cropDict1, seqName1)
199 seqName2, ranges2 = nameAndRangesFromDict(cropDict2, seqName2)
200 if not ranges1 and not ranges2:
202 r1 = rangesForSecondaryAlignments(ranges1, seqLen1)
203 r2 = rangesForSecondaryAlignments(ranges2, seqLen2)
204 b = list(croppedBlocks(list(blocks), r1, r2))
206 aln = seqName1, seqName2, b
207 alignments.append(aln)
209 coveredRange1 = b[0][0], b[-1][0] + b[-1][2]
210 updateSeqs(coverDict1, seqRanges1, seqName1, r1, coveredRange1)
212 coveredRange2 = b[0][1], b[-1][1] + b[-1][2]
213 updateSeqs(coverDict2, seqRanges2, seqName2, r2, coveredRange2)
214 return alignments, seqRanges1, coverDict1, seqRanges2, coverDict2
216 def twoValuesFromOption(text, separator):
217 if separator in text:
218 return text.split(separator)
221 def mergedRanges(ranges):
222 oldBeg, maxEnd = ranges[0]
223 for beg, end in ranges:
232 def mergedRangesPerSeq(coverDict):
233 for k, v in coverDict.iteritems():
235 yield k, list(mergedRanges(v))
237 def coveredLength(mergedCoverDict):
238 return sum(sum(e - b for b, e in v) for v in mergedCoverDict.itervalues())
240 def trimmed(seqRanges, coverDict, minAlignedBases, maxGapFrac, endPad, midPad):
241 maxEndGapFrac, maxMidGapFrac = twoValuesFromOption(maxGapFrac, ",")
242 maxEndGap = max(float(maxEndGapFrac) * minAlignedBases, endPad * 1.0)
243 maxMidGap = max(float(maxMidGapFrac) * minAlignedBases, midPad * 2.0)
245 for seqName, rangeBeg, rangeEnd in seqRanges:
246 seqBlocks = coverDict[seqName]
247 blocks = [i for i in seqBlocks if i[0] < rangeEnd and i[1] > rangeBeg]
248 if blocks[0][0] - rangeBeg > maxEndGap:
249 rangeBeg = blocks[0][0] - endPad
250 for j, y in enumerate(blocks):
253 if y[0] - x[1] > maxMidGap:
254 yield seqName, rangeBeg, x[1] + midPad
255 rangeBeg = y[0] - midPad
256 if rangeEnd - blocks[-1][1] > maxEndGap:
257 rangeEnd = blocks[-1][1] + endPad
258 yield seqName, rangeBeg, rangeEnd
260 def natural_sort_key(my_string):
261 '''Return a sort key for "natural" ordering, e.g. chr9 < chr10.'''
262 parts = re.split(r'(\d+)', my_string)
263 parts[1::2] = map(int, parts[1::2])
266 def nameKey(oneSeqRanges):
267 return natural_sort_key(oneSeqRanges[0][0])
269 def sizeKey(oneSeqRanges):
270 return sum(b - e for n, b, e in oneSeqRanges), nameKey(oneSeqRanges)
272 def getSortedRanges(seqRanges, sortOpt):
273 g = [list(v) for k, v in itertools.groupby(seqRanges, itemgetter(0))]
278 return [j for i in g for j in i]
280 def allSortedRanges(seqRanges, seqRangesB, sortOpt):
281 x, y = twoValuesFromOption(sortOpt, ":")
282 return getSortedRanges(seqRanges, x) + getSortedRanges(seqRangesB, y)
288 groups.append(t[-3:])
290 return ",".join(reversed(groups))
293 suffixes = "bp", "kb", "Mb", "Gb"
294 for i, x in enumerate(suffixes):
297 return "%.2g" % (1.0 * size / j) + x
298 if size < j * 1000 or i == len(suffixes) - 1:
299 return "%.0f" % (1.0 * size / j) + x
301 def labelText(seqRange, labelOpt):
302 seqName, beg, end = seqRange
304 return seqName + ": " + sizeText(end - beg)
306 return seqName + ":" + prettyNum(beg) + ": " + sizeText(end - beg)
308 return seqName + ":" + prettyNum(beg) + "-" + prettyNum(end)
311 def rangeLabels(seqRanges, labelOpt, font, fontsize, image_mode, textRot):
314 im = Image.new(image_mode, image_size)
315 draw = ImageDraw.Draw(im)
318 text = labelText(r, labelOpt)
320 x, y = draw.textsize(text, font=font)
325 def dataFromRanges(cutRanges, cutRangesB, sortOpt, font,
326 fontsize, image_mode, labelOpt, textRot):
327 s = allSortedRanges(cutRanges, cutRangesB, sortOpt)
329 warn("\t".join(map(str, i)))
331 rangeSizes = [e - b for n, b, e in s]
332 labs = list(rangeLabels(s, labelOpt, font, fontsize, image_mode, textRot))
333 margin = max(i[2] for i in labs)
334 # xxx the margin may be too big, because some labels may get omitted
335 return s, rangeSizes, labs, margin
338 '''Return x / y rounded up.'''
342 def get_bp_per_pix(rangeSizes, pixTweenRanges, maxPixels):
343 '''Get the minimum bp-per-pixel that fits in the size limit.'''
344 warn("choosing bp per pixel...")
345 numOfRanges = len(rangeSizes)
346 maxPixelsInRanges = maxPixels - pixTweenRanges * (numOfRanges - 1)
347 if maxPixelsInRanges < numOfRanges:
348 raise Exception("can't fit the image: too many sequences?")
349 negLimit = -maxPixelsInRanges
350 negBpPerPix = sum(rangeSizes) // negLimit
352 if sum(i // negBpPerPix for i in rangeSizes) >= negLimit:
356 def getRangePixBegs(rangePixLens, pixTweenRanges, margin):
357 '''Get the start pixel for each range.'''
359 pix_tot = margin - pixTweenRanges
360 for i in rangePixLens:
361 pix_tot += pixTweenRanges
362 rangePixBegs.append(pix_tot)
366 def pixelData(rangeSizes, bp_per_pix, pixTweenRanges, margin):
367 '''Return pixel information about the ranges.'''
368 rangePixLens = [div_ceil(i, bp_per_pix) for i in rangeSizes]
369 rangePixBegs = getRangePixBegs(rangePixLens, pixTweenRanges, margin)
370 tot_pix = rangePixBegs[-1] + rangePixLens[-1]
371 return rangePixBegs, rangePixLens, tot_pix
373 def drawLineForward(hits, width, bp_per_pix, beg1, beg2, size):
375 q1, r1 = divmod(beg1, bp_per_pix)
376 q2, r2 = divmod(beg2, bp_per_pix)
377 hits[q2 * width + q1] |= 1
378 next_pix = min(bp_per_pix - r1, bp_per_pix - r2)
379 if next_pix >= size: break
384 def drawLineReverse(hits, width, bp_per_pix, beg1, beg2, size):
387 q1, r1 = divmod(beg1, bp_per_pix)
388 q2, r2 = divmod(beg2, bp_per_pix)
389 hits[q2 * width + q1] |= 2
390 next_pix = min(bp_per_pix - r1, r2 + 1)
391 if next_pix >= size: break
396 def findOrigin(ranges, beg, size):
399 for rangeBeg, rangeEnd, origin in ranges:
403 def alignmentPixels(width, height, alignments, bp_per_pix,
404 rangeDict1, rangeDict2):
405 hits = [0] * (width * height) # the image data
406 for seq1, seq2, blocks in alignments:
407 beg1, beg2, size = blocks[0]
408 ori1 = findOrigin(rangeDict1[seq1], beg1, size)
409 ori2 = findOrigin(rangeDict2[seq2], beg2, size)
410 for beg1, beg2, size in blocks:
412 beg1 = -(beg1 + size)
413 beg2 = -(beg2 + size)
415 drawLineForward(hits, width, bp_per_pix,
416 beg1 + ori1, beg2 + ori2, size)
418 drawLineReverse(hits, width, bp_per_pix,
419 beg1 + ori1, beg2 - ori2, size)
422 def expandedSeqDict(seqDict):
423 '''Allow lookup by short sequence names, e.g. chr7 as well as hg19.chr7.'''
424 newDict = seqDict.copy()
425 for name, x in seqDict.items():
427 base = name.split(".")[-1]
428 if base in newDict: # an ambiguous case was found:
429 return seqDict # so give up completely
433 def readBed(fileName, rangeDict):
434 for line in myOpen(fileName):
438 if seqName not in rangeDict: continue
447 if len(w) > 8 and w[8].count(",") == 2:
448 color = "rgb(" + w[8] + ")"
453 yield layer, color, seqName, beg, end
455 def commaSeparatedInts(text):
456 return map(int, text.rstrip(",").split(","))
458 def readGenePred(opts, fileName, rangeDict):
459 for line in myOpen(fileName):
460 fields = line.split()
461 if not fields: continue
462 if fields[2] not in "+-": fields = fields[1:]
464 if seqName not in rangeDict: continue
466 cdsBeg = int(fields[5])
467 cdsEnd = int(fields[6])
468 exonBegs = commaSeparatedInts(fields[8])
469 exonEnds = commaSeparatedInts(fields[9])
470 for beg, end in zip(exonBegs, exonEnds):
471 yield 300, opts.exon_color, seqName, beg, end
474 if b < e: yield 400, opts.cds_color, seqName, b, e
476 def readRmsk(fileName, rangeDict):
477 for line in myOpen(fileName):
478 fields = line.split()
479 if len(fields) == 17: # rmsk.txt
481 if seqName not in rangeDict: continue # do this ASAP for speed
485 repeatClass = fields[11]
486 elif len(fields) == 15: # .out
488 if seqName not in rangeDict: continue
489 beg = int(fields[5]) - 1
492 repeatClass = fields[10]
495 if repeatClass in ("Low_complexity", "Simple_repeat"):
496 yield 200, "#fbf", seqName, beg, end
498 yield 100, "#ffe8e8", seqName, beg, end
500 yield 100, "#e8e8ff", seqName, beg, end
502 def isExtraFirstGapField(fields):
503 return fields[4].isdigit()
505 def readGaps(opts, fileName, rangeDict):
506 '''Read locations of unsequenced gaps, from an agp or gap file.'''
507 for line in myOpen(fileName):
509 if not w or w[0][0] == "#": continue
510 if isExtraFirstGapField(w): w = w[1:]
511 if w[4] not in "NU": continue
513 if seqName not in rangeDict: continue
515 beg = end - int(w[5]) # zero-based coordinate
517 yield 3000, opts.bridged_color, seqName, beg, end
519 yield 2000, opts.unbridged_color, seqName, beg, end
521 def bedBoxes(beds, rangeDict, margin, edge, isTop, bpPerPix):
522 for layer, color, seqName, bedBeg, bedEnd in beds:
523 for rangeBeg, rangeEnd, origin in rangeDict[seqName]:
524 beg = max(bedBeg, rangeBeg)
525 end = min(bedEnd, rangeEnd)
526 if beg >= end: continue
528 # include partly-covered pixels
529 b = (origin + beg) // bpPerPix
530 e = div_ceil(origin + end, bpPerPix)
532 # exclude partly-covered pixels
533 b = div_ceil(origin + beg, bpPerPix)
534 e = (origin + end) // bpPerPix
536 if end == rangeEnd: # include partly-covered end pixels
537 e = div_ceil(origin + end, bpPerPix)
539 box = b, margin, e, edge
541 box = margin, b, edge, e
542 yield layer, color, box
544 def drawAnnotations(im, boxes):
545 # xxx use partial transparency for different-color overlaps?
546 for layer, color, box in boxes:
549 def placedLabels(labels, rangePixBegs, rangePixLens, beg, end):
550 '''Return axis labels with endpoint & sort-order information.'''
552 for i, j, k in zip(labels, rangePixBegs, rangePixLens):
553 text, textWidth, textHeight = i
554 if textWidth > maxWidth:
556 labelBeg = j + (k - textWidth) // 2
557 labelEnd = labelBeg + textWidth
558 sortKey = textWidth - k
560 sortKey += maxWidth * (beg - labelBeg)
562 labelEnd = beg + textWidth
564 sortKey += maxWidth * (labelEnd - end)
566 labelBeg = end - textWidth
567 yield sortKey, labelBeg, labelEnd, text, textHeight
569 def nonoverlappingLabels(labels, minPixTweenLabels):
570 '''Get a subset of non-overlapping axis labels, greedily.'''
573 beg = i[1] - minPixTweenLabels
574 end = i[2] + minPixTweenLabels
575 if all(j[2] <= beg or j[1] >= end for j in out):
579 def axisImage(labels, rangePixBegs, rangePixLens, textRot,
580 textAln, font, image_mode, opts):
581 '''Make an image of axis labels.'''
582 beg = rangePixBegs[0]
583 end = rangePixBegs[-1] + rangePixLens[-1]
584 margin = max(i[2] for i in labels)
585 labels = sorted(placedLabels(labels, rangePixBegs, rangePixLens, beg, end))
586 minPixTweenLabels = 0 if textRot else opts.label_space
587 labels = nonoverlappingLabels(labels, minPixTweenLabels)
588 image_size = (margin, end) if textRot else (end, margin)
589 im = Image.new(image_mode, image_size, opts.margin_color)
590 draw = ImageDraw.Draw(im)
592 base = margin - i[4] if textAln else 0
593 position = (base, i[1]) if textRot else (i[1], base)
594 draw.text(position, i[3], font=font, fill=opts.text_color)
597 def rangesWithOrigins(sortedRanges, rangePixBegs, bpPerPix):
598 for i, j in zip(sortedRanges, rangePixBegs):
599 seqName, rangeBeg, rangeEnd = i
600 origin = bpPerPix * j - rangeBeg
601 yield seqName, (rangeBeg, rangeEnd, origin)
603 def rangesPerSeq(sortedRanges, rangePixBegs, bpPerPix):
604 a = rangesWithOrigins(sortedRanges, rangePixBegs, bpPerPix)
605 for k, v in itertools.groupby(a, itemgetter(0)):
606 yield k, [i[1] for i in v]
610 return ImageFont.truetype(opts.fontfile, opts.fontsize)
613 x = ["fc-match", "-f%{file}", "arial"]
614 p = subprocess.Popen(x, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
615 out, err = p.communicate()
616 fileNames.append(out)
618 warn("fc-match error: " + str(e))
619 fileNames.append("/Library/Fonts/Arial.ttf") # for Mac
622 font = ImageFont.truetype(i, opts.fontsize)
626 warn("font load error: " + str(e))
627 return ImageFont.load_default()
629 def lastDotplot(opts, args):
632 forward_color = ImageColor.getcolor(opts.forwardcolor, image_mode)
633 reverse_color = ImageColor.getcolor(opts.reversecolor, image_mode)
634 zipped_colors = zip(forward_color, reverse_color)
635 overlap_color = tuple([(i + j) // 2 for i, j in zipped_colors])
637 maxGap1, maxGapB1 = twoValuesFromOption(opts.max_gap1, ":")
638 maxGap2, maxGapB2 = twoValuesFromOption(opts.max_gap2, ":")
640 warn("reading alignments...")
641 alnData = readAlignments(args[0], opts)
642 alignments, seqRanges1, coverDict1, seqRanges2, coverDict2 = alnData
643 if not alignments: raise Exception("there are no alignments")
645 coverDict1 = dict(mergedRangesPerSeq(coverDict1))
646 coverDict2 = dict(mergedRangesPerSeq(coverDict2))
647 minAlignedBases = min(coveredLength(coverDict1), coveredLength(coverDict2))
648 pad = int(opts.pad * minAlignedBases)
649 cutRanges1 = list(trimmed(seqRanges1, coverDict1, minAlignedBases,
651 cutRanges2 = list(trimmed(seqRanges2, coverDict2, minAlignedBases,
654 warn("reading secondary alignments...")
655 alnDataB = readSecondaryAlignments(opts, cutRanges1, cutRanges2)
656 alignmentsB, seqRangesB1, coverDictB1, seqRangesB2, coverDictB2 = alnDataB
658 coverDictB1 = dict(mergedRangesPerSeq(coverDictB1))
659 coverDictB2 = dict(mergedRangesPerSeq(coverDictB2))
660 cutRangesB1 = trimmed(seqRangesB1, coverDictB1, minAlignedBases,
662 cutRangesB2 = trimmed(seqRangesB2, coverDictB2, minAlignedBases,
665 textRot1 = "vertical".startswith(opts.rot1)
666 i1 = dataFromRanges(cutRanges1, cutRangesB1, opts.sort1, font,
667 opts.fontsize, image_mode, opts.labels1, textRot1)
668 sortedRanges1, rangeSizes1, labelData1, tMargin = i1
670 textRot2 = "horizontal".startswith(opts.rot2)
671 i2 = dataFromRanges(cutRanges2, cutRangesB2, opts.sort2, font,
672 opts.fontsize, image_mode, opts.labels2, textRot2)
673 sortedRanges2, rangeSizes2, labelData2, lMargin = i2
675 maxPixels1 = opts.width - lMargin
676 maxPixels2 = opts.height - tMargin
677 bpPerPix1 = get_bp_per_pix(rangeSizes1, opts.border_pixels, maxPixels1)
678 bpPerPix2 = get_bp_per_pix(rangeSizes2, opts.border_pixels, maxPixels2)
679 bpPerPix = max(bpPerPix1, bpPerPix2)
680 warn("bp per pixel = " + str(bpPerPix))
682 p1 = pixelData(rangeSizes1, bpPerPix, opts.border_pixels, lMargin)
683 rangePixBegs1, rangePixLens1, width = p1
684 rangeDict1 = dict(rangesPerSeq(sortedRanges1, rangePixBegs1, bpPerPix))
686 p2 = pixelData(rangeSizes2, bpPerPix, opts.border_pixels, tMargin)
687 rangePixBegs2, rangePixLens2, height = p2
688 rangeDict2 = dict(rangesPerSeq(sortedRanges2, rangePixBegs2, bpPerPix))
690 warn("width: " + str(width))
691 warn("height: " + str(height))
693 warn("processing alignments...")
694 hits = alignmentPixels(width, height, alignments + alignmentsB, bpPerPix,
695 rangeDict1, rangeDict2)
697 warn("reading annotations...")
699 rangeDict1 = expandedSeqDict(rangeDict1)
700 beds1 = itertools.chain(readBed(opts.bed1, rangeDict1),
701 readRmsk(opts.rmsk1, rangeDict1),
702 readGenePred(opts, opts.genePred1, rangeDict1),
703 readGaps(opts, opts.gap1, rangeDict1))
704 b1 = bedBoxes(beds1, rangeDict1, tMargin, height, True, bpPerPix)
706 rangeDict2 = expandedSeqDict(rangeDict2)
707 beds2 = itertools.chain(readBed(opts.bed2, rangeDict2),
708 readRmsk(opts.rmsk2, rangeDict2),
709 readGenePred(opts, opts.genePred2, rangeDict2),
710 readGaps(opts, opts.gap2, rangeDict2))
711 b2 = bedBoxes(beds2, rangeDict2, lMargin, width, False, bpPerPix)
713 boxes = sorted(itertools.chain(b1, b2))
717 image_size = width, height
718 im = Image.new(image_mode, image_size, opts.background_color)
720 drawAnnotations(im, boxes)
722 for i in range(height):
723 for j in range(width):
724 store_value = hits[i * width + j]
726 if store_value == 1: im.putpixel(xy, forward_color)
727 elif store_value == 2: im.putpixel(xy, reverse_color)
728 elif store_value == 3: im.putpixel(xy, overlap_color)
730 if opts.fontsize != 0:
731 axis1 = axisImage(labelData1, rangePixBegs1, rangePixLens1,
732 textRot1, False, font, image_mode, opts)
734 axis1 = axis1.transpose(Image.ROTATE_90)
735 axis2 = axisImage(labelData2, rangePixBegs2, rangePixLens2,
736 textRot2, textRot2, font, image_mode, opts)
738 axis2 = axis2.transpose(Image.ROTATE_270)
739 im.paste(axis1, (0, 0))
740 im.paste(axis2, (0, 0))
742 for i in rangePixBegs1[1:]:
743 box = i - opts.border_pixels, tMargin, i, height
744 im.paste(opts.border_color, box)
746 for i in rangePixBegs2[1:]:
747 box = lMargin, i - opts.border_pixels, width, i
748 im.paste(opts.border_color, box)
752 if __name__ == "__main__":
753 usage = """%prog --help
754 or: %prog [options] maf-or-tab-alignments dotplot.png
755 or: %prog [options] maf-or-tab-alignments dotplot.gif
757 description = "Draw a dotplot of pair-wise sequence alignments in MAF or tabular format."
758 op = optparse.OptionParser(usage=usage, description=description)
759 op.add_option("-v", "--verbose", action="count",
760 help="show progress messages & data about the plot")
761 op.add_option("-1", "--seq1", metavar="PATTERN", action="append",
763 help="which sequences to show from the 1st genome")
764 op.add_option("-2", "--seq2", metavar="PATTERN", action="append",
766 help="which sequences to show from the 2nd genome")
767 # Replace "width" & "height" with a single "length" option?
768 op.add_option("-x", "--width", type="int", default=1000,
769 help="maximum width in pixels (default: %default)")
770 op.add_option("-y", "--height", type="int", default=1000,
771 help="maximum height in pixels (default: %default)")
772 op.add_option("-c", "--forwardcolor", metavar="COLOR", default="red",
773 help="color for forward alignments (default: %default)")
774 op.add_option("-r", "--reversecolor", metavar="COLOR", default="blue",
775 help="color for reverse alignments (default: %default)")
776 op.add_option("--alignments", metavar="FILE", help="secondary alignments")
777 op.add_option("--sort1", default="1", metavar="N",
778 help="genome1 sequence order: 0=input order, 1=name order, "
779 "2=length order (default=%default)")
780 op.add_option("--sort2", default="1", metavar="N",
781 help="genome2 sequence order: 0=input order, 1=name order, "
782 "2=length order (default=%default)")
783 op.add_option("--max-gap1", metavar="FRAC", default="0.5,2", help=
784 "maximum unaligned (end,mid) gap in genome1: "
785 "fraction of aligned length (default=%default)")
786 op.add_option("--max-gap2", metavar="FRAC", default="0.5,2", help=
787 "maximum unaligned (end,mid) gap in genome2: "
788 "fraction of aligned length (default=%default)")
789 op.add_option("--pad", metavar="FRAC", type="float", default=0.04, help=
790 "pad length when cutting unaligned gaps: "
791 "fraction of aligned length (default=%default)")
792 op.add_option("--border-pixels", metavar="INT", type="int", default=1,
793 help="number of pixels between sequences (default=%default)")
794 op.add_option("--border-color", metavar="COLOR", default="black",
795 help="color for pixels between sequences (default=%default)")
796 # --break-color and/or --break-pixels for intra-sequence breaks?
797 op.add_option("--margin-color", metavar="COLOR", default="#dcdcdc",
800 og = optparse.OptionGroup(op, "Text options")
801 og.add_option("-f", "--fontfile", metavar="FILE",
802 help="TrueType or OpenType font file")
803 og.add_option("-s", "--fontsize", metavar="SIZE", type="int", default=14,
804 help="TrueType or OpenType font size (default: %default)")
805 og.add_option("--labels1", type="int", default=0, metavar="N", help=
806 "genome1 labels: 0=name, 1=name:length, "
807 "2=name:start:length, 3=name:start-end (default=%default)")
808 og.add_option("--labels2", type="int", default=0, metavar="N", help=
809 "genome2 labels: 0=name, 1=name:length, "
810 "2=name:start:length, 3=name:start-end (default=%default)")
811 og.add_option("--rot1", metavar="ROT", default="h",
812 help="text rotation for the 1st genome (default=%default)")
813 og.add_option("--rot2", metavar="ROT", default="v",
814 help="text rotation for the 2nd genome (default=%default)")
815 op.add_option_group(og)
817 og = optparse.OptionGroup(op, "Annotation options")
818 og.add_option("--bed1", metavar="FILE",
819 help="read genome1 annotations from BED file")
820 og.add_option("--bed2", metavar="FILE",
821 help="read genome2 annotations from BED file")
822 og.add_option("--rmsk1", metavar="FILE", help="read genome1 repeats from "
823 "RepeatMasker .out or rmsk.txt file")
824 og.add_option("--rmsk2", metavar="FILE", help="read genome2 repeats from "
825 "RepeatMasker .out or rmsk.txt file")
826 op.add_option_group(og)
828 og = optparse.OptionGroup(op, "Gene options")
829 og.add_option("--genePred1", metavar="FILE",
830 help="read genome1 genes from genePred file")
831 og.add_option("--genePred2", metavar="FILE",
832 help="read genome2 genes from genePred file")
833 og.add_option("--exon-color", metavar="COLOR", default="PaleGreen",
834 help="color for exons (default=%default)")
835 og.add_option("--cds-color", metavar="COLOR", default="LimeGreen",
836 help="color for protein-coding regions (default=%default)")
837 op.add_option_group(og)
839 og = optparse.OptionGroup(op, "Unsequenced gap options")
840 og.add_option("--gap1", metavar="FILE",
841 help="read genome1 unsequenced gaps from agp or gap file")
842 og.add_option("--gap2", metavar="FILE",
843 help="read genome2 unsequenced gaps from agp or gap file")
844 og.add_option("--bridged-color", metavar="COLOR", default="yellow",
845 help="color for bridged gaps (default: %default)")
846 og.add_option("--unbridged-color", metavar="COLOR", default="orange",
847 help="color for unbridged gaps (default: %default)")
848 op.add_option_group(og)
849 (opts, args) = op.parse_args()
850 if len(args) != 2: op.error("2 arguments needed")
852 opts.text_color = "black"
853 opts.background_color = "white"
854 opts.label_space = 5 # minimum number of pixels between axis labels
856 try: lastDotplot(opts, args)
857 except KeyboardInterrupt: pass # avoid silly error message
858 except Exception as e:
859 prog = os.path.basename(sys.argv[0])
860 sys.exit(prog + ": error: " + str(e))