Martin@1
|
1 |
#! /usr/bin/env python
|
Martin@1062
|
2 |
# Author: Martin C. Frith 2008
|
Martin@1062
|
3 |
# SPDX-License-Identifier: GPL-3.0-or-later
|
Martin@1
|
4 |
|
Martin@272
|
5 |
# Read pair-wise alignments in MAF or LAST tabular format: write an
|
Martin@272
|
6 |
# "Oxford grid", a.k.a. dotplot.
|
Martin@1
|
7 |
|
Martin@1
|
8 |
# TODO: Currently, pixels with zero aligned nt-pairs are white, and
|
Martin@1
|
9 |
# pixels with one or more aligned nt-pairs are black. This can look
|
Martin@1
|
10 |
# too crowded for large genome alignments. I tried shading each pixel
|
Martin@1
|
11 |
# according to the number of aligned nt-pairs within it, but the
|
Martin@1
|
12 |
# result is too faint. How can this be done better?
|
Martin@1
|
13 |
|
Martin@916
|
14 |
import collections
|
Martin@914
|
15 |
import functools
|
Martin@875
|
16 |
import gzip
|
Martin@896
|
17 |
from fnmatch import fnmatchcase
|
Martin@961
|
18 |
import logging
|
Martin@906
|
19 |
from operator import itemgetter
|
Martin@903
|
20 |
import subprocess
|
Martin@896
|
21 |
import itertools, optparse, os, re, sys
|
Martin@475
|
22 |
|
Martin@475
|
23 |
# Try to make PIL/PILLOW work:
|
Martin@938
|
24 |
try:
|
Martin@938
|
25 |
from PIL import Image, ImageDraw, ImageFont, ImageColor
|
Martin@938
|
26 |
except ImportError:
|
Martin@938
|
27 |
import Image, ImageDraw, ImageFont, ImageColor
|
Martin@938
|
28 |
|
Martin@938
|
29 |
try:
|
Martin@938
|
30 |
from future_builtins import zip
|
Martin@938
|
31 |
except ImportError:
|
Martin@938
|
32 |
pass
|
Martin@1
|
33 |
|
Martin@844
|
34 |
def myOpen(fileName): # faster than fileinput
|
Martin@904
|
35 |
if fileName is None:
|
Martin@904
|
36 |
return []
|
Martin@844
|
37 |
if fileName == "-":
|
Martin@844
|
38 |
return sys.stdin
|
Martin@875
|
39 |
if fileName.endswith(".gz"):
|
Martin@957
|
40 |
return gzip.open(fileName, "rt") # xxx dubious for Python2
|
Martin@844
|
41 |
return open(fileName)
|
Martin@844
|
42 |
|
Martin@911
|
43 |
def groupByFirstItem(things):
|
Martin@911
|
44 |
for k, v in itertools.groupby(things, itemgetter(0)):
|
Martin@911
|
45 |
yield k, [i[1:] for i in v]
|
Martin@911
|
46 |
|
Martin@908
|
47 |
def croppedBlocks(blocks, ranges1, ranges2):
|
Martin@908
|
48 |
headBeg1, headBeg2, headSize = blocks[0]
|
Martin@908
|
49 |
for r1 in ranges1:
|
Martin@908
|
50 |
for r2 in ranges2:
|
Martin@908
|
51 |
cropBeg1, cropEnd1 = r1
|
Martin@908
|
52 |
if headBeg1 < 0:
|
Martin@908
|
53 |
cropBeg1, cropEnd1 = -cropEnd1, -cropBeg1
|
Martin@908
|
54 |
cropBeg2, cropEnd2 = r2
|
Martin@908
|
55 |
if headBeg2 < 0:
|
Martin@908
|
56 |
cropBeg2, cropEnd2 = -cropEnd2, -cropBeg2
|
Martin@908
|
57 |
for beg1, beg2, size in blocks:
|
Martin@908
|
58 |
b1 = max(cropBeg1, beg1)
|
Martin@908
|
59 |
e1 = min(cropEnd1, beg1 + size)
|
Martin@908
|
60 |
if b1 >= e1: continue
|
Martin@908
|
61 |
offset = beg2 - beg1
|
Martin@908
|
62 |
b2 = max(cropBeg2, b1 + offset)
|
Martin@908
|
63 |
e2 = min(cropEnd2, e1 + offset)
|
Martin@908
|
64 |
if b2 >= e2: continue
|
Martin@908
|
65 |
yield b2 - offset, b2, e2 - b2
|
Martin@840
|
66 |
|
Martin@482
|
67 |
def tabBlocks(beg1, beg2, blocks):
|
Martin@482
|
68 |
'''Get the gapless blocks of an alignment, from LAST tabular format.'''
|
Martin@482
|
69 |
for i in blocks.split(","):
|
Martin@482
|
70 |
if ":" in i:
|
Martin@482
|
71 |
x, y = i.split(":")
|
Martin@482
|
72 |
beg1 += int(x)
|
Martin@482
|
73 |
beg2 += int(y)
|
Martin@482
|
74 |
else:
|
Martin@482
|
75 |
size = int(i)
|
Martin@482
|
76 |
yield beg1, beg2, size
|
Martin@482
|
77 |
beg1 += size
|
Martin@482
|
78 |
beg2 += size
|
Martin@272
|
79 |
|
Martin@482
|
80 |
def mafBlocks(beg1, beg2, seq1, seq2):
|
Martin@482
|
81 |
'''Get the gapless blocks of an alignment, from MAF format.'''
|
Martin@482
|
82 |
size = 0
|
Martin@938
|
83 |
for x, y in zip(seq1, seq2):
|
Martin@482
|
84 |
if x == "-":
|
Martin@482
|
85 |
if size:
|
Martin@482
|
86 |
yield beg1, beg2, size
|
Martin@482
|
87 |
beg1 += size
|
Martin@482
|
88 |
beg2 += size
|
Martin@482
|
89 |
size = 0
|
Martin@482
|
90 |
beg2 += 1
|
Martin@482
|
91 |
elif y == "-":
|
Martin@482
|
92 |
if size:
|
Martin@482
|
93 |
yield beg1, beg2, size
|
Martin@482
|
94 |
beg1 += size
|
Martin@482
|
95 |
beg2 += size
|
Martin@482
|
96 |
size = 0
|
Martin@482
|
97 |
beg1 += 1
|
Martin@272
|
98 |
else:
|
Martin@482
|
99 |
size += 1
|
Martin@482
|
100 |
if size: yield beg1, beg2, size
|
Martin@272
|
101 |
|
Martin@980
|
102 |
def alignmentFromSegment(qrySeqName, qrySeqLen, segment):
|
Martin@980
|
103 |
refSeqLen = sys.maxsize # XXX
|
Martin@980
|
104 |
refSeqName, refSeqBeg, qrySeqBeg, size = segment
|
Martin@980
|
105 |
block = refSeqBeg, qrySeqBeg, size
|
Martin@980
|
106 |
return refSeqName, refSeqLen, qrySeqName, qrySeqLen, [block]
|
Martin@980
|
107 |
|
Martin@482
|
108 |
def alignmentInput(lines):
|
Martin@482
|
109 |
'''Get alignments and sequence lengths, from MAF or tabular format.'''
|
Martin@482
|
110 |
mafCount = 0
|
Martin@980
|
111 |
qrySeqName = ""
|
Martin@980
|
112 |
segments = []
|
Martin@272
|
113 |
for line in lines:
|
Martin@272
|
114 |
w = line.split()
|
Martin@980
|
115 |
if line[0] == "#":
|
Martin@980
|
116 |
pass
|
Martin@980
|
117 |
elif len(w) == 1:
|
Martin@980
|
118 |
for i in segments:
|
Martin@980
|
119 |
yield alignmentFromSegment(qrySeqName, qrySeqLen, i)
|
Martin@980
|
120 |
qrySeqName = w[0]
|
Martin@980
|
121 |
qrySeqLen = 0
|
Martin@980
|
122 |
segments = []
|
Martin@980
|
123 |
elif len(w) == 2 and qrySeqName and w[1].isdigit():
|
Martin@980
|
124 |
qrySeqLen += int(w[1])
|
Martin@981
|
125 |
elif len(w) == 4 and qrySeqName and w[1].isdigit() and w[3].isdigit():
|
Martin@981
|
126 |
refSeqName, refSeqBeg, refSeqEnd = w[0], int(w[1]), int(w[3])
|
Martin@980
|
127 |
size = abs(refSeqEnd - refSeqBeg)
|
Martin@980
|
128 |
if refSeqBeg > refSeqEnd:
|
Martin@980
|
129 |
refSeqBeg = -refSeqBeg
|
Martin@980
|
130 |
segments.append((refSeqName, refSeqBeg, qrySeqLen, size))
|
Martin@980
|
131 |
qrySeqLen += size
|
Martin@980
|
132 |
elif line[0].isdigit(): # tabular format
|
Martin@482
|
133 |
chr1, beg1, seqlen1 = w[1], int(w[2]), int(w[5])
|
Martin@482
|
134 |
if w[4] == "-": beg1 -= seqlen1
|
Martin@482
|
135 |
chr2, beg2, seqlen2 = w[6], int(w[7]), int(w[10])
|
Martin@482
|
136 |
if w[9] == "-": beg2 -= seqlen2
|
Martin@847
|
137 |
blocks = tabBlocks(beg1, beg2, w[11])
|
Martin@482
|
138 |
yield chr1, seqlen1, chr2, seqlen2, blocks
|
Martin@272
|
139 |
elif line[0] == "s": # MAF format
|
Martin@482
|
140 |
if mafCount == 0:
|
Martin@482
|
141 |
chr1, beg1, seqlen1, seq1 = w[1], int(w[2]), int(w[5]), w[6]
|
Martin@482
|
142 |
if w[4] == "-": beg1 -= seqlen1
|
Martin@482
|
143 |
mafCount = 1
|
Martin@482
|
144 |
else:
|
Martin@482
|
145 |
chr2, beg2, seqlen2, seq2 = w[1], int(w[2]), int(w[5]), w[6]
|
Martin@482
|
146 |
if w[4] == "-": beg2 -= seqlen2
|
Martin@847
|
147 |
blocks = mafBlocks(beg1, beg2, seq1, seq2)
|
Martin@482
|
148 |
yield chr1, seqlen1, chr2, seqlen2, blocks
|
Martin@482
|
149 |
mafCount = 0
|
Martin@980
|
150 |
for i in segments:
|
Martin@980
|
151 |
yield alignmentFromSegment(qrySeqName, qrySeqLen, i)
|
Martin@272
|
152 |
|
Martin@897
|
153 |
def seqRequestFromText(text):
|
Martin@840
|
154 |
if ":" in text:
|
Martin@840
|
155 |
pattern, interval = text.rsplit(":", 1)
|
Martin@840
|
156 |
if "-" in interval:
|
Martin@840
|
157 |
beg, end = interval.rsplit("-", 1)
|
Martin@840
|
158 |
return pattern, int(beg), int(end) # beg may be negative
|
Martin@840
|
159 |
return text, 0, sys.maxsize
|
Martin@840
|
160 |
|
Martin@909
|
161 |
def rangesFromSeqName(seqRequests, name, seqLen):
|
Martin@908
|
162 |
if seqRequests:
|
Martin@908
|
163 |
base = name.split(".")[-1] # allow for names like hg19.chr7
|
Martin@908
|
164 |
for pat, beg, end in seqRequests:
|
Martin@908
|
165 |
if fnmatchcase(name, pat) or fnmatchcase(base, pat):
|
Martin@909
|
166 |
yield max(beg, 0), min(end, seqLen)
|
Martin@908
|
167 |
else:
|
Martin@909
|
168 |
yield 0, seqLen
|
Martin@651
|
169 |
|
Martin@909
|
170 |
def updateSeqs(coverDict, seqRanges, seqName, ranges, coveredRange):
|
Martin@909
|
171 |
beg, end = coveredRange
|
Martin@909
|
172 |
if beg < 0:
|
Martin@909
|
173 |
coveredRange = -end, -beg
|
Martin@909
|
174 |
if seqName in coverDict:
|
Martin@909
|
175 |
coverDict[seqName].append(coveredRange)
|
Martin@839
|
176 |
else:
|
Martin@909
|
177 |
coverDict[seqName] = [coveredRange]
|
Martin@909
|
178 |
for beg, end in ranges:
|
Martin@909
|
179 |
r = seqName, beg, end
|
Martin@909
|
180 |
seqRanges.append(r)
|
Martin@839
|
181 |
|
Martin@651
|
182 |
def readAlignments(fileName, opts):
|
Martin@839
|
183 |
'''Get alignments and sequence limits, from MAF or tabular format.'''
|
Martin@937
|
184 |
seqRequests1 = [seqRequestFromText(i) for i in opts.seq1]
|
Martin@937
|
185 |
seqRequests2 = [seqRequestFromText(i) for i in opts.seq2]
|
Martin@840
|
186 |
|
Martin@482
|
187 |
alignments = []
|
Martin@909
|
188 |
seqRanges1 = []
|
Martin@909
|
189 |
seqRanges2 = []
|
Martin@909
|
190 |
coverDict1 = {}
|
Martin@909
|
191 |
coverDict2 = {}
|
Martin@844
|
192 |
lines = myOpen(fileName)
|
Martin@838
|
193 |
for seqName1, seqLen1, seqName2, seqLen2, blocks in alignmentInput(lines):
|
Martin@909
|
194 |
ranges1 = sorted(rangesFromSeqName(seqRequests1, seqName1, seqLen1))
|
Martin@909
|
195 |
if not ranges1: continue
|
Martin@909
|
196 |
ranges2 = sorted(rangesFromSeqName(seqRequests2, seqName2, seqLen2))
|
Martin@909
|
197 |
if not ranges2: continue
|
Martin@909
|
198 |
b = list(croppedBlocks(list(blocks), ranges1, ranges2))
|
Martin@840
|
199 |
if not b: continue
|
Martin@840
|
200 |
aln = seqName1, seqName2, b
|
Martin@482
|
201 |
alignments.append(aln)
|
Martin@909
|
202 |
coveredRange1 = b[0][0], b[-1][0] + b[-1][2]
|
Martin@909
|
203 |
updateSeqs(coverDict1, seqRanges1, seqName1, ranges1, coveredRange1)
|
Martin@909
|
204 |
coveredRange2 = b[0][1], b[-1][1] + b[-1][2]
|
Martin@909
|
205 |
updateSeqs(coverDict2, seqRanges2, seqName2, ranges2, coveredRange2)
|
Martin@909
|
206 |
return alignments, seqRanges1, coverDict1, seqRanges2, coverDict2
|
Martin@1
|
207 |
|
Martin@911
|
208 |
def nameAndRangesFromDict(cropDict, seqName):
|
Martin@911
|
209 |
if seqName in cropDict:
|
Martin@911
|
210 |
return seqName, cropDict[seqName]
|
Martin@911
|
211 |
n = seqName.split(".")[-1]
|
Martin@911
|
212 |
if n in cropDict:
|
Martin@911
|
213 |
return n, cropDict[n]
|
Martin@911
|
214 |
return seqName, []
|
Martin@911
|
215 |
|
Martin@911
|
216 |
def rangesForSecondaryAlignments(primaryRanges, seqLen):
|
Martin@911
|
217 |
if primaryRanges:
|
Martin@911
|
218 |
return primaryRanges
|
Martin@911
|
219 |
return [(0, seqLen)]
|
Martin@911
|
220 |
|
Martin@911
|
221 |
def readSecondaryAlignments(opts, cropRanges1, cropRanges2):
|
Martin@911
|
222 |
cropDict1 = dict(groupByFirstItem(cropRanges1))
|
Martin@911
|
223 |
cropDict2 = dict(groupByFirstItem(cropRanges2))
|
Martin@911
|
224 |
|
Martin@911
|
225 |
alignments = []
|
Martin@911
|
226 |
seqRanges1 = []
|
Martin@911
|
227 |
seqRanges2 = []
|
Martin@911
|
228 |
coverDict1 = {}
|
Martin@911
|
229 |
coverDict2 = {}
|
Martin@911
|
230 |
lines = myOpen(opts.alignments)
|
Martin@911
|
231 |
for seqName1, seqLen1, seqName2, seqLen2, blocks in alignmentInput(lines):
|
Martin@911
|
232 |
seqName1, ranges1 = nameAndRangesFromDict(cropDict1, seqName1)
|
Martin@911
|
233 |
seqName2, ranges2 = nameAndRangesFromDict(cropDict2, seqName2)
|
Martin@911
|
234 |
if not ranges1 and not ranges2:
|
Martin@911
|
235 |
continue
|
Martin@911
|
236 |
r1 = rangesForSecondaryAlignments(ranges1, seqLen1)
|
Martin@911
|
237 |
r2 = rangesForSecondaryAlignments(ranges2, seqLen2)
|
Martin@911
|
238 |
b = list(croppedBlocks(list(blocks), r1, r2))
|
Martin@911
|
239 |
if not b: continue
|
Martin@911
|
240 |
aln = seqName1, seqName2, b
|
Martin@911
|
241 |
alignments.append(aln)
|
Martin@911
|
242 |
if not ranges1:
|
Martin@911
|
243 |
coveredRange1 = b[0][0], b[-1][0] + b[-1][2]
|
Martin@911
|
244 |
updateSeqs(coverDict1, seqRanges1, seqName1, r1, coveredRange1)
|
Martin@911
|
245 |
if not ranges2:
|
Martin@911
|
246 |
coveredRange2 = b[0][1], b[-1][1] + b[-1][2]
|
Martin@911
|
247 |
updateSeqs(coverDict2, seqRanges2, seqName2, r2, coveredRange2)
|
Martin@911
|
248 |
return alignments, seqRanges1, coverDict1, seqRanges2, coverDict2
|
Martin@911
|
249 |
|
Martin@910
|
250 |
def twoValuesFromOption(text, separator):
|
Martin@910
|
251 |
if separator in text:
|
Martin@910
|
252 |
return text.split(separator)
|
Martin@910
|
253 |
return text, text
|
Martin@910
|
254 |
|
Martin@910
|
255 |
def mergedRanges(ranges):
|
Martin@910
|
256 |
oldBeg, maxEnd = ranges[0]
|
Martin@910
|
257 |
for beg, end in ranges:
|
Martin@910
|
258 |
if beg > maxEnd:
|
Martin@910
|
259 |
yield oldBeg, maxEnd
|
Martin@910
|
260 |
oldBeg = beg
|
Martin@910
|
261 |
maxEnd = end
|
Martin@910
|
262 |
elif end > maxEnd:
|
Martin@910
|
263 |
maxEnd = end
|
Martin@910
|
264 |
yield oldBeg, maxEnd
|
Martin@910
|
265 |
|
Martin@910
|
266 |
def mergedRangesPerSeq(coverDict):
|
Martin@941
|
267 |
for k, v in coverDict.items():
|
Martin@910
|
268 |
v.sort()
|
Martin@910
|
269 |
yield k, list(mergedRanges(v))
|
Martin@910
|
270 |
|
Martin@910
|
271 |
def coveredLength(mergedCoverDict):
|
Martin@941
|
272 |
return sum(sum(e - b for b, e in v) for v in mergedCoverDict.values())
|
Martin@910
|
273 |
|
Martin@910
|
274 |
def trimmed(seqRanges, coverDict, minAlignedBases, maxGapFrac, endPad, midPad):
|
Martin@910
|
275 |
maxEndGapFrac, maxMidGapFrac = twoValuesFromOption(maxGapFrac, ",")
|
Martin@910
|
276 |
maxEndGap = max(float(maxEndGapFrac) * minAlignedBases, endPad * 1.0)
|
Martin@910
|
277 |
maxMidGap = max(float(maxMidGapFrac) * minAlignedBases, midPad * 2.0)
|
Martin@910
|
278 |
|
Martin@910
|
279 |
for seqName, rangeBeg, rangeEnd in seqRanges:
|
Martin@910
|
280 |
seqBlocks = coverDict[seqName]
|
Martin@910
|
281 |
blocks = [i for i in seqBlocks if i[0] < rangeEnd and i[1] > rangeBeg]
|
Martin@910
|
282 |
if blocks[0][0] - rangeBeg > maxEndGap:
|
Martin@910
|
283 |
rangeBeg = blocks[0][0] - endPad
|
Martin@910
|
284 |
for j, y in enumerate(blocks):
|
Martin@910
|
285 |
if j:
|
Martin@910
|
286 |
x = blocks[j - 1]
|
Martin@910
|
287 |
if y[0] - x[1] > maxMidGap:
|
Martin@910
|
288 |
yield seqName, rangeBeg, x[1] + midPad
|
Martin@910
|
289 |
rangeBeg = y[0] - midPad
|
Martin@910
|
290 |
if rangeEnd - blocks[-1][1] > maxEndGap:
|
Martin@910
|
291 |
rangeEnd = blocks[-1][1] + endPad
|
Martin@910
|
292 |
yield seqName, rangeBeg, rangeEnd
|
Martin@910
|
293 |
|
Martin@916
|
294 |
def rangesWithStrandInfo(seqRanges, strandOpt, alignments, seqIndex):
|
Martin@916
|
295 |
if strandOpt == "1":
|
Martin@916
|
296 |
forwardMinusReverse = collections.defaultdict(int)
|
Martin@916
|
297 |
for i in alignments:
|
Martin@916
|
298 |
blocks = i[2]
|
Martin@916
|
299 |
beg1, beg2, size = blocks[0]
|
Martin@916
|
300 |
numOfAlignedLetterPairs = sum(i[2] for i in blocks)
|
Martin@916
|
301 |
if (beg1 < 0) != (beg2 < 0): # opposite-strand alignment
|
Martin@916
|
302 |
numOfAlignedLetterPairs *= -1
|
Martin@916
|
303 |
forwardMinusReverse[i[seqIndex]] += numOfAlignedLetterPairs
|
Martin@916
|
304 |
strandNum = 0
|
Martin@916
|
305 |
for seqName, beg, end in seqRanges:
|
Martin@916
|
306 |
if strandOpt == "1":
|
Martin@916
|
307 |
strandNum = 1 if forwardMinusReverse[seqName] >= 0 else 2
|
Martin@916
|
308 |
yield seqName, beg, end, strandNum
|
Martin@916
|
309 |
|
Martin@1
|
310 |
def natural_sort_key(my_string):
|
Martin@1
|
311 |
'''Return a sort key for "natural" ordering, e.g. chr9 < chr10.'''
|
Martin@1
|
312 |
parts = re.split(r'(\d+)', my_string)
|
Martin@1
|
313 |
parts[1::2] = map(int, parts[1::2])
|
Martin@1
|
314 |
return parts
|
Martin@1
|
315 |
|
Martin@907
|
316 |
def nameKey(oneSeqRanges):
|
Martin@907
|
317 |
return natural_sort_key(oneSeqRanges[0][0])
|
Martin@907
|
318 |
|
Martin@907
|
319 |
def sizeKey(oneSeqRanges):
|
Martin@925
|
320 |
return sum(b - e for n, b, e, s in oneSeqRanges), nameKey(oneSeqRanges)
|
Martin@907
|
321 |
|
Martin@914
|
322 |
def alignmentKey(seqNamesToLists, oneSeqRanges):
|
Martin@914
|
323 |
seqName = oneSeqRanges[0][0]
|
Martin@914
|
324 |
alignmentsOfThisSequence = seqNamesToLists[seqName]
|
Martin@914
|
325 |
numOfAlignedLetterPairs = sum(i[3] for i in alignmentsOfThisSequence)
|
Martin@914
|
326 |
toMiddle = numOfAlignedLetterPairs // 2
|
Martin@914
|
327 |
for i in alignmentsOfThisSequence:
|
Martin@914
|
328 |
toMiddle -= i[3]
|
Martin@914
|
329 |
if toMiddle < 0:
|
Martin@914
|
330 |
return i[1:3] # sequence-rank and "position" of this alignment
|
Martin@914
|
331 |
|
Martin@916
|
332 |
def rankAndFlipPerSeq(seqRanges):
|
Martin@916
|
333 |
rangesGroupedBySeqName = itertools.groupby(seqRanges, itemgetter(0))
|
Martin@916
|
334 |
for rank, group in enumerate(rangesGroupedBySeqName):
|
Martin@916
|
335 |
seqName, ranges = group
|
Martin@916
|
336 |
strandNum = next(ranges)[3]
|
Martin@916
|
337 |
flip = 1 if strandNum < 2 else -1
|
Martin@916
|
338 |
yield seqName, (rank, flip)
|
Martin@916
|
339 |
|
Martin@916
|
340 |
def alignmentSortData(alignments, seqIndex, otherNamesToRanksAndFlips):
|
Martin@915
|
341 |
otherIndex = 1 - seqIndex
|
Martin@915
|
342 |
for i in alignments:
|
Martin@915
|
343 |
blocks = i[2]
|
Martin@916
|
344 |
otherRank, otherFlip = otherNamesToRanksAndFlips[i[otherIndex]]
|
Martin@916
|
345 |
otherPos = otherFlip * abs(blocks[0][otherIndex] +
|
Martin@916
|
346 |
blocks[-1][otherIndex] + blocks[-1][2])
|
Martin@914
|
347 |
numOfAlignedLetterPairs = sum(i[2] for i in blocks)
|
Martin@915
|
348 |
yield i[seqIndex], otherRank, otherPos, numOfAlignedLetterPairs
|
Martin@914
|
349 |
|
Martin@915
|
350 |
def mySortedRanges(seqRanges, sortOpt, seqIndex, alignments, otherRanges):
|
Martin@913
|
351 |
rangesGroupedBySeqName = itertools.groupby(seqRanges, itemgetter(0))
|
Martin@913
|
352 |
g = [list(ranges) for seqName, ranges in rangesGroupedBySeqName]
|
Martin@916
|
353 |
for i in g:
|
Martin@916
|
354 |
if i[0][3] > 1:
|
Martin@916
|
355 |
i.reverse()
|
Martin@907
|
356 |
if sortOpt == "1":
|
Martin@907
|
357 |
g.sort(key=nameKey)
|
Martin@907
|
358 |
if sortOpt == "2":
|
Martin@907
|
359 |
g.sort(key=sizeKey)
|
Martin@914
|
360 |
if sortOpt == "3":
|
Martin@916
|
361 |
otherNamesToRanksAndFlips = dict(rankAndFlipPerSeq(otherRanges))
|
Martin@915
|
362 |
alns = sorted(alignmentSortData(alignments, seqIndex,
|
Martin@916
|
363 |
otherNamesToRanksAndFlips))
|
Martin@914
|
364 |
alnsGroupedBySeqName = itertools.groupby(alns, itemgetter(0))
|
Martin@914
|
365 |
seqNamesToLists = dict((k, list(v)) for k, v in alnsGroupedBySeqName)
|
Martin@914
|
366 |
g.sort(key=functools.partial(alignmentKey, seqNamesToLists))
|
Martin@907
|
367 |
return [j for i in g for j in i]
|
Martin@907
|
368 |
|
Martin@914
|
369 |
def allSortedRanges(opts, alignments, alignmentsB,
|
Martin@914
|
370 |
seqRanges1, seqRangesB1, seqRanges2, seqRangesB2):
|
Martin@916
|
371 |
o1, oB1 = twoValuesFromOption(opts.strands1, ":")
|
Martin@916
|
372 |
o2, oB2 = twoValuesFromOption(opts.strands2, ":")
|
Martin@916
|
373 |
if o1 == "1" and o2 == "1":
|
Martin@916
|
374 |
raise Exception("the strand options have circular dependency")
|
Martin@916
|
375 |
seqRanges1 = list(rangesWithStrandInfo(seqRanges1, o1, alignments, 0))
|
Martin@916
|
376 |
seqRanges2 = list(rangesWithStrandInfo(seqRanges2, o2, alignments, 1))
|
Martin@916
|
377 |
seqRangesB1 = list(rangesWithStrandInfo(seqRangesB1, oB1, alignmentsB, 0))
|
Martin@916
|
378 |
seqRangesB2 = list(rangesWithStrandInfo(seqRangesB2, oB2, alignmentsB, 1))
|
Martin@916
|
379 |
|
Martin@914
|
380 |
o1, oB1 = twoValuesFromOption(opts.sort1, ":")
|
Martin@914
|
381 |
o2, oB2 = twoValuesFromOption(opts.sort2, ":")
|
Martin@914
|
382 |
if o1 == "3" and o2 == "3":
|
Martin@914
|
383 |
raise Exception("the sort options have circular dependency")
|
Martin@914
|
384 |
if o1 != "3":
|
Martin@914
|
385 |
s1 = mySortedRanges(seqRanges1, o1, None, None, None)
|
Martin@914
|
386 |
if o2 != "3":
|
Martin@914
|
387 |
s2 = mySortedRanges(seqRanges2, o2, None, None, None)
|
Martin@914
|
388 |
if o1 == "3":
|
Martin@915
|
389 |
s1 = mySortedRanges(seqRanges1, o1, 0, alignments, s2)
|
Martin@914
|
390 |
if o2 == "3":
|
Martin@915
|
391 |
s2 = mySortedRanges(seqRanges2, o2, 1, alignments, s1)
|
Martin@915
|
392 |
t1 = mySortedRanges(seqRangesB1, oB1, 0, alignmentsB, s2)
|
Martin@915
|
393 |
t2 = mySortedRanges(seqRangesB2, oB2, 1, alignmentsB, s1)
|
Martin@914
|
394 |
return s1 + t1, s2 + t2
|
Martin@911
|
395 |
|
Martin@898
|
396 |
def prettyNum(n):
|
Martin@898
|
397 |
t = str(n)
|
Martin@898
|
398 |
groups = []
|
Martin@898
|
399 |
while t:
|
Martin@898
|
400 |
groups.append(t[-3:])
|
Martin@898
|
401 |
t = t[:-3]
|
Martin@904
|
402 |
return ",".join(reversed(groups))
|
Martin@898
|
403 |
|
Martin@846
|
404 |
def sizeText(size):
|
Martin@846
|
405 |
suffixes = "bp", "kb", "Mb", "Gb"
|
Martin@846
|
406 |
for i, x in enumerate(suffixes):
|
Martin@846
|
407 |
j = 10 ** (i * 3)
|
Martin@846
|
408 |
if size < j * 10:
|
Martin@846
|
409 |
return "%.2g" % (1.0 * size / j) + x
|
Martin@846
|
410 |
if size < j * 1000 or i == len(suffixes) - 1:
|
Martin@846
|
411 |
return "%.0f" % (1.0 * size / j) + x
|
Martin@846
|
412 |
|
Martin@898
|
413 |
def labelText(seqRange, labelOpt):
|
Martin@916
|
414 |
seqName, beg, end, strandNum = seqRange
|
Martin@898
|
415 |
if labelOpt == 1:
|
Martin@898
|
416 |
return seqName + ": " + sizeText(end - beg)
|
Martin@898
|
417 |
if labelOpt == 2:
|
Martin@904
|
418 |
return seqName + ":" + prettyNum(beg) + ": " + sizeText(end - beg)
|
Martin@898
|
419 |
if labelOpt == 3:
|
Martin@904
|
420 |
return seqName + ":" + prettyNum(beg) + "-" + prettyNum(end)
|
Martin@898
|
421 |
return seqName
|
Martin@846
|
422 |
|
Martin@899
|
423 |
def rangeLabels(seqRanges, labelOpt, font, fontsize, image_mode, textRot):
|
Martin@899
|
424 |
if fontsize:
|
Martin@899
|
425 |
image_size = 1, 1
|
Martin@899
|
426 |
im = Image.new(image_mode, image_size)
|
Martin@899
|
427 |
draw = ImageDraw.Draw(im)
|
Martin@899
|
428 |
x = y = 0
|
Martin@899
|
429 |
for r in seqRanges:
|
Martin@899
|
430 |
text = labelText(r, labelOpt)
|
Martin@899
|
431 |
if fontsize:
|
Martin@899
|
432 |
x, y = draw.textsize(text, font=font)
|
Martin@899
|
433 |
if textRot:
|
Martin@899
|
434 |
x, y = y, x
|
Martin@916
|
435 |
yield text, x, y, r[3]
|
Martin@899
|
436 |
|
Martin@914
|
437 |
def dataFromRanges(sortedRanges, font, fontSize, imageMode, labelOpt, textRot):
|
Martin@916
|
438 |
for seqName, rangeBeg, rangeEnd, strandNum in sortedRanges:
|
Martin@916
|
439 |
out = [seqName, str(rangeBeg), str(rangeEnd)]
|
Martin@916
|
440 |
if strandNum > 0:
|
Martin@916
|
441 |
out.append(".+-"[strandNum])
|
Martin@961
|
442 |
logging.info("\t".join(out))
|
Martin@961
|
443 |
logging.info("")
|
Martin@916
|
444 |
rangeSizes = [e - b for n, b, e, s in sortedRanges]
|
Martin@913
|
445 |
labs = list(rangeLabels(sortedRanges, labelOpt, font, fontSize,
|
Martin@913
|
446 |
imageMode, textRot))
|
Martin@907
|
447 |
margin = max(i[2] for i in labs)
|
Martin@878
|
448 |
# xxx the margin may be too big, because some labels may get omitted
|
Martin@914
|
449 |
return rangeSizes, labs, margin
|
Martin@1
|
450 |
|
Martin@1
|
451 |
def div_ceil(x, y):
|
Martin@1
|
452 |
'''Return x / y rounded up.'''
|
Martin@1
|
453 |
q, r = divmod(x, y)
|
Martin@1
|
454 |
return q + (r != 0)
|
Martin@1
|
455 |
|
Martin@896
|
456 |
def get_bp_per_pix(rangeSizes, pixTweenRanges, maxPixels):
|
Martin@1
|
457 |
'''Get the minimum bp-per-pixel that fits in the size limit.'''
|
Martin@961
|
458 |
logging.info("choosing bp per pixel...")
|
Martin@896
|
459 |
numOfRanges = len(rangeSizes)
|
Martin@896
|
460 |
maxPixelsInRanges = maxPixels - pixTweenRanges * (numOfRanges - 1)
|
Martin@896
|
461 |
if maxPixelsInRanges < numOfRanges:
|
Martin@649
|
462 |
raise Exception("can't fit the image: too many sequences?")
|
Martin@896
|
463 |
negLimit = -maxPixelsInRanges
|
Martin@896
|
464 |
negBpPerPix = sum(rangeSizes) // negLimit
|
Martin@863
|
465 |
while True:
|
Martin@896
|
466 |
if sum(i // negBpPerPix for i in rangeSizes) >= negLimit:
|
Martin@863
|
467 |
return -negBpPerPix
|
Martin@863
|
468 |
negBpPerPix -= 1
|
Martin@1
|
469 |
|
Martin@900
|
470 |
def getRangePixBegs(rangePixLens, pixTweenRanges, margin):
|
Martin@900
|
471 |
'''Get the start pixel for each range.'''
|
Martin@900
|
472 |
rangePixBegs = []
|
Martin@896
|
473 |
pix_tot = margin - pixTweenRanges
|
Martin@900
|
474 |
for i in rangePixLens:
|
Martin@896
|
475 |
pix_tot += pixTweenRanges
|
Martin@900
|
476 |
rangePixBegs.append(pix_tot)
|
Martin@1
|
477 |
pix_tot += i
|
Martin@900
|
478 |
return rangePixBegs
|
Martin@1
|
479 |
|
Martin@896
|
480 |
def pixelData(rangeSizes, bp_per_pix, pixTweenRanges, margin):
|
Martin@900
|
481 |
'''Return pixel information about the ranges.'''
|
Martin@900
|
482 |
rangePixLens = [div_ceil(i, bp_per_pix) for i in rangeSizes]
|
Martin@900
|
483 |
rangePixBegs = getRangePixBegs(rangePixLens, pixTweenRanges, margin)
|
Martin@900
|
484 |
tot_pix = rangePixBegs[-1] + rangePixLens[-1]
|
Martin@900
|
485 |
return rangePixBegs, rangePixLens, tot_pix
|
Martin@1
|
486 |
|
Martin@835
|
487 |
def drawLineForward(hits, width, bp_per_pix, beg1, beg2, size):
|
Martin@639
|
488 |
while True:
|
Martin@639
|
489 |
q1, r1 = divmod(beg1, bp_per_pix)
|
Martin@639
|
490 |
q2, r2 = divmod(beg2, bp_per_pix)
|
Martin@835
|
491 |
hits[q2 * width + q1] |= 1
|
Martin@639
|
492 |
next_pix = min(bp_per_pix - r1, bp_per_pix - r2)
|
Martin@639
|
493 |
if next_pix >= size: break
|
Martin@639
|
494 |
beg1 += next_pix
|
Martin@639
|
495 |
beg2 += next_pix
|
Martin@639
|
496 |
size -= next_pix
|
Martin@639
|
497 |
|
Martin@835
|
498 |
def drawLineReverse(hits, width, bp_per_pix, beg1, beg2, size):
|
Martin@639
|
499 |
while True:
|
Martin@639
|
500 |
q1, r1 = divmod(beg1, bp_per_pix)
|
Martin@639
|
501 |
q2, r2 = divmod(beg2, bp_per_pix)
|
Martin@835
|
502 |
hits[q2 * width + q1] |= 2
|
Martin@639
|
503 |
next_pix = min(bp_per_pix - r1, r2 + 1)
|
Martin@639
|
504 |
if next_pix >= size: break
|
Martin@639
|
505 |
beg1 += next_pix
|
Martin@639
|
506 |
beg2 -= next_pix
|
Martin@639
|
507 |
size -= next_pix
|
Martin@639
|
508 |
|
Martin@916
|
509 |
def strandAndOrigin(ranges, beg, size):
|
Martin@916
|
510 |
isReverseStrand = (beg < 0)
|
Martin@916
|
511 |
if isReverseStrand:
|
Martin@905
|
512 |
beg = -(beg + size)
|
Martin@916
|
513 |
for rangeBeg, rangeEnd, isReverseRange, origin in ranges:
|
Martin@926
|
514 |
if rangeEnd > beg: # assumes the ranges are sorted
|
Martin@916
|
515 |
return (isReverseStrand != isReverseRange), origin
|
Martin@905
|
516 |
|
Martin@905
|
517 |
def alignmentPixels(width, height, alignments, bp_per_pix,
|
Martin@905
|
518 |
rangeDict1, rangeDict2):
|
Martin@640
|
519 |
hits = [0] * (width * height) # the image data
|
Martin@640
|
520 |
for seq1, seq2, blocks in alignments:
|
Martin@905
|
521 |
beg1, beg2, size = blocks[0]
|
Martin@916
|
522 |
isReverse1, ori1 = strandAndOrigin(rangeDict1[seq1], beg1, size)
|
Martin@916
|
523 |
isReverse2, ori2 = strandAndOrigin(rangeDict2[seq2], beg2, size)
|
Martin@640
|
524 |
for beg1, beg2, size in blocks:
|
Martin@916
|
525 |
if isReverse1:
|
Martin@640
|
526 |
beg1 = -(beg1 + size)
|
Martin@640
|
527 |
beg2 = -(beg2 + size)
|
Martin@916
|
528 |
if isReverse1 == isReverse2:
|
Martin@835
|
529 |
drawLineForward(hits, width, bp_per_pix,
|
Martin@915
|
530 |
ori1 + beg1, ori2 + beg2, size)
|
Martin@640
|
531 |
else:
|
Martin@835
|
532 |
drawLineReverse(hits, width, bp_per_pix,
|
Martin@915
|
533 |
ori1 + beg1, ori2 - beg2 - 1, size)
|
Martin@640
|
534 |
return hits
|
Martin@1
|
535 |
|
Martin@980
|
536 |
def orientedBlocks(alignments, seqIndex):
|
Martin@980
|
537 |
otherIndex = 1 - seqIndex
|
Martin@980
|
538 |
for a in alignments:
|
Martin@980
|
539 |
seq1, seq2, blocks = a
|
Martin@980
|
540 |
for b in blocks:
|
Martin@980
|
541 |
beg1, beg2, size = b
|
Martin@980
|
542 |
if b[seqIndex] < 0:
|
Martin@980
|
543 |
b = -(beg1 + size), -(beg2 + size), size
|
Martin@980
|
544 |
yield a[seqIndex], b[seqIndex], a[otherIndex], b[otherIndex], size
|
Martin@980
|
545 |
|
Martin@980
|
546 |
def drawJoins(im, alignments, bpPerPix, seqIndex, rangeDict1, rangeDict2):
|
Martin@980
|
547 |
blocks = orientedBlocks(alignments, seqIndex)
|
Martin@980
|
548 |
oldSeq1 = ""
|
Martin@980
|
549 |
for seq1, beg1, seq2, beg2, size in sorted(blocks):
|
Martin@980
|
550 |
isReverse1, ori1 = strandAndOrigin(rangeDict1[seq1], beg1, size)
|
Martin@980
|
551 |
isReverse2, ori2 = strandAndOrigin(rangeDict2[seq2], beg2, size)
|
Martin@980
|
552 |
end1 = beg1 + size - 1
|
Martin@980
|
553 |
end2 = beg2 + size - 1
|
Martin@980
|
554 |
if isReverse1:
|
Martin@980
|
555 |
beg1 = -(beg1 + 1)
|
Martin@980
|
556 |
end1 = -(end1 + 1)
|
Martin@980
|
557 |
if isReverse2:
|
Martin@980
|
558 |
beg2 = -(beg2 + 1)
|
Martin@980
|
559 |
end2 = -(end2 + 1)
|
Martin@980
|
560 |
newPix1 = (ori1 + beg1) // bpPerPix
|
Martin@980
|
561 |
newPix2 = (ori2 + beg2) // bpPerPix
|
Martin@980
|
562 |
if seq1 == oldSeq1:
|
Martin@980
|
563 |
lowerPix2 = min(oldPix2, newPix2)
|
Martin@980
|
564 |
upperPix2 = max(oldPix2, newPix2)
|
Martin@980
|
565 |
midPix1 = (oldPix1 + newPix1) // 2
|
Martin@980
|
566 |
if isReverse1:
|
Martin@980
|
567 |
midPix1 = (oldPix1 + newPix1 + 1) // 2
|
Martin@980
|
568 |
oldPix1, newPix1 = newPix1, oldPix1
|
Martin@980
|
569 |
if upperPix2 - lowerPix2 > 1 and oldPix1 <= newPix1 <= oldPix1 + 1:
|
Martin@980
|
570 |
if seqIndex == 0:
|
Martin@980
|
571 |
box = midPix1, lowerPix2, midPix1 + 1, upperPix2 + 1
|
Martin@980
|
572 |
else:
|
Martin@980
|
573 |
box = lowerPix2, midPix1, upperPix2 + 1, midPix1 + 1
|
Martin@980
|
574 |
im.paste("lightgray", box)
|
Martin@980
|
575 |
oldPix1 = (ori1 + end1) // bpPerPix
|
Martin@980
|
576 |
oldPix2 = (ori2 + end2) // bpPerPix
|
Martin@980
|
577 |
oldSeq1 = seq1
|
Martin@980
|
578 |
|
Martin@650
|
579 |
def expandedSeqDict(seqDict):
|
Martin@650
|
580 |
'''Allow lookup by short sequence names, e.g. chr7 as well as hg19.chr7.'''
|
Martin@861
|
581 |
newDict = seqDict.copy()
|
Martin@650
|
582 |
for name, x in seqDict.items():
|
Martin@861
|
583 |
if "." in name:
|
Martin@861
|
584 |
base = name.split(".")[-1]
|
Martin@861
|
585 |
if base in newDict: # an ambiguous case was found:
|
Martin@861
|
586 |
return seqDict # so give up completely
|
Martin@861
|
587 |
newDict[base] = x
|
Martin@650
|
588 |
return newDict
|
Martin@650
|
589 |
|
Martin@905
|
590 |
def readBed(fileName, rangeDict):
|
Martin@845
|
591 |
for line in myOpen(fileName):
|
Martin@845
|
592 |
w = line.split()
|
Martin@847
|
593 |
if not w: continue
|
Martin@845
|
594 |
seqName = w[0]
|
Martin@905
|
595 |
if seqName not in rangeDict: continue
|
Martin@845
|
596 |
beg = int(w[1])
|
Martin@845
|
597 |
end = int(w[2])
|
Martin@857
|
598 |
layer = 900
|
Martin@895
|
599 |
color = "#fbf"
|
Martin@858
|
600 |
if len(w) > 4:
|
Martin@858
|
601 |
if w[4] != ".":
|
Martin@858
|
602 |
layer = float(w[4])
|
Martin@858
|
603 |
if len(w) > 5:
|
Martin@858
|
604 |
if len(w) > 8 and w[8].count(",") == 2:
|
Martin@858
|
605 |
color = "rgb(" + w[8] + ")"
|
Martin@916
|
606 |
else:
|
Martin@916
|
607 |
strand = w[5]
|
Martin@916
|
608 |
isRev = rangeDict[seqName][0][2]
|
Martin@916
|
609 |
if strand == "+" and not isRev or strand == "-" and isRev:
|
Martin@916
|
610 |
color = "#ffe8e8"
|
Martin@916
|
611 |
if strand == "-" and not isRev or strand == "+" and isRev:
|
Martin@916
|
612 |
color = "#e8e8ff"
|
Martin@859
|
613 |
yield layer, color, seqName, beg, end
|
Martin@845
|
614 |
|
Martin@860
|
615 |
def commaSeparatedInts(text):
|
Martin@860
|
616 |
return map(int, text.rstrip(",").split(","))
|
Martin@860
|
617 |
|
Martin@905
|
618 |
def readGenePred(opts, fileName, rangeDict):
|
Martin@860
|
619 |
for line in myOpen(fileName):
|
Martin@860
|
620 |
fields = line.split()
|
Martin@860
|
621 |
if not fields: continue
|
Martin@860
|
622 |
if fields[2] not in "+-": fields = fields[1:]
|
Martin@860
|
623 |
seqName = fields[1]
|
Martin@905
|
624 |
if seqName not in rangeDict: continue
|
Martin@860
|
625 |
#strand = fields[2]
|
Martin@860
|
626 |
cdsBeg = int(fields[5])
|
Martin@860
|
627 |
cdsEnd = int(fields[6])
|
Martin@860
|
628 |
exonBegs = commaSeparatedInts(fields[8])
|
Martin@860
|
629 |
exonEnds = commaSeparatedInts(fields[9])
|
Martin@860
|
630 |
for beg, end in zip(exonBegs, exonEnds):
|
Martin@860
|
631 |
yield 300, opts.exon_color, seqName, beg, end
|
Martin@860
|
632 |
b = max(beg, cdsBeg)
|
Martin@860
|
633 |
e = min(end, cdsEnd)
|
Martin@860
|
634 |
if b < e: yield 400, opts.cds_color, seqName, b, e
|
Martin@860
|
635 |
|
Martin@905
|
636 |
def readRmsk(fileName, rangeDict):
|
Martin@860
|
637 |
for line in myOpen(fileName):
|
Martin@860
|
638 |
fields = line.split()
|
Martin@860
|
639 |
if len(fields) == 17: # rmsk.txt
|
Martin@860
|
640 |
seqName = fields[5]
|
Martin@905
|
641 |
if seqName not in rangeDict: continue # do this ASAP for speed
|
Martin@860
|
642 |
beg = int(fields[6])
|
Martin@860
|
643 |
end = int(fields[7])
|
Martin@860
|
644 |
strand = fields[9]
|
Martin@860
|
645 |
repeatClass = fields[11]
|
Martin@860
|
646 |
elif len(fields) == 15: # .out
|
Martin@860
|
647 |
seqName = fields[4]
|
Martin@905
|
648 |
if seqName not in rangeDict: continue
|
Martin@860
|
649 |
beg = int(fields[5]) - 1
|
Martin@860
|
650 |
end = int(fields[6])
|
Martin@860
|
651 |
strand = fields[8]
|
Martin@984
|
652 |
repeatClass = fields[10].split("/")[0]
|
Martin@860
|
653 |
else:
|
Martin@860
|
654 |
continue
|
Martin@984
|
655 |
if repeatClass in ("Low_complexity", "Simple_repeat", "Satellite"):
|
Martin@895
|
656 |
yield 200, "#fbf", seqName, beg, end
|
Martin@916
|
657 |
elif (strand == "+") != rangeDict[seqName][0][2]:
|
Martin@895
|
658 |
yield 100, "#ffe8e8", seqName, beg, end
|
Martin@860
|
659 |
else:
|
Martin@895
|
660 |
yield 100, "#e8e8ff", seqName, beg, end
|
Martin@860
|
661 |
|
Martin@650
|
662 |
def isExtraFirstGapField(fields):
|
Martin@650
|
663 |
return fields[4].isdigit()
|
Martin@650
|
664 |
|
Martin@905
|
665 |
def readGaps(opts, fileName, rangeDict):
|
Martin@650
|
666 |
'''Read locations of unsequenced gaps, from an agp or gap file.'''
|
Martin@844
|
667 |
for line in myOpen(fileName):
|
Martin@650
|
668 |
w = line.split()
|
Martin@650
|
669 |
if not w or w[0][0] == "#": continue
|
Martin@650
|
670 |
if isExtraFirstGapField(w): w = w[1:]
|
Martin@650
|
671 |
if w[4] not in "NU": continue
|
Martin@650
|
672 |
seqName = w[0]
|
Martin@905
|
673 |
if seqName not in rangeDict: continue
|
Martin@650
|
674 |
end = int(w[2])
|
Martin@650
|
675 |
beg = end - int(w[5]) # zero-based coordinate
|
Martin@857
|
676 |
if w[7] == "yes":
|
Martin@859
|
677 |
yield 3000, opts.bridged_color, seqName, beg, end
|
Martin@857
|
678 |
else:
|
Martin@859
|
679 |
yield 2000, opts.unbridged_color, seqName, beg, end
|
Martin@650
|
680 |
|
Martin@905
|
681 |
def bedBoxes(beds, rangeDict, margin, edge, isTop, bpPerPix):
|
Martin@897
|
682 |
for layer, color, seqName, bedBeg, bedEnd in beds:
|
Martin@916
|
683 |
for rangeBeg, rangeEnd, isReverseRange, origin in rangeDict[seqName]:
|
Martin@905
|
684 |
beg = max(bedBeg, rangeBeg)
|
Martin@905
|
685 |
end = min(bedEnd, rangeEnd)
|
Martin@905
|
686 |
if beg >= end: continue
|
Martin@916
|
687 |
if isReverseRange:
|
Martin@916
|
688 |
beg, end = -end, -beg
|
Martin@905
|
689 |
if layer <= 1000:
|
Martin@905
|
690 |
# include partly-covered pixels
|
Martin@905
|
691 |
b = (origin + beg) // bpPerPix
|
Martin@905
|
692 |
e = div_ceil(origin + end, bpPerPix)
|
Martin@905
|
693 |
else:
|
Martin@905
|
694 |
# exclude partly-covered pixels
|
Martin@905
|
695 |
b = div_ceil(origin + beg, bpPerPix)
|
Martin@905
|
696 |
e = (origin + end) // bpPerPix
|
Martin@905
|
697 |
if e <= b: continue
|
Martin@916
|
698 |
if bedEnd >= rangeEnd: # include partly-covered end pixels
|
Martin@916
|
699 |
if isReverseRange:
|
Martin@916
|
700 |
b = (origin + beg) // bpPerPix
|
Martin@916
|
701 |
else:
|
Martin@916
|
702 |
e = div_ceil(origin + end, bpPerPix)
|
Martin@905
|
703 |
if isTop:
|
Martin@905
|
704 |
box = b, margin, e, edge
|
Martin@905
|
705 |
else:
|
Martin@905
|
706 |
box = margin, b, edge, e
|
Martin@905
|
707 |
yield layer, color, box
|
Martin@845
|
708 |
|
Martin@857
|
709 |
def drawAnnotations(im, boxes):
|
Martin@857
|
710 |
# xxx use partial transparency for different-color overlaps?
|
Martin@857
|
711 |
for layer, color, box in boxes:
|
Martin@650
|
712 |
im.paste(color, box)
|
Martin@650
|
713 |
|
Martin@901
|
714 |
def placedLabels(labels, rangePixBegs, rangePixLens, beg, end):
|
Martin@901
|
715 |
'''Return axis labels with endpoint & sort-order information.'''
|
Martin@901
|
716 |
maxWidth = end - beg
|
Martin@901
|
717 |
for i, j, k in zip(labels, rangePixBegs, rangePixLens):
|
Martin@916
|
718 |
text, textWidth, textHeight, strandNum = i
|
Martin@901
|
719 |
if textWidth > maxWidth:
|
Martin@901
|
720 |
continue
|
Martin@901
|
721 |
labelBeg = j + (k - textWidth) // 2
|
Martin@901
|
722 |
labelEnd = labelBeg + textWidth
|
Martin@901
|
723 |
sortKey = textWidth - k
|
Martin@901
|
724 |
if labelBeg < beg:
|
Martin@904
|
725 |
sortKey += maxWidth * (beg - labelBeg)
|
Martin@901
|
726 |
labelBeg = beg
|
Martin@901
|
727 |
labelEnd = beg + textWidth
|
Martin@901
|
728 |
if labelEnd > end:
|
Martin@904
|
729 |
sortKey += maxWidth * (labelEnd - end)
|
Martin@901
|
730 |
labelEnd = end
|
Martin@901
|
731 |
labelBeg = end - textWidth
|
Martin@916
|
732 |
yield sortKey, labelBeg, labelEnd, text, textHeight, strandNum
|
Martin@1
|
733 |
|
Martin@897
|
734 |
def nonoverlappingLabels(labels, minPixTweenLabels):
|
Martin@1
|
735 |
'''Get a subset of non-overlapping axis labels, greedily.'''
|
Martin@897
|
736 |
out = []
|
Martin@1
|
737 |
for i in labels:
|
Martin@897
|
738 |
beg = i[1] - minPixTweenLabels
|
Martin@897
|
739 |
end = i[2] + minPixTweenLabels
|
Martin@897
|
740 |
if all(j[2] <= beg or j[1] >= end for j in out):
|
Martin@897
|
741 |
out.append(i)
|
Martin@897
|
742 |
return out
|
Martin@1
|
743 |
|
Martin@901
|
744 |
def axisImage(labels, rangePixBegs, rangePixLens, textRot,
|
Martin@895
|
745 |
textAln, font, image_mode, opts):
|
Martin@1
|
746 |
'''Make an image of axis labels.'''
|
Martin@900
|
747 |
beg = rangePixBegs[0]
|
Martin@900
|
748 |
end = rangePixBegs[-1] + rangePixLens[-1]
|
Martin@901
|
749 |
margin = max(i[2] for i in labels)
|
Martin@901
|
750 |
labels = sorted(placedLabels(labels, rangePixBegs, rangePixLens, beg, end))
|
Martin@878
|
751 |
minPixTweenLabels = 0 if textRot else opts.label_space
|
Martin@897
|
752 |
labels = nonoverlappingLabels(labels, minPixTweenLabels)
|
Martin@897
|
753 |
image_size = (margin, end) if textRot else (end, margin)
|
Martin@895
|
754 |
im = Image.new(image_mode, image_size, opts.margin_color)
|
Martin@1
|
755 |
draw = ImageDraw.Draw(im)
|
Martin@916
|
756 |
for sortKey, labelBeg, labelEnd, text, textHeight, strandNum in labels:
|
Martin@915
|
757 |
base = margin - textHeight if textAln else 0
|
Martin@915
|
758 |
position = (base, labelBeg) if textRot else (labelBeg, base)
|
Martin@916
|
759 |
fill = ("black", opts.forwardcolor, opts.reversecolor)[strandNum]
|
Martin@916
|
760 |
draw.text(position, text, font=font, fill=fill)
|
Martin@1
|
761 |
return im
|
Martin@1
|
762 |
|
Martin@916
|
763 |
def rangesWithOrigins(sortedRanges, rangePixBegs, rangePixLens, bpPerPix):
|
Martin@916
|
764 |
for i, j, k in zip(sortedRanges, rangePixBegs, rangePixLens):
|
Martin@916
|
765 |
seqName, rangeBeg, rangeEnd, strandNum = i
|
Martin@916
|
766 |
isReverseRange = (strandNum > 1)
|
Martin@916
|
767 |
if isReverseRange:
|
Martin@916
|
768 |
origin = bpPerPix * (j + k) + rangeBeg
|
Martin@916
|
769 |
else:
|
Martin@916
|
770 |
origin = bpPerPix * j - rangeBeg
|
Martin@916
|
771 |
yield seqName, (rangeBeg, rangeEnd, isReverseRange, origin)
|
Martin@906
|
772 |
|
Martin@916
|
773 |
def rangesPerSeq(sortedRanges, rangePixBegs, rangePixLens, bpPerPix):
|
Martin@916
|
774 |
a = rangesWithOrigins(sortedRanges, rangePixBegs, rangePixLens, bpPerPix)
|
Martin@906
|
775 |
for k, v in itertools.groupby(a, itemgetter(0)):
|
Martin@926
|
776 |
yield k, sorted(i[1] for i in v)
|
Martin@836
|
777 |
|
Martin@903
|
778 |
def getFont(opts):
|
Martin@903
|
779 |
if opts.fontfile:
|
Martin@903
|
780 |
return ImageFont.truetype(opts.fontfile, opts.fontsize)
|
Martin@903
|
781 |
fileNames = []
|
Martin@903
|
782 |
try:
|
Martin@903
|
783 |
x = ["fc-match", "-f%{file}", "arial"]
|
Martin@942
|
784 |
p = subprocess.Popen(x, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
Martin@942
|
785 |
universal_newlines=True)
|
Martin@903
|
786 |
out, err = p.communicate()
|
Martin@903
|
787 |
fileNames.append(out)
|
Martin@903
|
788 |
except OSError as e:
|
Martin@961
|
789 |
logging.info("fc-match error: " + str(e))
|
Martin@903
|
790 |
fileNames.append("/Library/Fonts/Arial.ttf") # for Mac
|
Martin@903
|
791 |
for i in fileNames:
|
Martin@903
|
792 |
try:
|
Martin@903
|
793 |
font = ImageFont.truetype(i, opts.fontsize)
|
Martin@961
|
794 |
logging.info("font: " + i)
|
Martin@903
|
795 |
return font
|
Martin@903
|
796 |
except IOError as e:
|
Martin@961
|
797 |
logging.info("font load error: " + str(e))
|
Martin@903
|
798 |
return ImageFont.load_default()
|
Martin@903
|
799 |
|
Martin@961
|
800 |
def sequenceSizesAndNames(seqRanges):
|
Martin@961
|
801 |
for seqName, ranges in itertools.groupby(seqRanges, itemgetter(0)):
|
Martin@961
|
802 |
size = sum(e - b for n, b, e in ranges)
|
Martin@961
|
803 |
yield size, seqName
|
Martin@961
|
804 |
|
Martin@961
|
805 |
def biggestSequences(seqRanges, maxNumOfSequences):
|
Martin@961
|
806 |
s = sorted(sequenceSizesAndNames(seqRanges), reverse=True)
|
Martin@961
|
807 |
if len(s) > maxNumOfSequences:
|
Martin@961
|
808 |
logging.warning("too many sequences - discarding the smallest ones")
|
Martin@961
|
809 |
s = s[:maxNumOfSequences]
|
Martin@961
|
810 |
return set(i[1] for i in s)
|
Martin@961
|
811 |
|
Martin@961
|
812 |
def remainingSequenceRanges(seqRanges, alignments, seqIndex):
|
Martin@961
|
813 |
remainingSequences = set(i[seqIndex] for i in alignments)
|
Martin@961
|
814 |
return [i for i in seqRanges if i[0] in remainingSequences]
|
Martin@961
|
815 |
|
Martin@648
|
816 |
def lastDotplot(opts, args):
|
Martin@961
|
817 |
logLevel = logging.INFO if opts.verbose else logging.WARNING
|
Martin@961
|
818 |
logging.basicConfig(format="%(filename)s: %(message)s", level=logLevel)
|
Martin@961
|
819 |
|
Martin@903
|
820 |
font = getFont(opts)
|
Martin@643
|
821 |
image_mode = 'RGB'
|
Martin@643
|
822 |
forward_color = ImageColor.getcolor(opts.forwardcolor, image_mode)
|
Martin@643
|
823 |
reverse_color = ImageColor.getcolor(opts.reversecolor, image_mode)
|
Martin@643
|
824 |
zipped_colors = zip(forward_color, reverse_color)
|
Martin@643
|
825 |
overlap_color = tuple([(i + j) // 2 for i, j in zipped_colors])
|
Martin@641
|
826 |
|
Martin@911
|
827 |
maxGap1, maxGapB1 = twoValuesFromOption(opts.max_gap1, ":")
|
Martin@911
|
828 |
maxGap2, maxGapB2 = twoValuesFromOption(opts.max_gap2, ":")
|
Martin@911
|
829 |
|
Martin@961
|
830 |
logging.info("reading alignments...")
|
Martin@904
|
831 |
alnData = readAlignments(args[0], opts)
|
Martin@909
|
832 |
alignments, seqRanges1, coverDict1, seqRanges2, coverDict2 = alnData
|
Martin@649
|
833 |
if not alignments: raise Exception("there are no alignments")
|
Martin@961
|
834 |
logging.info("cutting...")
|
Martin@910
|
835 |
coverDict1 = dict(mergedRangesPerSeq(coverDict1))
|
Martin@910
|
836 |
coverDict2 = dict(mergedRangesPerSeq(coverDict2))
|
Martin@910
|
837 |
minAlignedBases = min(coveredLength(coverDict1), coveredLength(coverDict2))
|
Martin@910
|
838 |
pad = int(opts.pad * minAlignedBases)
|
Martin@910
|
839 |
cutRanges1 = list(trimmed(seqRanges1, coverDict1, minAlignedBases,
|
Martin@911
|
840 |
maxGap1, pad, pad))
|
Martin@910
|
841 |
cutRanges2 = list(trimmed(seqRanges2, coverDict2, minAlignedBases,
|
Martin@911
|
842 |
maxGap2, pad, pad))
|
Martin@911
|
843 |
|
Martin@961
|
844 |
biggestSeqs1 = biggestSequences(cutRanges1, opts.maxseqs)
|
Martin@961
|
845 |
cutRanges1 = [i for i in cutRanges1 if i[0] in biggestSeqs1]
|
Martin@961
|
846 |
alignments = [i for i in alignments if i[0] in biggestSeqs1]
|
Martin@961
|
847 |
cutRanges2 = remainingSequenceRanges(cutRanges2, alignments, 1)
|
Martin@961
|
848 |
|
Martin@961
|
849 |
biggestSeqs2 = biggestSequences(cutRanges2, opts.maxseqs)
|
Martin@961
|
850 |
cutRanges2 = [i for i in cutRanges2 if i[0] in biggestSeqs2]
|
Martin@961
|
851 |
alignments = [i for i in alignments if i[1] in biggestSeqs2]
|
Martin@961
|
852 |
cutRanges1 = remainingSequenceRanges(cutRanges1, alignments, 0)
|
Martin@961
|
853 |
|
Martin@961
|
854 |
logging.info("reading secondary alignments...")
|
Martin@911
|
855 |
alnDataB = readSecondaryAlignments(opts, cutRanges1, cutRanges2)
|
Martin@911
|
856 |
alignmentsB, seqRangesB1, coverDictB1, seqRangesB2, coverDictB2 = alnDataB
|
Martin@961
|
857 |
logging.info("cutting...")
|
Martin@911
|
858 |
coverDictB1 = dict(mergedRangesPerSeq(coverDictB1))
|
Martin@911
|
859 |
coverDictB2 = dict(mergedRangesPerSeq(coverDictB2))
|
Martin@911
|
860 |
cutRangesB1 = trimmed(seqRangesB1, coverDictB1, minAlignedBases,
|
Martin@911
|
861 |
maxGapB1, 0, 0)
|
Martin@911
|
862 |
cutRangesB2 = trimmed(seqRangesB2, coverDictB2, minAlignedBases,
|
Martin@911
|
863 |
maxGapB2, 0, 0)
|
Martin@641
|
864 |
|
Martin@961
|
865 |
logging.info("sorting...")
|
Martin@914
|
866 |
sortOut = allSortedRanges(opts, alignments, alignmentsB,
|
Martin@914
|
867 |
cutRanges1, cutRangesB1, cutRanges2, cutRangesB2)
|
Martin@914
|
868 |
sortedRanges1, sortedRanges2 = sortOut
|
Martin@914
|
869 |
|
Martin@878
|
870 |
textRot1 = "vertical".startswith(opts.rot1)
|
Martin@914
|
871 |
i1 = dataFromRanges(sortedRanges1, font,
|
Martin@908
|
872 |
opts.fontsize, image_mode, opts.labels1, textRot1)
|
Martin@914
|
873 |
rangeSizes1, labelData1, tMargin = i1
|
Martin@846
|
874 |
|
Martin@878
|
875 |
textRot2 = "horizontal".startswith(opts.rot2)
|
Martin@914
|
876 |
i2 = dataFromRanges(sortedRanges2, font,
|
Martin@908
|
877 |
opts.fontsize, image_mode, opts.labels2, textRot2)
|
Martin@914
|
878 |
rangeSizes2, labelData2, lMargin = i2
|
Martin@641
|
879 |
|
Martin@896
|
880 |
maxPixels1 = opts.width - lMargin
|
Martin@896
|
881 |
maxPixels2 = opts.height - tMargin
|
Martin@897
|
882 |
bpPerPix1 = get_bp_per_pix(rangeSizes1, opts.border_pixels, maxPixels1)
|
Martin@897
|
883 |
bpPerPix2 = get_bp_per_pix(rangeSizes2, opts.border_pixels, maxPixels2)
|
Martin@855
|
884 |
bpPerPix = max(bpPerPix1, bpPerPix2)
|
Martin@961
|
885 |
logging.info("bp per pixel = " + str(bpPerPix))
|
Martin@641
|
886 |
|
Martin@900
|
887 |
p1 = pixelData(rangeSizes1, bpPerPix, opts.border_pixels, lMargin)
|
Martin@900
|
888 |
rangePixBegs1, rangePixLens1, width = p1
|
Martin@916
|
889 |
rangeDict1 = dict(rangesPerSeq(sortedRanges1, rangePixBegs1,
|
Martin@916
|
890 |
rangePixLens1, bpPerPix))
|
Martin@900
|
891 |
|
Martin@900
|
892 |
p2 = pixelData(rangeSizes2, bpPerPix, opts.border_pixels, tMargin)
|
Martin@900
|
893 |
rangePixBegs2, rangePixLens2, height = p2
|
Martin@916
|
894 |
rangeDict2 = dict(rangesPerSeq(sortedRanges2, rangePixBegs2,
|
Martin@916
|
895 |
rangePixLens2, bpPerPix))
|
Martin@900
|
896 |
|
Martin@961
|
897 |
logging.info("width: " + str(width))
|
Martin@961
|
898 |
logging.info("height: " + str(height))
|
Martin@839
|
899 |
|
Martin@961
|
900 |
logging.info("processing alignments...")
|
Martin@980
|
901 |
allAlignments = alignments + alignmentsB
|
Martin@980
|
902 |
hits = alignmentPixels(width, height, allAlignments, bpPerPix,
|
Martin@905
|
903 |
rangeDict1, rangeDict2)
|
Martin@641
|
904 |
|
Martin@961
|
905 |
logging.info("reading annotations...")
|
Martin@134
|
906 |
|
Martin@905
|
907 |
rangeDict1 = expandedSeqDict(rangeDict1)
|
Martin@905
|
908 |
beds1 = itertools.chain(readBed(opts.bed1, rangeDict1),
|
Martin@905
|
909 |
readRmsk(opts.rmsk1, rangeDict1),
|
Martin@905
|
910 |
readGenePred(opts, opts.genePred1, rangeDict1),
|
Martin@905
|
911 |
readGaps(opts, opts.gap1, rangeDict1))
|
Martin@905
|
912 |
b1 = bedBoxes(beds1, rangeDict1, tMargin, height, True, bpPerPix)
|
Martin@845
|
913 |
|
Martin@905
|
914 |
rangeDict2 = expandedSeqDict(rangeDict2)
|
Martin@905
|
915 |
beds2 = itertools.chain(readBed(opts.bed2, rangeDict2),
|
Martin@905
|
916 |
readRmsk(opts.rmsk2, rangeDict2),
|
Martin@905
|
917 |
readGenePred(opts, opts.genePred2, rangeDict2),
|
Martin@905
|
918 |
readGaps(opts, opts.gap2, rangeDict2))
|
Martin@905
|
919 |
b2 = bedBoxes(beds2, rangeDict2, lMargin, width, False, bpPerPix)
|
Martin@857
|
920 |
|
Martin@857
|
921 |
boxes = sorted(itertools.chain(b1, b2))
|
Martin@904
|
922 |
|
Martin@961
|
923 |
logging.info("drawing...")
|
Martin@904
|
924 |
|
Martin@904
|
925 |
image_size = width, height
|
Martin@904
|
926 |
im = Image.new(image_mode, image_size, opts.background_color)
|
Martin@904
|
927 |
|
Martin@857
|
928 |
drawAnnotations(im, boxes)
|
Martin@650
|
929 |
|
Martin@980
|
930 |
joinA, joinB = twoValuesFromOption(opts.join, ":")
|
Martin@980
|
931 |
if joinA in "13":
|
Martin@980
|
932 |
drawJoins(im, alignments, bpPerPix, 0, rangeDict1, rangeDict2)
|
Martin@980
|
933 |
if joinB in "13":
|
Martin@980
|
934 |
drawJoins(im, alignmentsB, bpPerPix, 0, rangeDict1, rangeDict2)
|
Martin@980
|
935 |
if joinA in "23":
|
Martin@980
|
936 |
drawJoins(im, alignments, bpPerPix, 1, rangeDict2, rangeDict1)
|
Martin@980
|
937 |
if joinB in "23":
|
Martin@980
|
938 |
drawJoins(im, alignmentsB, bpPerPix, 1, rangeDict2, rangeDict1)
|
Martin@980
|
939 |
|
Martin@643
|
940 |
for i in range(height):
|
Martin@643
|
941 |
for j in range(width):
|
Martin@643
|
942 |
store_value = hits[i * width + j]
|
Martin@643
|
943 |
xy = j, i
|
Martin@643
|
944 |
if store_value == 1: im.putpixel(xy, forward_color)
|
Martin@643
|
945 |
elif store_value == 2: im.putpixel(xy, reverse_color)
|
Martin@643
|
946 |
elif store_value == 3: im.putpixel(xy, overlap_color)
|
Martin@95
|
947 |
|
Martin@643
|
948 |
if opts.fontsize != 0:
|
Martin@900
|
949 |
axis1 = axisImage(labelData1, rangePixBegs1, rangePixLens1,
|
Martin@895
|
950 |
textRot1, False, font, image_mode, opts)
|
Martin@878
|
951 |
if textRot1:
|
Martin@878
|
952 |
axis1 = axis1.transpose(Image.ROTATE_90)
|
Martin@900
|
953 |
axis2 = axisImage(labelData2, rangePixBegs2, rangePixLens2,
|
Martin@895
|
954 |
textRot2, textRot2, font, image_mode, opts)
|
Martin@878
|
955 |
if not textRot2:
|
Martin@878
|
956 |
axis2 = axis2.transpose(Image.ROTATE_270)
|
Martin@643
|
957 |
im.paste(axis1, (0, 0))
|
Martin@643
|
958 |
im.paste(axis2, (0, 0))
|
Martin@1
|
959 |
|
Martin@900
|
960 |
for i in rangePixBegs1[1:]:
|
Martin@877
|
961 |
box = i - opts.border_pixels, tMargin, i, height
|
Martin@852
|
962 |
im.paste(opts.border_color, box)
|
Martin@1
|
963 |
|
Martin@900
|
964 |
for i in rangePixBegs2[1:]:
|
Martin@877
|
965 |
box = lMargin, i - opts.border_pixels, width, i
|
Martin@852
|
966 |
im.paste(opts.border_color, box)
|
Martin@1
|
967 |
|
Martin@643
|
968 |
im.save(args[1])
|
Martin@648
|
969 |
|
Martin@648
|
970 |
if __name__ == "__main__":
|
Martin@649
|
971 |
usage = """%prog --help
|
Martin@649
|
972 |
or: %prog [options] maf-or-tab-alignments dotplot.png
|
Martin@649
|
973 |
or: %prog [options] maf-or-tab-alignments dotplot.gif
|
Martin@649
|
974 |
or: ..."""
|
Martin@649
|
975 |
description = "Draw a dotplot of pair-wise sequence alignments in MAF or tabular format."
|
Martin@649
|
976 |
op = optparse.OptionParser(usage=usage, description=description)
|
Martin@866
|
977 |
op.add_option("-v", "--verbose", action="count",
|
Martin@866
|
978 |
help="show progress messages & data about the plot")
|
Martin@961
|
979 |
# Replace "width" & "height" with a single "length" option?
|
Martin@961
|
980 |
op.add_option("-x", "--width", metavar="INT", type="int", default=1000,
|
Martin@961
|
981 |
help="maximum width in pixels (default: %default)")
|
Martin@961
|
982 |
op.add_option("-y", "--height", metavar="INT", type="int", default=1000,
|
Martin@961
|
983 |
help="maximum height in pixels (default: %default)")
|
Martin@961
|
984 |
op.add_option("-m", "--maxseqs", type="int", default=100, metavar="M",
|
Martin@961
|
985 |
help="maximum number of horizontal or vertical sequences "
|
Martin@961
|
986 |
"(default=%default)")
|
Martin@651
|
987 |
op.add_option("-1", "--seq1", metavar="PATTERN", action="append",
|
Martin@840
|
988 |
default=[],
|
Martin@651
|
989 |
help="which sequences to show from the 1st genome")
|
Martin@651
|
990 |
op.add_option("-2", "--seq2", metavar="PATTERN", action="append",
|
Martin@840
|
991 |
default=[],
|
Martin@651
|
992 |
help="which sequences to show from the 2nd genome")
|
Martin@649
|
993 |
op.add_option("-c", "--forwardcolor", metavar="COLOR", default="red",
|
Martin@649
|
994 |
help="color for forward alignments (default: %default)")
|
Martin@649
|
995 |
op.add_option("-r", "--reversecolor", metavar="COLOR", default="blue",
|
Martin@649
|
996 |
help="color for reverse alignments (default: %default)")
|
Martin@911
|
997 |
op.add_option("--alignments", metavar="FILE", help="secondary alignments")
|
Martin@907
|
998 |
op.add_option("--sort1", default="1", metavar="N",
|
Martin@851
|
999 |
help="genome1 sequence order: 0=input order, 1=name order, "
|
Martin@914
|
1000 |
"2=length order, 3=alignment order (default=%default)")
|
Martin@907
|
1001 |
op.add_option("--sort2", default="1", metavar="N",
|
Martin@851
|
1002 |
help="genome2 sequence order: 0=input order, 1=name order, "
|
Martin@914
|
1003 |
"2=length order, 3=alignment order (default=%default)")
|
Martin@916
|
1004 |
op.add_option("--strands1", default="0", metavar="N", help=
|
Martin@916
|
1005 |
"genome1 sequence orientation: 0=forward orientation, "
|
Martin@916
|
1006 |
"1=alignment orientation (default=%default)")
|
Martin@916
|
1007 |
op.add_option("--strands2", default="0", metavar="N", help=
|
Martin@916
|
1008 |
"genome2 sequence orientation: 0=forward orientation, "
|
Martin@916
|
1009 |
"1=alignment orientation (default=%default)")
|
Martin@910
|
1010 |
op.add_option("--max-gap1", metavar="FRAC", default="0.5,2", help=
|
Martin@910
|
1011 |
"maximum unaligned (end,mid) gap in genome1: "
|
Martin@910
|
1012 |
"fraction of aligned length (default=%default)")
|
Martin@910
|
1013 |
op.add_option("--max-gap2", metavar="FRAC", default="0.5,2", help=
|
Martin@910
|
1014 |
"maximum unaligned (end,mid) gap in genome2: "
|
Martin@910
|
1015 |
"fraction of aligned length (default=%default)")
|
Martin@910
|
1016 |
op.add_option("--pad", metavar="FRAC", type="float", default=0.04, help=
|
Martin@910
|
1017 |
"pad length when cutting unaligned gaps: "
|
Martin@910
|
1018 |
"fraction of aligned length (default=%default)")
|
Martin@980
|
1019 |
op.add_option("-j", "--join", default="0", metavar="N", help=
|
Martin@980
|
1020 |
"join: 0=nothing, 1=alignments adjacent in genome1, "
|
Martin@980
|
1021 |
"2=alignments adjacent in genome2 (default=%default)")
|
Martin@852
|
1022 |
op.add_option("--border-pixels", metavar="INT", type="int", default=1,
|
Martin@852
|
1023 |
help="number of pixels between sequences (default=%default)")
|
Martin@895
|
1024 |
op.add_option("--border-color", metavar="COLOR", default="black",
|
Martin@852
|
1025 |
help="color for pixels between sequences (default=%default)")
|
Martin@911
|
1026 |
# --break-color and/or --break-pixels for intra-sequence breaks?
|
Martin@895
|
1027 |
op.add_option("--margin-color", metavar="COLOR", default="#dcdcdc",
|
Martin@895
|
1028 |
help="margin color")
|
Martin@846
|
1029 |
|
Martin@850
|
1030 |
og = optparse.OptionGroup(op, "Text options")
|
Martin@850
|
1031 |
og.add_option("-f", "--fontfile", metavar="FILE",
|
Martin@850
|
1032 |
help="TrueType or OpenType font file")
|
Martin@903
|
1033 |
og.add_option("-s", "--fontsize", metavar="SIZE", type="int", default=14,
|
Martin@850
|
1034 |
help="TrueType or OpenType font size (default: %default)")
|
Martin@898
|
1035 |
og.add_option("--labels1", type="int", default=0, metavar="N", help=
|
Martin@898
|
1036 |
"genome1 labels: 0=name, 1=name:length, "
|
Martin@898
|
1037 |
"2=name:start:length, 3=name:start-end (default=%default)")
|
Martin@898
|
1038 |
og.add_option("--labels2", type="int", default=0, metavar="N", help=
|
Martin@898
|
1039 |
"genome2 labels: 0=name, 1=name:length, "
|
Martin@898
|
1040 |
"2=name:start:length, 3=name:start-end (default=%default)")
|
Martin@878
|
1041 |
og.add_option("--rot1", metavar="ROT", default="h",
|
Martin@878
|
1042 |
help="text rotation for the 1st genome (default=%default)")
|
Martin@878
|
1043 |
og.add_option("--rot2", metavar="ROT", default="v",
|
Martin@878
|
1044 |
help="text rotation for the 2nd genome (default=%default)")
|
Martin@850
|
1045 |
op.add_option_group(og)
|
Martin@850
|
1046 |
|
Martin@860
|
1047 |
og = optparse.OptionGroup(op, "Annotation options")
|
Martin@860
|
1048 |
og.add_option("--bed1", metavar="FILE",
|
Martin@860
|
1049 |
help="read genome1 annotations from BED file")
|
Martin@860
|
1050 |
og.add_option("--bed2", metavar="FILE",
|
Martin@860
|
1051 |
help="read genome2 annotations from BED file")
|
Martin@860
|
1052 |
og.add_option("--rmsk1", metavar="FILE", help="read genome1 repeats from "
|
Martin@860
|
1053 |
"RepeatMasker .out or rmsk.txt file")
|
Martin@860
|
1054 |
og.add_option("--rmsk2", metavar="FILE", help="read genome2 repeats from "
|
Martin@860
|
1055 |
"RepeatMasker .out or rmsk.txt file")
|
Martin@860
|
1056 |
op.add_option_group(og)
|
Martin@860
|
1057 |
|
Martin@860
|
1058 |
og = optparse.OptionGroup(op, "Gene options")
|
Martin@860
|
1059 |
og.add_option("--genePred1", metavar="FILE",
|
Martin@860
|
1060 |
help="read genome1 genes from genePred file")
|
Martin@860
|
1061 |
og.add_option("--genePred2", metavar="FILE",
|
Martin@860
|
1062 |
help="read genome2 genes from genePred file")
|
Martin@895
|
1063 |
og.add_option("--exon-color", metavar="COLOR", default="PaleGreen",
|
Martin@860
|
1064 |
help="color for exons (default=%default)")
|
Martin@895
|
1065 |
og.add_option("--cds-color", metavar="COLOR", default="LimeGreen",
|
Martin@860
|
1066 |
help="color for protein-coding regions (default=%default)")
|
Martin@860
|
1067 |
op.add_option_group(og)
|
Martin@860
|
1068 |
|
Martin@650
|
1069 |
og = optparse.OptionGroup(op, "Unsequenced gap options")
|
Martin@650
|
1070 |
og.add_option("--gap1", metavar="FILE",
|
Martin@650
|
1071 |
help="read genome1 unsequenced gaps from agp or gap file")
|
Martin@650
|
1072 |
og.add_option("--gap2", metavar="FILE",
|
Martin@650
|
1073 |
help="read genome2 unsequenced gaps from agp or gap file")
|
Martin@650
|
1074 |
og.add_option("--bridged-color", metavar="COLOR", default="yellow",
|
Martin@650
|
1075 |
help="color for bridged gaps (default: %default)")
|
Martin@895
|
1076 |
og.add_option("--unbridged-color", metavar="COLOR", default="orange",
|
Martin@650
|
1077 |
help="color for unbridged gaps (default: %default)")
|
Martin@650
|
1078 |
op.add_option_group(og)
|
Martin@648
|
1079 |
(opts, args) = op.parse_args()
|
Martin@648
|
1080 |
if len(args) != 2: op.error("2 arguments needed")
|
Martin@648
|
1081 |
|
Martin@648
|
1082 |
opts.background_color = "white"
|
Martin@648
|
1083 |
opts.label_space = 5 # minimum number of pixels between axis labels
|
Martin@648
|
1084 |
|
Martin@649
|
1085 |
try: lastDotplot(opts, args)
|
Martin@649
|
1086 |
except KeyboardInterrupt: pass # avoid silly error message
|
Martin@903
|
1087 |
except Exception as e:
|
Martin@649
|
1088 |
prog = os.path.basename(sys.argv[0])
|
Martin@649
|
1089 |
sys.exit(prog + ": error: " + str(e))
|