1.1 --- a/scripts/last-dotplot Thu Dec 21 16:22:52 2017 +0900
1.2 +++ b/scripts/last-dotplot Thu Dec 21 18:02:21 2017 +0900
1.3 @@ -9,6 +9,7 @@
1.4 # according to the number of aligned nt-pairs within it, but the
1.5 # result is too faint. How can this be done better?
1.6
1.7 +import collections
1.8 import functools
1.9 import gzip
1.10 from fnmatch import fnmatchcase
1.11 @@ -258,6 +259,22 @@
1.12 rangeEnd = blocks[-1][1] + endPad
1.13 yield seqName, rangeBeg, rangeEnd
1.14
1.15 +def rangesWithStrandInfo(seqRanges, strandOpt, alignments, seqIndex):
1.16 + if strandOpt == "1":
1.17 + forwardMinusReverse = collections.defaultdict(int)
1.18 + for i in alignments:
1.19 + blocks = i[2]
1.20 + beg1, beg2, size = blocks[0]
1.21 + numOfAlignedLetterPairs = sum(i[2] for i in blocks)
1.22 + if (beg1 < 0) != (beg2 < 0): # opposite-strand alignment
1.23 + numOfAlignedLetterPairs *= -1
1.24 + forwardMinusReverse[i[seqIndex]] += numOfAlignedLetterPairs
1.25 + strandNum = 0
1.26 + for seqName, beg, end in seqRanges:
1.27 + if strandOpt == "1":
1.28 + strandNum = 1 if forwardMinusReverse[seqName] >= 0 else 2
1.29 + yield seqName, beg, end, strandNum
1.30 +
1.31 def natural_sort_key(my_string):
1.32 '''Return a sort key for "natural" ordering, e.g. chr9 < chr10.'''
1.33 parts = re.split(r'(\d+)', my_string)
1.34 @@ -280,29 +297,38 @@
1.35 if toMiddle < 0:
1.36 return i[1:3] # sequence-rank and "position" of this alignment
1.37
1.38 -def alignmentSortData(alignments, seqIndex, otherNamesToRanks):
1.39 +def rankAndFlipPerSeq(seqRanges):
1.40 + rangesGroupedBySeqName = itertools.groupby(seqRanges, itemgetter(0))
1.41 + for rank, group in enumerate(rangesGroupedBySeqName):
1.42 + seqName, ranges = group
1.43 + strandNum = next(ranges)[3]
1.44 + flip = 1 if strandNum < 2 else -1
1.45 + yield seqName, (rank, flip)
1.46 +
1.47 +def alignmentSortData(alignments, seqIndex, otherNamesToRanksAndFlips):
1.48 otherIndex = 1 - seqIndex
1.49 for i in alignments:
1.50 blocks = i[2]
1.51 - otherRank = otherNamesToRanks[i[otherIndex]]
1.52 - otherPos = abs(blocks[0][otherIndex] +
1.53 - blocks[-1][otherIndex] + blocks[-1][2])
1.54 + otherRank, otherFlip = otherNamesToRanksAndFlips[i[otherIndex]]
1.55 + otherPos = otherFlip * abs(blocks[0][otherIndex] +
1.56 + blocks[-1][otherIndex] + blocks[-1][2])
1.57 numOfAlignedLetterPairs = sum(i[2] for i in blocks)
1.58 yield i[seqIndex], otherRank, otherPos, numOfAlignedLetterPairs
1.59
1.60 def mySortedRanges(seqRanges, sortOpt, seqIndex, alignments, otherRanges):
1.61 rangesGroupedBySeqName = itertools.groupby(seqRanges, itemgetter(0))
1.62 g = [list(ranges) for seqName, ranges in rangesGroupedBySeqName]
1.63 + for i in g:
1.64 + if i[0][3] > 1:
1.65 + i.reverse()
1.66 if sortOpt == "1":
1.67 g.sort(key=nameKey)
1.68 if sortOpt == "2":
1.69 g.sort(key=sizeKey)
1.70 if sortOpt == "3":
1.71 - otherNameGroups = itertools.groupby(i[0] for i in otherRanges)
1.72 - ranksAndNames = enumerate(i[0] for i in otherNameGroups)
1.73 - otherNamesToRanks = dict((n, r) for r, n in ranksAndNames)
1.74 + otherNamesToRanksAndFlips = dict(rankAndFlipPerSeq(otherRanges))
1.75 alns = sorted(alignmentSortData(alignments, seqIndex,
1.76 - otherNamesToRanks))
1.77 + otherNamesToRanksAndFlips))
1.78 alnsGroupedBySeqName = itertools.groupby(alns, itemgetter(0))
1.79 seqNamesToLists = dict((k, list(v)) for k, v in alnsGroupedBySeqName)
1.80 g.sort(key=functools.partial(alignmentKey, seqNamesToLists))
1.81 @@ -310,6 +336,15 @@
1.82
1.83 def allSortedRanges(opts, alignments, alignmentsB,
1.84 seqRanges1, seqRangesB1, seqRanges2, seqRangesB2):
1.85 + o1, oB1 = twoValuesFromOption(opts.strands1, ":")
1.86 + o2, oB2 = twoValuesFromOption(opts.strands2, ":")
1.87 + if o1 == "1" and o2 == "1":
1.88 + raise Exception("the strand options have circular dependency")
1.89 + seqRanges1 = list(rangesWithStrandInfo(seqRanges1, o1, alignments, 0))
1.90 + seqRanges2 = list(rangesWithStrandInfo(seqRanges2, o2, alignments, 1))
1.91 + seqRangesB1 = list(rangesWithStrandInfo(seqRangesB1, oB1, alignmentsB, 0))
1.92 + seqRangesB2 = list(rangesWithStrandInfo(seqRangesB2, oB2, alignmentsB, 1))
1.93 +
1.94 o1, oB1 = twoValuesFromOption(opts.sort1, ":")
1.95 o2, oB2 = twoValuesFromOption(opts.sort2, ":")
1.96 if o1 == "3" and o2 == "3":
1.97 @@ -344,7 +379,7 @@
1.98 return "%.0f" % (1.0 * size / j) + x
1.99
1.100 def labelText(seqRange, labelOpt):
1.101 - seqName, beg, end = seqRange
1.102 + seqName, beg, end, strandNum = seqRange
1.103 if labelOpt == 1:
1.104 return seqName + ": " + sizeText(end - beg)
1.105 if labelOpt == 2:
1.106 @@ -365,13 +400,16 @@
1.107 x, y = draw.textsize(text, font=font)
1.108 if textRot:
1.109 x, y = y, x
1.110 - yield text, x, y
1.111 + yield text, x, y, r[3]
1.112
1.113 def dataFromRanges(sortedRanges, font, fontSize, imageMode, labelOpt, textRot):
1.114 - for i in sortedRanges:
1.115 - warn("\t".join(map(str, i)))
1.116 + for seqName, rangeBeg, rangeEnd, strandNum in sortedRanges:
1.117 + out = [seqName, str(rangeBeg), str(rangeEnd)]
1.118 + if strandNum > 0:
1.119 + out.append(".+-"[strandNum])
1.120 + warn("\t".join(out))
1.121 warn("")
1.122 - rangeSizes = [e - b for n, b, e in sortedRanges]
1.123 + rangeSizes = [e - b for n, b, e, s in sortedRanges]
1.124 labs = list(rangeLabels(sortedRanges, labelOpt, font, fontSize,
1.125 imageMode, textRot))
1.126 margin = max(i[2] for i in labs)
1.127 @@ -436,25 +474,26 @@
1.128 beg2 -= next_pix
1.129 size -= next_pix
1.130
1.131 -def findOrigin(ranges, beg, size):
1.132 - if beg < 0:
1.133 +def strandAndOrigin(ranges, beg, size):
1.134 + isReverseStrand = (beg < 0)
1.135 + if isReverseStrand:
1.136 beg = -(beg + size)
1.137 - for rangeBeg, rangeEnd, origin in ranges:
1.138 + for rangeBeg, rangeEnd, isReverseRange, origin in ranges:
1.139 if rangeEnd > beg:
1.140 - return origin
1.141 + return (isReverseStrand != isReverseRange), origin
1.142
1.143 def alignmentPixels(width, height, alignments, bp_per_pix,
1.144 rangeDict1, rangeDict2):
1.145 hits = [0] * (width * height) # the image data
1.146 for seq1, seq2, blocks in alignments:
1.147 beg1, beg2, size = blocks[0]
1.148 - ori1 = findOrigin(rangeDict1[seq1], beg1, size)
1.149 - ori2 = findOrigin(rangeDict2[seq2], beg2, size)
1.150 + isReverse1, ori1 = strandAndOrigin(rangeDict1[seq1], beg1, size)
1.151 + isReverse2, ori2 = strandAndOrigin(rangeDict2[seq2], beg2, size)
1.152 for beg1, beg2, size in blocks:
1.153 - if beg1 < 0:
1.154 + if isReverse1:
1.155 beg1 = -(beg1 + size)
1.156 beg2 = -(beg2 + size)
1.157 - if beg2 >= 0:
1.158 + if isReverse1 == isReverse2:
1.159 drawLineForward(hits, width, bp_per_pix,
1.160 ori1 + beg1, ori2 + beg2, size)
1.161 else:
1.162 @@ -489,10 +528,13 @@
1.163 if len(w) > 5:
1.164 if len(w) > 8 and w[8].count(",") == 2:
1.165 color = "rgb(" + w[8] + ")"
1.166 - elif w[5] == "+":
1.167 - color = "#ffe8e8"
1.168 - elif w[5] == "-":
1.169 - color = "#e8e8ff"
1.170 + else:
1.171 + strand = w[5]
1.172 + isRev = rangeDict[seqName][0][2]
1.173 + if strand == "+" and not isRev or strand == "-" and isRev:
1.174 + color = "#ffe8e8"
1.175 + if strand == "-" and not isRev or strand == "+" and isRev:
1.176 + color = "#e8e8ff"
1.177 yield layer, color, seqName, beg, end
1.178
1.179 def commaSeparatedInts(text):
1.180 @@ -537,7 +579,7 @@
1.181 continue
1.182 if repeatClass in ("Low_complexity", "Simple_repeat"):
1.183 yield 200, "#fbf", seqName, beg, end
1.184 - elif strand == "+":
1.185 + elif (strand == "+") != rangeDict[seqName][0][2]:
1.186 yield 100, "#ffe8e8", seqName, beg, end
1.187 else:
1.188 yield 100, "#e8e8ff", seqName, beg, end
1.189 @@ -563,10 +605,12 @@
1.190
1.191 def bedBoxes(beds, rangeDict, margin, edge, isTop, bpPerPix):
1.192 for layer, color, seqName, bedBeg, bedEnd in beds:
1.193 - for rangeBeg, rangeEnd, origin in rangeDict[seqName]:
1.194 + for rangeBeg, rangeEnd, isReverseRange, origin in rangeDict[seqName]:
1.195 beg = max(bedBeg, rangeBeg)
1.196 end = min(bedEnd, rangeEnd)
1.197 if beg >= end: continue
1.198 + if isReverseRange:
1.199 + beg, end = -end, -beg
1.200 if layer <= 1000:
1.201 # include partly-covered pixels
1.202 b = (origin + beg) // bpPerPix
1.203 @@ -576,8 +620,11 @@
1.204 b = div_ceil(origin + beg, bpPerPix)
1.205 e = (origin + end) // bpPerPix
1.206 if e <= b: continue
1.207 - if end == rangeEnd: # include partly-covered end pixels
1.208 - e = div_ceil(origin + end, bpPerPix)
1.209 + if bedEnd >= rangeEnd: # include partly-covered end pixels
1.210 + if isReverseRange:
1.211 + b = (origin + beg) // bpPerPix
1.212 + else:
1.213 + e = div_ceil(origin + end, bpPerPix)
1.214 if isTop:
1.215 box = b, margin, e, edge
1.216 else:
1.217 @@ -593,7 +640,7 @@
1.218 '''Return axis labels with endpoint & sort-order information.'''
1.219 maxWidth = end - beg
1.220 for i, j, k in zip(labels, rangePixBegs, rangePixLens):
1.221 - text, textWidth, textHeight = i
1.222 + text, textWidth, textHeight, strandNum = i
1.223 if textWidth > maxWidth:
1.224 continue
1.225 labelBeg = j + (k - textWidth) // 2
1.226 @@ -607,7 +654,7 @@
1.227 sortKey += maxWidth * (labelEnd - end)
1.228 labelEnd = end
1.229 labelBeg = end - textWidth
1.230 - yield sortKey, labelBeg, labelEnd, text, textHeight
1.231 + yield sortKey, labelBeg, labelEnd, text, textHeight, strandNum
1.232
1.233 def nonoverlappingLabels(labels, minPixTweenLabels):
1.234 '''Get a subset of non-overlapping axis labels, greedily.'''
1.235 @@ -631,20 +678,25 @@
1.236 image_size = (margin, end) if textRot else (end, margin)
1.237 im = Image.new(image_mode, image_size, opts.margin_color)
1.238 draw = ImageDraw.Draw(im)
1.239 - for sortKey, labelBeg, labelEnd, text, textHeight in labels:
1.240 + for sortKey, labelBeg, labelEnd, text, textHeight, strandNum in labels:
1.241 base = margin - textHeight if textAln else 0
1.242 position = (base, labelBeg) if textRot else (labelBeg, base)
1.243 - draw.text(position, text, font=font, fill=opts.text_color)
1.244 + fill = ("black", opts.forwardcolor, opts.reversecolor)[strandNum]
1.245 + draw.text(position, text, font=font, fill=fill)
1.246 return im
1.247
1.248 -def rangesWithOrigins(sortedRanges, rangePixBegs, bpPerPix):
1.249 - for i, j in zip(sortedRanges, rangePixBegs):
1.250 - seqName, rangeBeg, rangeEnd = i
1.251 - origin = bpPerPix * j - rangeBeg
1.252 - yield seqName, (rangeBeg, rangeEnd, origin)
1.253 +def rangesWithOrigins(sortedRanges, rangePixBegs, rangePixLens, bpPerPix):
1.254 + for i, j, k in zip(sortedRanges, rangePixBegs, rangePixLens):
1.255 + seqName, rangeBeg, rangeEnd, strandNum = i
1.256 + isReverseRange = (strandNum > 1)
1.257 + if isReverseRange:
1.258 + origin = bpPerPix * (j + k) + rangeBeg
1.259 + else:
1.260 + origin = bpPerPix * j - rangeBeg
1.261 + yield seqName, (rangeBeg, rangeEnd, isReverseRange, origin)
1.262
1.263 -def rangesPerSeq(sortedRanges, rangePixBegs, bpPerPix):
1.264 - a = rangesWithOrigins(sortedRanges, rangePixBegs, bpPerPix)
1.265 +def rangesPerSeq(sortedRanges, rangePixBegs, rangePixLens, bpPerPix):
1.266 + a = rangesWithOrigins(sortedRanges, rangePixBegs, rangePixLens, bpPerPix)
1.267 for k, v in itertools.groupby(a, itemgetter(0)):
1.268 yield k, [i[1] for i in v]
1.269
1.270 @@ -729,11 +781,13 @@
1.271
1.272 p1 = pixelData(rangeSizes1, bpPerPix, opts.border_pixels, lMargin)
1.273 rangePixBegs1, rangePixLens1, width = p1
1.274 - rangeDict1 = dict(rangesPerSeq(sortedRanges1, rangePixBegs1, bpPerPix))
1.275 + rangeDict1 = dict(rangesPerSeq(sortedRanges1, rangePixBegs1,
1.276 + rangePixLens1, bpPerPix))
1.277
1.278 p2 = pixelData(rangeSizes2, bpPerPix, opts.border_pixels, tMargin)
1.279 rangePixBegs2, rangePixLens2, height = p2
1.280 - rangeDict2 = dict(rangesPerSeq(sortedRanges2, rangePixBegs2, bpPerPix))
1.281 + rangeDict2 = dict(rangesPerSeq(sortedRanges2, rangePixBegs2,
1.282 + rangePixLens2, bpPerPix))
1.283
1.284 warn("width: " + str(width))
1.285 warn("height: " + str(height))
1.286 @@ -828,6 +882,12 @@
1.287 op.add_option("--sort2", default="1", metavar="N",
1.288 help="genome2 sequence order: 0=input order, 1=name order, "
1.289 "2=length order, 3=alignment order (default=%default)")
1.290 + op.add_option("--strands1", default="0", metavar="N", help=
1.291 + "genome1 sequence orientation: 0=forward orientation, "
1.292 + "1=alignment orientation (default=%default)")
1.293 + op.add_option("--strands2", default="0", metavar="N", help=
1.294 + "genome2 sequence orientation: 0=forward orientation, "
1.295 + "1=alignment orientation (default=%default)")
1.296 op.add_option("--max-gap1", metavar="FRAC", default="0.5,2", help=
1.297 "maximum unaligned (end,mid) gap in genome1: "
1.298 "fraction of aligned length (default=%default)")
1.299 @@ -897,7 +957,6 @@
1.300 (opts, args) = op.parse_args()
1.301 if len(args) != 2: op.error("2 arguments needed")
1.302
1.303 - opts.text_color = "black"
1.304 opts.background_color = "white"
1.305 opts.label_space = 5 # minimum number of pixels between axis labels
1.306