1.1 --- a/scripts/last-dotplot Tue Dec 05 13:41:22 2017 +0900
1.2 +++ b/scripts/last-dotplot Tue Dec 05 17:57:24 2017 +0900
1.3 @@ -9,6 +9,7 @@
1.4 # according to the number of aligned nt-pairs within it, but the
1.5 # result is too faint. How can this be done better?
1.6
1.7 +import functools
1.8 import gzip
1.9 from fnmatch import fnmatchcase
1.10 from operator import itemgetter
1.11 @@ -269,18 +270,65 @@
1.12 def sizeKey(oneSeqRanges):
1.13 return sum(b - e for n, b, e in oneSeqRanges), nameKey(oneSeqRanges)
1.14
1.15 -def mySortedRanges(seqRanges, sortOpt):
1.16 +def alignmentKey(seqNamesToLists, oneSeqRanges):
1.17 + seqName = oneSeqRanges[0][0]
1.18 + alignmentsOfThisSequence = seqNamesToLists[seqName]
1.19 + numOfAlignedLetterPairs = sum(i[3] for i in alignmentsOfThisSequence)
1.20 + toMiddle = numOfAlignedLetterPairs // 2
1.21 + for i in alignmentsOfThisSequence:
1.22 + toMiddle -= i[3]
1.23 + if toMiddle < 0:
1.24 + return i[1:3] # sequence-rank and "position" of this alignment
1.25 +
1.26 +def alignmentSortData1(alignments, seqNamesToRanks2):
1.27 + for seqName1, seqName2, blocks in alignments:
1.28 + seqRank2 = seqNamesToRanks2[seqName2]
1.29 + pos2 = abs(blocks[0][1] + blocks[-1][1] + blocks[-1][2])
1.30 + numOfAlignedLetterPairs = sum(i[2] for i in blocks)
1.31 + yield seqName1, seqRank2, pos2, numOfAlignedLetterPairs
1.32 +
1.33 +def alignmentSortData2(alignments, seqNamesToRanks1):
1.34 + for seqName1, seqName2, blocks in alignments:
1.35 + seqRank1 = seqNamesToRanks1[seqName1]
1.36 + pos1 = abs(blocks[0][0] + blocks[-1][0] + blocks[-1][2])
1.37 + numOfAlignedLetterPairs = sum(i[2] for i in blocks)
1.38 + yield seqName2, seqRank1, pos1, numOfAlignedLetterPairs
1.39 +
1.40 +def mySortedRanges(seqRanges, sortOpt,
1.41 + alignmentSortDataFunc, alignments, otherRanges):
1.42 rangesGroupedBySeqName = itertools.groupby(seqRanges, itemgetter(0))
1.43 g = [list(ranges) for seqName, ranges in rangesGroupedBySeqName]
1.44 if sortOpt == "1":
1.45 g.sort(key=nameKey)
1.46 if sortOpt == "2":
1.47 g.sort(key=sizeKey)
1.48 + if sortOpt == "3":
1.49 + otherNameGroups = itertools.groupby(i[0] for i in otherRanges)
1.50 + ranksAndNames = enumerate(i[0] for i in otherNameGroups)
1.51 + otherNamesToRanks = dict((n, r) for r, n in ranksAndNames)
1.52 + alns = sorted(alignmentSortDataFunc(alignments, otherNamesToRanks))
1.53 + alnsGroupedBySeqName = itertools.groupby(alns, itemgetter(0))
1.54 + seqNamesToLists = dict((k, list(v)) for k, v in alnsGroupedBySeqName)
1.55 + g.sort(key=functools.partial(alignmentKey, seqNamesToLists))
1.56 return [j for i in g for j in i]
1.57
1.58 -def allSortedRanges(seqRanges, seqRangesB, sortOpt):
1.59 - x, y = twoValuesFromOption(sortOpt, ":")
1.60 - return mySortedRanges(seqRanges, x) + mySortedRanges(seqRangesB, y)
1.61 +def allSortedRanges(opts, alignments, alignmentsB,
1.62 + seqRanges1, seqRangesB1, seqRanges2, seqRangesB2):
1.63 + o1, oB1 = twoValuesFromOption(opts.sort1, ":")
1.64 + o2, oB2 = twoValuesFromOption(opts.sort2, ":")
1.65 + if o1 == "3" and o2 == "3":
1.66 + raise Exception("the sort options have circular dependency")
1.67 + if o1 != "3":
1.68 + s1 = mySortedRanges(seqRanges1, o1, None, None, None)
1.69 + if o2 != "3":
1.70 + s2 = mySortedRanges(seqRanges2, o2, None, None, None)
1.71 + if o1 == "3":
1.72 + s1 = mySortedRanges(seqRanges1, o1, alignmentSortData1, alignments, s2)
1.73 + if o2 == "3":
1.74 + s2 = mySortedRanges(seqRanges2, o2, alignmentSortData2, alignments, s1)
1.75 + t1 = mySortedRanges(seqRangesB1, oB1, alignmentSortData1, alignmentsB, s2)
1.76 + t2 = mySortedRanges(seqRangesB2, oB2, alignmentSortData2, alignmentsB, s1)
1.77 + return s1 + t1, s2 + t2
1.78
1.79 def prettyNum(n):
1.80 t = str(n)
1.81 @@ -323,9 +371,7 @@
1.82 x, y = y, x
1.83 yield text, x, y
1.84
1.85 -def dataFromRanges(cutRanges, cutRangesB, sortOpt, font,
1.86 - fontSize, imageMode, labelOpt, textRot):
1.87 - sortedRanges = allSortedRanges(cutRanges, cutRangesB, sortOpt)
1.88 +def dataFromRanges(sortedRanges, font, fontSize, imageMode, labelOpt, textRot):
1.89 for i in sortedRanges:
1.90 warn("\t".join(map(str, i)))
1.91 warn("")
1.92 @@ -334,7 +380,7 @@
1.93 imageMode, textRot))
1.94 margin = max(i[2] for i in labs)
1.95 # xxx the margin may be too big, because some labels may get omitted
1.96 - return sortedRanges, rangeSizes, labs, margin
1.97 + return rangeSizes, labs, margin
1.98
1.99 def div_ceil(x, y):
1.100 '''Return x / y rounded up.'''
1.101 @@ -664,15 +710,20 @@
1.102 cutRangesB2 = trimmed(seqRangesB2, coverDictB2, minAlignedBases,
1.103 maxGapB2, 0, 0)
1.104
1.105 + warn("sorting...")
1.106 + sortOut = allSortedRanges(opts, alignments, alignmentsB,
1.107 + cutRanges1, cutRangesB1, cutRanges2, cutRangesB2)
1.108 + sortedRanges1, sortedRanges2 = sortOut
1.109 +
1.110 textRot1 = "vertical".startswith(opts.rot1)
1.111 - i1 = dataFromRanges(cutRanges1, cutRangesB1, opts.sort1, font,
1.112 + i1 = dataFromRanges(sortedRanges1, font,
1.113 opts.fontsize, image_mode, opts.labels1, textRot1)
1.114 - sortedRanges1, rangeSizes1, labelData1, tMargin = i1
1.115 + rangeSizes1, labelData1, tMargin = i1
1.116
1.117 textRot2 = "horizontal".startswith(opts.rot2)
1.118 - i2 = dataFromRanges(cutRanges2, cutRangesB2, opts.sort2, font,
1.119 + i2 = dataFromRanges(sortedRanges2, font,
1.120 opts.fontsize, image_mode, opts.labels2, textRot2)
1.121 - sortedRanges2, rangeSizes2, labelData2, lMargin = i2
1.122 + rangeSizes2, labelData2, lMargin = i2
1.123
1.124 maxPixels1 = opts.width - lMargin
1.125 maxPixels2 = opts.height - tMargin
1.126 @@ -778,10 +829,10 @@
1.127 op.add_option("--alignments", metavar="FILE", help="secondary alignments")
1.128 op.add_option("--sort1", default="1", metavar="N",
1.129 help="genome1 sequence order: 0=input order, 1=name order, "
1.130 - "2=length order (default=%default)")
1.131 + "2=length order, 3=alignment order (default=%default)")
1.132 op.add_option("--sort2", default="1", metavar="N",
1.133 help="genome2 sequence order: 0=input order, 1=name order, "
1.134 - "2=length order (default=%default)")
1.135 + "2=length order, 3=alignment order (default=%default)")
1.136 op.add_option("--max-gap1", metavar="FRAC", default="0.5,2", help=
1.137 "maximum unaligned (end,mid) gap in genome1: "
1.138 "fraction of aligned length (default=%default)")