1.1 --- a/doc/last-dotplot.txt Tue Dec 05 13:41:22 2017 +0900
1.2 +++ b/doc/last-dotplot.txt Tue Dec 05 17:57:24 2017 +0900
1.3 @@ -58,9 +58,8 @@
1.4 Fonts
1.5 -----
1.6
1.7 -last-dotplot tries to find a nice font on your computer, but may fail,
1.8 -in which case it will use an ugly font. You can specify a font like
1.9 -this::
1.10 +last-dotplot tries to find a nice font on your computer, but may fail
1.11 +and use an ugly font. You can specify a font like this::
1.12
1.13 last-dotplot -f /usr/share/fonts/liberation/LiberationSans-Regular.ttf alns alns.png
1.14
1.15 @@ -91,12 +90,15 @@
1.16 secondary alignments that are near the primary alignments.
1.17 --sort1=N
1.18 Put the 1st genome's sequences left-to-right in order of: their
1.19 - appearance in the input (0), their names (1), their lengths (2).
1.20 - You can use two colon-separated values, e.g. "2:1" to specify 2
1.21 - for primary and 1 for secondary alignments.
1.22 + appearance in the input (0), their names (1), their lengths (2),
1.23 + the top-to-bottom order of (the midpoints of) their alignments
1.24 + (3). You can use two colon-separated values, e.g. "2:1" to
1.25 + specify 2 for primary and 1 for secondary alignments.
1.26 --sort2=N
1.27 Put the 2nd genome's sequences top-to-bottom in order of: their
1.28 - appearance in the input (0), their names (1), their lengths (2).
1.29 + appearance in the input (0), their names (1), their lengths (2),
1.30 + the left-to-right order of (the midpoints of) their alignments
1.31 + (3).
1.32 --max-gap1=FRAC
1.33 Maximum unaligned gap in the 1st genome. For example, if two
1.34 parts of one DNA read align to widely-separated parts of one
2.1 --- a/scripts/last-dotplot Tue Dec 05 13:41:22 2017 +0900
2.2 +++ b/scripts/last-dotplot Tue Dec 05 17:57:24 2017 +0900
2.3 @@ -9,6 +9,7 @@
2.4 # according to the number of aligned nt-pairs within it, but the
2.5 # result is too faint. How can this be done better?
2.6
2.7 +import functools
2.8 import gzip
2.9 from fnmatch import fnmatchcase
2.10 from operator import itemgetter
2.11 @@ -269,18 +270,65 @@
2.12 def sizeKey(oneSeqRanges):
2.13 return sum(b - e for n, b, e in oneSeqRanges), nameKey(oneSeqRanges)
2.14
2.15 -def mySortedRanges(seqRanges, sortOpt):
2.16 +def alignmentKey(seqNamesToLists, oneSeqRanges):
2.17 + seqName = oneSeqRanges[0][0]
2.18 + alignmentsOfThisSequence = seqNamesToLists[seqName]
2.19 + numOfAlignedLetterPairs = sum(i[3] for i in alignmentsOfThisSequence)
2.20 + toMiddle = numOfAlignedLetterPairs // 2
2.21 + for i in alignmentsOfThisSequence:
2.22 + toMiddle -= i[3]
2.23 + if toMiddle < 0:
2.24 + return i[1:3] # sequence-rank and "position" of this alignment
2.25 +
2.26 +def alignmentSortData1(alignments, seqNamesToRanks2):
2.27 + for seqName1, seqName2, blocks in alignments:
2.28 + seqRank2 = seqNamesToRanks2[seqName2]
2.29 + pos2 = abs(blocks[0][1] + blocks[-1][1] + blocks[-1][2])
2.30 + numOfAlignedLetterPairs = sum(i[2] for i in blocks)
2.31 + yield seqName1, seqRank2, pos2, numOfAlignedLetterPairs
2.32 +
2.33 +def alignmentSortData2(alignments, seqNamesToRanks1):
2.34 + for seqName1, seqName2, blocks in alignments:
2.35 + seqRank1 = seqNamesToRanks1[seqName1]
2.36 + pos1 = abs(blocks[0][0] + blocks[-1][0] + blocks[-1][2])
2.37 + numOfAlignedLetterPairs = sum(i[2] for i in blocks)
2.38 + yield seqName2, seqRank1, pos1, numOfAlignedLetterPairs
2.39 +
2.40 +def mySortedRanges(seqRanges, sortOpt,
2.41 + alignmentSortDataFunc, alignments, otherRanges):
2.42 rangesGroupedBySeqName = itertools.groupby(seqRanges, itemgetter(0))
2.43 g = [list(ranges) for seqName, ranges in rangesGroupedBySeqName]
2.44 if sortOpt == "1":
2.45 g.sort(key=nameKey)
2.46 if sortOpt == "2":
2.47 g.sort(key=sizeKey)
2.48 + if sortOpt == "3":
2.49 + otherNameGroups = itertools.groupby(i[0] for i in otherRanges)
2.50 + ranksAndNames = enumerate(i[0] for i in otherNameGroups)
2.51 + otherNamesToRanks = dict((n, r) for r, n in ranksAndNames)
2.52 + alns = sorted(alignmentSortDataFunc(alignments, otherNamesToRanks))
2.53 + alnsGroupedBySeqName = itertools.groupby(alns, itemgetter(0))
2.54 + seqNamesToLists = dict((k, list(v)) for k, v in alnsGroupedBySeqName)
2.55 + g.sort(key=functools.partial(alignmentKey, seqNamesToLists))
2.56 return [j for i in g for j in i]
2.57
2.58 -def allSortedRanges(seqRanges, seqRangesB, sortOpt):
2.59 - x, y = twoValuesFromOption(sortOpt, ":")
2.60 - return mySortedRanges(seqRanges, x) + mySortedRanges(seqRangesB, y)
2.61 +def allSortedRanges(opts, alignments, alignmentsB,
2.62 + seqRanges1, seqRangesB1, seqRanges2, seqRangesB2):
2.63 + o1, oB1 = twoValuesFromOption(opts.sort1, ":")
2.64 + o2, oB2 = twoValuesFromOption(opts.sort2, ":")
2.65 + if o1 == "3" and o2 == "3":
2.66 + raise Exception("the sort options have circular dependency")
2.67 + if o1 != "3":
2.68 + s1 = mySortedRanges(seqRanges1, o1, None, None, None)
2.69 + if o2 != "3":
2.70 + s2 = mySortedRanges(seqRanges2, o2, None, None, None)
2.71 + if o1 == "3":
2.72 + s1 = mySortedRanges(seqRanges1, o1, alignmentSortData1, alignments, s2)
2.73 + if o2 == "3":
2.74 + s2 = mySortedRanges(seqRanges2, o2, alignmentSortData2, alignments, s1)
2.75 + t1 = mySortedRanges(seqRangesB1, oB1, alignmentSortData1, alignmentsB, s2)
2.76 + t2 = mySortedRanges(seqRangesB2, oB2, alignmentSortData2, alignmentsB, s1)
2.77 + return s1 + t1, s2 + t2
2.78
2.79 def prettyNum(n):
2.80 t = str(n)
2.81 @@ -323,9 +371,7 @@
2.82 x, y = y, x
2.83 yield text, x, y
2.84
2.85 -def dataFromRanges(cutRanges, cutRangesB, sortOpt, font,
2.86 - fontSize, imageMode, labelOpt, textRot):
2.87 - sortedRanges = allSortedRanges(cutRanges, cutRangesB, sortOpt)
2.88 +def dataFromRanges(sortedRanges, font, fontSize, imageMode, labelOpt, textRot):
2.89 for i in sortedRanges:
2.90 warn("\t".join(map(str, i)))
2.91 warn("")
2.92 @@ -334,7 +380,7 @@
2.93 imageMode, textRot))
2.94 margin = max(i[2] for i in labs)
2.95 # xxx the margin may be too big, because some labels may get omitted
2.96 - return sortedRanges, rangeSizes, labs, margin
2.97 + return rangeSizes, labs, margin
2.98
2.99 def div_ceil(x, y):
2.100 '''Return x / y rounded up.'''
2.101 @@ -664,15 +710,20 @@
2.102 cutRangesB2 = trimmed(seqRangesB2, coverDictB2, minAlignedBases,
2.103 maxGapB2, 0, 0)
2.104
2.105 + warn("sorting...")
2.106 + sortOut = allSortedRanges(opts, alignments, alignmentsB,
2.107 + cutRanges1, cutRangesB1, cutRanges2, cutRangesB2)
2.108 + sortedRanges1, sortedRanges2 = sortOut
2.109 +
2.110 textRot1 = "vertical".startswith(opts.rot1)
2.111 - i1 = dataFromRanges(cutRanges1, cutRangesB1, opts.sort1, font,
2.112 + i1 = dataFromRanges(sortedRanges1, font,
2.113 opts.fontsize, image_mode, opts.labels1, textRot1)
2.114 - sortedRanges1, rangeSizes1, labelData1, tMargin = i1
2.115 + rangeSizes1, labelData1, tMargin = i1
2.116
2.117 textRot2 = "horizontal".startswith(opts.rot2)
2.118 - i2 = dataFromRanges(cutRanges2, cutRangesB2, opts.sort2, font,
2.119 + i2 = dataFromRanges(sortedRanges2, font,
2.120 opts.fontsize, image_mode, opts.labels2, textRot2)
2.121 - sortedRanges2, rangeSizes2, labelData2, lMargin = i2
2.122 + rangeSizes2, labelData2, lMargin = i2
2.123
2.124 maxPixels1 = opts.width - lMargin
2.125 maxPixels2 = opts.height - tMargin
2.126 @@ -778,10 +829,10 @@
2.127 op.add_option("--alignments", metavar="FILE", help="secondary alignments")
2.128 op.add_option("--sort1", default="1", metavar="N",
2.129 help="genome1 sequence order: 0=input order, 1=name order, "
2.130 - "2=length order (default=%default)")
2.131 + "2=length order, 3=alignment order (default=%default)")
2.132 op.add_option("--sort2", default="1", metavar="N",
2.133 help="genome2 sequence order: 0=input order, 1=name order, "
2.134 - "2=length order (default=%default)")
2.135 + "2=length order, 3=alignment order (default=%default)")
2.136 op.add_option("--max-gap1", metavar="FRAC", default="0.5,2", help=
2.137 "maximum unaligned (end,mid) gap in genome1: "
2.138 "fraction of aligned length (default=%default)")