scripts/last-dotplot
changeset 914 f4ca19d126e3
parent 913 e87abc7ae9c6
child 915 6079fabb49a5
     1.1 --- a/scripts/last-dotplot	Tue Dec 05 13:41:22 2017 +0900
     1.2 +++ b/scripts/last-dotplot	Tue Dec 05 17:57:24 2017 +0900
     1.3 @@ -9,6 +9,7 @@
     1.4  # according to the number of aligned nt-pairs within it, but the
     1.5  # result is too faint.  How can this be done better?
     1.6  
     1.7 +import functools
     1.8  import gzip
     1.9  from fnmatch import fnmatchcase
    1.10  from operator import itemgetter
    1.11 @@ -269,18 +270,65 @@
    1.12  def sizeKey(oneSeqRanges):
    1.13      return sum(b - e for n, b, e in oneSeqRanges), nameKey(oneSeqRanges)
    1.14  
    1.15 -def mySortedRanges(seqRanges, sortOpt):
    1.16 +def alignmentKey(seqNamesToLists, oneSeqRanges):
    1.17 +    seqName = oneSeqRanges[0][0]
    1.18 +    alignmentsOfThisSequence = seqNamesToLists[seqName]
    1.19 +    numOfAlignedLetterPairs = sum(i[3] for i in alignmentsOfThisSequence)
    1.20 +    toMiddle = numOfAlignedLetterPairs // 2
    1.21 +    for i in alignmentsOfThisSequence:
    1.22 +        toMiddle -= i[3]
    1.23 +        if toMiddle < 0:
    1.24 +            return i[1:3]  # sequence-rank and "position" of this alignment
    1.25 +
    1.26 +def alignmentSortData1(alignments, seqNamesToRanks2):
    1.27 +    for seqName1, seqName2, blocks in alignments:
    1.28 +        seqRank2 = seqNamesToRanks2[seqName2]
    1.29 +        pos2 = abs(blocks[0][1] + blocks[-1][1] + blocks[-1][2])
    1.30 +        numOfAlignedLetterPairs = sum(i[2] for i in blocks)
    1.31 +        yield seqName1, seqRank2, pos2, numOfAlignedLetterPairs
    1.32 +
    1.33 +def alignmentSortData2(alignments, seqNamesToRanks1):
    1.34 +    for seqName1, seqName2, blocks in alignments:
    1.35 +        seqRank1 = seqNamesToRanks1[seqName1]
    1.36 +        pos1 = abs(blocks[0][0] + blocks[-1][0] + blocks[-1][2])
    1.37 +        numOfAlignedLetterPairs = sum(i[2] for i in blocks)
    1.38 +        yield seqName2, seqRank1, pos1, numOfAlignedLetterPairs
    1.39 +
    1.40 +def mySortedRanges(seqRanges, sortOpt,
    1.41 +                   alignmentSortDataFunc, alignments, otherRanges):
    1.42      rangesGroupedBySeqName = itertools.groupby(seqRanges, itemgetter(0))
    1.43      g = [list(ranges) for seqName, ranges in rangesGroupedBySeqName]
    1.44      if sortOpt == "1":
    1.45          g.sort(key=nameKey)
    1.46      if sortOpt == "2":
    1.47          g.sort(key=sizeKey)
    1.48 +    if sortOpt == "3":
    1.49 +        otherNameGroups = itertools.groupby(i[0] for i in otherRanges)
    1.50 +        ranksAndNames = enumerate(i[0] for i in otherNameGroups)
    1.51 +        otherNamesToRanks = dict((n, r) for r, n in ranksAndNames)
    1.52 +        alns = sorted(alignmentSortDataFunc(alignments, otherNamesToRanks))
    1.53 +        alnsGroupedBySeqName = itertools.groupby(alns, itemgetter(0))
    1.54 +        seqNamesToLists = dict((k, list(v)) for k, v in alnsGroupedBySeqName)
    1.55 +        g.sort(key=functools.partial(alignmentKey, seqNamesToLists))
    1.56      return [j for i in g for j in i]
    1.57  
    1.58 -def allSortedRanges(seqRanges, seqRangesB, sortOpt):
    1.59 -    x, y = twoValuesFromOption(sortOpt, ":")
    1.60 -    return mySortedRanges(seqRanges, x) + mySortedRanges(seqRangesB, y)
    1.61 +def allSortedRanges(opts, alignments, alignmentsB,
    1.62 +                    seqRanges1, seqRangesB1, seqRanges2, seqRangesB2):
    1.63 +    o1, oB1 = twoValuesFromOption(opts.sort1, ":")
    1.64 +    o2, oB2 = twoValuesFromOption(opts.sort2, ":")
    1.65 +    if o1 == "3" and o2 == "3":
    1.66 +        raise Exception("the sort options have circular dependency")
    1.67 +    if o1 != "3":
    1.68 +        s1 = mySortedRanges(seqRanges1, o1, None, None, None)
    1.69 +    if o2 != "3":
    1.70 +        s2 = mySortedRanges(seqRanges2, o2, None, None, None)
    1.71 +    if o1 == "3":
    1.72 +        s1 = mySortedRanges(seqRanges1, o1, alignmentSortData1, alignments, s2)
    1.73 +    if o2 == "3":
    1.74 +        s2 = mySortedRanges(seqRanges2, o2, alignmentSortData2, alignments, s1)
    1.75 +    t1 = mySortedRanges(seqRangesB1, oB1, alignmentSortData1, alignmentsB, s2)
    1.76 +    t2 = mySortedRanges(seqRangesB2, oB2, alignmentSortData2, alignmentsB, s1)
    1.77 +    return s1 + t1, s2 + t2
    1.78  
    1.79  def prettyNum(n):
    1.80      t = str(n)
    1.81 @@ -323,9 +371,7 @@
    1.82                  x, y = y, x
    1.83          yield text, x, y
    1.84  
    1.85 -def dataFromRanges(cutRanges, cutRangesB, sortOpt, font,
    1.86 -                   fontSize, imageMode, labelOpt, textRot):
    1.87 -    sortedRanges = allSortedRanges(cutRanges, cutRangesB, sortOpt)
    1.88 +def dataFromRanges(sortedRanges, font, fontSize, imageMode, labelOpt, textRot):
    1.89      for i in sortedRanges:
    1.90          warn("\t".join(map(str, i)))
    1.91      warn("")
    1.92 @@ -334,7 +380,7 @@
    1.93                              imageMode, textRot))
    1.94      margin = max(i[2] for i in labs)
    1.95      # xxx the margin may be too big, because some labels may get omitted
    1.96 -    return sortedRanges, rangeSizes, labs, margin
    1.97 +    return rangeSizes, labs, margin
    1.98  
    1.99  def div_ceil(x, y):
   1.100      '''Return x / y rounded up.'''
   1.101 @@ -664,15 +710,20 @@
   1.102      cutRangesB2 = trimmed(seqRangesB2, coverDictB2, minAlignedBases,
   1.103                            maxGapB2, 0, 0)
   1.104  
   1.105 +    warn("sorting...")
   1.106 +    sortOut = allSortedRanges(opts, alignments, alignmentsB,
   1.107 +                              cutRanges1, cutRangesB1, cutRanges2, cutRangesB2)
   1.108 +    sortedRanges1, sortedRanges2 = sortOut
   1.109 +
   1.110      textRot1 = "vertical".startswith(opts.rot1)
   1.111 -    i1 = dataFromRanges(cutRanges1, cutRangesB1, opts.sort1, font,
   1.112 +    i1 = dataFromRanges(sortedRanges1, font,
   1.113                          opts.fontsize, image_mode, opts.labels1, textRot1)
   1.114 -    sortedRanges1, rangeSizes1, labelData1, tMargin = i1
   1.115 +    rangeSizes1, labelData1, tMargin = i1
   1.116  
   1.117      textRot2 = "horizontal".startswith(opts.rot2)
   1.118 -    i2 = dataFromRanges(cutRanges2, cutRangesB2, opts.sort2, font,
   1.119 +    i2 = dataFromRanges(sortedRanges2, font,
   1.120                          opts.fontsize, image_mode, opts.labels2, textRot2)
   1.121 -    sortedRanges2, rangeSizes2, labelData2, lMargin = i2
   1.122 +    rangeSizes2, labelData2, lMargin = i2
   1.123  
   1.124      maxPixels1 = opts.width  - lMargin
   1.125      maxPixels2 = opts.height - tMargin
   1.126 @@ -778,10 +829,10 @@
   1.127      op.add_option("--alignments", metavar="FILE", help="secondary alignments")
   1.128      op.add_option("--sort1", default="1", metavar="N",
   1.129                    help="genome1 sequence order: 0=input order, 1=name order, "
   1.130 -                  "2=length order (default=%default)")
   1.131 +                  "2=length order, 3=alignment order (default=%default)")
   1.132      op.add_option("--sort2", default="1", metavar="N",
   1.133                    help="genome2 sequence order: 0=input order, 1=name order, "
   1.134 -                  "2=length order (default=%default)")
   1.135 +                  "2=length order, 3=alignment order (default=%default)")
   1.136      op.add_option("--max-gap1", metavar="FRAC", default="0.5,2", help=
   1.137                    "maximum unaligned (end,mid) gap in genome1: "
   1.138                    "fraction of aligned length (default=%default)")