scripts/last-dotplot
changeset 910 42653029d900
parent 909 92dfad507286
child 911 4080f3f6f73d
     1.1 --- a/scripts/last-dotplot	Wed Nov 08 16:06:17 2017 +0900
     1.2 +++ b/scripts/last-dotplot	Wed Nov 08 17:02:56 2017 +0900
     1.3 @@ -167,6 +167,50 @@
     1.4          updateSeqs(coverDict2, seqRanges2, seqName2, ranges2, coveredRange2)
     1.5      return alignments, seqRanges1, coverDict1, seqRanges2, coverDict2
     1.6  
     1.7 +def twoValuesFromOption(text, separator):
     1.8 +    if separator in text:
     1.9 +        return text.split(separator)
    1.10 +    return text, text
    1.11 +
    1.12 +def mergedRanges(ranges):
    1.13 +    oldBeg, maxEnd = ranges[0]
    1.14 +    for beg, end in ranges:
    1.15 +        if beg > maxEnd:
    1.16 +            yield oldBeg, maxEnd
    1.17 +            oldBeg = beg
    1.18 +            maxEnd = end
    1.19 +        elif end > maxEnd:
    1.20 +            maxEnd = end
    1.21 +    yield oldBeg, maxEnd
    1.22 +
    1.23 +def mergedRangesPerSeq(coverDict):
    1.24 +    for k, v in coverDict.iteritems():
    1.25 +        v.sort()
    1.26 +        yield k, list(mergedRanges(v))
    1.27 +
    1.28 +def coveredLength(mergedCoverDict):
    1.29 +    return sum(sum(e - b for b, e in v) for v in mergedCoverDict.itervalues())
    1.30 +
    1.31 +def trimmed(seqRanges, coverDict, minAlignedBases, maxGapFrac, endPad, midPad):
    1.32 +    maxEndGapFrac, maxMidGapFrac = twoValuesFromOption(maxGapFrac, ",")
    1.33 +    maxEndGap = max(float(maxEndGapFrac) * minAlignedBases, endPad * 1.0)
    1.34 +    maxMidGap = max(float(maxMidGapFrac) * minAlignedBases, midPad * 2.0)
    1.35 +
    1.36 +    for seqName, rangeBeg, rangeEnd in seqRanges:
    1.37 +        seqBlocks = coverDict[seqName]
    1.38 +        blocks = [i for i in seqBlocks if i[0] < rangeEnd and i[1] > rangeBeg]
    1.39 +        if blocks[0][0] - rangeBeg > maxEndGap:
    1.40 +            rangeBeg = blocks[0][0] - endPad
    1.41 +        for j, y in enumerate(blocks):
    1.42 +            if j:
    1.43 +                x = blocks[j - 1]
    1.44 +                if y[0] - x[1] > maxMidGap:
    1.45 +                    yield seqName, rangeBeg, x[1] + midPad
    1.46 +                    rangeBeg = y[0] - midPad
    1.47 +        if rangeEnd - blocks[-1][1] > maxEndGap:
    1.48 +            rangeEnd = blocks[-1][1] + endPad
    1.49 +        yield seqName, rangeBeg, rangeEnd
    1.50 +
    1.51  def natural_sort_key(my_string):
    1.52      '''Return a sort key for "natural" ordering, e.g. chr9 < chr10.'''
    1.53      parts = re.split(r'(\d+)', my_string)
    1.54 @@ -541,16 +585,24 @@
    1.55      warn("reading alignments...")
    1.56      alnData = readAlignments(args[0], opts)
    1.57      alignments, seqRanges1, coverDict1, seqRanges2, coverDict2 = alnData
    1.58 -    warn("done")
    1.59      if not alignments: raise Exception("there are no alignments")
    1.60 +    warn("cutting...")
    1.61 +    coverDict1 = dict(mergedRangesPerSeq(coverDict1))
    1.62 +    coverDict2 = dict(mergedRangesPerSeq(coverDict2))
    1.63 +    minAlignedBases = min(coveredLength(coverDict1), coveredLength(coverDict2))
    1.64 +    pad = int(opts.pad * minAlignedBases)
    1.65 +    cutRanges1 = list(trimmed(seqRanges1, coverDict1, minAlignedBases,
    1.66 +                              opts.max_gap1, pad, pad))
    1.67 +    cutRanges2 = list(trimmed(seqRanges2, coverDict2, minAlignedBases,
    1.68 +                              opts.max_gap2, pad, pad))
    1.69  
    1.70      textRot1 = "vertical".startswith(opts.rot1)
    1.71 -    i1 = dataFromRanges(seqRanges1, opts.sort1, font,
    1.72 +    i1 = dataFromRanges(cutRanges1, opts.sort1, font,
    1.73                          opts.fontsize, image_mode, opts.labels1, textRot1)
    1.74      sortedRanges1, rangeSizes1, labelData1, tMargin = i1
    1.75  
    1.76      textRot2 = "horizontal".startswith(opts.rot2)
    1.77 -    i2 = dataFromRanges(seqRanges2, opts.sort2, font,
    1.78 +    i2 = dataFromRanges(cutRanges2, opts.sort2, font,
    1.79                          opts.fontsize, image_mode, opts.labels2, textRot2)
    1.80      sortedRanges2, rangeSizes2, labelData2, lMargin = i2
    1.81  
    1.82 @@ -661,6 +713,15 @@
    1.83      op.add_option("--sort2", default="1", metavar="N",
    1.84                    help="genome2 sequence order: 0=input order, 1=name order, "
    1.85                    "2=length order (default=%default)")
    1.86 +    op.add_option("--max-gap1", metavar="FRAC", default="0.5,2", help=
    1.87 +                  "maximum unaligned (end,mid) gap in genome1: "
    1.88 +                  "fraction of aligned length (default=%default)")
    1.89 +    op.add_option("--max-gap2", metavar="FRAC", default="0.5,2", help=
    1.90 +                  "maximum unaligned (end,mid) gap in genome2: "
    1.91 +                  "fraction of aligned length (default=%default)")
    1.92 +    op.add_option("--pad", metavar="FRAC", type="float", default=0.04, help=
    1.93 +                  "pad length when cutting unaligned gaps: "
    1.94 +                  "fraction of aligned length (default=%default)")
    1.95      op.add_option("--border-pixels", metavar="INT", type="int", default=1,
    1.96                    help="number of pixels between sequences (default=%default)")
    1.97      op.add_option("--border-color", metavar="COLOR", default="black",