Remove last-dotplot trim options
authorMartin C. Frith
Wed Nov 08 16:06:17 2017 +0900 (2017-11-08)
changeset 90992dfad507286
parent 908 6840398323e1
child 910 42653029d900
Remove last-dotplot trim options
doc/last-dotplot.txt
scripts/last-dotplot
     1.1 --- a/doc/last-dotplot.txt	Wed Nov 08 15:45:08 2017 +0900
     1.2 +++ b/doc/last-dotplot.txt	Wed Nov 08 16:06:17 2017 +0900
     1.3 @@ -89,10 +89,6 @@
     1.4    --sort2=N
     1.5        Put the 2nd genome's sequences top-to-bottom in order of: their
     1.6        appearance in the input (0), their names (1), their lengths (2).
     1.7 -  --trim1
     1.8 -      Trim unaligned sequence flanks from the 1st (horizontal) genome.
     1.9 -  --trim2
    1.10 -      Trim unaligned sequence flanks from the 2nd (vertical) genome.
    1.11    --border-pixels=INT
    1.12        Number of pixels between sequences.
    1.13    --border-color=COLOR
     2.1 --- a/scripts/last-dotplot	Wed Nov 08 15:45:08 2017 +0900
     2.2 +++ b/scripts/last-dotplot	Wed Nov 08 16:06:17 2017 +0900
     2.3 @@ -120,30 +120,26 @@
     2.4              return pattern, int(beg), int(end)  # beg may be negative
     2.5      return text, 0, sys.maxsize
     2.6  
     2.7 -def rangeFromSeqName(seqRequests, name, seqLen):
     2.8 +def rangesFromSeqName(seqRequests, name, seqLen):
     2.9      if seqRequests:
    2.10          base = name.split(".")[-1]  # allow for names like hg19.chr7
    2.11          for pat, beg, end in seqRequests:
    2.12              if fnmatchcase(name, pat) or fnmatchcase(base, pat):
    2.13 -                return max(beg, 0), min(end, seqLen)
    2.14 -        return None
    2.15 +                yield max(beg, 0), min(end, seqLen)
    2.16      else:
    2.17 -        return 0, seqLen
    2.18 +        yield 0, seqLen
    2.19  
    2.20 -def updateSeqs(isTrim, seqNames, seqLimits, seqName, seqRange, blocks, index):
    2.21 -    if seqName not in seqLimits:
    2.22 -        seqNames.append(seqName)
    2.23 -    if isTrim:
    2.24 -        beg = blocks[0][index]
    2.25 -        end = blocks[-1][index] + blocks[-1][2]
    2.26 -        if beg < 0: beg, end = -end, -beg
    2.27 -        if seqName in seqLimits:
    2.28 -            b, e = seqLimits[seqName]
    2.29 -            seqLimits[seqName] = min(b, beg), max(e, end)
    2.30 -        else:
    2.31 -            seqLimits[seqName] = beg, end
    2.32 +def updateSeqs(coverDict, seqRanges, seqName, ranges, coveredRange):
    2.33 +    beg, end = coveredRange
    2.34 +    if beg < 0:
    2.35 +        coveredRange = -end, -beg
    2.36 +    if seqName in coverDict:
    2.37 +        coverDict[seqName].append(coveredRange)
    2.38      else:
    2.39 -        seqLimits[seqName] = seqRange
    2.40 +        coverDict[seqName] = [coveredRange]
    2.41 +        for beg, end in ranges:
    2.42 +            r = seqName, beg, end
    2.43 +            seqRanges.append(r)
    2.44  
    2.45  def readAlignments(fileName, opts):
    2.46      '''Get alignments and sequence limits, from MAF or tabular format.'''
    2.47 @@ -151,25 +147,25 @@
    2.48      seqRequests2 = map(seqRequestFromText, opts.seq2)
    2.49  
    2.50      alignments = []
    2.51 -    seqNames1 = []
    2.52 -    seqNames2 = []
    2.53 -    seqLimits1 = {}
    2.54 -    seqLimits2 = {}
    2.55 +    seqRanges1 = []
    2.56 +    seqRanges2 = []
    2.57 +    coverDict1 = {}
    2.58 +    coverDict2 = {}
    2.59      lines = myOpen(fileName)
    2.60      for seqName1, seqLen1, seqName2, seqLen2, blocks in alignmentInput(lines):
    2.61 -        range1 = rangeFromSeqName(seqRequests1, seqName1, seqLen1)
    2.62 -        if not range1: continue
    2.63 -        range2 = rangeFromSeqName(seqRequests2, seqName2, seqLen2)
    2.64 -        if not range2: continue
    2.65 -        b = list(croppedBlocks(list(blocks), [range1], [range2]))
    2.66 +        ranges1 = sorted(rangesFromSeqName(seqRequests1, seqName1, seqLen1))
    2.67 +        if not ranges1: continue
    2.68 +        ranges2 = sorted(rangesFromSeqName(seqRequests2, seqName2, seqLen2))
    2.69 +        if not ranges2: continue
    2.70 +        b = list(croppedBlocks(list(blocks), ranges1, ranges2))
    2.71          if not b: continue
    2.72          aln = seqName1, seqName2, b
    2.73          alignments.append(aln)
    2.74 -        updateSeqs(opts.trim1, seqNames1, seqLimits1, seqName1, range1, b, 0)
    2.75 -        updateSeqs(opts.trim2, seqNames2, seqLimits2, seqName2, range2, b, 1)
    2.76 -    seqRanges1 = [(i, seqLimits1[i][0], seqLimits1[i][1]) for i in seqNames1]
    2.77 -    seqRanges2 = [(i, seqLimits2[i][0], seqLimits2[i][1]) for i in seqNames2]
    2.78 -    return alignments, seqRanges1, seqRanges2
    2.79 +        coveredRange1 = b[0][0], b[-1][0] + b[-1][2]
    2.80 +        updateSeqs(coverDict1, seqRanges1, seqName1, ranges1, coveredRange1)
    2.81 +        coveredRange2 = b[0][1], b[-1][1] + b[-1][2]
    2.82 +        updateSeqs(coverDict2, seqRanges2, seqName2, ranges2, coveredRange2)
    2.83 +    return alignments, seqRanges1, coverDict1, seqRanges2, coverDict2
    2.84  
    2.85  def natural_sort_key(my_string):
    2.86      '''Return a sort key for "natural" ordering, e.g. chr9 < chr10.'''
    2.87 @@ -544,7 +540,7 @@
    2.88  
    2.89      warn("reading alignments...")
    2.90      alnData = readAlignments(args[0], opts)
    2.91 -    alignments, seqRanges1, seqRanges2 = alnData
    2.92 +    alignments, seqRanges1, coverDict1, seqRanges2, coverDict2 = alnData
    2.93      warn("done")
    2.94      if not alignments: raise Exception("there are no alignments")
    2.95  
    2.96 @@ -665,10 +661,6 @@
    2.97      op.add_option("--sort2", default="1", metavar="N",
    2.98                    help="genome2 sequence order: 0=input order, 1=name order, "
    2.99                    "2=length order (default=%default)")
   2.100 -    op.add_option("--trim1", action="store_true",
   2.101 -                  help="trim unaligned sequence flanks from the 1st genome")
   2.102 -    op.add_option("--trim2", action="store_true",
   2.103 -                  help="trim unaligned sequence flanks from the 2nd genome")
   2.104      op.add_option("--border-pixels", metavar="INT", type="int", default=1,
   2.105                    help="number of pixels between sequences (default=%default)")
   2.106      op.add_option("--border-color", metavar="COLOR", default="black",