scripts/last-dotplot
changeset 909 92dfad507286
parent 908 6840398323e1
child 910 42653029d900
     1.1 --- a/scripts/last-dotplot	Wed Nov 08 15:45:08 2017 +0900
     1.2 +++ b/scripts/last-dotplot	Wed Nov 08 16:06:17 2017 +0900
     1.3 @@ -120,30 +120,26 @@
     1.4              return pattern, int(beg), int(end)  # beg may be negative
     1.5      return text, 0, sys.maxsize
     1.6  
     1.7 -def rangeFromSeqName(seqRequests, name, seqLen):
     1.8 +def rangesFromSeqName(seqRequests, name, seqLen):
     1.9      if seqRequests:
    1.10          base = name.split(".")[-1]  # allow for names like hg19.chr7
    1.11          for pat, beg, end in seqRequests:
    1.12              if fnmatchcase(name, pat) or fnmatchcase(base, pat):
    1.13 -                return max(beg, 0), min(end, seqLen)
    1.14 -        return None
    1.15 +                yield max(beg, 0), min(end, seqLen)
    1.16      else:
    1.17 -        return 0, seqLen
    1.18 +        yield 0, seqLen
    1.19  
    1.20 -def updateSeqs(isTrim, seqNames, seqLimits, seqName, seqRange, blocks, index):
    1.21 -    if seqName not in seqLimits:
    1.22 -        seqNames.append(seqName)
    1.23 -    if isTrim:
    1.24 -        beg = blocks[0][index]
    1.25 -        end = blocks[-1][index] + blocks[-1][2]
    1.26 -        if beg < 0: beg, end = -end, -beg
    1.27 -        if seqName in seqLimits:
    1.28 -            b, e = seqLimits[seqName]
    1.29 -            seqLimits[seqName] = min(b, beg), max(e, end)
    1.30 -        else:
    1.31 -            seqLimits[seqName] = beg, end
    1.32 +def updateSeqs(coverDict, seqRanges, seqName, ranges, coveredRange):
    1.33 +    beg, end = coveredRange
    1.34 +    if beg < 0:
    1.35 +        coveredRange = -end, -beg
    1.36 +    if seqName in coverDict:
    1.37 +        coverDict[seqName].append(coveredRange)
    1.38      else:
    1.39 -        seqLimits[seqName] = seqRange
    1.40 +        coverDict[seqName] = [coveredRange]
    1.41 +        for beg, end in ranges:
    1.42 +            r = seqName, beg, end
    1.43 +            seqRanges.append(r)
    1.44  
    1.45  def readAlignments(fileName, opts):
    1.46      '''Get alignments and sequence limits, from MAF or tabular format.'''
    1.47 @@ -151,25 +147,25 @@
    1.48      seqRequests2 = map(seqRequestFromText, opts.seq2)
    1.49  
    1.50      alignments = []
    1.51 -    seqNames1 = []
    1.52 -    seqNames2 = []
    1.53 -    seqLimits1 = {}
    1.54 -    seqLimits2 = {}
    1.55 +    seqRanges1 = []
    1.56 +    seqRanges2 = []
    1.57 +    coverDict1 = {}
    1.58 +    coverDict2 = {}
    1.59      lines = myOpen(fileName)
    1.60      for seqName1, seqLen1, seqName2, seqLen2, blocks in alignmentInput(lines):
    1.61 -        range1 = rangeFromSeqName(seqRequests1, seqName1, seqLen1)
    1.62 -        if not range1: continue
    1.63 -        range2 = rangeFromSeqName(seqRequests2, seqName2, seqLen2)
    1.64 -        if not range2: continue
    1.65 -        b = list(croppedBlocks(list(blocks), [range1], [range2]))
    1.66 +        ranges1 = sorted(rangesFromSeqName(seqRequests1, seqName1, seqLen1))
    1.67 +        if not ranges1: continue
    1.68 +        ranges2 = sorted(rangesFromSeqName(seqRequests2, seqName2, seqLen2))
    1.69 +        if not ranges2: continue
    1.70 +        b = list(croppedBlocks(list(blocks), ranges1, ranges2))
    1.71          if not b: continue
    1.72          aln = seqName1, seqName2, b
    1.73          alignments.append(aln)
    1.74 -        updateSeqs(opts.trim1, seqNames1, seqLimits1, seqName1, range1, b, 0)
    1.75 -        updateSeqs(opts.trim2, seqNames2, seqLimits2, seqName2, range2, b, 1)
    1.76 -    seqRanges1 = [(i, seqLimits1[i][0], seqLimits1[i][1]) for i in seqNames1]
    1.77 -    seqRanges2 = [(i, seqLimits2[i][0], seqLimits2[i][1]) for i in seqNames2]
    1.78 -    return alignments, seqRanges1, seqRanges2
    1.79 +        coveredRange1 = b[0][0], b[-1][0] + b[-1][2]
    1.80 +        updateSeqs(coverDict1, seqRanges1, seqName1, ranges1, coveredRange1)
    1.81 +        coveredRange2 = b[0][1], b[-1][1] + b[-1][2]
    1.82 +        updateSeqs(coverDict2, seqRanges2, seqName2, ranges2, coveredRange2)
    1.83 +    return alignments, seqRanges1, coverDict1, seqRanges2, coverDict2
    1.84  
    1.85  def natural_sort_key(my_string):
    1.86      '''Return a sort key for "natural" ordering, e.g. chr9 < chr10.'''
    1.87 @@ -544,7 +540,7 @@
    1.88  
    1.89      warn("reading alignments...")
    1.90      alnData = readAlignments(args[0], opts)
    1.91 -    alignments, seqRanges1, seqRanges2 = alnData
    1.92 +    alignments, seqRanges1, coverDict1, seqRanges2, coverDict2 = alnData
    1.93      warn("done")
    1.94      if not alignments: raise Exception("there are no alignments")
    1.95  
    1.96 @@ -665,10 +661,6 @@
    1.97      op.add_option("--sort2", default="1", metavar="N",
    1.98                    help="genome2 sequence order: 0=input order, 1=name order, "
    1.99                    "2=length order (default=%default)")
   1.100 -    op.add_option("--trim1", action="store_true",
   1.101 -                  help="trim unaligned sequence flanks from the 1st genome")
   1.102 -    op.add_option("--trim2", action="store_true",
   1.103 -                  help="trim unaligned sequence flanks from the 2nd genome")
   1.104      op.add_option("--border-pixels", metavar="INT", type="int", default=1,
   1.105                    help="number of pixels between sequences (default=%default)")
   1.106      op.add_option("--border-color", metavar="COLOR", default="black",