scripts/last-dotplot
changeset 840 85a72978fb7d
parent 839 bbc6f00e683b
child 844 23de4eb3be1d
     1.1 --- a/scripts/last-dotplot	Thu Mar 02 10:10:51 2017 +0900
     1.2 +++ b/scripts/last-dotplot	Thu Mar 02 11:31:57 2017 +0900
     1.3 @@ -19,6 +19,21 @@
     1.4      prog = os.path.basename(sys.argv[0])
     1.5      sys.stderr.write(prog + ": " + message + "\n")
     1.6  
     1.7 +def croppedBlocks(blocks, range1, range2):
     1.8 +    cropBeg1, cropEnd1 = range1
     1.9 +    cropBeg2, cropEnd2 = range2
    1.10 +    if blocks[0][0] < 0: cropBeg1, cropEnd1 = -cropEnd1, -cropBeg1
    1.11 +    if blocks[0][1] < 0: cropBeg2, cropEnd2 = -cropEnd2, -cropBeg2
    1.12 +    for beg1, beg2, size in blocks:
    1.13 +        b1 = max(cropBeg1, beg1)
    1.14 +        e1 = min(cropEnd1, beg1 + size)
    1.15 +        if b1 >= e1: continue
    1.16 +        offset = beg2 - beg1
    1.17 +        b2 = max(cropBeg2, b1 + offset)
    1.18 +        e2 = min(cropEnd2, e1 + offset)
    1.19 +        if b2 >= e2: continue
    1.20 +        yield b2 - offset, b2, e2 - b2
    1.21 +
    1.22  def tabBlocks(beg1, beg2, blocks):
    1.23      '''Get the gapless blocks of an alignment, from LAST tabular format.'''
    1.24      for i in blocks.split(","):
    1.25 @@ -78,15 +93,23 @@
    1.26                  yield chr1, seqlen1, chr2, seqlen2, blocks
    1.27                  mafCount = 0
    1.28  
    1.29 -def isWantedSequenceName(name, patterns):
    1.30 -    if not patterns: return True
    1.31 +def seqRangeFromText(text):
    1.32 +    if ":" in text:
    1.33 +        pattern, interval = text.rsplit(":", 1)
    1.34 +        if "-" in interval:
    1.35 +            beg, end = interval.rsplit("-", 1)
    1.36 +            return pattern, int(beg), int(end)  # beg may be negative
    1.37 +    return text, 0, sys.maxsize
    1.38 +
    1.39 +def rangeFromSeqName(seqRanges, name, seqLen):
    1.40 +    if not seqRanges: return 0, seqLen
    1.41      base = name.split(".")[-1]  # allow for names like hg19.chr7
    1.42 -    for i in patterns:
    1.43 -        if fnmatch.fnmatchcase(name, i): return True
    1.44 -        if fnmatch.fnmatchcase(base, i): return True
    1.45 -    return False
    1.46 +    for pat, beg, end in seqRanges:
    1.47 +        if fnmatch.fnmatchcase(name, pat) or fnmatch.fnmatchcase(base, pat):
    1.48 +            return max(beg, 0), min(end, seqLen)
    1.49 +    return 0, 0
    1.50  
    1.51 -def updateSeqLimits(isTrim, seqLimits, seqName, seqLen, blocks, index):
    1.52 +def updateSeqLimits(isTrim, seqLimits, seqName, seqRange, blocks, index):
    1.53      if isTrim:
    1.54          beg = blocks[0][index]
    1.55          end = blocks[-1][index] + blocks[-1][2]
    1.56 @@ -97,21 +120,26 @@
    1.57          else:
    1.58              seqLimits[seqName] = beg, end
    1.59      else:
    1.60 -        seqLimits[seqName] = 0, seqLen
    1.61 +        seqLimits[seqName] = seqRange
    1.62  
    1.63  def readAlignments(fileName, opts):
    1.64      '''Get alignments and sequence limits, from MAF or tabular format.'''
    1.65 +    seqRanges1 = map(seqRangeFromText, opts.seq1)
    1.66 +    seqRanges2 = map(seqRangeFromText, opts.seq2)
    1.67 +
    1.68      alignments = []
    1.69      seqLimits1 = {}
    1.70      seqLimits2 = {}
    1.71      lines = fileinput.input(fileName)
    1.72      for seqName1, seqLen1, seqName2, seqLen2, blocks in alignmentInput(lines):
    1.73 -        if not isWantedSequenceName(seqName1, opts.seq1): continue
    1.74 -        if not isWantedSequenceName(seqName2, opts.seq2): continue
    1.75 -        aln = seqName1, seqName2, blocks
    1.76 +        range1 = rangeFromSeqName(seqRanges1, seqName1, seqLen1)
    1.77 +        range2 = rangeFromSeqName(seqRanges2, seqName2, seqLen2)
    1.78 +        b = list(croppedBlocks(blocks, range1, range2))
    1.79 +        if not b: continue
    1.80 +        aln = seqName1, seqName2, b
    1.81          alignments.append(aln)
    1.82 -        updateSeqLimits(opts.trim1, seqLimits1, seqName1, seqLen1, blocks, 0)
    1.83 -        updateSeqLimits(opts.trim2, seqLimits2, seqName2, seqLen2, blocks, 1)
    1.84 +        updateSeqLimits(opts.trim1, seqLimits1, seqName1, range1, b, 0)
    1.85 +        updateSeqLimits(opts.trim2, seqLimits2, seqName2, range2, b, 1)
    1.86      return alignments, seqLimits1, seqLimits2
    1.87  
    1.88  def natural_sort_key(my_string):
    1.89 @@ -393,8 +421,10 @@
    1.90      description = "Draw a dotplot of pair-wise sequence alignments in MAF or tabular format."
    1.91      op = optparse.OptionParser(usage=usage, description=description)
    1.92      op.add_option("-1", "--seq1", metavar="PATTERN", action="append",
    1.93 +                  default=[],
    1.94                    help="which sequences to show from the 1st genome")
    1.95      op.add_option("-2", "--seq2", metavar="PATTERN", action="append",
    1.96 +                  default=[],
    1.97                    help="which sequences to show from the 2nd genome")
    1.98      # Replace "width" & "height" with a single "length" option?
    1.99      op.add_option("-x", "--width", type="int", default=1000,