scripts/last-dotplot
changeset 980 27c26371226b
parent 961 ed0fb9b1eb40
child 981 0e184d90d5da
     1.1 --- a/scripts/last-dotplot	Mon Dec 10 08:23:09 2018 +0900
     1.2 +++ b/scripts/last-dotplot	Mon Apr 22 14:36:02 2019 +0900
     1.3 @@ -97,12 +97,37 @@
     1.4              size += 1
     1.5      if size: yield beg1, beg2, size
     1.6  
     1.7 +def alignmentFromSegment(qrySeqName, qrySeqLen, segment):
     1.8 +    refSeqLen = sys.maxsize  # XXX
     1.9 +    refSeqName, refSeqBeg, qrySeqBeg, size = segment
    1.10 +    block = refSeqBeg, qrySeqBeg, size
    1.11 +    return refSeqName, refSeqLen, qrySeqName, qrySeqLen, [block]
    1.12 +
    1.13  def alignmentInput(lines):
    1.14      '''Get alignments and sequence lengths, from MAF or tabular format.'''
    1.15      mafCount = 0
    1.16 +    qrySeqName = ""
    1.17 +    segments = []
    1.18      for line in lines:
    1.19          w = line.split()
    1.20 -        if line[0].isdigit():  # tabular format
    1.21 +        if line[0] == "#":
    1.22 +            pass
    1.23 +        elif len(w) == 1:
    1.24 +            for i in segments:
    1.25 +                yield alignmentFromSegment(qrySeqName, qrySeqLen, i)
    1.26 +            qrySeqName = w[0]
    1.27 +            qrySeqLen = 0
    1.28 +            segments = []
    1.29 +        elif len(w) == 2 and qrySeqName and w[1].isdigit():
    1.30 +            qrySeqLen += int(w[1])
    1.31 +        elif len(w) == 3 and qrySeqName and w[1].isdigit() and w[2].isdigit():
    1.32 +            refSeqName, refSeqBeg, refSeqEnd = w[0], int(w[1]), int(w[2])
    1.33 +            size = abs(refSeqEnd - refSeqBeg)
    1.34 +            if refSeqBeg > refSeqEnd:
    1.35 +                refSeqBeg = -refSeqBeg
    1.36 +            segments.append((refSeqName, refSeqBeg, qrySeqLen, size))
    1.37 +            qrySeqLen += size
    1.38 +        elif line[0].isdigit():  # tabular format
    1.39              chr1, beg1, seqlen1 = w[1], int(w[2]), int(w[5])
    1.40              if w[4] == "-": beg1 -= seqlen1
    1.41              chr2, beg2, seqlen2 = w[6], int(w[7]), int(w[10])
    1.42 @@ -120,6 +145,8 @@
    1.43                  blocks = mafBlocks(beg1, beg2, seq1, seq2)
    1.44                  yield chr1, seqlen1, chr2, seqlen2, blocks
    1.45                  mafCount = 0
    1.46 +    for i in segments:
    1.47 +        yield alignmentFromSegment(qrySeqName, qrySeqLen, i)
    1.48  
    1.49  def seqRequestFromText(text):
    1.50      if ":" in text:
    1.51 @@ -504,6 +531,49 @@
    1.52                                  ori1 + beg1, ori2 - beg2 - 1, size)
    1.53      return hits
    1.54  
    1.55 +def orientedBlocks(alignments, seqIndex):
    1.56 +    otherIndex = 1 - seqIndex
    1.57 +    for a in alignments:
    1.58 +        seq1, seq2, blocks = a
    1.59 +        for b in blocks:
    1.60 +            beg1, beg2, size = b
    1.61 +            if b[seqIndex] < 0:
    1.62 +                b = -(beg1 + size), -(beg2 + size), size
    1.63 +            yield a[seqIndex], b[seqIndex], a[otherIndex], b[otherIndex], size
    1.64 +
    1.65 +def drawJoins(im, alignments, bpPerPix, seqIndex, rangeDict1, rangeDict2):
    1.66 +    blocks = orientedBlocks(alignments, seqIndex)
    1.67 +    oldSeq1 = ""
    1.68 +    for seq1, beg1, seq2, beg2, size in sorted(blocks):
    1.69 +        isReverse1, ori1 = strandAndOrigin(rangeDict1[seq1], beg1, size)
    1.70 +        isReverse2, ori2 = strandAndOrigin(rangeDict2[seq2], beg2, size)
    1.71 +        end1 = beg1 + size - 1
    1.72 +        end2 = beg2 + size - 1
    1.73 +        if isReverse1:
    1.74 +            beg1 = -(beg1 + 1)
    1.75 +            end1 = -(end1 + 1)
    1.76 +        if isReverse2:
    1.77 +            beg2 = -(beg2 + 1)
    1.78 +            end2 = -(end2 + 1)
    1.79 +        newPix1 = (ori1 + beg1) // bpPerPix
    1.80 +        newPix2 = (ori2 + beg2) // bpPerPix
    1.81 +        if seq1 == oldSeq1:
    1.82 +            lowerPix2 = min(oldPix2, newPix2)
    1.83 +            upperPix2 = max(oldPix2, newPix2)
    1.84 +            midPix1 = (oldPix1 + newPix1) // 2
    1.85 +            if isReverse1:
    1.86 +                midPix1 = (oldPix1 + newPix1 + 1) // 2
    1.87 +                oldPix1, newPix1 = newPix1, oldPix1
    1.88 +            if upperPix2 - lowerPix2 > 1 and oldPix1 <= newPix1 <= oldPix1 + 1:
    1.89 +                if seqIndex == 0:
    1.90 +                    box = midPix1, lowerPix2, midPix1 + 1, upperPix2 + 1
    1.91 +                else:
    1.92 +                    box = lowerPix2, midPix1, upperPix2 + 1, midPix1 + 1
    1.93 +                im.paste("lightgray", box)
    1.94 +        oldPix1 = (ori1 + end1) // bpPerPix
    1.95 +        oldPix2 = (ori2 + end2) // bpPerPix
    1.96 +        oldSeq1 = seq1
    1.97 +
    1.98  def expandedSeqDict(seqDict):
    1.99      '''Allow lookup by short sequence names, e.g. chr7 as well as hg19.chr7.'''
   1.100      newDict = seqDict.copy()
   1.101 @@ -826,7 +896,8 @@
   1.102      logging.info("height: " + str(height))
   1.103  
   1.104      logging.info("processing alignments...")
   1.105 -    hits = alignmentPixels(width, height, alignments + alignmentsB, bpPerPix,
   1.106 +    allAlignments = alignments + alignmentsB
   1.107 +    hits = alignmentPixels(width, height, allAlignments, bpPerPix,
   1.108                             rangeDict1, rangeDict2)
   1.109  
   1.110      logging.info("reading annotations...")
   1.111 @@ -854,6 +925,16 @@
   1.112  
   1.113      drawAnnotations(im, boxes)
   1.114  
   1.115 +    joinA, joinB = twoValuesFromOption(opts.join, ":")
   1.116 +    if joinA in "13":
   1.117 +        drawJoins(im, alignments, bpPerPix, 0, rangeDict1, rangeDict2)
   1.118 +    if joinB in "13":
   1.119 +        drawJoins(im, alignmentsB, bpPerPix, 0, rangeDict1, rangeDict2)
   1.120 +    if joinA in "23":
   1.121 +        drawJoins(im, alignments, bpPerPix, 1, rangeDict2, rangeDict1)
   1.122 +    if joinB in "23":
   1.123 +        drawJoins(im, alignmentsB, bpPerPix, 1, rangeDict2, rangeDict1)
   1.124 +
   1.125      for i in range(height):
   1.126          for j in range(width):
   1.127              store_value = hits[i * width + j]
   1.128 @@ -933,6 +1014,9 @@
   1.129      op.add_option("--pad", metavar="FRAC", type="float", default=0.04, help=
   1.130                    "pad length when cutting unaligned gaps: "
   1.131                    "fraction of aligned length (default=%default)")
   1.132 +    op.add_option("-j", "--join", default="0", metavar="N", help=
   1.133 +                  "join: 0=nothing, 1=alignments adjacent in genome1, "
   1.134 +                  "2=alignments adjacent in genome2 (default=%default)")
   1.135      op.add_option("--border-pixels", metavar="INT", type="int", default=1,
   1.136                    help="number of pixels between sequences (default=%default)")
   1.137      op.add_option("--border-color", metavar="COLOR", default="black",