Add last-dotplot --join option
authorMartin C. Frith
Mon Apr 22 14:36:02 2019 +0900 (5 months ago)
changeset 98027c26371226b
parent 979 9b416e560dc3
child 981 0e184d90d5da
Add last-dotplot --join option
doc/last-dotplot.txt
scripts/last-dotplot
     1.1 --- a/doc/last-dotplot.txt	Mon Apr 01 10:03:47 2019 +0900
     1.2 +++ b/doc/last-dotplot.txt	Mon Apr 22 14:36:02 2019 +0900
     1.3 @@ -84,6 +84,12 @@
     1.4        Maximum unaligned gap in the 2nd genome.
     1.5    --pad=FRAC
     1.6        Length of pad to leave when cutting unaligned gaps.
     1.7 +  -j N, --join=N
     1.8 +      Draw horizontal or vertical lines joining adjacent alignments.
     1.9 +      0 means don't join, 1 means draw vertical lines joining
    1.10 +      alignments that are adjacent in the 1st (horizontal) genome, 2
    1.11 +      means draw horizontal lines joining alignments that are adjacent
    1.12 +      in the 2nd (vertical) genome.
    1.13    --border-pixels=INT
    1.14        Number of pixels between sequences.
    1.15    --border-color=COLOR
     2.1 --- a/scripts/last-dotplot	Mon Apr 01 10:03:47 2019 +0900
     2.2 +++ b/scripts/last-dotplot	Mon Apr 22 14:36:02 2019 +0900
     2.3 @@ -97,12 +97,37 @@
     2.4              size += 1
     2.5      if size: yield beg1, beg2, size
     2.6  
     2.7 +def alignmentFromSegment(qrySeqName, qrySeqLen, segment):
     2.8 +    refSeqLen = sys.maxsize  # XXX
     2.9 +    refSeqName, refSeqBeg, qrySeqBeg, size = segment
    2.10 +    block = refSeqBeg, qrySeqBeg, size
    2.11 +    return refSeqName, refSeqLen, qrySeqName, qrySeqLen, [block]
    2.12 +
    2.13  def alignmentInput(lines):
    2.14      '''Get alignments and sequence lengths, from MAF or tabular format.'''
    2.15      mafCount = 0
    2.16 +    qrySeqName = ""
    2.17 +    segments = []
    2.18      for line in lines:
    2.19          w = line.split()
    2.20 -        if line[0].isdigit():  # tabular format
    2.21 +        if line[0] == "#":
    2.22 +            pass
    2.23 +        elif len(w) == 1:
    2.24 +            for i in segments:
    2.25 +                yield alignmentFromSegment(qrySeqName, qrySeqLen, i)
    2.26 +            qrySeqName = w[0]
    2.27 +            qrySeqLen = 0
    2.28 +            segments = []
    2.29 +        elif len(w) == 2 and qrySeqName and w[1].isdigit():
    2.30 +            qrySeqLen += int(w[1])
    2.31 +        elif len(w) == 3 and qrySeqName and w[1].isdigit() and w[2].isdigit():
    2.32 +            refSeqName, refSeqBeg, refSeqEnd = w[0], int(w[1]), int(w[2])
    2.33 +            size = abs(refSeqEnd - refSeqBeg)
    2.34 +            if refSeqBeg > refSeqEnd:
    2.35 +                refSeqBeg = -refSeqBeg
    2.36 +            segments.append((refSeqName, refSeqBeg, qrySeqLen, size))
    2.37 +            qrySeqLen += size
    2.38 +        elif line[0].isdigit():  # tabular format
    2.39              chr1, beg1, seqlen1 = w[1], int(w[2]), int(w[5])
    2.40              if w[4] == "-": beg1 -= seqlen1
    2.41              chr2, beg2, seqlen2 = w[6], int(w[7]), int(w[10])
    2.42 @@ -120,6 +145,8 @@
    2.43                  blocks = mafBlocks(beg1, beg2, seq1, seq2)
    2.44                  yield chr1, seqlen1, chr2, seqlen2, blocks
    2.45                  mafCount = 0
    2.46 +    for i in segments:
    2.47 +        yield alignmentFromSegment(qrySeqName, qrySeqLen, i)
    2.48  
    2.49  def seqRequestFromText(text):
    2.50      if ":" in text:
    2.51 @@ -504,6 +531,49 @@
    2.52                                  ori1 + beg1, ori2 - beg2 - 1, size)
    2.53      return hits
    2.54  
    2.55 +def orientedBlocks(alignments, seqIndex):
    2.56 +    otherIndex = 1 - seqIndex
    2.57 +    for a in alignments:
    2.58 +        seq1, seq2, blocks = a
    2.59 +        for b in blocks:
    2.60 +            beg1, beg2, size = b
    2.61 +            if b[seqIndex] < 0:
    2.62 +                b = -(beg1 + size), -(beg2 + size), size
    2.63 +            yield a[seqIndex], b[seqIndex], a[otherIndex], b[otherIndex], size
    2.64 +
    2.65 +def drawJoins(im, alignments, bpPerPix, seqIndex, rangeDict1, rangeDict2):
    2.66 +    blocks = orientedBlocks(alignments, seqIndex)
    2.67 +    oldSeq1 = ""
    2.68 +    for seq1, beg1, seq2, beg2, size in sorted(blocks):
    2.69 +        isReverse1, ori1 = strandAndOrigin(rangeDict1[seq1], beg1, size)
    2.70 +        isReverse2, ori2 = strandAndOrigin(rangeDict2[seq2], beg2, size)
    2.71 +        end1 = beg1 + size - 1
    2.72 +        end2 = beg2 + size - 1
    2.73 +        if isReverse1:
    2.74 +            beg1 = -(beg1 + 1)
    2.75 +            end1 = -(end1 + 1)
    2.76 +        if isReverse2:
    2.77 +            beg2 = -(beg2 + 1)
    2.78 +            end2 = -(end2 + 1)
    2.79 +        newPix1 = (ori1 + beg1) // bpPerPix
    2.80 +        newPix2 = (ori2 + beg2) // bpPerPix
    2.81 +        if seq1 == oldSeq1:
    2.82 +            lowerPix2 = min(oldPix2, newPix2)
    2.83 +            upperPix2 = max(oldPix2, newPix2)
    2.84 +            midPix1 = (oldPix1 + newPix1) // 2
    2.85 +            if isReverse1:
    2.86 +                midPix1 = (oldPix1 + newPix1 + 1) // 2
    2.87 +                oldPix1, newPix1 = newPix1, oldPix1
    2.88 +            if upperPix2 - lowerPix2 > 1 and oldPix1 <= newPix1 <= oldPix1 + 1:
    2.89 +                if seqIndex == 0:
    2.90 +                    box = midPix1, lowerPix2, midPix1 + 1, upperPix2 + 1
    2.91 +                else:
    2.92 +                    box = lowerPix2, midPix1, upperPix2 + 1, midPix1 + 1
    2.93 +                im.paste("lightgray", box)
    2.94 +        oldPix1 = (ori1 + end1) // bpPerPix
    2.95 +        oldPix2 = (ori2 + end2) // bpPerPix
    2.96 +        oldSeq1 = seq1
    2.97 +
    2.98  def expandedSeqDict(seqDict):
    2.99      '''Allow lookup by short sequence names, e.g. chr7 as well as hg19.chr7.'''
   2.100      newDict = seqDict.copy()
   2.101 @@ -826,7 +896,8 @@
   2.102      logging.info("height: " + str(height))
   2.103  
   2.104      logging.info("processing alignments...")
   2.105 -    hits = alignmentPixels(width, height, alignments + alignmentsB, bpPerPix,
   2.106 +    allAlignments = alignments + alignmentsB
   2.107 +    hits = alignmentPixels(width, height, allAlignments, bpPerPix,
   2.108                             rangeDict1, rangeDict2)
   2.109  
   2.110      logging.info("reading annotations...")
   2.111 @@ -854,6 +925,16 @@
   2.112  
   2.113      drawAnnotations(im, boxes)
   2.114  
   2.115 +    joinA, joinB = twoValuesFromOption(opts.join, ":")
   2.116 +    if joinA in "13":
   2.117 +        drawJoins(im, alignments, bpPerPix, 0, rangeDict1, rangeDict2)
   2.118 +    if joinB in "13":
   2.119 +        drawJoins(im, alignmentsB, bpPerPix, 0, rangeDict1, rangeDict2)
   2.120 +    if joinA in "23":
   2.121 +        drawJoins(im, alignments, bpPerPix, 1, rangeDict2, rangeDict1)
   2.122 +    if joinB in "23":
   2.123 +        drawJoins(im, alignmentsB, bpPerPix, 1, rangeDict2, rangeDict1)
   2.124 +
   2.125      for i in range(height):
   2.126          for j in range(width):
   2.127              store_value = hits[i * width + j]
   2.128 @@ -933,6 +1014,9 @@
   2.129      op.add_option("--pad", metavar="FRAC", type="float", default=0.04, help=
   2.130                    "pad length when cutting unaligned gaps: "
   2.131                    "fraction of aligned length (default=%default)")
   2.132 +    op.add_option("-j", "--join", default="0", metavar="N", help=
   2.133 +                  "join: 0=nothing, 1=alignments adjacent in genome1, "
   2.134 +                  "2=alignments adjacent in genome2 (default=%default)")
   2.135      op.add_option("--border-pixels", metavar="INT", type="int", default=1,
   2.136                    help="number of pixels between sequences (default=%default)")
   2.137      op.add_option("--border-color", metavar="COLOR", default="black",