1.1 --- a/doc/last-dotplot.txt Mon Apr 01 10:03:47 2019 +0900
1.2 +++ b/doc/last-dotplot.txt Mon Apr 22 14:36:02 2019 +0900
1.3 @@ -84,6 +84,12 @@
1.4 Maximum unaligned gap in the 2nd genome.
1.5 --pad=FRAC
1.6 Length of pad to leave when cutting unaligned gaps.
1.7 + -j N, --join=N
1.8 + Draw horizontal or vertical lines joining adjacent alignments.
1.9 + 0 means don't join, 1 means draw vertical lines joining
1.10 + alignments that are adjacent in the 1st (horizontal) genome, 2
1.11 + means draw horizontal lines joining alignments that are adjacent
1.12 + in the 2nd (vertical) genome.
1.13 --border-pixels=INT
1.14 Number of pixels between sequences.
1.15 --border-color=COLOR
2.1 --- a/scripts/last-dotplot Mon Apr 01 10:03:47 2019 +0900
2.2 +++ b/scripts/last-dotplot Mon Apr 22 14:36:02 2019 +0900
2.3 @@ -97,12 +97,37 @@
2.4 size += 1
2.5 if size: yield beg1, beg2, size
2.6
2.7 +def alignmentFromSegment(qrySeqName, qrySeqLen, segment):
2.8 + refSeqLen = sys.maxsize # XXX
2.9 + refSeqName, refSeqBeg, qrySeqBeg, size = segment
2.10 + block = refSeqBeg, qrySeqBeg, size
2.11 + return refSeqName, refSeqLen, qrySeqName, qrySeqLen, [block]
2.12 +
2.13 def alignmentInput(lines):
2.14 '''Get alignments and sequence lengths, from MAF or tabular format.'''
2.15 mafCount = 0
2.16 + qrySeqName = ""
2.17 + segments = []
2.18 for line in lines:
2.19 w = line.split()
2.20 - if line[0].isdigit(): # tabular format
2.21 + if line[0] == "#":
2.22 + pass
2.23 + elif len(w) == 1:
2.24 + for i in segments:
2.25 + yield alignmentFromSegment(qrySeqName, qrySeqLen, i)
2.26 + qrySeqName = w[0]
2.27 + qrySeqLen = 0
2.28 + segments = []
2.29 + elif len(w) == 2 and qrySeqName and w[1].isdigit():
2.30 + qrySeqLen += int(w[1])
2.31 + elif len(w) == 3 and qrySeqName and w[1].isdigit() and w[2].isdigit():
2.32 + refSeqName, refSeqBeg, refSeqEnd = w[0], int(w[1]), int(w[2])
2.33 + size = abs(refSeqEnd - refSeqBeg)
2.34 + if refSeqBeg > refSeqEnd:
2.35 + refSeqBeg = -refSeqBeg
2.36 + segments.append((refSeqName, refSeqBeg, qrySeqLen, size))
2.37 + qrySeqLen += size
2.38 + elif line[0].isdigit(): # tabular format
2.39 chr1, beg1, seqlen1 = w[1], int(w[2]), int(w[5])
2.40 if w[4] == "-": beg1 -= seqlen1
2.41 chr2, beg2, seqlen2 = w[6], int(w[7]), int(w[10])
2.42 @@ -120,6 +145,8 @@
2.43 blocks = mafBlocks(beg1, beg2, seq1, seq2)
2.44 yield chr1, seqlen1, chr2, seqlen2, blocks
2.45 mafCount = 0
2.46 + for i in segments:
2.47 + yield alignmentFromSegment(qrySeqName, qrySeqLen, i)
2.48
2.49 def seqRequestFromText(text):
2.50 if ":" in text:
2.51 @@ -504,6 +531,49 @@
2.52 ori1 + beg1, ori2 - beg2 - 1, size)
2.53 return hits
2.54
2.55 +def orientedBlocks(alignments, seqIndex):
2.56 + otherIndex = 1 - seqIndex
2.57 + for a in alignments:
2.58 + seq1, seq2, blocks = a
2.59 + for b in blocks:
2.60 + beg1, beg2, size = b
2.61 + if b[seqIndex] < 0:
2.62 + b = -(beg1 + size), -(beg2 + size), size
2.63 + yield a[seqIndex], b[seqIndex], a[otherIndex], b[otherIndex], size
2.64 +
2.65 +def drawJoins(im, alignments, bpPerPix, seqIndex, rangeDict1, rangeDict2):
2.66 + blocks = orientedBlocks(alignments, seqIndex)
2.67 + oldSeq1 = ""
2.68 + for seq1, beg1, seq2, beg2, size in sorted(blocks):
2.69 + isReverse1, ori1 = strandAndOrigin(rangeDict1[seq1], beg1, size)
2.70 + isReverse2, ori2 = strandAndOrigin(rangeDict2[seq2], beg2, size)
2.71 + end1 = beg1 + size - 1
2.72 + end2 = beg2 + size - 1
2.73 + if isReverse1:
2.74 + beg1 = -(beg1 + 1)
2.75 + end1 = -(end1 + 1)
2.76 + if isReverse2:
2.77 + beg2 = -(beg2 + 1)
2.78 + end2 = -(end2 + 1)
2.79 + newPix1 = (ori1 + beg1) // bpPerPix
2.80 + newPix2 = (ori2 + beg2) // bpPerPix
2.81 + if seq1 == oldSeq1:
2.82 + lowerPix2 = min(oldPix2, newPix2)
2.83 + upperPix2 = max(oldPix2, newPix2)
2.84 + midPix1 = (oldPix1 + newPix1) // 2
2.85 + if isReverse1:
2.86 + midPix1 = (oldPix1 + newPix1 + 1) // 2
2.87 + oldPix1, newPix1 = newPix1, oldPix1
2.88 + if upperPix2 - lowerPix2 > 1 and oldPix1 <= newPix1 <= oldPix1 + 1:
2.89 + if seqIndex == 0:
2.90 + box = midPix1, lowerPix2, midPix1 + 1, upperPix2 + 1
2.91 + else:
2.92 + box = lowerPix2, midPix1, upperPix2 + 1, midPix1 + 1
2.93 + im.paste("lightgray", box)
2.94 + oldPix1 = (ori1 + end1) // bpPerPix
2.95 + oldPix2 = (ori2 + end2) // bpPerPix
2.96 + oldSeq1 = seq1
2.97 +
2.98 def expandedSeqDict(seqDict):
2.99 '''Allow lookup by short sequence names, e.g. chr7 as well as hg19.chr7.'''
2.100 newDict = seqDict.copy()
2.101 @@ -826,7 +896,8 @@
2.102 logging.info("height: " + str(height))
2.103
2.104 logging.info("processing alignments...")
2.105 - hits = alignmentPixels(width, height, alignments + alignmentsB, bpPerPix,
2.106 + allAlignments = alignments + alignmentsB
2.107 + hits = alignmentPixels(width, height, allAlignments, bpPerPix,
2.108 rangeDict1, rangeDict2)
2.109
2.110 logging.info("reading annotations...")
2.111 @@ -854,6 +925,16 @@
2.112
2.113 drawAnnotations(im, boxes)
2.114
2.115 + joinA, joinB = twoValuesFromOption(opts.join, ":")
2.116 + if joinA in "13":
2.117 + drawJoins(im, alignments, bpPerPix, 0, rangeDict1, rangeDict2)
2.118 + if joinB in "13":
2.119 + drawJoins(im, alignmentsB, bpPerPix, 0, rangeDict1, rangeDict2)
2.120 + if joinA in "23":
2.121 + drawJoins(im, alignments, bpPerPix, 1, rangeDict2, rangeDict1)
2.122 + if joinB in "23":
2.123 + drawJoins(im, alignmentsB, bpPerPix, 1, rangeDict2, rangeDict1)
2.124 +
2.125 for i in range(height):
2.126 for j in range(width):
2.127 store_value = hits[i * width + j]
2.128 @@ -933,6 +1014,9 @@
2.129 op.add_option("--pad", metavar="FRAC", type="float", default=0.04, help=
2.130 "pad length when cutting unaligned gaps: "
2.131 "fraction of aligned length (default=%default)")
2.132 + op.add_option("-j", "--join", default="0", metavar="N", help=
2.133 + "join: 0=nothing, 1=alignments adjacent in genome1, "
2.134 + "2=alignments adjacent in genome2 (default=%default)")
2.135 op.add_option("--border-pixels", metavar="INT", type="int", default=1,
2.136 help="number of pixels between sequences (default=%default)")
2.137 op.add_option("--border-color", metavar="COLOR", default="black",