1.1 --- a/doc/last-dotplot.txt Wed Nov 08 17:02:56 2017 +0900
1.2 +++ b/doc/last-dotplot.txt Wed Nov 08 17:59:10 2017 +0900
1.3 @@ -83,12 +83,35 @@
1.4 Color for forward alignments.
1.5 -r COLOR, --reversecolor=COLOR
1.6 Color for reverse alignments.
1.7 + --alignments=FILE
1.8 + Read secondary alignments. For example: we could use primary
1.9 + alignment data with one human DNA read aligned to the human
1.10 + genome, and secondary alignment data with the whole chimpanzee
1.11 + versus human genomes. last-dotplot will show the parts of the
1.12 + secondary alignments that are near the primary alignments.
1.13 --sort1=N
1.14 Put the 1st genome's sequences left-to-right in order of: their
1.15 appearance in the input (0), their names (1), their lengths (2).
1.16 + You can use two colon-separated values, e.g. "2:1" to specify 2
1.17 + for primary and 1 for secondary alignments.
1.18 --sort2=N
1.19 Put the 2nd genome's sequences top-to-bottom in order of: their
1.20 appearance in the input (0), their names (1), their lengths (2).
1.21 + --max-gap1=FRAC
1.22 + Maximum unaligned gap in the 1st genome. For example, if two
1.23 + parts of one DNA read align to widely-separated parts of one
1.24 + chromosome, it's probably best to cut the intervening region
1.25 + from the dotplot. FRAC is a fraction of the length of the
1.26 + (primary) alignments. You can specify "inf" to keep all
1.27 + unaligned gaps. You can use two comma-separated values,
1.28 + e.g. "0.5,3" to specify 0.5 for end-gaps (unaligned sequence
1.29 + ends) and 3 for mid-gaps (between alignments). You can use two
1.30 + colon-separated values (each of which may be comma-separated)
1.31 + for primary and secondary alignments.
1.32 + --max-gap2=FRAC
1.33 + Maximum unaligned gap in the 2nd genome.
1.34 + --pad=FRAC
1.35 + Length of pad to leave when cutting unaligned gaps.
1.36 --border-pixels=INT
1.37 Number of pixels between sequences.
1.38 --border-color=COLOR
2.1 --- a/scripts/last-dotplot Wed Nov 08 17:02:56 2017 +0900
2.2 +++ b/scripts/last-dotplot Wed Nov 08 17:59:10 2017 +0900
2.3 @@ -33,6 +33,10 @@
2.4 prog = os.path.basename(sys.argv[0])
2.5 sys.stderr.write(prog + ": " + message + "\n")
2.6
2.7 +def groupByFirstItem(things):
2.8 + for k, v in itertools.groupby(things, itemgetter(0)):
2.9 + yield k, [i[1:] for i in v]
2.10 +
2.11 def croppedBlocks(blocks, ranges1, ranges2):
2.12 headBeg1, headBeg2, headSize = blocks[0]
2.13 for r1 in ranges1:
2.14 @@ -167,6 +171,48 @@
2.15 updateSeqs(coverDict2, seqRanges2, seqName2, ranges2, coveredRange2)
2.16 return alignments, seqRanges1, coverDict1, seqRanges2, coverDict2
2.17
2.18 +def nameAndRangesFromDict(cropDict, seqName):
2.19 + if seqName in cropDict:
2.20 + return seqName, cropDict[seqName]
2.21 + n = seqName.split(".")[-1]
2.22 + if n in cropDict:
2.23 + return n, cropDict[n]
2.24 + return seqName, []
2.25 +
2.26 +def rangesForSecondaryAlignments(primaryRanges, seqLen):
2.27 + if primaryRanges:
2.28 + return primaryRanges
2.29 + return [(0, seqLen)]
2.30 +
2.31 +def readSecondaryAlignments(opts, cropRanges1, cropRanges2):
2.32 + cropDict1 = dict(groupByFirstItem(cropRanges1))
2.33 + cropDict2 = dict(groupByFirstItem(cropRanges2))
2.34 +
2.35 + alignments = []
2.36 + seqRanges1 = []
2.37 + seqRanges2 = []
2.38 + coverDict1 = {}
2.39 + coverDict2 = {}
2.40 + lines = myOpen(opts.alignments)
2.41 + for seqName1, seqLen1, seqName2, seqLen2, blocks in alignmentInput(lines):
2.42 + seqName1, ranges1 = nameAndRangesFromDict(cropDict1, seqName1)
2.43 + seqName2, ranges2 = nameAndRangesFromDict(cropDict2, seqName2)
2.44 + if not ranges1 and not ranges2:
2.45 + continue
2.46 + r1 = rangesForSecondaryAlignments(ranges1, seqLen1)
2.47 + r2 = rangesForSecondaryAlignments(ranges2, seqLen2)
2.48 + b = list(croppedBlocks(list(blocks), r1, r2))
2.49 + if not b: continue
2.50 + aln = seqName1, seqName2, b
2.51 + alignments.append(aln)
2.52 + if not ranges1:
2.53 + coveredRange1 = b[0][0], b[-1][0] + b[-1][2]
2.54 + updateSeqs(coverDict1, seqRanges1, seqName1, r1, coveredRange1)
2.55 + if not ranges2:
2.56 + coveredRange2 = b[0][1], b[-1][1] + b[-1][2]
2.57 + updateSeqs(coverDict2, seqRanges2, seqName2, r2, coveredRange2)
2.58 + return alignments, seqRanges1, coverDict1, seqRanges2, coverDict2
2.59 +
2.60 def twoValuesFromOption(text, separator):
2.61 if separator in text:
2.62 return text.split(separator)
2.63 @@ -231,6 +277,10 @@
2.64 g.sort(key=sizeKey)
2.65 return [j for i in g for j in i]
2.66
2.67 +def allSortedRanges(seqRanges, seqRangesB, sortOpt):
2.68 + x, y = twoValuesFromOption(sortOpt, ":")
2.69 + return getSortedRanges(seqRanges, x) + getSortedRanges(seqRangesB, y)
2.70 +
2.71 def prettyNum(n):
2.72 t = str(n)
2.73 groups = []
2.74 @@ -272,9 +322,9 @@
2.75 x, y = y, x
2.76 yield text, x, y
2.77
2.78 -def dataFromRanges(seqRanges, sortOpt, font,
2.79 +def dataFromRanges(cutRanges, cutRangesB, sortOpt, font,
2.80 fontsize, image_mode, labelOpt, textRot):
2.81 - s = getSortedRanges(seqRanges, sortOpt)
2.82 + s = allSortedRanges(cutRanges, cutRangesB, sortOpt)
2.83 for i in s:
2.84 warn("\t".join(map(str, i)))
2.85 warn("")
2.86 @@ -582,6 +632,9 @@
2.87 zipped_colors = zip(forward_color, reverse_color)
2.88 overlap_color = tuple([(i + j) // 2 for i, j in zipped_colors])
2.89
2.90 + maxGap1, maxGapB1 = twoValuesFromOption(opts.max_gap1, ":")
2.91 + maxGap2, maxGapB2 = twoValuesFromOption(opts.max_gap2, ":")
2.92 +
2.93 warn("reading alignments...")
2.94 alnData = readAlignments(args[0], opts)
2.95 alignments, seqRanges1, coverDict1, seqRanges2, coverDict2 = alnData
2.96 @@ -592,17 +645,28 @@
2.97 minAlignedBases = min(coveredLength(coverDict1), coveredLength(coverDict2))
2.98 pad = int(opts.pad * minAlignedBases)
2.99 cutRanges1 = list(trimmed(seqRanges1, coverDict1, minAlignedBases,
2.100 - opts.max_gap1, pad, pad))
2.101 + maxGap1, pad, pad))
2.102 cutRanges2 = list(trimmed(seqRanges2, coverDict2, minAlignedBases,
2.103 - opts.max_gap2, pad, pad))
2.104 + maxGap2, pad, pad))
2.105 +
2.106 + warn("reading secondary alignments...")
2.107 + alnDataB = readSecondaryAlignments(opts, cutRanges1, cutRanges2)
2.108 + alignmentsB, seqRangesB1, coverDictB1, seqRangesB2, coverDictB2 = alnDataB
2.109 + warn("cutting...")
2.110 + coverDictB1 = dict(mergedRangesPerSeq(coverDictB1))
2.111 + coverDictB2 = dict(mergedRangesPerSeq(coverDictB2))
2.112 + cutRangesB1 = trimmed(seqRangesB1, coverDictB1, minAlignedBases,
2.113 + maxGapB1, 0, 0)
2.114 + cutRangesB2 = trimmed(seqRangesB2, coverDictB2, minAlignedBases,
2.115 + maxGapB2, 0, 0)
2.116
2.117 textRot1 = "vertical".startswith(opts.rot1)
2.118 - i1 = dataFromRanges(cutRanges1, opts.sort1, font,
2.119 + i1 = dataFromRanges(cutRanges1, cutRangesB1, opts.sort1, font,
2.120 opts.fontsize, image_mode, opts.labels1, textRot1)
2.121 sortedRanges1, rangeSizes1, labelData1, tMargin = i1
2.122
2.123 textRot2 = "horizontal".startswith(opts.rot2)
2.124 - i2 = dataFromRanges(cutRanges2, opts.sort2, font,
2.125 + i2 = dataFromRanges(cutRanges2, cutRangesB2, opts.sort2, font,
2.126 opts.fontsize, image_mode, opts.labels2, textRot2)
2.127 sortedRanges2, rangeSizes2, labelData2, lMargin = i2
2.128
2.129 @@ -625,7 +689,7 @@
2.130 warn("height: " + str(height))
2.131
2.132 warn("processing alignments...")
2.133 - hits = alignmentPixels(width, height, alignments, bpPerPix,
2.134 + hits = alignmentPixels(width, height, alignments + alignmentsB, bpPerPix,
2.135 rangeDict1, rangeDict2)
2.136
2.137 warn("reading annotations...")
2.138 @@ -707,6 +771,7 @@
2.139 help="color for forward alignments (default: %default)")
2.140 op.add_option("-r", "--reversecolor", metavar="COLOR", default="blue",
2.141 help="color for reverse alignments (default: %default)")
2.142 + op.add_option("--alignments", metavar="FILE", help="secondary alignments")
2.143 op.add_option("--sort1", default="1", metavar="N",
2.144 help="genome1 sequence order: 0=input order, 1=name order, "
2.145 "2=length order (default=%default)")
2.146 @@ -726,6 +791,7 @@
2.147 help="number of pixels between sequences (default=%default)")
2.148 op.add_option("--border-color", metavar="COLOR", default="black",
2.149 help="color for pixels between sequences (default=%default)")
2.150 + # --break-color and/or --break-pixels for intra-sequence breaks?
2.151 op.add_option("--margin-color", metavar="COLOR", default="#dcdcdc",
2.152 help="margin color")
2.153