scripts/last-dotplot
changeset 911 4080f3f6f73d
parent 910 42653029d900
child 912 8834139fa8a8
     1.1 --- a/scripts/last-dotplot	Wed Nov 08 17:02:56 2017 +0900
     1.2 +++ b/scripts/last-dotplot	Wed Nov 08 17:59:10 2017 +0900
     1.3 @@ -33,6 +33,10 @@
     1.4          prog = os.path.basename(sys.argv[0])
     1.5          sys.stderr.write(prog + ": " + message + "\n")
     1.6  
     1.7 +def groupByFirstItem(things):
     1.8 +    for k, v in itertools.groupby(things, itemgetter(0)):
     1.9 +        yield k, [i[1:] for i in v]
    1.10 +
    1.11  def croppedBlocks(blocks, ranges1, ranges2):
    1.12      headBeg1, headBeg2, headSize = blocks[0]
    1.13      for r1 in ranges1:
    1.14 @@ -167,6 +171,48 @@
    1.15          updateSeqs(coverDict2, seqRanges2, seqName2, ranges2, coveredRange2)
    1.16      return alignments, seqRanges1, coverDict1, seqRanges2, coverDict2
    1.17  
    1.18 +def nameAndRangesFromDict(cropDict, seqName):
    1.19 +    if seqName in cropDict:
    1.20 +        return seqName, cropDict[seqName]
    1.21 +    n = seqName.split(".")[-1]
    1.22 +    if n in cropDict:
    1.23 +        return n, cropDict[n]
    1.24 +    return seqName, []
    1.25 +
    1.26 +def rangesForSecondaryAlignments(primaryRanges, seqLen):
    1.27 +    if primaryRanges:
    1.28 +        return primaryRanges
    1.29 +    return [(0, seqLen)]
    1.30 +
    1.31 +def readSecondaryAlignments(opts, cropRanges1, cropRanges2):
    1.32 +    cropDict1 = dict(groupByFirstItem(cropRanges1))
    1.33 +    cropDict2 = dict(groupByFirstItem(cropRanges2))
    1.34 +
    1.35 +    alignments = []
    1.36 +    seqRanges1 = []
    1.37 +    seqRanges2 = []
    1.38 +    coverDict1 = {}
    1.39 +    coverDict2 = {}
    1.40 +    lines = myOpen(opts.alignments)
    1.41 +    for seqName1, seqLen1, seqName2, seqLen2, blocks in alignmentInput(lines):
    1.42 +        seqName1, ranges1 = nameAndRangesFromDict(cropDict1, seqName1)
    1.43 +        seqName2, ranges2 = nameAndRangesFromDict(cropDict2, seqName2)
    1.44 +        if not ranges1 and not ranges2:
    1.45 +            continue
    1.46 +        r1 = rangesForSecondaryAlignments(ranges1, seqLen1)
    1.47 +        r2 = rangesForSecondaryAlignments(ranges2, seqLen2)
    1.48 +        b = list(croppedBlocks(list(blocks), r1, r2))
    1.49 +        if not b: continue
    1.50 +        aln = seqName1, seqName2, b
    1.51 +        alignments.append(aln)
    1.52 +        if not ranges1:
    1.53 +            coveredRange1 = b[0][0], b[-1][0] + b[-1][2]
    1.54 +            updateSeqs(coverDict1, seqRanges1, seqName1, r1, coveredRange1)
    1.55 +        if not ranges2:
    1.56 +            coveredRange2 = b[0][1], b[-1][1] + b[-1][2]
    1.57 +            updateSeqs(coverDict2, seqRanges2, seqName2, r2, coveredRange2)
    1.58 +    return alignments, seqRanges1, coverDict1, seqRanges2, coverDict2
    1.59 +
    1.60  def twoValuesFromOption(text, separator):
    1.61      if separator in text:
    1.62          return text.split(separator)
    1.63 @@ -231,6 +277,10 @@
    1.64          g.sort(key=sizeKey)
    1.65      return [j for i in g for j in i]
    1.66  
    1.67 +def allSortedRanges(seqRanges, seqRangesB, sortOpt):
    1.68 +    x, y = twoValuesFromOption(sortOpt, ":")
    1.69 +    return getSortedRanges(seqRanges, x) + getSortedRanges(seqRangesB, y)
    1.70 +
    1.71  def prettyNum(n):
    1.72      t = str(n)
    1.73      groups = []
    1.74 @@ -272,9 +322,9 @@
    1.75                  x, y = y, x
    1.76          yield text, x, y
    1.77  
    1.78 -def dataFromRanges(seqRanges, sortOpt, font,
    1.79 +def dataFromRanges(cutRanges, cutRangesB, sortOpt, font,
    1.80                     fontsize, image_mode, labelOpt, textRot):
    1.81 -    s = getSortedRanges(seqRanges, sortOpt)
    1.82 +    s = allSortedRanges(cutRanges, cutRangesB, sortOpt)
    1.83      for i in s:
    1.84          warn("\t".join(map(str, i)))
    1.85      warn("")
    1.86 @@ -582,6 +632,9 @@
    1.87      zipped_colors = zip(forward_color, reverse_color)
    1.88      overlap_color = tuple([(i + j) // 2 for i, j in zipped_colors])
    1.89  
    1.90 +    maxGap1, maxGapB1 = twoValuesFromOption(opts.max_gap1, ":")
    1.91 +    maxGap2, maxGapB2 = twoValuesFromOption(opts.max_gap2, ":")
    1.92 +
    1.93      warn("reading alignments...")
    1.94      alnData = readAlignments(args[0], opts)
    1.95      alignments, seqRanges1, coverDict1, seqRanges2, coverDict2 = alnData
    1.96 @@ -592,17 +645,28 @@
    1.97      minAlignedBases = min(coveredLength(coverDict1), coveredLength(coverDict2))
    1.98      pad = int(opts.pad * minAlignedBases)
    1.99      cutRanges1 = list(trimmed(seqRanges1, coverDict1, minAlignedBases,
   1.100 -                              opts.max_gap1, pad, pad))
   1.101 +                              maxGap1, pad, pad))
   1.102      cutRanges2 = list(trimmed(seqRanges2, coverDict2, minAlignedBases,
   1.103 -                              opts.max_gap2, pad, pad))
   1.104 +                              maxGap2, pad, pad))
   1.105 +
   1.106 +    warn("reading secondary alignments...")
   1.107 +    alnDataB = readSecondaryAlignments(opts, cutRanges1, cutRanges2)
   1.108 +    alignmentsB, seqRangesB1, coverDictB1, seqRangesB2, coverDictB2 = alnDataB
   1.109 +    warn("cutting...")
   1.110 +    coverDictB1 = dict(mergedRangesPerSeq(coverDictB1))
   1.111 +    coverDictB2 = dict(mergedRangesPerSeq(coverDictB2))
   1.112 +    cutRangesB1 = trimmed(seqRangesB1, coverDictB1, minAlignedBases,
   1.113 +                          maxGapB1, 0, 0)
   1.114 +    cutRangesB2 = trimmed(seqRangesB2, coverDictB2, minAlignedBases,
   1.115 +                          maxGapB2, 0, 0)
   1.116  
   1.117      textRot1 = "vertical".startswith(opts.rot1)
   1.118 -    i1 = dataFromRanges(cutRanges1, opts.sort1, font,
   1.119 +    i1 = dataFromRanges(cutRanges1, cutRangesB1, opts.sort1, font,
   1.120                          opts.fontsize, image_mode, opts.labels1, textRot1)
   1.121      sortedRanges1, rangeSizes1, labelData1, tMargin = i1
   1.122  
   1.123      textRot2 = "horizontal".startswith(opts.rot2)
   1.124 -    i2 = dataFromRanges(cutRanges2, opts.sort2, font,
   1.125 +    i2 = dataFromRanges(cutRanges2, cutRangesB2, opts.sort2, font,
   1.126                          opts.fontsize, image_mode, opts.labels2, textRot2)
   1.127      sortedRanges2, rangeSizes2, labelData2, lMargin = i2
   1.128  
   1.129 @@ -625,7 +689,7 @@
   1.130      warn("height: " + str(height))
   1.131  
   1.132      warn("processing alignments...")
   1.133 -    hits = alignmentPixels(width, height, alignments, bpPerPix,
   1.134 +    hits = alignmentPixels(width, height, alignments + alignmentsB, bpPerPix,
   1.135                             rangeDict1, rangeDict2)
   1.136  
   1.137      warn("reading annotations...")
   1.138 @@ -707,6 +771,7 @@
   1.139                    help="color for forward alignments (default: %default)")
   1.140      op.add_option("-r", "--reversecolor", metavar="COLOR", default="blue",
   1.141                    help="color for reverse alignments (default: %default)")
   1.142 +    op.add_option("--alignments", metavar="FILE", help="secondary alignments")
   1.143      op.add_option("--sort1", default="1", metavar="N",
   1.144                    help="genome1 sequence order: 0=input order, 1=name order, "
   1.145                    "2=length order (default=%default)")
   1.146 @@ -726,6 +791,7 @@
   1.147                    help="number of pixels between sequences (default=%default)")
   1.148      op.add_option("--border-color", metavar="COLOR", default="black",
   1.149                    help="color for pixels between sequences (default=%default)")
   1.150 +    # --break-color and/or --break-pixels for intra-sequence breaks?
   1.151      op.add_option("--margin-color", metavar="COLOR", default="#dcdcdc",
   1.152                    help="margin color")
   1.153