Add sequence order options to last-dotplot
authorMartin C. Frith
Tue May 02 13:35:07 2017 +0900 (2017-05-02)
changeset 8515b2acb7fdb3e
parent 850 db0ddc92f2ec
child 852 f3b6c666afad
Add sequence order options to last-dotplot
doc/last-dotplot.txt
scripts/last-dotplot
     1.1 --- a/doc/last-dotplot.txt	Tue May 02 13:18:23 2017 +0900
     1.2 +++ b/doc/last-dotplot.txt	Tue May 02 13:35:07 2017 +0900
     1.3 @@ -71,6 +71,12 @@
     1.4        Color for forward alignments.
     1.5    -r COLOR, --reversecolor=COLOR
     1.6        Color for reverse alignments.
     1.7 +  --sort1=N
     1.8 +      Put the 1st genome's sequences left-to-right in order of: their
     1.9 +      appearance in the input (0), their names (1), their lengths (2).
    1.10 +  --sort2=N
    1.11 +      Put the 2nd genome's sequences top-to-bottom in order of: their
    1.12 +      appearance in the input (0), their names (1), their lengths (2).
    1.13    --trim1
    1.14        Trim unaligned sequence flanks from the 1st (horizontal) genome.
    1.15    --trim2
     2.1 --- a/scripts/last-dotplot	Tue May 02 13:18:23 2017 +0900
     2.2 +++ b/scripts/last-dotplot	Tue May 02 13:35:07 2017 +0900
     2.3 @@ -114,7 +114,9 @@
     2.4              return max(beg, 0), min(end, seqLen)
     2.5      return None
     2.6  
     2.7 -def updateSeqLimits(isTrim, seqLimits, seqName, seqRange, blocks, index):
     2.8 +def updateSeqs(isTrim, seqNames, seqLimits, seqName, seqRange, blocks, index):
     2.9 +    if seqName not in seqLimits:
    2.10 +        seqNames.append(seqName)
    2.11      if isTrim:
    2.12          beg = blocks[0][index]
    2.13          end = blocks[-1][index] + blocks[-1][2]
    2.14 @@ -133,6 +135,8 @@
    2.15      seqRanges2 = map(seqRangeFromText, opts.seq2)
    2.16  
    2.17      alignments = []
    2.18 +    seqNames1 = []
    2.19 +    seqNames2 = []
    2.20      seqLimits1 = {}
    2.21      seqLimits2 = {}
    2.22      lines = myOpen(fileName)
    2.23 @@ -145,9 +149,9 @@
    2.24          if not b: continue
    2.25          aln = seqName1, seqName2, b
    2.26          alignments.append(aln)
    2.27 -        updateSeqLimits(opts.trim1, seqLimits1, seqName1, range1, b, 0)
    2.28 -        updateSeqLimits(opts.trim2, seqLimits2, seqName2, range2, b, 1)
    2.29 -    return alignments, seqLimits1, seqLimits2
    2.30 +        updateSeqs(opts.trim1, seqNames1, seqLimits1, seqName1, range1, b, 0)
    2.31 +        updateSeqs(opts.trim2, seqNames2, seqLimits2, seqName2, range2, b, 1)
    2.32 +    return alignments, seqNames1, seqNames2, seqLimits1, seqLimits2
    2.33  
    2.34  def natural_sort_key(my_string):
    2.35      '''Return a sort key for "natural" ordering, e.g. chr9 < chr10.'''
    2.36 @@ -175,11 +179,16 @@
    2.37  def seqNameAndSizeText(seqName, seqSize):
    2.38      return seqName + ": " + sizeText(seqSize)
    2.39  
    2.40 -def getSeqInfo(seqLimits, font, fontsize, image_mode, isShowSize):
    2.41 +def getSeqInfo(sortOpt, seqNames, seqLimits,
    2.42 +               font, fontsize, image_mode, isShowSize):
    2.43      '''Return miscellaneous information about the sequences.'''
    2.44 -    seqNames = seqLimits.keys()
    2.45 -    seqNames.sort(key=natural_sort_key)
    2.46 +    if sortOpt == 1:
    2.47 +        seqNames.sort(key=natural_sort_key)
    2.48      seqSizes = [seqLimits[i][1] - seqLimits[i][0] for i in seqNames]
    2.49 +    if sortOpt == 2:
    2.50 +        seqRecords = sorted(zip(seqSizes, seqNames), reverse=True)
    2.51 +        seqSizes = [i[0] for i in seqRecords]
    2.52 +        seqNames = [i[1] for i in seqRecords]
    2.53      if isShowSize:
    2.54          seqLabels = map(seqNameAndSizeText, seqNames, seqSizes)
    2.55      else:
    2.56 @@ -392,15 +401,17 @@
    2.57      overlap_color = tuple([(i + j) // 2 for i, j in zipped_colors])
    2.58  
    2.59      warn("reading alignments...")
    2.60 -    alignments, seqLimits1, seqLimits2 = readAlignments(args[0], opts)
    2.61 +    alignmentInfo = readAlignments(args[0], opts)
    2.62 +    alignments, seqNames1, seqNames2, seqLimits1, seqLimits2 = alignmentInfo
    2.63      warn("done")
    2.64 -
    2.65      if not alignments: raise Exception("there are no alignments")
    2.66  
    2.67 -    i1 = getSeqInfo(seqLimits1, font, opts.fontsize, image_mode, opts.lengths1)
    2.68 +    i1 = getSeqInfo(opts.sort1, seqNames1, seqLimits1,
    2.69 +                    font, opts.fontsize, image_mode, opts.lengths1)
    2.70      seqNames1, seqSizes1, seqLabels1, labelSizes1, margin1 = i1
    2.71  
    2.72 -    i2 = getSeqInfo(seqLimits2, font, opts.fontsize, image_mode, opts.lengths2)
    2.73 +    i2 = getSeqInfo(opts.sort2, seqNames2, seqLimits2,
    2.74 +                    font, opts.fontsize, image_mode, opts.lengths2)
    2.75      seqNames2, seqSizes2, seqLabels2, labelSizes2, margin2 = i2
    2.76  
    2.77      warn("choosing bp per pixel...")
    2.78 @@ -500,6 +511,12 @@
    2.79                    help="color for forward alignments (default: %default)")
    2.80      op.add_option("-r", "--reversecolor", metavar="COLOR", default="blue",
    2.81                    help="color for reverse alignments (default: %default)")
    2.82 +    op.add_option("--sort1", type="int", default=1, metavar="N",
    2.83 +                  help="genome1 sequence order: 0=input order, 1=name order, "
    2.84 +                  "2=length order (default=%default)")
    2.85 +    op.add_option("--sort2", type="int", default=1, metavar="N",
    2.86 +                  help="genome2 sequence order: 0=input order, 1=name order, "
    2.87 +                  "2=length order (default=%default)")
    2.88      op.add_option("--trim1", action="store_true",
    2.89                    help="trim unaligned sequence flanks from the 1st genome")
    2.90      op.add_option("--trim2", action="store_true",