postmask: fix bug for unusual alignment headers
authorMartin C. Frith
Mon May 14 14:15:33 2018 +0900 (2018-05-14)
changeset 93865969d4d464d
parent 937 8a4fadbe6080
child 939 76cdd2203460
postmask: fix bug for unusual alignment headers
scripts/last-dotplot
scripts/last-postmask
test/102.maf
test/last-postmask-test.out
test/last-postmask-test.sh
test/last-split-test.out
test/last-split-test.sh
     1.1 --- a/scripts/last-dotplot	Mon May 14 11:42:55 2018 +0900
     1.2 +++ b/scripts/last-dotplot	Mon May 14 14:15:33 2018 +0900
     1.3 @@ -18,8 +18,15 @@
     1.4  import itertools, optparse, os, re, sys
     1.5  
     1.6  # Try to make PIL/PILLOW work:
     1.7 -try: from PIL import Image, ImageDraw, ImageFont, ImageColor
     1.8 -except ImportError: import Image, ImageDraw, ImageFont, ImageColor
     1.9 +try:
    1.10 +    from PIL import Image, ImageDraw, ImageFont, ImageColor
    1.11 +except ImportError:
    1.12 +    import Image, ImageDraw, ImageFont, ImageColor
    1.13 +
    1.14 +try:
    1.15 +    from future_builtins import zip
    1.16 +except ImportError:
    1.17 +    pass
    1.18  
    1.19  def myOpen(fileName):  # faster than fileinput
    1.20      if fileName is None:
    1.21 @@ -75,7 +82,7 @@
    1.22  def mafBlocks(beg1, beg2, seq1, seq2):
    1.23      '''Get the gapless blocks of an alignment, from MAF format.'''
    1.24      size = 0
    1.25 -    for x, y in itertools.izip(seq1, seq2):
    1.26 +    for x, y in zip(seq1, seq2):
    1.27          if x == "-":
    1.28              if size:
    1.29                  yield beg1, beg2, size
     2.1 --- a/scripts/last-postmask	Mon May 14 11:42:55 2018 +0900
     2.2 +++ b/scripts/last-postmask	Mon May 14 14:15:33 2018 +0900
     2.3 @@ -15,7 +15,12 @@
     2.4  from __future__ import print_function
     2.5  
     2.6  import gzip
     2.7 -import itertools, optparse, os, signal, sys
     2.8 +import optparse, os, signal, sys
     2.9 +
    2.10 +try:
    2.11 +    from future_builtins import zip
    2.12 +except ImportError:
    2.13 +    pass
    2.14  
    2.15  def myOpen(fileName):
    2.16      if fileName == "-":
    2.17 @@ -78,15 +83,18 @@
    2.18      return False
    2.19  
    2.20  def printIfGood(maf, seqs, scoreMatrix, delOpenCost, insOpenCost, minScore):
    2.21 -    cols = itertools.izip(*seqs)
    2.22 +    cols = zip(*seqs)
    2.23      if isGoodAlignment(cols, scoreMatrix, delOpenCost, insOpenCost, minScore):
    2.24          for line in maf:
    2.25              print(line, end="")
    2.26          print()
    2.27  
    2.28  def doOneFile(lines):
    2.29 +    aDel = bDel = aIns = bIns = minScore = matrices = None
    2.30      strandParam = 0
    2.31 -    scoreMatrices = None
    2.32 +    scoreMatrix = []
    2.33 +    rowHeads = []
    2.34 +    colHeads = []
    2.35      maf = []
    2.36      seqs = []
    2.37  
    2.38 @@ -101,29 +109,29 @@
    2.39                  if i.startswith("B="): bIns = int(i[2:])
    2.40                  if i.startswith("e="): minScore = int(i[2:])
    2.41                  if i.startswith("S="): strandParam = int(i[2:])
    2.42 -            if len(fields) > 1 and max(map(len, fields)) == 1:
    2.43 +            if not colHeads and len(fields) > 1 and max(map(len, fields)) == 1:
    2.44                  colHeads = fields[1:]
    2.45 -                rowHeads = []
    2.46 -                matrix = []
    2.47              elif len(fields) > 2 and len(fields[1]) == 1:
    2.48                  rowHeads.append(fields[1])
    2.49 -                matrix.append([int(i) for i in fields[2:]])
    2.50 +                scoreMatrix.append([int(i) for i in fields[2:]])
    2.51          elif line.isspace():
    2.52 -            if seqs: printIfGood(maf, seqs, scoreMatrix, aDel, aIns, minScore)
    2.53 +            if seqs: printIfGood(maf, seqs, matrix, aDel, aIns, minScore)
    2.54              maf = []
    2.55              seqs = []
    2.56          else:
    2.57 -            if not scoreMatrices:
    2.58 -                scoreMatrices = matrixPerStrand(rowHeads, colHeads, matrix,
    2.59 -                                                bDel, bIns)
    2.60              maf.append(line)
    2.61              if line[0] == "s":
    2.62 +                if not matrices:
    2.63 +                    if None in (aDel, bDel, aIns, bIns, minScore):
    2.64 +                        raise Exception("can't read alignment header")
    2.65 +                    matrices = matrixPerStrand(rowHeads, colHeads,
    2.66 +                                               scoreMatrix, bDel, bIns)
    2.67                  fields = line.split()
    2.68                  if len(seqs) == strandParam:
    2.69                      strand = fields[4]
    2.70 -                    scoreMatrix = scoreMatrices[strand == "-"]
    2.71 +                    matrix = matrices[strand == "-"]
    2.72                  seqs.append(fields[6])
    2.73 -    if seqs: printIfGood(maf, seqs, scoreMatrix, aDel, aIns, minScore)
    2.74 +    if seqs: printIfGood(maf, seqs, matrix, aDel, aIns, minScore)
    2.75  
    2.76  def lastPostmask(args):
    2.77      if not args:
     3.1 --- a/test/102.maf	Mon May 14 11:42:55 2018 +0900
     3.2 +++ b/test/102.maf	Mon May 14 14:15:33 2018 +0900
     3.3 @@ -11,6 +11,7 @@
     3.4  # C -20  6 -21 -12
     3.5  # G -10 -21  6 -21
     3.6  # T -21 -11 -20  6
     3.7 +# N  0  0  0  0
     3.8  #
     3.9  a score=138 EG2=3.3e+04 E=0.13
    3.10  s chr3 23607556 23 + 198295559 ACGTTCTGGTTTATGTTTCCTTG
     4.1 --- a/test/last-postmask-test.out	Mon May 14 11:42:55 2018 +0900
     4.2 +++ b/test/last-postmask-test.out	Mon May 14 14:15:33 2018 +0900
     4.3 @@ -12,6 +12,7 @@
     4.4  # C -20  6 -21 -12
     4.5  # G -10 -21  6 -21
     4.6  # T -21 -11 -20  6
     4.7 +# N  0  0  0  0
     4.8  #
     4.9  a score=138 EG2=3.3e+04 E=0.13
    4.10  s chr3 23607556 23 + 198295559 ACGTTCTGGTTTATGTTTCCTTG
    4.11 @@ -226,3 +227,6 @@
    4.12  s 102        166 24 -       699 CAGGAGCGGCCACCATGGCC-CAAG
    4.13  
    4.14  
    4.15 +TEST last-postmask 90089.maf
    4.16 +last-postmask: error: can't read alignment header
    4.17 +
     5.1 --- a/test/last-postmask-test.sh	Mon May 14 11:42:55 2018 +0900
     5.2 +++ b/test/last-postmask-test.sh	Mon May 14 14:15:33 2018 +0900
     5.3 @@ -12,5 +12,6 @@
     5.4  
     5.5  {
     5.6      try last-postmask 102.maf
     5.7 -} |
     5.8 +    try last-postmask 90089.maf
     5.9 +} 2>&1 |
    5.10  diff -u $(basename $0 .sh).out -
     6.1 --- a/test/last-split-test.out	Mon May 14 11:42:55 2018 +0900
     6.2 +++ b/test/last-split-test.out	Mon May 14 14:15:33 2018 +0900
     6.3 @@ -46814,6 +46814,7 @@
     6.4  # C -20  6 -21 -12
     6.5  # G -10 -21  6 -21
     6.6  # T -21 -11 -20  6
     6.7 +# N  0  0  0  0
     6.8  #
     6.9  # m=0.01 s=110
    6.10  #
     7.1 --- a/test/last-split-test.sh	Mon May 14 11:42:55 2018 +0900
     7.2 +++ b/test/last-split-test.sh	Mon May 14 14:15:33 2018 +0900
     7.3 @@ -31,4 +31,4 @@
     7.4  
     7.5      last-split 102.maf
     7.6  } |
     7.7 -diff last-split-test.out -
     7.8 +diff -u last-split-test.out -