Make last-train find LAST programs more robustly
authorMartin C. Frith
Mon May 14 11:42:55 2018 +0900 (2018-05-14)
changeset 9378a4fadbe6080
parent 936 b1c09fdd12fe
child 938 65969d4d464d
Make last-train find LAST programs more robustly
scripts/last-dotplot
scripts/last-map-probs
scripts/last-postmask
scripts/last-train
scripts/maf-convert
scripts/maf-swap
     1.1 --- a/scripts/last-dotplot	Mon May 07 10:51:50 2018 +0900
     1.2 +++ b/scripts/last-dotplot	Mon May 14 11:42:55 2018 +0900
     1.3 @@ -149,8 +149,8 @@
     1.4  
     1.5  def readAlignments(fileName, opts):
     1.6      '''Get alignments and sequence limits, from MAF or tabular format.'''
     1.7 -    seqRequests1 = map(seqRequestFromText, opts.seq1)
     1.8 -    seqRequests2 = map(seqRequestFromText, opts.seq2)
     1.9 +    seqRequests1 = [seqRequestFromText(i) for i in opts.seq1]
    1.10 +    seqRequests2 = [seqRequestFromText(i) for i in opts.seq2]
    1.11  
    1.12      alignments = []
    1.13      seqRanges1 = []
     2.1 --- a/scripts/last-map-probs	Mon May 07 10:51:50 2018 +0900
     2.2 +++ b/scripts/last-map-probs	Mon May 14 11:42:55 2018 +0900
     2.3 @@ -120,6 +120,6 @@
     2.4  
     2.5      try: lastMapProbs(opts, args)
     2.6      except KeyboardInterrupt: pass  # avoid silly error message
     2.7 -    except Exception, e:
     2.8 +    except Exception as e:
     2.9          prog = os.path.basename(sys.argv[0])
    2.10          sys.exit(prog + ": error: " + str(e))
     3.1 --- a/scripts/last-postmask	Mon May 07 10:51:50 2018 +0900
     3.2 +++ b/scripts/last-postmask	Mon May 14 11:42:55 2018 +0900
     3.3 @@ -30,9 +30,11 @@
     3.4      i = x.find(base)
     3.5      return y[i] if i >= 0 else base
     3.6  
     3.7 -def getScoreMatrix(rowHeads, colHeads, matrix, deleteCost, insertCost):
     3.8 +def fastScoreMatrix(rowHeads, colHeads, matrix, deleteCost, insertCost):
     3.9 +    matrixLen = 128
    3.10      defaultScore = min(map(min, matrix))
    3.11 -    scoreMatrix = [[defaultScore for i in range(128)] for j in range(128)]
    3.12 +    fastMatrix = [[defaultScore for i in range(matrixLen)]
    3.13 +                  for j in range(matrixLen)]
    3.14      for i, x in enumerate(rowHeads):
    3.15          for j, y in enumerate(colHeads):
    3.16              xu = ord(x.upper())
    3.17 @@ -41,46 +43,49 @@
    3.18              yl = ord(y.lower())
    3.19              score = matrix[i][j]
    3.20              maskScore = min(score, 0)
    3.21 -            scoreMatrix[xu][yu] = score
    3.22 -            scoreMatrix[xu][yl] = maskScore
    3.23 -            scoreMatrix[xl][yu] = maskScore
    3.24 -            scoreMatrix[xl][yl] = maskScore
    3.25 -    for i in range(128):
    3.26 -        scoreMatrix[i][ord("-")] = -deleteCost
    3.27 -        scoreMatrix[ord("-")][i] = -insertCost
    3.28 -    return scoreMatrix
    3.29 +            fastMatrix[xu][yu] = score
    3.30 +            fastMatrix[xu][yl] = maskScore
    3.31 +            fastMatrix[xl][yu] = maskScore
    3.32 +            fastMatrix[xl][yl] = maskScore
    3.33 +    for i in range(matrixLen):
    3.34 +        fastMatrix[i][ord("-")] = -deleteCost
    3.35 +        fastMatrix[ord("-")][i] = -insertCost
    3.36 +    return fastMatrix
    3.37  
    3.38 -def getScoreMatrices(rowHeads, colHeads, matrix, deleteCost, insertCost):
    3.39 +def matrixPerStrand(rowHeads, colHeads, matrix, deleteCost, insertCost):
    3.40      rowComps = [complement(i) for i in rowHeads]
    3.41      colComps = [complement(i) for i in colHeads]
    3.42 -    f = getScoreMatrix(rowHeads, colHeads, matrix, deleteCost, insertCost)
    3.43 -    r = getScoreMatrix(rowComps, colComps, matrix, deleteCost, insertCost)
    3.44 -    return f, r
    3.45 +    fwd = fastScoreMatrix(rowHeads, colHeads, matrix, deleteCost, insertCost)
    3.46 +    rev = fastScoreMatrix(rowComps, colComps, matrix, deleteCost, insertCost)
    3.47 +    return fwd, rev
    3.48  
    3.49 -def isGoodAlignment(seqs, scoreMatrix, delOpenCost, insOpenCost, minScore):
    3.50 +def isGoodAlignment(columns, scoreMatrix, delOpenCost, insOpenCost, minScore):
    3.51      """Does the alignment have a segment with score >= minScore?"""
    3.52 -    r, q = seqs
    3.53      score = 0
    3.54 -    xOld = " "
    3.55 -    yOld = " "
    3.56 -    for x, y in itertools.izip(r, q):
    3.57 +    xOld = yOld = " "
    3.58 +    for x, y in columns:
    3.59          score += scoreMatrix[ord(x)][ord(y)]
    3.60 -        if score >= minScore: return True
    3.61 -        if x == "-" and xOld != "-": score -= insOpenCost
    3.62 -        if y == "-" and yOld != "-": score -= delOpenCost
    3.63 -        if score < 0: score = 0
    3.64 +        if score >= minScore:
    3.65 +            return True
    3.66 +        if x == "-" and xOld != "-":
    3.67 +            score -= insOpenCost
    3.68 +        if y == "-" and yOld != "-":
    3.69 +            score -= delOpenCost
    3.70 +        if score < 0:
    3.71 +            score = 0
    3.72          xOld = x
    3.73          yOld = y
    3.74      return False
    3.75  
    3.76  def printIfGood(maf, seqs, scoreMatrix, delOpenCost, insOpenCost, minScore):
    3.77 -    if isGoodAlignment(seqs, scoreMatrix, delOpenCost, insOpenCost, minScore):
    3.78 +    cols = itertools.izip(*seqs)
    3.79 +    if isGoodAlignment(cols, scoreMatrix, delOpenCost, insOpenCost, minScore):
    3.80          for line in maf:
    3.81              print(line, end="")
    3.82          print()
    3.83  
    3.84  def doOneFile(lines):
    3.85 -    matrixStrand = 0
    3.86 +    strandParam = 0
    3.87      scoreMatrices = None
    3.88      maf = []
    3.89      seqs = []
    3.90 @@ -88,33 +93,33 @@
    3.91      for line in lines:
    3.92          if line[0] == "#":
    3.93              print(line, end="")
    3.94 -            w = line.split()
    3.95 -            for i in w:
    3.96 +            fields = line.split()
    3.97 +            for i in fields:
    3.98                  if i.startswith("a="): aDel = int(i[2:])
    3.99                  if i.startswith("b="): bDel = int(i[2:])
   3.100                  if i.startswith("A="): aIns = int(i[2:])
   3.101                  if i.startswith("B="): bIns = int(i[2:])
   3.102                  if i.startswith("e="): minScore = int(i[2:])
   3.103 -                if i.startswith("S="): matrixStrand = int(i[2:])
   3.104 -            if len(w) > 1 and max(map(len, w)) == 1:
   3.105 -                colHeads = w[1:]
   3.106 +                if i.startswith("S="): strandParam = int(i[2:])
   3.107 +            if len(fields) > 1 and max(map(len, fields)) == 1:
   3.108 +                colHeads = fields[1:]
   3.109                  rowHeads = []
   3.110                  matrix = []
   3.111 -            elif len(w) > 2 and len(w[1]) == 1:
   3.112 -                rowHeads.append(w[1])
   3.113 -                matrix.append(list(map(int, w[2:])))
   3.114 +            elif len(fields) > 2 and len(fields[1]) == 1:
   3.115 +                rowHeads.append(fields[1])
   3.116 +                matrix.append([int(i) for i in fields[2:]])
   3.117          elif line.isspace():
   3.118              if seqs: printIfGood(maf, seqs, scoreMatrix, aDel, aIns, minScore)
   3.119              maf = []
   3.120              seqs = []
   3.121          else:
   3.122              if not scoreMatrices:
   3.123 -                scoreMatrices = getScoreMatrices(rowHeads, colHeads, matrix,
   3.124 -                                                 bDel, bIns)
   3.125 +                scoreMatrices = matrixPerStrand(rowHeads, colHeads, matrix,
   3.126 +                                                bDel, bIns)
   3.127              maf.append(line)
   3.128              if line[0] == "s":
   3.129                  fields = line.split()
   3.130 -                if len(seqs) == matrixStrand:
   3.131 +                if len(seqs) == strandParam:
   3.132                      strand = fields[4]
   3.133                      scoreMatrix = scoreMatrices[strand == "-"]
   3.134                  seqs.append(fields[6])
   3.135 @@ -138,6 +143,6 @@
   3.136  
   3.137      try: lastPostmask(args)
   3.138      except KeyboardInterrupt: pass  # avoid silly error message
   3.139 -    except Exception, e:
   3.140 +    except Exception as e:
   3.141          prog = os.path.basename(sys.argv[0])
   3.142          sys.exit(prog + ": error: " + str(e))
     4.1 --- a/scripts/last-train	Mon May 07 10:51:50 2018 +0900
     4.2 +++ b/scripts/last-train	Mon May 14 11:42:55 2018 +0900
     4.3 @@ -305,8 +305,8 @@
     4.4      return scoreFromProb(scale, probRatio)
     4.5  
     4.6  def matScoresFromProbs(scale, probs):
     4.7 -    rowProbs = map(sum, probs)
     4.8 -    colProbs = map(sum, zip(*probs))
     4.9 +    rowProbs = [sum(i) for i in probs]
    4.10 +    colProbs = [sum(i) for i in zip(*probs)]
    4.11      return [[scoreFromLetterProbs(scale, j, x, y) for j, y in zip(i, colProbs)]
    4.12              for i, x in zip(probs, rowProbs)]
    4.13  
    4.14 @@ -339,11 +339,10 @@
    4.15      print()
    4.16  
    4.17  def tryToMakeChildProgramsFindable():
    4.18 -    myDir = os.path.dirname(__file__)
    4.19 -    x = os.path.join(myDir, os.pardir, "src")
    4.20 -    y = os.path.join(myDir, os.pardir, "scripts")
    4.21 +    d = os.path.dirname(__file__)
    4.22 +    e = os.path.join(d, os.pardir, "src")
    4.23      # put them first, to avoid getting older versions of LAST:
    4.24 -    os.environ["PATH"] = x + os.pathsep + y + os.pathsep + os.environ["PATH"]
    4.25 +    os.environ["PATH"] = d + os.pathsep + e + os.pathsep + os.environ["PATH"]
    4.26  
    4.27  def readLastalProgName(lastdbIndexName):
    4.28      bitsPerInt = "32"
    4.29 @@ -532,6 +531,6 @@
    4.30  
    4.31      try: lastTrain(opts, args)
    4.32      except KeyboardInterrupt: pass  # avoid silly error message
    4.33 -    except Exception, e:
    4.34 +    except Exception as e:
    4.35          prog = os.path.basename(sys.argv[0])
    4.36          sys.exit(prog + ": error: " + str(e))
     5.1 --- a/scripts/maf-convert	Mon May 07 10:51:50 2018 +0900
     5.2 +++ b/scripts/maf-convert	Mon May 14 11:42:55 2018 +0900
     5.3 @@ -884,6 +884,6 @@
     5.4          op.error("need file (not pipe) with option -d")
     5.5  
     5.6      try: mafConvert(opts, args)
     5.7 -    except Exception, e:
     5.8 +    except Exception as e:
     5.9          prog = os.path.basename(sys.argv[0])
    5.10          sys.exit(prog + ": error: " + str(e))
     6.1 --- a/scripts/maf-swap	Mon May 07 10:51:50 2018 +0900
     6.2 +++ b/scripts/maf-swap	Mon May 14 11:42:55 2018 +0900
     6.3 @@ -137,6 +137,6 @@
     6.4  
     6.5      try: mafSwap(opts, args)
     6.6      except KeyboardInterrupt: pass  # avoid silly error message
     6.7 -    except Exception, e:
     6.8 +    except Exception as e:
     6.9          prog = os.path.basename(sys.argv[0])
    6.10          sys.exit(prog + ": error: " + str(e))