Add piecewise linear gap costs (unfinished)
authorMartin C. Frith
Thu Mar 28 19:59:32 2019 +0900 (5 months ago)
changeset 9753701b1e2019b
parent 974 2b0e54aee679
child 976 f3f69ccff967
Add piecewise linear gap costs (unfinished)
src/Alignment.cc
src/Alignment.hh
src/Centroid.cc
src/Centroid.hh
src/GeneralizedAffineGapCosts.cc
src/GeneralizedAffineGapCosts.hh
src/LastalArguments.cc
src/LastalArguments.hh
src/lastal.cc
src/makefile
src/mcf_gap_costs.cc
src/mcf_gap_costs.hh
     1.1 --- a/src/Alignment.cc	Thu Mar 28 16:46:23 2019 +0900
     1.2 +++ b/src/Alignment.cc	Thu Mar 28 19:59:32 2019 +0900
     1.3 @@ -4,7 +4,6 @@
     1.4  #include "Alphabet.hh"
     1.5  #include "Centroid.hh"
     1.6  #include "GeneticCode.hh"
     1.7 -#include "GeneralizedAffineGapCosts.hh"
     1.8  #include "GreedyXdropAligner.hh"
     1.9  #include "TwoQualityScoreMatrix.hh"
    1.10  #include <cassert>
    1.11 @@ -73,7 +72,7 @@
    1.12  			   GreedyXdropAligner& greedyAligner, bool isGreedy,
    1.13  			   const uchar* seq1, const uchar* seq2, int globality,
    1.14  			   const ScoreMatrixRow* scoreMatrix, int smMax,
    1.15 -			   const GeneralizedAffineGapCosts& gap, int maxDrop,
    1.16 +			   const mcf::GapCosts& gap, int maxDrop,
    1.17  			   int frameshiftCost, size_t frameSize,
    1.18  			   const ScoreMatrixRow* pssm2,
    1.19                             const TwoQualityScoreMatrix& sm2qual,
    1.20 @@ -171,7 +170,7 @@
    1.21  
    1.22  // cost of the gap between x and y
    1.23  static int gapCost(const SegmentPair &x, const SegmentPair &y,
    1.24 -		   const GeneralizedAffineGapCosts &gapCosts,
    1.25 +		   const mcf::GapCosts &gapCosts,
    1.26  		   int frameshiftCost, size_t frameSize) {
    1.27    size_t gapSize1 = y.beg1() - x.end1();
    1.28    size_t gapSize2, frameshift2;
    1.29 @@ -183,7 +182,7 @@
    1.30  
    1.31  bool Alignment::isOptimal( const uchar* seq1, const uchar* seq2, int globality,
    1.32  			   const ScoreMatrixRow* scoreMatrix, int maxDrop,
    1.33 -			   const GeneralizedAffineGapCosts& gapCosts,
    1.34 +			   const mcf::GapCosts& gapCosts,
    1.35  			   int frameshiftCost, size_t frameSize,
    1.36  			   const ScoreMatrixRow* pssm2,
    1.37                             const TwoQualityScoreMatrix& sm2qual,
    1.38 @@ -226,7 +225,7 @@
    1.39  
    1.40  bool Alignment::hasGoodSegment(const uchar *seq1, const uchar *seq2,
    1.41  			       int minScore, const ScoreMatrixRow *scoreMatrix,
    1.42 -			       const GeneralizedAffineGapCosts &gapCosts,
    1.43 +			       const mcf::GapCosts &gapCosts,
    1.44  			       int frameshiftCost, size_t frameSize,
    1.45  			       const ScoreMatrixRow *pssm2,
    1.46  			       const TwoQualityScoreMatrix &sm2qual,
    1.47 @@ -292,13 +291,15 @@
    1.48  			size_t start1, size_t start2,
    1.49  			bool isForward, int globality,
    1.50  			const ScoreMatrixRow* sm, int smMax, int maxDrop,
    1.51 -			const GeneralizedAffineGapCosts& gap,
    1.52 +			const mcf::GapCosts& gap,
    1.53  			int frameshiftCost, size_t frameSize,
    1.54  			const ScoreMatrixRow* pssm2,
    1.55                          const TwoQualityScoreMatrix& sm2qual,
    1.56                          const uchar* qual1, const uchar* qual2,
    1.57  			const Alphabet& alph, AlignmentExtras& extras,
    1.58  			double gamma, int outputType ){
    1.59 +  const mcf::GapCosts::Piece &del = gap.delPieces[0];
    1.60 +  const mcf::GapCosts::Piece &ins = gap.insPieces[0];
    1.61    GappedXdropAligner& aligner = centroid.aligner();
    1.62  
    1.63    if( frameSize ){
    1.64 @@ -307,7 +308,6 @@
    1.65      assert( !globality );
    1.66      assert( !pssm2 );
    1.67      assert( !sm2qual );
    1.68 -    assert( gap.isSymmetric() );
    1.69  
    1.70      size_t dnaStart = aaToDna( start2, frameSize );
    1.71      size_t f = dnaStart + 1;
    1.72 @@ -317,13 +317,13 @@
    1.73  
    1.74      score += aligner.align3( seq1 + start1, seq2 + start2,
    1.75  			     seq2 + frame1, seq2 + frame2, isForward,
    1.76 -			     sm, gap.delExist, gap.delExtend, gap.pairExtend,
    1.77 +			     sm, del.openCost, del.growCost, gap.pairCost,
    1.78  			     frameshiftCost, maxDrop, smMax );
    1.79  
    1.80      size_t end1, end2, size;
    1.81      // This should be OK even if end2 < size * 3:
    1.82      while( aligner.getNextChunk3( end1, end2, size,
    1.83 -				  gap.delExist, gap.delExtend, gap.pairExtend,
    1.84 +				  del.openCost, del.growCost, gap.pairCost,
    1.85  				  frameshiftCost ) )
    1.86        chunks.push_back( SegmentPair( end1 - size, end2 - size * 3, size ) );
    1.87  
    1.88 @@ -336,19 +336,19 @@
    1.89      : sm2qual ? aligner.align2qual( seq1 + start1, qual1 + start1,
    1.90  				    seq2 + start2, qual2 + start2,
    1.91  				    isForward, globality, sm2qual,
    1.92 -				    gap.delExist, gap.delExtend,
    1.93 -				    gap.insExist, gap.insExtend,
    1.94 -				    gap.pairExtend, maxDrop, smMax )
    1.95 +				    del.openCost, del.growCost,
    1.96 +				    ins.openCost, ins.growCost,
    1.97 +				    gap.pairCost, maxDrop, smMax )
    1.98      : pssm2   ? aligner.alignPssm( seq1 + start1, pssm2 + start2,
    1.99  				   isForward, globality,
   1.100 -				   gap.delExist, gap.delExtend,
   1.101 -				   gap.insExist, gap.insExtend,
   1.102 -				   gap.pairExtend, maxDrop, smMax )
   1.103 +				   del.openCost, del.growCost,
   1.104 +				   ins.openCost, ins.growCost,
   1.105 +				   gap.pairCost, maxDrop, smMax )
   1.106      :           aligner.align( seq1 + start1, seq2 + start2,
   1.107  			       isForward, globality, sm,
   1.108 -			       gap.delExist, gap.delExtend,
   1.109 -			       gap.insExist, gap.insExtend,
   1.110 -			       gap.pairExtend, maxDrop, smMax );
   1.111 +			       del.openCost, del.growCost,
   1.112 +			       ins.openCost, ins.growCost,
   1.113 +			       gap.pairCost, maxDrop, smMax );
   1.114  
   1.115    if( extensionScore == -INF ){
   1.116      score = -INF;  // avoid score overflow
   1.117 @@ -364,9 +364,8 @@
   1.118  	chunks.push_back( SegmentPair( end1 - size, end2 - size, size ) );
   1.119      }else{
   1.120        while( aligner.getNextChunk( end1, end2, size,
   1.121 -				   gap.delExist, gap.delExtend,
   1.122 -				   gap.insExist, gap.insExtend,
   1.123 -				   gap.pairExtend ) )
   1.124 +				   del.openCost, del.growCost,
   1.125 +				   ins.openCost, ins.growCost, gap.pairCost ) )
   1.126  	chunks.push_back( SegmentPair( end1 - size, end2 - size, size ) );
   1.127      }
   1.128    }
     2.1 --- a/src/Alignment.hh	Thu Mar 28 16:46:23 2019 +0900
     2.2 +++ b/src/Alignment.hh	Thu Mar 28 19:59:32 2019 +0900
     2.3 @@ -6,6 +6,7 @@
     2.4  #define ALIGNMENT_HH
     2.5  #include "ScoreMatrixRow.hh"
     2.6  #include "SegmentPair.hh"
     2.7 +#include "mcf_gap_costs.hh"
     2.8  #include <stddef.h>  // size_t
     2.9  #include <string>
    2.10  #include <vector>
    2.11 @@ -15,7 +16,6 @@
    2.12  
    2.13  typedef unsigned char uchar;
    2.14  
    2.15 -class GeneralizedAffineGapCosts;
    2.16  class GreedyXdropAligner;
    2.17  class LastEvaluer;
    2.18  class MultiSequence;
    2.19 @@ -81,7 +81,7 @@
    2.20  		  GreedyXdropAligner& greedyAligner, bool isGreedy,
    2.21  		  const uchar* seq1, const uchar* seq2, int globality,
    2.22  		  const ScoreMatrixRow* scoreMatrix, int smMax,
    2.23 -		  const GeneralizedAffineGapCosts& gap, int maxDrop,
    2.24 +		  const mcf::GapCosts& gap, int maxDrop,
    2.25  		  int frameshiftCost, size_t frameSize,
    2.26  		  const ScoreMatrixRow* pssm2,
    2.27                    const TwoQualityScoreMatrix& sm2qual,
    2.28 @@ -95,7 +95,7 @@
    2.29    // If "globality" is non-zero, skip the prefix and suffix checks.
    2.30    bool isOptimal( const uchar* seq1, const uchar* seq2, int globality,
    2.31                    const ScoreMatrixRow* scoreMatrix, int maxDrop,
    2.32 -                  const GeneralizedAffineGapCosts& gapCosts,
    2.33 +                  const mcf::GapCosts& gapCosts,
    2.34  		  int frameshiftCost, size_t frameSize,
    2.35  		  const ScoreMatrixRow* pssm2,
    2.36                    const TwoQualityScoreMatrix& sm2qual,
    2.37 @@ -104,7 +104,7 @@
    2.38    // Does the Alignment have any segment with score >= minScore?
    2.39    bool hasGoodSegment(const uchar *seq1, const uchar *seq2,
    2.40  		      int minScore, const ScoreMatrixRow *scoreMatrix,
    2.41 -		      const GeneralizedAffineGapCosts &gapCosts,
    2.42 +		      const mcf::GapCosts &gapCosts,
    2.43  		      int frameshiftCost, size_t frameSize,
    2.44  		      const ScoreMatrixRow *pssm2,
    2.45  		      const TwoQualityScoreMatrix &sm2qual,
    2.46 @@ -134,7 +134,7 @@
    2.47  	       size_t start1, size_t start2,
    2.48  	       bool isForward, int globality,
    2.49  	       const ScoreMatrixRow* sm, int smMax, int maxDrop,
    2.50 -	       const GeneralizedAffineGapCosts& gap,
    2.51 +	       const mcf::GapCosts& gap,
    2.52  	       int frameshiftCost, size_t frameSize,
    2.53  	       const ScoreMatrixRow* pssm2,
    2.54                 const TwoQualityScoreMatrix& sm2qual,
     3.1 --- a/src/Centroid.cc	Thu Mar 28 16:46:23 2019 +0900
     3.2 +++ b/src/Centroid.cc	Thu Mar 28 19:59:32 2019 +0900
     3.3 @@ -18,6 +18,11 @@
     3.4    }
     3.5  }
     3.6  
     3.7 +static bool isAffineGapCosts(const mcf::GapCosts &g) {
     3.8 +  return g.pairCost >= g.delPieces[0].growCost + g.insPieces[0].growCost +
     3.9 +    std::max(g.delPieces[0].openCost, g.insPieces[0].openCost);
    3.10 +}
    3.11 +
    3.12  namespace cbrc{
    3.13  
    3.14    ExpectedCount::ExpectedCount ()
    3.15 @@ -147,17 +152,15 @@
    3.16  
    3.17    void Centroid::forward(const uchar* seq1, const uchar* seq2,
    3.18  			 const ExpMatrixRow* pssm, bool isExtendFwd,
    3.19 -			 const GeneralizedAffineGapCosts& gap,
    3.20 -			 int globality) {
    3.21 +			 const mcf::GapCosts& gap, int globality) {
    3.22      initForwardMatrix();
    3.23      const int seqIncrement = isExtendFwd ? 1 : -1;
    3.24 -    const bool isAffine = gap.isAffine();
    3.25 -    const double eE = EXP ( - gap.delExtend / T );
    3.26 -    const double eF = EXP ( - gap.delExist / T );
    3.27 -    const double eEI = EXP ( - gap.insExtend / T );
    3.28 -    const double eFI = EXP ( - gap.insExist / T );
    3.29 -    const double eP = EXP ( - gap.pairExtend / T );
    3.30 -    assert( gap.insExist == gap.delExist || eP <= 0.0 );
    3.31 +    const bool isAffine = isAffineGapCosts(gap);
    3.32 +    const double eE = EXP(-gap.delPieces[0].growCost / T);
    3.33 +    const double eF = EXP(-gap.delPieces[0].openCost / T);
    3.34 +    const double eEI = EXP(-gap.insPieces[0].growCost / T);
    3.35 +    const double eFI = EXP(-gap.insPieces[0].openCost / T);
    3.36 +    const double eP = EXP(-gap.pairCost / T);
    3.37      double Z = 0.0;  // partion function of forward values
    3.38  
    3.39      for( size_t k = 0; k < numAntidiagonals; ++k ){  // loop over antidiagonals
    3.40 @@ -303,17 +306,15 @@
    3.41    // compute posterior probabilities while executing backward algorithm
    3.42    void Centroid::backward(const uchar* seq1, const uchar* seq2,
    3.43  			  const ExpMatrixRow* pssm, bool isExtendFwd,
    3.44 -			  const GeneralizedAffineGapCosts& gap,
    3.45 -			  int globality) {
    3.46 +			  const mcf::GapCosts& gap, int globality) {
    3.47      initBackwardMatrix();
    3.48      const int seqIncrement = isExtendFwd ? 1 : -1;
    3.49 -    const bool isAffine = gap.isAffine();
    3.50 -    const double eE = EXP ( - gap.delExtend / T );
    3.51 -    const double eF = EXP ( - gap.delExist / T );
    3.52 -    const double eEI = EXP ( - gap.insExtend / T );
    3.53 -    const double eFI = EXP ( - gap.insExist / T );
    3.54 -    const double eP = EXP ( - gap.pairExtend / T );
    3.55 -    assert( gap.insExist == gap.delExist || eP <= 0.0 );
    3.56 +    const bool isAffine = isAffineGapCosts(gap);
    3.57 +    const double eE = EXP(-gap.delPieces[0].growCost / T);
    3.58 +    const double eF = EXP(-gap.delPieces[0].openCost / T);
    3.59 +    const double eEI = EXP(-gap.insPieces[0].growCost / T);
    3.60 +    const double eFI = EXP(-gap.insPieces[0].openCost / T);
    3.61 +    const double eP = EXP(-gap.pairCost / T);
    3.62      double scaledUnit = 1.0;
    3.63  
    3.64      for( size_t k = numAntidiagonals; k-- > 0; ){
    3.65 @@ -729,7 +730,7 @@
    3.66    void Centroid::computeExpectedCounts ( const uchar* seq1, const uchar* seq2,
    3.67  					 size_t start1, size_t start2,
    3.68  					 bool isExtendFwd,
    3.69 -					 const GeneralizedAffineGapCosts& gap,
    3.70 +					 const mcf::GapCosts& gap,
    3.71  					 unsigned alphabetSize,
    3.72  					 ExpectedCount& c ) const{
    3.73      seq1 += start1;
    3.74 @@ -744,13 +745,12 @@
    3.75      const int seqIncrement = isExtendFwd ? 1 : -1;
    3.76      int alphabetSizeIncrement = alphabetSize;
    3.77      if (!isExtendFwd) alphabetSizeIncrement *= -1;
    3.78 -    const bool isAffine = gap.isAffine();
    3.79 -    const double eE = EXP ( - gap.delExtend / T );
    3.80 -    const double eF = EXP ( - gap.delExist / T );
    3.81 -    const double eEI = EXP ( - gap.insExtend / T );
    3.82 -    const double eFI = EXP ( - gap.insExist / T );
    3.83 -    const double eP = EXP ( - gap.pairExtend / T );
    3.84 -    assert( gap.insExist == gap.delExist || eP <= 0.0 );
    3.85 +    const bool isAffine = isAffineGapCosts(gap);
    3.86 +    const double eE = EXP(-gap.delPieces[0].growCost / T);
    3.87 +    const double eF = EXP(-gap.delPieces[0].openCost / T);
    3.88 +    const double eEI = EXP(-gap.insPieces[0].growCost / T);
    3.89 +    const double eFI = EXP(-gap.insPieces[0].openCost / T);
    3.90 +    const double eP = EXP(-gap.pairCost / T);
    3.91  
    3.92      for( size_t k = 0; k < numAntidiagonals; ++k ){  // loop over antidiagonals
    3.93        const size_t seq1beg = seq1start( k );
     4.1 --- a/src/Centroid.hh	Thu Mar 28 16:46:23 2019 +0900
     4.2 +++ b/src/Centroid.hh	Thu Mar 28 19:59:32 2019 +0900
     4.3 @@ -4,7 +4,7 @@
     4.4  #ifndef CENTROID_HH
     4.5  #define CENTROID_HH
     4.6  #include "GappedXdropAligner.hh"
     4.7 -#include "GeneralizedAffineGapCosts.hh"
     4.8 +#include "mcf_gap_costs.hh"
     4.9  #include "SegmentPair.hh"
    4.10  #include "OneQualityScoreMatrix.hh"
    4.11  #include <stddef.h>  // size_t
    4.12 @@ -58,8 +58,7 @@
    4.13  
    4.14      void doForwardBackwardAlgorithm(const uchar* seq1, const uchar* seq2,
    4.15  				    size_t start1, size_t start2,
    4.16 -				    bool isExtendFwd,
    4.17 -				    const GeneralizedAffineGapCosts& gap,
    4.18 +				    bool isExtendFwd, const mcf::GapCosts& gap,
    4.19  				    int globality) {
    4.20        seq1 += start1;
    4.21        seq2 += start2;
    4.22 @@ -100,8 +99,7 @@
    4.23      // Added by MH (2008/10/10) : compute expected counts for transitions and emissions
    4.24      void computeExpectedCounts(const uchar* seq1, const uchar* seq2,
    4.25  			       size_t start1, size_t start2, bool isExtendFwd,
    4.26 -			       const GeneralizedAffineGapCosts& gap,
    4.27 -			       unsigned alphabetSize,
    4.28 +			       const mcf::GapCosts& gap, unsigned alphabetSize,
    4.29  			       ExpectedCount& count) const;
    4.30  
    4.31    private:
    4.32 @@ -144,11 +142,11 @@
    4.33  
    4.34      void forward(const uchar* seq1, const uchar* seq2,
    4.35  		 const ExpMatrixRow* pssm, bool isExtendFwd,
    4.36 -		 const GeneralizedAffineGapCosts& gap, int globality);
    4.37 +		 const mcf::GapCosts& gap, int globality);
    4.38  
    4.39      void backward(const uchar* seq1, const uchar* seq2,
    4.40  		  const ExpMatrixRow* pssm, bool isExtendFwd,
    4.41 -		  const GeneralizedAffineGapCosts& gap, int globality);
    4.42 +		  const mcf::GapCosts& gap, int globality);
    4.43  
    4.44      void initForwardMatrix();
    4.45      void initBackwardMatrix();
     5.1 --- a/src/GeneralizedAffineGapCosts.cc	Thu Mar 28 16:46:23 2019 +0900
     5.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.3 @@ -1,25 +0,0 @@
     5.4 -// Copyright 2008, 2009, 2012 Martin C. Frith
     5.5 -
     5.6 -#include "GeneralizedAffineGapCosts.hh"
     5.7 -
     5.8 -namespace cbrc{
     5.9 -
    5.10 -int GeneralizedAffineGapCosts::cost( int gapSize1, int gapSize2 ) const{
    5.11 -  int delPart = gapSize1 ? delExist + delExtend * gapSize1 : 0;
    5.12 -  int insPart = gapSize2 ? insExist + insExtend * gapSize2 : 0;
    5.13 -  int c = delPart + insPart;
    5.14 -
    5.15 -  if( gapSize1 >= gapSize2 && pairExtend < insExist + insExtend + delExtend ){
    5.16 -    int d = delPart + (pairExtend - delExtend) * gapSize2;
    5.17 -    c = std::min( c, d );
    5.18 -  }
    5.19 -
    5.20 -  if( gapSize2 >= gapSize1 && pairExtend < delExist + delExtend + insExtend ){
    5.21 -    int d = insPart + (pairExtend - insExtend) * gapSize1;
    5.22 -    c = std::min( c, d );
    5.23 -  }
    5.24 -
    5.25 -  return c;
    5.26 -}
    5.27 -
    5.28 -}
     6.1 --- a/src/GeneralizedAffineGapCosts.hh	Thu Mar 28 16:46:23 2019 +0900
     6.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.3 @@ -1,55 +0,0 @@
     6.4 -// Copyright 2008, 2009, 2012 Martin C. Frith
     6.5 -
     6.6 -// This struct holds parameters for so-called generalized affine gap
     6.7 -// costs (for pair-wise sequence alignment).  In this scheme, a "gap"
     6.8 -// may consist of unaligned regions in both sequences.  If these
     6.9 -// unaligned regions have sizes j and k, where j <= k, the cost is:
    6.10 -
    6.11 -// a + b*(k-j) + c*j
    6.12 -
    6.13 -// If c >= a + 2b, it reduces to standard affine gaps.  For more
    6.14 -// information, see: SF Altschul 1998 Proteins 32(1):88-96.
    6.15 -
    6.16 -// In a further generalization, the costs for insertions and deletions
    6.17 -// may differ.  Typically, they would not differ:
    6.18 -// delExist = insExist = a
    6.19 -// delExtend = insExtend = b
    6.20 -// pairExtend = c
    6.21 -
    6.22 -#ifndef GENERALIZEDAFFINEGAPCOSTS_HH
    6.23 -#define GENERALIZEDAFFINEGAPCOSTS_HH
    6.24 -
    6.25 -#include <algorithm>
    6.26 -
    6.27 -namespace cbrc{
    6.28 -
    6.29 -struct GeneralizedAffineGapCosts{
    6.30 -  int delExist;
    6.31 -  int delExtend;
    6.32 -  int insExist;
    6.33 -  int insExtend;
    6.34 -  int pairExtend;
    6.35 -
    6.36 -  void assign( int a, int b, int A, int B, int c )
    6.37 -  { delExist = a; delExtend = b; insExist = A; insExtend = B;
    6.38 -    pairExtend = (c > 0) ? c : 1000000000; }
    6.39 -
    6.40 -  bool isSymmetric() const
    6.41 -  { return insExist == delExist && insExtend == delExtend; }
    6.42 -
    6.43 -  // Will standard affine gaps always suffice for maximal alignment scores?
    6.44 -  bool isAffine() const{
    6.45 -    return pairExtend >= std::max(delExist, insExist) + delExtend + insExtend;
    6.46 -  }
    6.47 -
    6.48 -  // Return the score of a gap with the given sizes in a pair of
    6.49 -  // sequences, considering that it might be either one "generalized"
    6.50 -  // gap or two neighbouring "affine" gaps.
    6.51 -  // Here, gapSize2=0 would be a deletion, and gapSize1=0 would be an
    6.52 -  // insertion.
    6.53 -  int cost( int gapSize1, int gapSize2 ) const;
    6.54 -};
    6.55 -
    6.56 -}
    6.57 -
    6.58 -#endif
     7.1 --- a/src/LastalArguments.cc	Thu Mar 28 16:46:23 2019 +0900
     7.2 +++ b/src/LastalArguments.cc	Thu Mar 28 19:59:32 2019 +0900
     7.3 @@ -6,10 +6,8 @@
     7.4  #include <algorithm>  // max
     7.5  #include <iostream>
     7.6  #include <sstream>
     7.7 -#include <vector>
     7.8  #include <stdexcept>
     7.9  #include <cctype>
    7.10 -#include <climits>
    7.11  #include <cmath>  // log
    7.12  #include <cstring>  // strtok
    7.13  #include <cstdlib>  // EXIT_SUCCESS
    7.14 @@ -20,6 +18,26 @@
    7.15    ERR( std::string("bad option value: -") + opt + ' ' + arg );
    7.16  }
    7.17  
    7.18 +static void parseIntList(const char *in, std::vector<int> &out) {
    7.19 +  std::istringstream s(in);
    7.20 +  out.clear();
    7.21 +  int x;
    7.22 +  while (s >> x) {
    7.23 +    out.push_back(x);
    7.24 +    s.ignore();
    7.25 +  }
    7.26 +  if (!s.eof()) {
    7.27 +    ERR(std::string("bad option value: ") + in);
    7.28 +  }
    7.29 +}
    7.30 +
    7.31 +static void writeIntList(std::ostream &s, const std::vector<int> &v) {
    7.32 +  for (size_t i = 0; i < v.size(); ++i) {
    7.33 +    if (i) s << ',';
    7.34 +    s << v[i];
    7.35 +  }
    7.36 +}
    7.37 +
    7.38  static int myGetopt( int argc, char** argv, const char* optstring ){
    7.39    if( optind < argc ){
    7.40      std::string nextarg = argv[optind];
    7.41 @@ -70,10 +88,6 @@
    7.42    minScoreGapless(-1),  // depends on minScoreGapped and the outputType
    7.43    matchScore(-1),  // depends on the alphabet
    7.44    mismatchCost(-1),  // depends on the alphabet
    7.45 -  gapExistCost(INT_MIN),  // depends on the alphabet
    7.46 -  gapExtendCost(-1),  // depends on the alphabet
    7.47 -  insExistCost(INT_MIN),  // depends on gapExistCost
    7.48 -  insExtendCost(-1),  // depends on gapExtendCost
    7.49    gapPairCost(-1),  // this means: OFF
    7.50    frameshiftCost(-1),  // this means: ordinary, non-translated alignment
    7.51    matrixFile(""),
    7.52 @@ -222,18 +236,16 @@
    7.53        if (ambiguousLetterOpt < 0 || ambiguousLetterOpt > 3) badopt(c, optarg);
    7.54        break;
    7.55      case 'a':
    7.56 -      unstringify( gapExistCost, optarg );
    7.57 +      parseIntList(optarg, delOpenCosts);
    7.58        break;
    7.59      case 'b':
    7.60 -      unstringify( gapExtendCost, optarg );
    7.61 -      if( gapExtendCost <= 0 ) badopt( c, optarg );
    7.62 +      parseIntList(optarg, delGrowCosts);
    7.63        break;
    7.64      case 'A':
    7.65 -      unstringify( insExistCost, optarg );
    7.66 +      parseIntList(optarg, insOpenCosts);
    7.67        break;
    7.68      case 'B':
    7.69 -      unstringify( insExtendCost, optarg );
    7.70 -      if( insExtendCost <= 0 ) badopt( c, optarg );
    7.71 +      parseIntList(optarg, insGrowCosts);
    7.72        break;
    7.73      case 'c':
    7.74        unstringify( gapPairCost, optarg );
    7.75 @@ -439,10 +451,11 @@
    7.76    if( isGreedy ){
    7.77      if( matchScore     < 0 ) matchScore     =   2;
    7.78      if( mismatchCost   < 0 ) mismatchCost   =   3;
    7.79 -    gapExistCost = 0;
    7.80 -    gapExtendCost = mismatchCost + matchScore / 2;
    7.81 -    insExistCost = gapExistCost;
    7.82 -    insExtendCost = gapExtendCost;
    7.83 +    int gapGrowCost = mismatchCost + matchScore / 2;
    7.84 +    delOpenCosts.assign(1, 0);
    7.85 +    delGrowCosts.assign(1, gapGrowCost);
    7.86 +    insOpenCosts.assign(1, 0);
    7.87 +    insGrowCosts.assign(1, gapGrowCost);
    7.88      if( frameshiftCost > 0 ) frameshiftCost =   0;
    7.89      if( matchScore % 2 )
    7.90        ERR( "with option -M, the match score (option -r) must be even" );
    7.91 @@ -451,20 +464,20 @@
    7.92      // default match & mismatch scores: Blosum62 matrix
    7.93      if( matchScore < 0 && mismatchCost >= 0 ) matchScore   = 1;  // idiot-proof
    7.94      if( mismatchCost < 0 && matchScore >= 0 ) mismatchCost = 1;  // idiot-proof
    7.95 -    if( gapExistCost   == INT_MIN ) gapExistCost   =  11;
    7.96 -    if( gapExtendCost  < 0 ) gapExtendCost  =   2;
    7.97 +    if (delOpenCosts.empty()) delOpenCosts.assign(1, 11);
    7.98 +    if (delGrowCosts.empty()) delGrowCosts.assign(1,  2);
    7.99    }
   7.100    else if( !isUseQuality( inputFormat ) ){
   7.101      if( matchScore     < 0 ) matchScore     =   1;
   7.102      if( mismatchCost   < 0 ) mismatchCost   =   1;
   7.103 -    if( gapExistCost   == INT_MIN ) gapExistCost   =   7;
   7.104 -    if( gapExtendCost  < 0 ) gapExtendCost  =   1;
   7.105 +    if (delOpenCosts.empty()) delOpenCosts.assign(1, 7);
   7.106 +    if (delGrowCosts.empty()) delGrowCosts.assign(1, 1);
   7.107    }
   7.108    else{  // sequence quality scores will be used:
   7.109      if( matchScore     < 0 ) matchScore     =   6;
   7.110      if( mismatchCost   < 0 ) mismatchCost   =  18;
   7.111 -    if( gapExistCost   == INT_MIN ) gapExistCost   =  21;
   7.112 -    if( gapExtendCost  < 0 ) gapExtendCost  =   9;
   7.113 +    if (delOpenCosts.empty()) delOpenCosts.assign(1, 21);
   7.114 +    if (delGrowCosts.empty()) delGrowCosts.assign(1,  9);
   7.115      // With this scoring scheme for DNA, gapless lambda ~= ln(10)/10,
   7.116      // so these scores should be comparable to PHRED scores.
   7.117      // Furthermore, since mismatchCost/matchScore = 3, the target
   7.118 @@ -478,8 +491,8 @@
   7.119      maxEvalue = 1e18 / (numOfStrands() * r * queryLettersPerRandomAlignment);
   7.120    }
   7.121  
   7.122 -  if( insExistCost == INT_MIN ) insExistCost = gapExistCost;
   7.123 -  if( insExtendCost < 0 ) insExtendCost = gapExtendCost;
   7.124 +  if (insOpenCosts.empty()) insOpenCosts = delOpenCosts;
   7.125 +  if (insGrowCosts.empty()) insGrowCosts = delGrowCosts;
   7.126  
   7.127    if( tantanSetting < 0 ){
   7.128      isKeepLowercase = isKeepRefLowercase;
   7.129 @@ -523,12 +536,23 @@
   7.130  
   7.131    if( minimizerWindow == 0 ) minimizerWindow = refMinimizerWindow;
   7.132  
   7.133 -  if( isFrameshift() && frameshiftCost < gapExtendCost )
   7.134 -    ERR( "the frameshift cost must not be less than the gap extension cost" );
   7.135 +  if (delOpenCosts.size() != delGrowCosts.size() ||
   7.136 +      insOpenCosts.size() != insGrowCosts.size()) {
   7.137 +    ERR("bad gap costs");
   7.138 +  }
   7.139  
   7.140 -  if( insExistCost != gapExistCost || insExtendCost != gapExtendCost ){
   7.141 -    if( isFrameshift() )
   7.142 -      ERR( "can't combine option -F > 0 with option -A or -B" );
   7.143 +  if (delOpenCosts.size() > 1 || insOpenCosts.size() > 1) {
   7.144 +    ERR("piecewise linear gap costs not implemented");
   7.145 +  }
   7.146 +
   7.147 +  if (isFrameshift()) {
   7.148 +    if (frameshiftCost < delGrowCosts[0])
   7.149 +      ERR("the frameshift cost must not be less than the gap extension cost");
   7.150 +
   7.151 +    if (insOpenCosts[0] != delOpenCosts[0] ||
   7.152 +	insGrowCosts[0] != delGrowCosts[0]) {
   7.153 +      ERR("can't combine option -F > 0 with option -A or -B");
   7.154 +    }
   7.155    }
   7.156  }
   7.157  
   7.158 @@ -573,10 +597,10 @@
   7.159  
   7.160  void LastalArguments::writeCommented( std::ostream& stream ) const{
   7.161    stream << '#';
   7.162 -  stream << " a=" << gapExistCost;
   7.163 -  stream << " b=" << gapExtendCost;
   7.164 -  stream << " A=" << insExistCost;
   7.165 -  stream << " B=" << insExtendCost;
   7.166 +  stream << " a="; writeIntList(stream, delOpenCosts);
   7.167 +  stream << " b="; writeIntList(stream, delGrowCosts);
   7.168 +  stream << " A="; writeIntList(stream, insOpenCosts);
   7.169 +  stream << " B="; writeIntList(stream, insGrowCosts);
   7.170    if( gapPairCost > 0 )
   7.171      stream << " c=" << gapPairCost;
   7.172    if( isTranslated() )
     8.1 --- a/src/LastalArguments.hh	Thu Mar 28 16:46:23 2019 +0900
     8.2 +++ b/src/LastalArguments.hh	Thu Mar 28 19:59:32 2019 +0900
     8.3 @@ -7,8 +7,10 @@
     8.4  
     8.5  #include "SequenceFormat.hh"
     8.6  
     8.7 +#include <climits>
     8.8  #include <string>
     8.9  #include <iosfwd>
    8.10 +#include <vector>
    8.11  #include <stddef.h>  // size_t
    8.12  
    8.13  namespace cbrc{
    8.14 @@ -53,8 +55,14 @@
    8.15    int numOfStrands() const{ return (strand == 2) ? 2 : 1; }
    8.16  
    8.17    int minGapCost(int gapLength) const {
    8.18 -    return std::min(gapExistCost + gapLength * gapExtendCost,
    8.19 -		    insExistCost + gapLength * insExtendCost);
    8.20 +    int m = INT_MAX;
    8.21 +    for (size_t i = 0; i < delOpenCosts.size(); ++i) {
    8.22 +      m = std::min(m, delOpenCosts[i] + gapLength * delGrowCosts[i]);
    8.23 +    }
    8.24 +    for (size_t i = 0; i < insOpenCosts.size(); ++i) {
    8.25 +      m = std::min(m, insOpenCosts[i] + gapLength * insGrowCosts[i]);
    8.26 +    }
    8.27 +    return m;
    8.28    }
    8.29  
    8.30    // options:
    8.31 @@ -73,10 +81,10 @@
    8.32    int minScoreGapless;
    8.33    int matchScore;
    8.34    int mismatchCost;
    8.35 -  int gapExistCost;
    8.36 -  int gapExtendCost;
    8.37 -  int insExistCost;
    8.38 -  int insExtendCost;
    8.39 +  std::vector<int> delOpenCosts;
    8.40 +  std::vector<int> delGrowCosts;
    8.41 +  std::vector<int> insOpenCosts;
    8.42 +  std::vector<int> insGrowCosts;
    8.43    int gapPairCost;
    8.44    int frameshiftCost;
    8.45    std::string matrixFile;
     9.1 --- a/src/lastal.cc	Thu Mar 28 16:46:23 2019 +0900
     9.2 +++ b/src/lastal.cc	Thu Mar 28 19:59:32 2019 +0900
     9.3 @@ -20,7 +20,6 @@
     9.4  #include "ScoreMatrix.hh"
     9.5  #include "TantanMasker.hh"
     9.6  #include "DiagonalTable.hh"
     9.7 -#include "GeneralizedAffineGapCosts.hh"
     9.8  #include "GreedyXdropAligner.hh"
     9.9  #include "gaplessXdrop.hh"
    9.10  #include "gaplessPssmXdrop.hh"
    9.11 @@ -77,7 +76,7 @@
    9.12    ScoreMatrix scoreMatrix;
    9.13    SubstitutionMatrices fwdMatrices;
    9.14    SubstitutionMatrices revMatrices;
    9.15 -  GeneralizedAffineGapCosts gapCosts;
    9.16 +  mcf::GapCosts gapCosts;
    9.17    std::vector<LastAligner> aligners;
    9.18    LastEvaluer evaluer;
    9.19    MultiSequence query;  // sequence that hasn't been indexed by lastdb
    9.20 @@ -261,11 +260,12 @@
    9.21    bool isStandardGeneticCode = args.geneticCodeFile.empty();
    9.22    LOG( "getting E-value parameters..." );
    9.23    try{
    9.24 +    const mcf::GapCosts::Piece &del = gapCosts.delPieces[0];
    9.25 +    const mcf::GapCosts::Piece &ins = gapCosts.insPieces[0];
    9.26      evaluer.init( canonicalMatrixName, args.matchScore, args.mismatchCost,
    9.27                    alph.letters.c_str(), fwdMatrices.scoresMasked,
    9.28  		  stats.letterProbs1(), stats.letterProbs2(), isGapped,
    9.29 -                  gapCosts.delExist, gapCosts.delExtend,
    9.30 -                  gapCosts.insExist, gapCosts.insExtend,
    9.31 +		  del.openCost, del.growCost, ins.openCost, ins.growCost,
    9.32                    args.frameshiftCost, geneticCode, isStandardGeneticCode,
    9.33  		  args.verbosity );
    9.34      evaluer.setSearchSpace( refLetters, args.numOfStrands() );
    9.35 @@ -1146,8 +1146,8 @@
    9.36      tantanMasker.init( isProtein, args.tantanSetting > 1,
    9.37  		       alph.letters, alph.encode );
    9.38    makeScoreMatrix( matrixName, matrixFile );
    9.39 -  gapCosts.assign( args.gapExistCost, args.gapExtendCost,
    9.40 -		   args.insExistCost, args.insExtendCost, args.gapPairCost );
    9.41 +  gapCosts.assign(args.delOpenCosts, args.delGrowCosts,
    9.42 +		  args.insOpenCosts, args.insGrowCosts, args.gapPairCost);
    9.43  
    9.44    if( args.isTranslated() ){
    9.45      if( isDna )  // allow user-defined alphabet
    10.1 --- a/src/makefile	Thu Mar 28 16:46:23 2019 +0900
    10.2 +++ b/src/makefile	Thu Mar 28 19:59:32 2019 +0900
    10.3 @@ -25,7 +25,7 @@
    10.4  ScoreMatrix.o SubsetMinimizerFinder.o TantanMasker.o fileMap.o		\
    10.5  tantan.o LastalArguments.o GappedXdropAligner.o				\
    10.6  GappedXdropAlignerPssm.o GappedXdropAligner2qual.o			\
    10.7 -GappedXdropAligner3frame.o GeneralizedAffineGapCosts.o GeneticCode.o	\
    10.8 +GappedXdropAligner3frame.o mcf_gap_costs.o GeneticCode.o	\
    10.9  GreedyXdropAligner.o LastEvaluer.o OneQualityScoreMatrix.o		\
   10.10  QualityPssmMaker.o TwoQualityScoreMatrix.o gaplessXdrop.o		\
   10.11  gaplessPssmXdrop.o gaplessTwoQualityXdrop.o cbrc_linalg.o		\
   10.12 @@ -134,21 +134,20 @@
   10.13  	$(CXX) -MM -I. split/*.cc | sed 's|.*:|split/&|' | $(fix8) >> m
   10.14  	mv m makefile
   10.15  Alignment.o Alignment.o8: Alignment.cc Alignment.hh ScoreMatrixRow.hh SegmentPair.hh \
   10.16 - Alphabet.hh Centroid.hh GappedXdropAligner.hh \
   10.17 - GeneralizedAffineGapCosts.hh OneQualityScoreMatrix.hh \
   10.18 - mcf_substitution_matrix_stats.hh GeneticCode.hh GreedyXdropAligner.hh \
   10.19 - TwoQualityScoreMatrix.hh
   10.20 + mcf_gap_costs.hh Alphabet.hh Centroid.hh GappedXdropAligner.hh \
   10.21 + OneQualityScoreMatrix.hh mcf_substitution_matrix_stats.hh GeneticCode.hh \
   10.22 + GreedyXdropAligner.hh TwoQualityScoreMatrix.hh
   10.23  AlignmentPot.o AlignmentPot.o8: AlignmentPot.cc AlignmentPot.hh Alignment.hh \
   10.24 - ScoreMatrixRow.hh SegmentPair.hh
   10.25 + ScoreMatrixRow.hh SegmentPair.hh mcf_gap_costs.hh
   10.26  AlignmentWrite.o AlignmentWrite.o8: AlignmentWrite.cc Alignment.hh ScoreMatrixRow.hh \
   10.27 - SegmentPair.hh GeneticCode.hh LastEvaluer.hh \
   10.28 + SegmentPair.hh mcf_gap_costs.hh GeneticCode.hh LastEvaluer.hh \
   10.29   alp/sls_alignment_evaluer.hpp alp/sls_pvalues.hpp alp/sls_basic.hpp \
   10.30   alp/sls_falp_alignment_evaluer.hpp alp/sls_fsa1_pvalues.hpp \
   10.31   MultiSequence.hh VectorOrMmap.hh Mmap.hh fileMap.hh stringify.hh \
   10.32   Alphabet.hh
   10.33  Alphabet.o Alphabet.o8: Alphabet.cc Alphabet.hh
   10.34  Centroid.o Centroid.o8: Centroid.cc Centroid.hh GappedXdropAligner.hh \
   10.35 - ScoreMatrixRow.hh GeneralizedAffineGapCosts.hh SegmentPair.hh \
   10.36 + ScoreMatrixRow.hh mcf_gap_costs.hh SegmentPair.hh \
   10.37   OneQualityScoreMatrix.hh mcf_substitution_matrix_stats.hh \
   10.38   GappedXdropAlignerInl.hh
   10.39  CyclicSubsetSeed.o CyclicSubsetSeed.o8: CyclicSubsetSeed.cc CyclicSubsetSeed.hh \
   10.40 @@ -165,8 +164,6 @@
   10.41   GappedXdropAligner.hh ScoreMatrixRow.hh GappedXdropAlignerInl.hh
   10.42  GappedXdropAlignerPssm.o GappedXdropAlignerPssm.o8: GappedXdropAlignerPssm.cc GappedXdropAligner.hh \
   10.43   ScoreMatrixRow.hh GappedXdropAlignerInl.hh
   10.44 -GeneralizedAffineGapCosts.o GeneralizedAffineGapCosts.o8: GeneralizedAffineGapCosts.cc \
   10.45 - GeneralizedAffineGapCosts.hh
   10.46  GeneticCode.o GeneticCode.o8: GeneticCode.cc GeneticCode.hh Alphabet.hh
   10.47  GreedyXdropAligner.o GreedyXdropAligner.o8: GreedyXdropAligner.cc GreedyXdropAligner.hh \
   10.48   ScoreMatrixRow.hh
   10.49 @@ -227,17 +224,17 @@
   10.50   alp/sls_alignment_evaluer.hpp alp/sls_pvalues.hpp alp/sls_basic.hpp \
   10.51   alp/sls_falp_alignment_evaluer.hpp alp/sls_fsa1_pvalues.hpp \
   10.52   GeneticCode.hh SubsetMinimizerFinder.hh SubsetSuffixArray.hh \
   10.53 - CyclicSubsetSeed.hh Centroid.hh GappedXdropAligner.hh \
   10.54 - GeneralizedAffineGapCosts.hh SegmentPair.hh AlignmentPot.hh Alignment.hh \
   10.55 - SegmentPairPot.hh ScoreMatrix.hh TantanMasker.hh tantan.hh \
   10.56 - DiagonalTable.hh GreedyXdropAligner.hh gaplessXdrop.hh \
   10.57 - gaplessPssmXdrop.hh gaplessTwoQualityXdrop.hh zio.hh mcf_zstream.hh \
   10.58 - threadUtil.hh version.hh
   10.59 + CyclicSubsetSeed.hh Centroid.hh GappedXdropAligner.hh mcf_gap_costs.hh \
   10.60 + SegmentPair.hh AlignmentPot.hh Alignment.hh SegmentPairPot.hh \
   10.61 + ScoreMatrix.hh TantanMasker.hh tantan.hh DiagonalTable.hh \
   10.62 + GreedyXdropAligner.hh gaplessXdrop.hh gaplessPssmXdrop.hh \
   10.63 + gaplessTwoQualityXdrop.hh zio.hh mcf_zstream.hh threadUtil.hh version.hh
   10.64  lastdb.o lastdb.o8: lastdb.cc last.hh Alphabet.hh MultiSequence.hh \
   10.65   ScoreMatrixRow.hh VectorOrMmap.hh Mmap.hh fileMap.hh stringify.hh \
   10.66   SequenceFormat.hh qualityScoreUtil.hh LastdbArguments.hh \
   10.67   SubsetSuffixArray.hh CyclicSubsetSeed.hh TantanMasker.hh tantan.hh \
   10.68   zio.hh mcf_zstream.hh threadUtil.hh version.hh
   10.69 +mcf_gap_costs.o mcf_gap_costs.o8: mcf_gap_costs.cc mcf_gap_costs.hh
   10.70  mcf_substitution_matrix_stats.o mcf_substitution_matrix_stats.o8: mcf_substitution_matrix_stats.cc \
   10.71   mcf_substitution_matrix_stats.hh LambdaCalculator.hh cbrc_linalg.hh
   10.72  tantan.o tantan.o8: tantan.cc tantan.hh
    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/src/mcf_gap_costs.cc	Thu Mar 28 19:59:32 2019 +0900
    11.3 @@ -0,0 +1,76 @@
    11.4 +// Author: Martin C. Frith 2019
    11.5 +// SPDX-License-Identifier: GPL-3.0-or-later
    11.6 +
    11.7 +#include "mcf_gap_costs.hh"
    11.8 +#include <stdexcept>
    11.9 +#include <assert.h>
   11.10 +#include <limits.h>
   11.11 +#include <stddef.h>  // size_t
   11.12 +
   11.13 +static void err(const char *s) {
   11.14 +  throw std::runtime_error(s);
   11.15 +}
   11.16 +
   11.17 +namespace mcf {
   11.18 +
   11.19 +static void assignGapCostPieces(const std::vector<int> &openCosts,
   11.20 +				const std::vector<int> &growCosts,
   11.21 +				std::vector<GapCosts::Piece> &pieces) {
   11.22 +  assert(openCosts.size() == growCosts.size());
   11.23 +  pieces.clear();
   11.24 +  for (size_t i = 0; i < openCosts.size(); ++i) {
   11.25 +    GapCosts::Piece p = {openCosts[i], growCosts[i]};
   11.26 +    if (p.growCost <= 0) err("gap extension cost must be > 0");
   11.27 +    pieces.push_back(p);
   11.28 +  }
   11.29 +  assert(!pieces.empty());
   11.30 +}
   11.31 +
   11.32 +void GapCosts::assign(const std::vector<int> &delOpenCosts,
   11.33 +		      const std::vector<int> &delGrowCosts,
   11.34 +		      const std::vector<int> &insOpenCosts,
   11.35 +		      const std::vector<int> &insGrowCosts,
   11.36 +		      int unalignedPairCost) {
   11.37 +  assignGapCostPieces(delOpenCosts, delGrowCosts, delPieces);
   11.38 +  assignGapCostPieces(insOpenCosts, insGrowCosts, insPieces);
   11.39 +  if (unalignedPairCost > 0) {
   11.40 +    pairCost = unalignedPairCost;
   11.41 +  } else {
   11.42 +    pairCost = delPieces[0].openCost + delPieces[0].growCost +
   11.43 +      insPieces[0].openCost + insPieces[0].growCost;
   11.44 +  }
   11.45 +}
   11.46 +
   11.47 +int GapCosts::cost(int refInsertLen, int qryInsertLen) const {
   11.48 +  int genCost = INT_MAX;
   11.49 +
   11.50 +  int delCost = 0;
   11.51 +  if (refInsertLen > 0) {
   11.52 +    delCost = INT_MAX;
   11.53 +    for (size_t i = 0; i < delPieces.size(); ++i) {
   11.54 +      int s = delPieces[i].openCost + delPieces[i].growCost * refInsertLen;
   11.55 +      delCost = std::min(delCost, s);
   11.56 +      if (refInsertLen >= qryInsertLen) {
   11.57 +	int t = s + (pairCost - delPieces[i].growCost) * qryInsertLen;
   11.58 +	genCost = std::min(genCost, t);
   11.59 +      }
   11.60 +    }
   11.61 +  }
   11.62 +
   11.63 +  int insCost = 0;
   11.64 +  if (qryInsertLen > 0) {
   11.65 +    insCost = INT_MAX;
   11.66 +    for (size_t i = 0; i < insPieces.size(); ++i) {
   11.67 +      int s = insPieces[i].openCost + insPieces[i].growCost * qryInsertLen;
   11.68 +      insCost = std::min(insCost, s);
   11.69 +      if (qryInsertLen >= refInsertLen) {
   11.70 +	int t = s + (pairCost - insPieces[i].growCost) * refInsertLen;
   11.71 +	genCost = std::min(genCost, t);
   11.72 +      }
   11.73 +    }
   11.74 +  }
   11.75 +
   11.76 +  return std::min(delCost + insCost, genCost);
   11.77 +}
   11.78 +
   11.79 +}
    12.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.2 +++ b/src/mcf_gap_costs.hh	Thu Mar 28 19:59:32 2019 +0900
    12.3 @@ -0,0 +1,53 @@
    12.4 +// Author: Martin C. Frith 2019
    12.5 +// SPDX-License-Identifier: GPL-3.0-or-later
    12.6 +
    12.7 +// Gap costs for pair-wise sequence alignment.
    12.8 +// A gap of length k costs: openCost + k * growCost.
    12.9 +
   12.10 +// "Generalized affine gap cost" is supported [SF Altschul 1998
   12.11 +// Proteins 32(1):88-96].  In this scheme, a "gap" may consist of
   12.12 +// unaligned regions in both sequences.  If these unaligned regions
   12.13 +// have sizes j and k, where j <= k, the cost is:
   12.14 +// openCost + growCost*(k-j) + pairCost*j
   12.15 +
   12.16 +// Different costs for deletions and insertions are supported.
   12.17 +
   12.18 +// Piecewise linear gap costs are supported.
   12.19 +
   12.20 +#ifndef MCF_GAP_COSTS_HH
   12.21 +#define MCF_GAP_COSTS_HH
   12.22 +
   12.23 +#include <vector>
   12.24 +
   12.25 +namespace mcf {
   12.26 +
   12.27 +struct GapCosts {
   12.28 +  struct Piece {
   12.29 +    int openCost;
   12.30 +    int growCost;
   12.31 +  };
   12.32 +
   12.33 +  std::vector<Piece> delPieces;
   12.34 +  std::vector<Piece> insPieces;
   12.35 +  int pairCost;
   12.36 +
   12.37 +  // Assign piecewise linear open and grow costs, and one pairCost.
   12.38 +  // If unalignedPairCost <= 0, assign non-generalized costs.
   12.39 +  // Throw a runtime_error if any growCost is <= 0.
   12.40 +  // delOpenCosts.size() must equal delGrowCosts.size(), and
   12.41 +  // insOpenCosts.size() must equal insGrowCosts.size().
   12.42 +  // The vectors must not be empty.
   12.43 +  void assign(const std::vector<int> &delOpenCosts,
   12.44 +	      const std::vector<int> &delGrowCosts,
   12.45 +	      const std::vector<int> &insOpenCosts,
   12.46 +	      const std::vector<int> &insGrowCosts,
   12.47 +	      int unalignedPairCost);
   12.48 +
   12.49 +  // The cost of a "gap" consisting of unaligned letters in the query
   12.50 +  // and/or reference sequence
   12.51 +  int cost(int refInsertLen, int qryInsertLen) const;
   12.52 +};
   12.53 +
   12.54 +}
   12.55 +
   12.56 +#endif