Refactor
authorMartin C. Frith
Fri Mar 29 14:46:20 2019 +0900 (5 months ago)
changeset 976f3f69ccff967
parent 975 3701b1e2019b
child 977 9eeaeea88f2c
Refactor
src/Alignment.cc
src/Centroid.cc
src/Centroid.hh
     1.1 --- a/src/Alignment.cc	Thu Mar 28 19:59:32 2019 +0900
     1.2 +++ b/src/Alignment.cc	Fri Mar 29 14:46:20 2019 +0900
     1.3 @@ -48,8 +48,6 @@
     1.4    // MI = IM - DI - PI + IQ
     1.5    // PM = MP - PD - PI - PQ
     1.6    // DM + IM + PM = MD + MI + MP - DQ - IQ - PQ
     1.7 -  // SM, SD, SP, SI seem to be always zero.
     1.8 -  // MQ, SQ ?
     1.9  }
    1.10  
    1.11  static void countSeedMatches( double* expectedCounts,
     2.1 --- a/src/Centroid.cc	Thu Mar 28 19:59:32 2019 +0900
     2.2 +++ b/src/Centroid.cc	Fri Mar 29 14:46:20 2019 +0900
     2.3 @@ -28,45 +28,15 @@
     2.4    ExpectedCount::ExpectedCount ()
     2.5    {
     2.6      double d0 = 0;
     2.7 -    MM = d0; MD = d0; MP = d0; MI = d0; MQ = d0;
     2.8 +    MM = d0; MD = d0; MP = d0; MI = d0;
     2.9      DD = d0; DM = d0; DI = d0;
    2.10      PP = d0; PM = d0; PD = d0; PI = d0;
    2.11      II = d0; IM = d0;
    2.12 -    SM = d0; SD = d0; SP = d0; SI = d0; SQ = d0;
    2.13  
    2.14      for (int n=0; n<scoreMatrixRowSize; n++)
    2.15        for (int m=0; m<scoreMatrixRowSize; m++) emit[n][m] = d0;
    2.16    }
    2.17  
    2.18 -  std::ostream& ExpectedCount::write (std::ostream& os) const
    2.19 -  {
    2.20 -    for (int n=0; n<scoreMatrixRowSize; ++n) {
    2.21 -      for (int m=0; m<scoreMatrixRowSize; ++m) {
    2.22 -	double prob = emit[n][m];
    2.23 -	if (prob > 0)
    2.24 -	  os << "emit[" << n << "][" << m << "]=" << emit[n][m] << std::endl;
    2.25 -      }
    2.26 -    }
    2.27 -    os << "M->M=" << MM << std::endl;
    2.28 -    os << "M->D=" << MD << std::endl;
    2.29 -    os << "M->P=" << MP << std::endl;
    2.30 -    os << "M->I=" << MI << std::endl;
    2.31 -
    2.32 -    os << "D->D=" << DD << std::endl;
    2.33 -    os << "D->M=" << DM << std::endl;
    2.34 -    os << "D->I=" << DI << std::endl;
    2.35 -
    2.36 -    os << "P->P=" << PP << std::endl;
    2.37 -    os << "P->M=" << PM << std::endl;
    2.38 -    os << "P->D=" << PD << std::endl;
    2.39 -    os << "P->I=" << PI << std::endl;
    2.40 -
    2.41 -    os << "I->I=" << II << std::endl;
    2.42 -    os << "I->M=" << IM << std::endl;
    2.43 -
    2.44 -    return os;
    2.45 -  }
    2.46 -
    2.47    void Centroid::setScoreMatrix( const ScoreMatrixRow* sm, double T ) {
    2.48      this -> T = T;
    2.49      this -> isPssm = false;
    2.50 @@ -131,14 +101,12 @@
    2.51  
    2.52    void Centroid::initForwardMatrix(){
    2.53      size_t n = xa.scoreEndIndex( numAntidiagonals );
    2.54 -
    2.55      if ( fM.size() < n ) {
    2.56        fM.resize( n );
    2.57        fD.resize( n );
    2.58        fI.resize( n );
    2.59        fP.resize( n );
    2.60      }
    2.61 -
    2.62      fM[0] = 1;
    2.63    }
    2.64  
    2.65 @@ -153,14 +121,16 @@
    2.66    void Centroid::forward(const uchar* seq1, const uchar* seq2,
    2.67  			 const ExpMatrixRow* pssm, bool isExtendFwd,
    2.68  			 const mcf::GapCosts& gap, int globality) {
    2.69 -    initForwardMatrix();
    2.70      const int seqIncrement = isExtendFwd ? 1 : -1;
    2.71      const bool isAffine = isAffineGapCosts(gap);
    2.72 +    initForwardMatrix();
    2.73 +
    2.74      const double eE = EXP(-gap.delPieces[0].growCost / T);
    2.75      const double eF = EXP(-gap.delPieces[0].openCost / T);
    2.76      const double eEI = EXP(-gap.insPieces[0].growCost / T);
    2.77      const double eFI = EXP(-gap.insPieces[0].openCost / T);
    2.78      const double eP = EXP(-gap.pairCost / T);
    2.79 +
    2.80      double Z = 0.0;  // partion function of forward values
    2.81  
    2.82      for( size_t k = 0; k < numAntidiagonals; ++k ){  // loop over antidiagonals
    2.83 @@ -199,18 +169,20 @@
    2.84  
    2.85  	if (isAffine) {
    2.86  	  while (1) {
    2.87 +	    const unsigned letter1 = *s1;
    2.88 +	    const unsigned letter2 = *s2;
    2.89 +	    const double matchProb = match_score[letter1][letter2];
    2.90 +
    2.91  	    const double xM = *fM2 * scale12;
    2.92  	    const double xD = *fD1 * seE;
    2.93  	    const double xI = *fI1 * seEI;
    2.94 -	    const unsigned letter1 = *s1;
    2.95 -	    const unsigned letter2 = *s2;
    2.96 -	    const double matchProb = match_score[letter1][letter2];
    2.97 +	    const double xSum = xM + xD + xI;
    2.98 +
    2.99  	    *fD0 = xM * eF + xD;
   2.100  	    *fI0 = (xM + xD) * eFI + xI;
   2.101 -	    const double total = xM + xD + xI;
   2.102 -	    *fM0 = total * matchProb;
   2.103 +	    *fM0 = xSum * matchProb;
   2.104  	    sum_f += xM;
   2.105 -	    if (globality && matchProb <= 0) Z += total;  // xxx
   2.106 +	    if (globality && matchProb <= 0) Z += xSum;  // xxx
   2.107  
   2.108  	    if (fM0 == fM0last) break;
   2.109  	    fM0++; fD0++; fI0++;
   2.110 @@ -220,20 +192,22 @@
   2.111  	  }
   2.112  	} else {
   2.113  	  while (1) {
   2.114 +	    const unsigned letter1 = *s1;
   2.115 +	    const unsigned letter2 = *s2;
   2.116 +	    const double matchProb = match_score[letter1][letter2];
   2.117 +
   2.118  	    const double xM = *fM2 * scale12;
   2.119  	    const double xD = *fD1 * seE;
   2.120  	    const double xI = *fI1 * seEI;
   2.121  	    const double xP = *fP2 * seP;
   2.122 -	    const unsigned letter1 = *s1;
   2.123 -	    const unsigned letter2 = *s2;
   2.124 -	    const double matchProb = match_score[letter1][letter2];
   2.125 +	    const double xSum = (xM + xD) + (xI + xP);
   2.126 +
   2.127  	    *fP0 = xM * eF + xP;
   2.128  	    *fD0 = xM * eF + xP + xD;
   2.129  	    *fI0 = (xM + xD) * eFI + (xI + xP);
   2.130 -	    const double total = (xM + xD) + (xI + xP);
   2.131 -	    *fM0 = total * matchProb;
   2.132 +	    *fM0 = xSum * matchProb;
   2.133  	    sum_f += xM;
   2.134 -	    if (globality && matchProb <= 0) Z += total;  // xxx
   2.135 +	    if (globality && matchProb <= 0) Z += xSum;  // xxx
   2.136  
   2.137  	    if (fM0 == fM0last) break;
   2.138  	    fM0++; fD0++; fI0++; fP0++;
   2.139 @@ -247,18 +221,20 @@
   2.140  
   2.141  	if (isAffine) {
   2.142  	  while (1) {
   2.143 +	    const unsigned letter1 = *s1;
   2.144 +	    const double *matchProbs = *p2;
   2.145 +	    const double matchProb = matchProbs[letter1];
   2.146 +
   2.147  	    const double xM = *fM2 * scale12;
   2.148  	    const double xD = *fD1 * seE;
   2.149  	    const double xI = *fI1 * seEI;
   2.150 -	    const unsigned letter1 = *s1;
   2.151 -	    const double *matchProbs = *p2;
   2.152 -	    const double matchProb = matchProbs[letter1];
   2.153 +	    const double xSum = xM + xD + xI;
   2.154 +
   2.155  	    *fD0 = xM * eF + xD;
   2.156  	    *fI0 = (xM + xD) * eFI + xI;
   2.157 -	    const double total = xM + xD + xI;
   2.158 -	    *fM0 = total * matchProb;
   2.159 +	    *fM0 = xSum * matchProb;
   2.160  	    sum_f += xM;
   2.161 -	    if (globality && matchProb <= 0) Z += total;  // xxx
   2.162 +	    if (globality && matchProb <= 0) Z += xSum;  // xxx
   2.163  
   2.164  	    if (fM0 == fM0last) break;
   2.165  	    fM0++; fD0++; fI0++;
   2.166 @@ -268,20 +244,22 @@
   2.167  	  }
   2.168  	} else {
   2.169  	  while (1) {
   2.170 +	    const unsigned letter1 = *s1;
   2.171 +	    const double *matchProbs = *p2;
   2.172 +	    const double matchProb = matchProbs[letter1];
   2.173 +
   2.174  	    const double xM = *fM2 * scale12;
   2.175  	    const double xD = *fD1 * seE;
   2.176  	    const double xI = *fI1 * seEI;
   2.177  	    const double xP = *fP2 * seP;
   2.178 -	    const unsigned letter1 = *s1;
   2.179 -	    const double *matchProbs = *p2;
   2.180 -	    const double matchProb = matchProbs[letter1];
   2.181 +	    const double xSum = (xM + xD) + (xI + xP);
   2.182 +
   2.183  	    *fP0 = xM * eF + xP;
   2.184  	    *fD0 = xM * eF + xP + xD;
   2.185  	    *fI0 = (xM + xD) * eFI + (xI + xP);
   2.186 -	    const double total = (xM + xD) + (xI + xP);
   2.187 -	    *fM0 = total * matchProb;
   2.188 +	    *fM0 = xSum * matchProb;
   2.189  	    sum_f += xM;
   2.190 -	    if (globality && matchProb <= 0) Z += total;  // xxx
   2.191 +	    if (globality && matchProb <= 0) Z += xSum;  // xxx
   2.192  
   2.193  	    if (fM0 == fM0last) break;
   2.194  	    fM0++; fD0++; fI0++; fP0++;
   2.195 @@ -307,14 +285,16 @@
   2.196    void Centroid::backward(const uchar* seq1, const uchar* seq2,
   2.197  			  const ExpMatrixRow* pssm, bool isExtendFwd,
   2.198  			  const mcf::GapCosts& gap, int globality) {
   2.199 -    initBackwardMatrix();
   2.200      const int seqIncrement = isExtendFwd ? 1 : -1;
   2.201      const bool isAffine = isAffineGapCosts(gap);
   2.202 +    initBackwardMatrix();
   2.203 +
   2.204      const double eE = EXP(-gap.delPieces[0].growCost / T);
   2.205      const double eF = EXP(-gap.delPieces[0].openCost / T);
   2.206      const double eEI = EXP(-gap.insPieces[0].growCost / T);
   2.207      const double eFI = EXP(-gap.insPieces[0].openCost / T);
   2.208      const double eP = EXP(-gap.pairCost / T);
   2.209 +
   2.210      double scaledUnit = 1.0;
   2.211  
   2.212      for( size_t k = numAntidiagonals; k-- > 0; ){
   2.213 @@ -358,10 +338,14 @@
   2.214  
   2.215  	if (isAffine) {
   2.216  	  while (1) {
   2.217 -	    const double matchProb = match_score[*s1][*s2];
   2.218 +	    const unsigned letter1 = *s1;
   2.219 +	    const unsigned letter2 = *s2;
   2.220 +	    const double matchProb = match_score[letter1][letter2];
   2.221 +
   2.222  	    const double yM = (*bM0) * matchProb;
   2.223  	    const double yD = *bD0;
   2.224  	    const double yI = *bI0;
   2.225 +
   2.226  	    double zM = yM + yD * eF + yI * eFI;
   2.227  	    double zD = yM + yD + yI * eFI;
   2.228  	    double zI = yM + yI;
   2.229 @@ -391,11 +375,15 @@
   2.230  	  }
   2.231  	} else {
   2.232  	  while (1) {
   2.233 -	    const double matchProb = match_score[*s1][*s2];
   2.234 +	    const unsigned letter1 = *s1;
   2.235 +	    const unsigned letter2 = *s2;
   2.236 +	    const double matchProb = match_score[letter1][letter2];
   2.237 +
   2.238  	    const double yM = (*bM0) * matchProb;
   2.239  	    const double yD = *bD0;
   2.240  	    const double yI = *bI0;
   2.241  	    const double yP = *bP0;
   2.242 +
   2.243  	    double zM = yM + yD * eF + yI * eFI + yP * eF;
   2.244  	    double zD = yM + yD + yI * eFI;
   2.245  	    double zI = yM + yI;
   2.246 @@ -431,10 +419,13 @@
   2.247  
   2.248  	if (isAffine) {
   2.249  	  while (1) {
   2.250 -	    const double matchProb = (*p2)[*s1];
   2.251 +	    const unsigned letter1 = *s1;
   2.252 +	    const double matchProb = (*p2)[letter1];
   2.253 +
   2.254  	    const double yM = (*bM0) * matchProb;
   2.255  	    const double yD = *bD0;
   2.256  	    const double yI = *bI0;
   2.257 +
   2.258  	    double zM = yM + yD * eF + yI * eFI;
   2.259  	    double zD = yM + yD + yI * eFI;
   2.260  	    double zI = yM + yI;
   2.261 @@ -462,11 +453,14 @@
   2.262  	  }
   2.263  	} else {
   2.264  	  while (1) {
   2.265 -	    const double matchProb = (*p2)[*s1];
   2.266 +	    const unsigned letter1 = *s1;
   2.267 +	    const double matchProb = (*p2)[letter1];
   2.268 +
   2.269  	    const double yM = (*bM0) * matchProb;
   2.270  	    const double yD = *bD0;
   2.271  	    const double yI = *bI0;
   2.272  	    const double yP = *bP0;
   2.273 +
   2.274  	    double zM = yM + yD * eF + yI * eFI + yP * eF;
   2.275  	    double zD = yM + yD + yI * eFI;
   2.276  	    double zI = yM + yI;
   2.277 @@ -614,7 +608,7 @@
   2.278  	const double u = gamma * thisD - thisXD;
   2.279  	const double t = gamma * thisI - thisXI;
   2.280  	const double oldX1 = *X1++;  // Added by MCF
   2.281 -	const double score = std::max( std::max( oldX1 + u, *X1 + t), *X2++ + s );
   2.282 +	const double score = std::max(std::max(oldX1 + u, *X1 + t), *X2++ + s);
   2.283  	updateScore ( score, k, seq1pos );
   2.284  	*X0++ = score;
   2.285  	seq1pos++;
   2.286 @@ -746,6 +740,7 @@
   2.287      int alphabetSizeIncrement = alphabetSize;
   2.288      if (!isExtendFwd) alphabetSizeIncrement *= -1;
   2.289      const bool isAffine = isAffineGapCosts(gap);
   2.290 +
   2.291      const double eE = EXP(-gap.delPieces[0].growCost / T);
   2.292      const double eF = EXP(-gap.delPieces[0].openCost / T);
   2.293      const double eEI = EXP(-gap.insPieces[0].growCost / T);
   2.294 @@ -785,15 +780,21 @@
   2.295  
   2.296  	if (isAffine) {
   2.297  	  while (1) {
   2.298 +	    const unsigned letter1 = *s1;
   2.299 +	    const unsigned letter2 = *s2;
   2.300 +	    const double matchProb = match_score[letter1][letter2];
   2.301 +
   2.302 +	    const double yM = *bM0 * matchProb;
   2.303 +	    const double yD = *bD0;
   2.304 +	    const double yI = *bI0;
   2.305 +
   2.306  	    const double xM = *fM2 * scale12;
   2.307  	    const double xD = *fD1 * seE;
   2.308  	    const double xI = *fI1 * seEI;
   2.309 -	    const unsigned letter1 = *s1;
   2.310 -	    const unsigned letter2 = *s2;
   2.311 -	    const double yM = *bM0 * match_score[letter1][letter2];
   2.312 -	    const double yD = *bD0;
   2.313 -	    const double yI = *bI0;
   2.314 -	    c.emit[letter1][letter2] += (xM + xD + xI) * yM;
   2.315 +	    const double xSum = xM + xD + xI;
   2.316 +
   2.317 +	    const double alignProb = xSum * yM;
   2.318 +	    c.emit[letter1][letter2] += alignProb;
   2.319  	    c.MM += xM * yM;
   2.320  	    c.DM += xD * yM;
   2.321  	    c.IM += xI * yM;
   2.322 @@ -802,6 +803,7 @@
   2.323  	    c.MI += xM * yI * eFI;
   2.324  	    c.DI += xD * yI * eFI;
   2.325  	    c.II += xI * yI;
   2.326 +
   2.327  	    if (bM0 == bM0last) break;
   2.328  	    fM2++; fD1++; fI1++;
   2.329  	    bM0++; bD0++; bI0++;
   2.330 @@ -810,17 +812,23 @@
   2.331  	  }
   2.332  	} else {
   2.333  	  while (1) {
   2.334 +	    const unsigned letter1 = *s1;
   2.335 +	    const unsigned letter2 = *s2;
   2.336 +	    const double matchProb = match_score[letter1][letter2];
   2.337 +
   2.338 +	    const double yM = *bM0 * matchProb;
   2.339 +	    const double yD = *bD0;
   2.340 +	    const double yI = *bI0;
   2.341 +	    const double yP = *bP0;
   2.342 +
   2.343  	    const double xM = *fM2 * scale12;
   2.344  	    const double xD = *fD1 * seE;
   2.345  	    const double xI = *fI1 * seEI;
   2.346  	    const double xP = *fP2 * seP;
   2.347 -	    const unsigned letter1 = *s1;
   2.348 -	    const unsigned letter2 = *s2;
   2.349 -	    const double yM = *bM0 * match_score[letter1][letter2];
   2.350 -	    const double yD = *bD0;
   2.351 -	    const double yI = *bI0;
   2.352 -	    const double yP = *bP0;
   2.353 -	    c.emit[letter1][letter2] += (xM + xD + xI + xP) * yM;
   2.354 +	    const double xSum = xM + xD + xI + xP;
   2.355 +
   2.356 +	    const double alignProb = xSum * yM;
   2.357 +	    c.emit[letter1][letter2] += alignProb;
   2.358  	    c.MM += xM * yM;
   2.359  	    c.DM += xD * yM;
   2.360  	    c.IM += xI * yM;
   2.361 @@ -834,6 +842,7 @@
   2.362  	    c.PI += xP * yI;
   2.363  	    c.MP += xM * yP * eF;
   2.364  	    c.PP += xP * yP;
   2.365 +
   2.366  	    if (bM0 == bM0last) break;
   2.367  	    fM2++; fD1++; fI1++; fP2++;
   2.368  	    bM0++; bD0++; bI0++; bP0++;
   2.369 @@ -841,22 +850,26 @@
   2.370  	    s2 -= seqIncrement;
   2.371  	  }
   2.372  	}
   2.373 -      }
   2.374 -      else {
   2.375 +      } else {
   2.376  	const ExpMatrixRow* p2 = isExtendFwd ? pssm + seq2pos : pssm - seq2pos;
   2.377  	const size_t a2 = seq2pos * alphabetSize;
   2.378  	const double* lp2 = isExtendFwd ? letterProbs + a2 : letterProbs - a2;
   2.379  
   2.380  	if (isAffine) {
   2.381  	  while (1) { // inner most loop
   2.382 +	    const unsigned letter1 = *s1;
   2.383 +	    const double matchProb = (*p2)[letter1];
   2.384 +
   2.385 +	    const double yM = *bM0 * matchProb;
   2.386 +	    const double yD = *bD0;
   2.387 +	    const double yI = *bI0;
   2.388 +
   2.389  	    const double xM = *fM2 * scale12;
   2.390  	    const double xD = *fD1 * seE;
   2.391  	    const double xI = *fI1 * seEI;
   2.392 -	    const unsigned letter1 = *s1;
   2.393 -	    const double yM = *bM0 * (*p2)[letter1];
   2.394 -	    const double yD = *bD0;
   2.395 -	    const double yI = *bI0;
   2.396 -	    const double alignProb = (xM + xD + xI) * yM;
   2.397 +	    const double xSum = xM + xD + xI;
   2.398 +
   2.399 +	    const double alignProb = xSum * yM;
   2.400  	    countUncertainLetters(c.emit[letter1], alignProb,
   2.401  				  alphabetSize, match_score[letter1], lp2);
   2.402  	    c.MM += xM * yM;
   2.403 @@ -867,6 +880,7 @@
   2.404  	    c.MI += xM * yI * eFI;
   2.405  	    c.DI += xD * yI * eFI;
   2.406  	    c.II += xI * yI;
   2.407 +
   2.408  	    if (bM0 == bM0last) break;
   2.409  	    fM2++; fD1++; fI1++;
   2.410  	    bM0++; bD0++; bI0++;
   2.411 @@ -874,18 +888,23 @@
   2.412  	    p2 -= seqIncrement;
   2.413  	    lp2 -= alphabetSizeIncrement;
   2.414  	  }
   2.415 -	}else{
   2.416 +	} else {
   2.417  	  while (1) { // inner most loop
   2.418 +	    const unsigned letter1 = *s1;
   2.419 +	    const double matchProb = (*p2)[letter1];
   2.420 +
   2.421 +	    const double yM = *bM0 * matchProb;
   2.422 +	    const double yD = *bD0;
   2.423 +	    const double yI = *bI0;
   2.424 +	    const double yP = *bP0;
   2.425 +
   2.426  	    const double xM = *fM2 * scale12;
   2.427  	    const double xD = *fD1 * seE;
   2.428  	    const double xI = *fI1 * seEI;
   2.429  	    const double xP = *fP2 * seP;
   2.430 -	    const unsigned letter1 = *s1;
   2.431 -	    const double yM = *bM0 * (*p2)[letter1];
   2.432 -	    const double yD = *bD0;
   2.433 -	    const double yI = *bI0;
   2.434 -	    const double yP = *bP0;
   2.435 -	    const double alignProb = (xM + xD + xI + xP) * yM;
   2.436 +	    const double xSum = xM + xD + xI + xP;
   2.437 +
   2.438 +	    const double alignProb = xSum * yM;
   2.439  	    countUncertainLetters(c.emit[letter1], alignProb,
   2.440  				  alphabetSize, match_score[letter1], lp2);
   2.441  	    c.MM += xM * yM;
   2.442 @@ -901,6 +920,7 @@
   2.443  	    c.PI += xP * yI;
   2.444  	    c.MP += xM * yP * eF;
   2.445  	    c.PP += xP * yP;
   2.446 +
   2.447  	    if (bM0 == bM0last) break;
   2.448  	    fM2++; fD1++; fI1++; fP2++;
   2.449  	    bM0++; bD0++; bI0++; bP0++;
     3.1 --- a/src/Centroid.hh	Thu Mar 28 19:59:32 2019 +0900
     3.2 +++ b/src/Centroid.hh	Fri Mar 29 14:46:20 2019 +0900
     3.3 @@ -16,14 +16,12 @@
     3.4    struct ExpectedCount{
     3.5    public:
     3.6      double emit[scoreMatrixRowSize][scoreMatrixRowSize];
     3.7 -    double MM, MD, MP, MI, MQ;
     3.8 +    double MM, MD, MP, MI;
     3.9      double DD, DM, DI;
    3.10      double PP, PM, PD, PI;
    3.11      double II, IM;
    3.12 -    double SM, SD, SP, SI, SQ;
    3.13    public:
    3.14      ExpectedCount ();
    3.15 -    std::ostream& write (std::ostream& os) const;
    3.16    };
    3.17  
    3.18    /**