Refactor
authorMartin C. Frith
Wed Feb 27 09:56:54 2019 +0900 (6 months ago)
changeset 972ae880ec00591
parent 971 6189dce68b46
child 973 1ed9db7f46ca
Refactor
src/lastal.cc
     1.1 --- a/src/lastal.cc	Tue Feb 26 23:56:16 2019 +0900
     1.2 +++ b/src/lastal.cc	Wed Feb 27 09:56:54 2019 +0900
     1.3 @@ -171,9 +171,16 @@
     1.4    // This would work, except the maxDrops aren't finalized yet:
     1.5    // maxScore = std::max(args.maxDropGapped, args.maxDropFinal) + 1;
     1.6  
     1.7 +  for (unsigned i = 0; i < scoreMatrixRowSize; ++i) {  // xxx
     1.8 +    for (unsigned j = 0; j < scoreMatrixRowSize; ++j) {
     1.9 +      fwdMatrices.scores[i][j] = scoreMatrix.caseInsensitive[i][j];
    1.10 +      fwdMatrices.scoresMasked[i][j] = scoreMatrix.caseSensitive[i][j];
    1.11 +    }
    1.12 +  }
    1.13 +
    1.14    if( args.isQueryStrandMatrix && args.strand != 1 ){
    1.15 -    complementMatrix( scoreMatrix.caseInsensitive, revMatrices.scores );
    1.16 -    complementMatrix( scoreMatrix.caseSensitive, revMatrices.scoresMasked );
    1.17 +    complementMatrix(fwdMatrices.scores, revMatrices.scores);
    1.18 +    complementMatrix(fwdMatrices.scoresMasked, revMatrices.scoresMasked);
    1.19    }
    1.20  }
    1.21  
    1.22 @@ -185,7 +192,7 @@
    1.23        return warn( args.programName,
    1.24  		   "quality data not used for DNA-versus-protein alignment" );
    1.25  
    1.26 -  const ScoreMatrixRow* m = scoreMatrix.caseSensitive;  // case isn't relevant
    1.27 +  const ScoreMatrixRow* m = fwdMatrices.scoresMasked;  // case isn't relevant
    1.28    const ScoreMatrixRow* mRev = revMatrices.scores;
    1.29    bool isMatchMismatch = (args.matrixFile.empty() && args.matchScore > 0);
    1.30    bool isPhred1 = isPhred( referenceFormat );
    1.31 @@ -277,7 +284,7 @@
    1.32    LOG( "getting E-value parameters..." );
    1.33    try{
    1.34      evaluer.init( canonicalMatrixName, args.matchScore, args.mismatchCost,
    1.35 -                  alph.letters.c_str(), scoreMatrix.caseSensitive,
    1.36 +                  alph.letters.c_str(), fwdMatrices.scoresMasked,
    1.37  		  stats.letterProbs1(), stats.letterProbs2(), isGapped,
    1.38                    gapCosts.delExist, gapCosts.delExtend,
    1.39                    gapCosts.insExist, gapCosts.insExtend,
    1.40 @@ -391,41 +398,6 @@
    1.41    }
    1.42  }
    1.43  
    1.44 -const ScoreMatrixRow *getScoreMatrix(char strand, bool isMask) {
    1.45 -  if (strand == '+' || !args.isQueryStrandMatrix)
    1.46 -    return isMask ? scoreMatrix.caseSensitive : scoreMatrix.caseInsensitive;
    1.47 -  else
    1.48 -    return isMask ? revMatrices.scoresMasked : revMatrices.scores;
    1.49 -}
    1.50 -
    1.51 -const OneQualityScoreMatrix &getOneQualityMatrix(char strand, bool isMask) {
    1.52 -  if (strand == '+' || !args.isQueryStrandMatrix)
    1.53 -    return isMask ? fwdMatrices.oneQualMasked : fwdMatrices.oneQual;
    1.54 -  else
    1.55 -    return isMask ? revMatrices.oneQualMasked : revMatrices.oneQual;
    1.56 -}
    1.57 -
    1.58 -const TwoQualityScoreMatrix &getTwoQualityMatrix(char strand, bool isMask) {
    1.59 -  if (strand == '+' || !args.isQueryStrandMatrix)
    1.60 -    return isMask ? fwdMatrices.twoQualMasked : fwdMatrices.twoQual;
    1.61 -  else
    1.62 -    return isMask ? revMatrices.twoQualMasked : revMatrices.twoQual;
    1.63 -}
    1.64 -
    1.65 -const OneQualityExpMatrix &getOneQualityExpMatrix(char strand) {
    1.66 -  if (strand == '+' || !args.isQueryStrandMatrix)
    1.67 -    return fwdMatrices.oneQualExp;
    1.68 -  else
    1.69 -    return revMatrices.oneQualExp;
    1.70 -}
    1.71 -
    1.72 -const QualityPssmMaker &getQualityPssmMaker(char strand) {
    1.73 -  if (strand == '+' || !args.isQueryStrandMatrix)
    1.74 -    return fwdMatrices.maker;
    1.75 -  else
    1.76 -    return revMatrices.maker;
    1.77 -}
    1.78 -
    1.79  static const uchar *getQueryQual(size_t queryNum) {
    1.80    const uchar *q = query.qualityReader();
    1.81    if (q) q += query.padBeg(queryNum) * query.qualsPerLetter();
    1.82 @@ -460,15 +432,15 @@
    1.83    int d;  // the maximum score drop
    1.84    int z;
    1.85  
    1.86 -  Dispatcher( Phase::Enum e, const LastAligner& aligner,
    1.87 -	      size_t queryNum, char strand, const uchar* querySeq ) :
    1.88 +  Dispatcher( Phase::Enum e, const LastAligner& aligner, size_t queryNum,
    1.89 +	      const SubstitutionMatrices &matrices, const uchar* querySeq ) :
    1.90        a( text.seqReader() ),
    1.91        b( querySeq ),
    1.92        i( text.qualityReader() ),
    1.93        j( getQueryQual(queryNum) ),
    1.94        p( getQueryPssm(aligner, queryNum) ),
    1.95 -      m( getScoreMatrix( strand, isMaskLowercase(e) ) ),
    1.96 -      t( getTwoQualityMatrix( strand, isMaskLowercase(e) ) ),
    1.97 +      m( isMaskLowercase(e) ? matrices.scoresMasked : matrices.scores ),
    1.98 +      t( isMaskLowercase(e) ? matrices.twoQualMasked : matrices.twoQual ),
    1.99        d( (e == Phase::gapless) ? args.maxDropGapless :
   1.100           (e == Phase::gapped ) ? args.maxDropGapped : args.maxDropFinal ),
   1.101        z( t ? 2 : p ? 1 : 0 ){}
   1.102 @@ -547,9 +519,10 @@
   1.103  
   1.104  // Find query matches to the suffix array, and do gapless extensions
   1.105  void alignGapless( LastAligner& aligner, SegmentPairPot& gaplessAlns,
   1.106 -		   size_t queryNum, char strand, const uchar* querySeq ){
   1.107 +		   size_t queryNum, const SubstitutionMatrices &matrices,
   1.108 +		   const uchar* querySeq ){
   1.109    const bool isOverlap = (args.globality && args.outputType == 1);
   1.110 -  Dispatcher dis( Phase::gapless, aligner, queryNum, strand, querySeq );
   1.111 +  Dispatcher dis(Phase::gapless, aligner, queryNum, matrices, querySeq);
   1.112    DiagonalTable dt;  // record already-covered positions on each diagonal
   1.113    countT matchCount = 0, gaplessExtensionCount = 0, gaplessAlignmentCount = 0;
   1.114    size_t significantAlignmentCount = 0;
   1.115 @@ -652,9 +625,9 @@
   1.116  // Do gapped extensions of the gapless alignments
   1.117  void alignGapped( LastAligner& aligner,
   1.118  		  AlignmentPot& gappedAlns, SegmentPairPot& gaplessAlns,
   1.119 -                  size_t queryNum, char strand, const uchar* querySeq,
   1.120 -		  Phase::Enum phase ){
   1.121 -  Dispatcher dis( phase, aligner, queryNum, strand, querySeq );
   1.122 +                  size_t queryNum, const SubstitutionMatrices &matrices,
   1.123 +		  const uchar* querySeq, Phase::Enum phase ){
   1.124 +  Dispatcher dis(phase, aligner, queryNum, matrices, querySeq);
   1.125    size_t frameSize = args.isFrameshift() ? (query.padLen(queryNum) / 3) : 0;
   1.126    countT gappedExtensionCount = 0, gappedAlignmentCount = 0;
   1.127  
   1.128 @@ -728,9 +701,10 @@
   1.129  // Print the gapped alignments, after optionally calculating match
   1.130  // probabilities and re-aligning using the gamma-centroid algorithm
   1.131  void alignFinish( LastAligner& aligner, const AlignmentPot& gappedAlns,
   1.132 -		  size_t queryNum, char strand, const uchar* querySeq ){
   1.133 +		  size_t queryNum, const SubstitutionMatrices &matrices,
   1.134 +		  const uchar* querySeq ){
   1.135    Centroid& centroid = aligner.centroid;
   1.136 -  Dispatcher dis( Phase::final, aligner, queryNum, strand, querySeq );
   1.137 +  Dispatcher dis(Phase::final, aligner, queryNum, matrices, querySeq);
   1.138    size_t queryLen = query.padLen(queryNum);
   1.139    size_t frameSize = args.isFrameshift() ? (queryLen / 3) : 0;
   1.140  
   1.141 @@ -738,9 +712,6 @@
   1.142      if( dis.p ){
   1.143        if (args.outputType == 7) {
   1.144  	centroid.setScoreMatrix(dis.m, args.temperature);
   1.145 -	bool isFwd = (strand == '+' || !args.isQueryStrandMatrix);
   1.146 -	const SubstitutionMatrices &matrices =
   1.147 -	  isFwd ? fwdMatrices : revMatrices;
   1.148  	centroid.setLetterProbsPerPosition(alph.size, queryLen, dis.b, dis.j,
   1.149  					   isUseFastq(args.inputFormat),
   1.150  					   matrices.maker.qualToProbRight(),
   1.151 @@ -748,7 +719,7 @@
   1.152  					   alph.numbersToUppercase);
   1.153        }
   1.154        centroid.setPssm(dis.p, queryLen, args.temperature,
   1.155 -		       getOneQualityExpMatrix(strand), dis.b, dis.j);
   1.156 +		       matrices.oneQualExp, dis.b, dis.j);
   1.157      }
   1.158      else{
   1.159        centroid.setScoreMatrix( dis.m, args.temperature );
   1.160 @@ -778,10 +749,11 @@
   1.161  }
   1.162  
   1.163  static void eraseWeakAlignments(LastAligner &aligner, AlignmentPot &gappedAlns,
   1.164 -				size_t queryNum, char strand,
   1.165 +				size_t queryNum,
   1.166 +				const SubstitutionMatrices &matrices,
   1.167  				const uchar *querySeq) {
   1.168    size_t frameSize = args.isFrameshift() ? (query.padLen(queryNum) / 3) : 0;
   1.169 -  Dispatcher dis(Phase::gapless, aligner, queryNum, strand, querySeq);
   1.170 +  Dispatcher dis(Phase::gapless, aligner, queryNum, matrices, querySeq);
   1.171    for (size_t i = 0; i < gappedAlns.size(); ++i) {
   1.172      Alignment &a = gappedAlns.items[i];
   1.173      if (!a.hasGoodSegment(dis.a, dis.b, args.minScoreGapped, dis.m, gapCosts,
   1.174 @@ -842,8 +814,8 @@
   1.175  }
   1.176  
   1.177  void makeQualityPssm( LastAligner& aligner,
   1.178 -		      size_t queryNum, char strand, const uchar* querySeq,
   1.179 -		      bool isMask ){
   1.180 +		      size_t queryNum, const SubstitutionMatrices &matrices,
   1.181 +		      const uchar* querySeq, bool isMask ){
   1.182    if (!isUseQuality(args.inputFormat) || isUseQuality(referenceFormat)) return;
   1.183    if( args.isTranslated() || args.isGreedy ) return;
   1.184  
   1.185 @@ -857,53 +829,53 @@
   1.186    int *pssm = &qualityPssm[0];
   1.187  
   1.188    if( args.inputFormat == sequenceFormat::prb ){
   1.189 -    const QualityPssmMaker &m = getQualityPssmMaker( strand );
   1.190 -    m.make( seqBeg, seqEnd, q, pssm, isMask );
   1.191 +    matrices.maker.make( seqBeg, seqEnd, q, pssm, isMask );
   1.192    }
   1.193    else {
   1.194 -    const OneQualityScoreMatrix &m = getOneQualityMatrix( strand, isMask );
   1.195 +    const OneQualityScoreMatrix &m =
   1.196 +      isMask ? matrices.oneQualMasked : matrices.oneQual;
   1.197      makePositionSpecificScoreMatrix( m, seqBeg, seqEnd, q, pssm );
   1.198    }
   1.199  }
   1.200  
   1.201  // Scan one query sequence against one database volume
   1.202 -void scan( LastAligner& aligner,
   1.203 -	   size_t queryNum, char strand, const uchar* querySeq ){
   1.204 +void scan(LastAligner& aligner, size_t queryNum,
   1.205 +	  const SubstitutionMatrices &matrices, const uchar* querySeq) {
   1.206    if( args.outputType == 0 ){  // we just want match counts
   1.207      countMatches( queryNum, querySeq );
   1.208      return;
   1.209    }
   1.210  
   1.211    bool isMask = (args.maskLowercase > 0);
   1.212 -  makeQualityPssm( aligner, queryNum, strand, querySeq, isMask );
   1.213 +  makeQualityPssm(aligner, queryNum, matrices, querySeq, isMask);
   1.214  
   1.215    SegmentPairPot gaplessAlns;
   1.216 -  alignGapless( aligner, gaplessAlns, queryNum, strand, querySeq );
   1.217 +  alignGapless(aligner, gaplessAlns, queryNum, matrices, querySeq);
   1.218    if( args.outputType == 1 ) return;  // we just want gapless alignments
   1.219    if( gaplessAlns.size() == 0 ) return;
   1.220  
   1.221    if( args.maskLowercase == 1 || args.maskLowercase == 2 )
   1.222 -    makeQualityPssm( aligner, queryNum, strand, querySeq, false );
   1.223 +    makeQualityPssm(aligner, queryNum, matrices, querySeq, false);
   1.224  
   1.225    AlignmentPot gappedAlns;
   1.226  
   1.227    if( args.maxDropFinal != args.maxDropGapped ){
   1.228      alignGapped( aligner, gappedAlns, gaplessAlns,
   1.229 -		 queryNum, strand, querySeq, Phase::gapped );
   1.230 +		 queryNum, matrices, querySeq, Phase::gapped );
   1.231      erase_if( gaplessAlns.items, SegmentPairPot::isNotMarkedAsGood );
   1.232    }
   1.233  
   1.234    alignGapped( aligner, gappedAlns, gaplessAlns,
   1.235 -	       queryNum, strand, querySeq, Phase::final );
   1.236 +	       queryNum, matrices, querySeq, Phase::final );
   1.237    if( gappedAlns.size() == 0 ) return;
   1.238  
   1.239    if (args.maskLowercase == 2) {
   1.240 -    makeQualityPssm(aligner, queryNum, strand, querySeq, true);
   1.241 -    eraseWeakAlignments(aligner, gappedAlns, queryNum, strand, querySeq);
   1.242 +    makeQualityPssm(aligner, queryNum, matrices, querySeq, true);
   1.243 +    eraseWeakAlignments(aligner, gappedAlns, queryNum, matrices, querySeq);
   1.244      LOG2("lowercase-filtered alignments=" << gappedAlns.size());
   1.245      if (gappedAlns.size() == 0) return;
   1.246      if (args.outputType > 3)
   1.247 -      makeQualityPssm(aligner, queryNum, strand, querySeq, false);
   1.248 +      makeQualityPssm(aligner, queryNum, matrices, querySeq, false);
   1.249    }
   1.250  
   1.251    if( args.outputType > 2 ){  // we want non-redundant alignments
   1.252 @@ -912,7 +884,7 @@
   1.253    }
   1.254  
   1.255    if( !isCollatedAlignments() ) gappedAlns.sort();  // sort by score
   1.256 -  alignFinish( aligner, gappedAlns, queryNum, strand, querySeq );
   1.257 +  alignFinish(aligner, gappedAlns, queryNum, matrices, querySeq);
   1.258  }
   1.259  
   1.260  static void tantanMaskOneQuery(size_t queryNum, uchar *querySeq) {
   1.261 @@ -937,7 +909,8 @@
   1.262  
   1.263  // Scan one query sequence strand against one database volume,
   1.264  // after optionally translating and/or masking the query
   1.265 -void translateAndScan( LastAligner& aligner, size_t queryNum, char strand ){
   1.266 +void translateAndScan(LastAligner& aligner,
   1.267 +		      size_t queryNum, const SubstitutionMatrices &matrices) {
   1.268    const uchar* querySeq = query.seqReader() + query.padBeg(queryNum);
   1.269    std::vector<uchar> modifiedQuery;
   1.270    size_t size = query.padLen(queryNum);
   1.271 @@ -958,7 +931,7 @@
   1.272    }
   1.273  
   1.274    size_t oldNumOfAlns = aligner.textAlns.size();
   1.275 -  scan( aligner, queryNum, strand, querySeq );
   1.276 +  scan( aligner, queryNum, matrices, querySeq );
   1.277    cullFinalAlignments( aligner.textAlns, oldNumOfAlns );
   1.278  }
   1.279  
   1.280 @@ -968,13 +941,14 @@
   1.281      query.reverseComplementOneSequence(queryNum, queryAlph.complement);
   1.282  
   1.283    if (args.strand != 0)
   1.284 -    translateAndScan(aligner, queryNum, '+');
   1.285 +    translateAndScan(aligner, queryNum, fwdMatrices);
   1.286  
   1.287    if (args.strand == 2 || (args.strand == 0 && isFirstVolume))
   1.288      query.reverseComplementOneSequence(queryNum, queryAlph.complement);
   1.289  
   1.290    if (args.strand != 1)
   1.291 -    translateAndScan(aligner, queryNum, '-');
   1.292 +    translateAndScan(aligner, queryNum,
   1.293 +		     args.isQueryStrandMatrix ? revMatrices : fwdMatrices);
   1.294  }
   1.295  
   1.296  static void alignSomeQueries(size_t chunkNum,