Make last-split a bit faster
authorMartin C. Frith
Wed Mar 11 18:35:26 2020 +0900 (2 months ago)
changeset 1052ba6739cb6e7b
parent 1051 eb537293aa74
child 1053 13c4e8b892dc
Make last-split a bit faster
src/split/cbrc_split_aligner.cc
src/split/cbrc_split_aligner.hh
src/split/last-split.cc
     1.1 --- a/src/split/cbrc_split_aligner.cc	Wed Mar 11 16:58:27 2020 +0900
     1.2 +++ b/src/split/cbrc_split_aligner.cc	Wed Mar 11 18:35:26 2020 +0900
     1.3 @@ -482,7 +482,41 @@
     1.4    return sum;
     1.5  }
     1.6  
     1.7 -void SplitAligner::forward() {
     1.8 +void SplitAligner::forwardSplit() {
     1.9 +  resizeVector(rescales);
    1.10 +  resizeMatrix(Fmat);
    1.11 +
    1.12 +  unsigned sortedAlnPos = 0;
    1.13 +  unsigned oldNumInplay = 0;
    1.14 +  unsigned newNumInplay = 0;
    1.15 +
    1.16 +  stable_sort(sortedAlnIndices.begin(), sortedAlnIndices.end(),
    1.17 +	      QbegLess(&dpBegs[0], &rnameAndStrandIds[0], &rBegs[0]));
    1.18 +
    1.19 +  for (unsigned i = 0; i < numAlns; ++i) cell(Fmat, i, dpBeg(i)) = 0.0;
    1.20 +  double sumOfProbs = 1;
    1.21 +  double rescale = 1;
    1.22 +
    1.23 +  for (unsigned j = minBeg; j < maxEnd; j++) {
    1.24 +    updateInplayAlnIndicesF(sortedAlnPos, oldNumInplay, newNumInplay, j);
    1.25 +    cell(rescales, j) = rescale;
    1.26 +    double probFromJump = sumOfProbs * restartProb;
    1.27 +    double pSum = 0.0;
    1.28 +    for (unsigned x = 0; x < newNumInplay; ++x) {
    1.29 +      unsigned i = newInplayAlnIndices[x];
    1.30 +      size_t ij = matrixRowOrigins[i] + j;
    1.31 +      double p = (probFromJump + Fmat[ij] * Dexp[ij]) * Aexp[ij] * rescale;
    1.32 +      Fmat[ij + 1] = p;
    1.33 +      pSum += p;
    1.34 +    }
    1.35 +    sumOfProbs = pSum + sumOfProbs * rescale;
    1.36 +    rescale = 1 / (pSum + 1);
    1.37 +  }
    1.38 +
    1.39 +  cell(rescales, maxEnd) = 1 / sumOfProbs;  // makes scaled sumOfProbs equal 1
    1.40 +}
    1.41 +
    1.42 +void SplitAligner::forwardSplice() {
    1.43      resizeVector(rescales);
    1.44      resizeMatrix(Fmat);
    1.45  
    1.46 @@ -494,8 +528,7 @@
    1.47  		QbegLess(&dpBegs[0], &rnameAndStrandIds[0], &rBegs[0]));
    1.48  
    1.49      for (unsigned i = 0; i < numAlns; ++i) cell(Fmat, i, dpBeg(i)) = 0.0;
    1.50 -    double sumProb = 1;
    1.51 -    double probFromJump = restartProb;
    1.52 +    double probFromJump = 0;
    1.53      double begprob = 1.0;
    1.54      double zF = 0.0;  // sum of probabilities from the forward algorithm
    1.55      double rescale = 1;
    1.56 @@ -516,7 +549,7 @@
    1.57  	      p += probFromSpliceF(i, j, oldNumInplay, oldInplayPos);
    1.58  	    p *= spliceEndProb(ij);
    1.59  	    p += Fmat[ij] * Dexp[ij];
    1.60 -	    if (restartProb <= 0 && alns[i].qstart == j) p += begprob;
    1.61 +	    if (alns[i].qstart == j) p += begprob;
    1.62  	    p = p * Aexp[ij] * rescale;
    1.63  
    1.64  	    Fmat[ij + 1] = p;
    1.65 @@ -525,17 +558,42 @@
    1.66  	    rNew += p;
    1.67          }
    1.68          begprob *= rescale;
    1.69 -	sumProb = pSum + sumProb * rescale;
    1.70 -	probFromJump = pSum * jumpProb + sumProb * restartProb;
    1.71 +	probFromJump = pSum * jumpProb;
    1.72  	rescale = 1 / (rNew + 1);
    1.73      }
    1.74  
    1.75 -    if (restartProb > 0) zF = sumProb;
    1.76 -    //zF *= cell(rescales, maxEnd);
    1.77      cell(rescales, maxEnd) = 1 / zF;  // this causes scaled zF to equal 1
    1.78  }
    1.79  
    1.80 -void SplitAligner::backward() {
    1.81 +void SplitAligner::backwardSplit() {
    1.82 +  resizeMatrix(Bmat);
    1.83 +
    1.84 +  unsigned sortedAlnPos = 0;
    1.85 +  unsigned oldNumInplay = 0;
    1.86 +  unsigned newNumInplay = 0;
    1.87 +
    1.88 +  stable_sort(sortedAlnIndices.begin(), sortedAlnIndices.end(),
    1.89 +	      QendLess(&dpEnds[0], &rnameAndStrandIds[0], &rEnds[0]));
    1.90 +
    1.91 +  for (unsigned i = 0; i < numAlns; ++i) cell(Bmat, i, dpEnd(i)) = 0.0;
    1.92 +  double sumOfProbs = 1;
    1.93 +
    1.94 +  for (unsigned j = maxEnd; j > minBeg; j--) {
    1.95 +    updateInplayAlnIndicesB(sortedAlnPos, oldNumInplay, newNumInplay, j);
    1.96 +    double rescale = cell(rescales, j);
    1.97 +    double pSum = 0.0;
    1.98 +    for (unsigned x = 0; x < newNumInplay; ++x) {
    1.99 +      unsigned i = newInplayAlnIndices[x];
   1.100 +      size_t ij = matrixRowOrigins[i] + j;
   1.101 +      double p = (sumOfProbs + Bmat[ij] * Dexp[ij]) * Aexp[ij - 1] * rescale;
   1.102 +      Bmat[ij - 1] = p;
   1.103 +      pSum += p;
   1.104 +    }
   1.105 +    sumOfProbs = pSum * restartProb + sumOfProbs * rescale;
   1.106 +  }
   1.107 +}
   1.108 +
   1.109 +void SplitAligner::backwardSplice() {
   1.110      resizeMatrix(Bmat);
   1.111  
   1.112      unsigned sortedAlnPos = 0;
   1.113 @@ -546,8 +604,7 @@
   1.114  		QendLess(&dpEnds[0], &rnameAndStrandIds[0], &rEnds[0]));
   1.115  
   1.116      for (unsigned i = 0; i < numAlns; ++i) cell(Bmat, i, dpEnd(i)) = 0.0;
   1.117 -    double sumProb = (restartProb > 0) ? 1 : 0;
   1.118 -    double probFromJump = sumProb;
   1.119 +    double probFromJump = 0;
   1.120      double endprob = 1.0;
   1.121      //double zB = 0.0;  // sum of probabilities from the backward algorithm
   1.122  
   1.123 @@ -566,7 +623,7 @@
   1.124  	      p += probFromSpliceB(i, j, oldNumInplay, oldInplayPos);
   1.125  	    p *= spliceBegProb(ij);
   1.126  	    p += Bmat[ij] * Dexp[ij];
   1.127 -	    if (restartProb <= 0 && alns[i].qend == j) p += endprob;
   1.128 +	    if (alns[i].qend == j) p += endprob;
   1.129  	    p = p * Aexp[ij - 1] * rescale;
   1.130  
   1.131  	    Bmat[ij - 1] = p;
   1.132 @@ -574,12 +631,8 @@
   1.133  	    pSum += p * spliceEndProb(ij - 1);
   1.134          }
   1.135          endprob *= rescale;
   1.136 -	sumProb = pSum * restartProb + sumProb * rescale;
   1.137 -	probFromJump = pSum * jumpProb + sumProb;
   1.138 +	probFromJump = pSum * jumpProb;
   1.139      }
   1.140 -
   1.141 -    //if (restartProb > 0) zB = sumProb;
   1.142 -    //zB *= cell(rescales, minBeg);
   1.143  }
   1.144  
   1.145  std::vector<double>
     2.1 --- a/src/split/cbrc_split_aligner.hh	Wed Mar 11 16:58:27 2020 +0900
     2.2 +++ b/src/split/cbrc_split_aligner.hh	Wed Mar 11 18:35:26 2020 +0900
     2.3 @@ -91,9 +91,20 @@
     2.4      int segmentScore(unsigned alnNum,
     2.5  		     unsigned queryBeg, unsigned queryEnd) const;
     2.6  
     2.7 -    void forward();
     2.8 +    void forwardSplit();
     2.9 +    void backwardSplit();
    2.10 +    void forwardSplice();
    2.11 +    void backwardSplice();
    2.12  
    2.13 -    void backward();
    2.14 +    void forwardBackward() {
    2.15 +      if (restartProb > 0) {
    2.16 +	forwardSplit();
    2.17 +	backwardSplit();
    2.18 +      } else {
    2.19 +	forwardSplice();
    2.20 +	backwardSplice();
    2.21 +      }
    2.22 +    }
    2.23  
    2.24      // Returns one probability per column, for a segment of an alignment
    2.25      std::vector<double> marginalProbs(unsigned queryBeg, unsigned alnNum,
     3.1 --- a/src/split/last-split.cc	Wed Mar 11 16:58:27 2020 +0900
     3.2 +++ b/src/split/last-split.cc	Wed Mar 11 18:35:26 2020 +0900
     3.3 @@ -184,13 +184,11 @@
     3.4    sa.initMatricesForOneQuery();
     3.5  
     3.6    if (opts.direction != 0) {
     3.7 -    sa.forward();
     3.8 -    sa.backward();
     3.9 +    sa.forwardBackward();
    3.10    }
    3.11    if (opts.direction != 1) {
    3.12      sa.flipSpliceSignals();
    3.13 -    sa.forward();
    3.14 -    sa.backward();
    3.15 +    sa.forwardBackward();
    3.16      sa.flipSpliceSignals();
    3.17    }
    3.18