Make tantan repeat-finding faster default tip
authorMartin C. Frith
Thu May 07 08:14:11 2020 +0900 (3 weeks ago)
changeset 1061543d36d39ce3
parent 1060 ca3f417edc01
Make tantan repeat-finding faster
src/tantan.cc
     1.1 --- a/src/tantan.cc	Mon Mar 16 12:29:28 2020 +0900
     1.2 +++ b/src/tantan.cc	Thu May 07 08:14:11 2020 +0900
     1.3 @@ -274,6 +274,10 @@
     1.4      return seqPtr - seqBeg < maxRepeatOffset;
     1.5    }
     1.6  
     1.7 +  int maxOffsetInTheSequence() {
     1.8 +    return isNearSeqBeg() ? (seqPtr - seqBeg) : maxRepeatOffset;
     1.9 +  }
    1.10 +
    1.11    const uchar *seqFurthestBack() {
    1.12      return isNearSeqBeg() ? seqBeg : seqPtr - maxRepeatOffset;
    1.13    }
    1.14 @@ -296,6 +300,50 @@
    1.15      }
    1.16    }
    1.17  
    1.18 +  void calcForwardTransitionAndEmissionProbs() {
    1.19 +    if (endGapProb > 0) {
    1.20 +      calcForwardTransitionProbsWithGaps();
    1.21 +      calcEmissionProbs();
    1.22 +      return;
    1.23 +    }
    1.24 +
    1.25 +    double b = backgroundProb;
    1.26 +    double fromForeground = 0;
    1.27 +    double *foregroundBeg = BEG(foregroundProbs);
    1.28 +    const double *lrRow = likelihoodRatioMatrix[*seqPtr];
    1.29 +    int maxOffset = maxOffsetInTheSequence();
    1.30 +
    1.31 +    for (int i = 0; i < maxOffset; ++i) {
    1.32 +      double f = foregroundBeg[i];
    1.33 +      fromForeground += f;
    1.34 +      foregroundBeg[i] = (b * b2fProbs[i] + f * f2f0) * lrRow[seqPtr[-i-1]];
    1.35 +    }
    1.36 +
    1.37 +    backgroundProb = b * b2b + fromForeground * f2b;
    1.38 +  }
    1.39 +
    1.40 +  void calcEmissionAndBackwardTransitionProbs() {
    1.41 +    if (endGapProb > 0) {
    1.42 +      calcEmissionProbs();
    1.43 +      calcBackwardTransitionProbsWithGaps();
    1.44 +      return;
    1.45 +    }
    1.46 +
    1.47 +    double toBackground = f2b * backgroundProb;
    1.48 +    double toForeground = 0;
    1.49 +    double *foregroundBeg = BEG(foregroundProbs);
    1.50 +    const double *lrRow = likelihoodRatioMatrix[*seqPtr];
    1.51 +    int maxOffset = maxOffsetInTheSequence();
    1.52 +
    1.53 +    for (int i = 0; i < maxOffset; ++i) {
    1.54 +      double f = foregroundBeg[i] * lrRow[seqPtr[-i-1]];
    1.55 +      toForeground += b2fProbs[i] * f;
    1.56 +      foregroundBeg[i] = toBackground + f2f0 * f;
    1.57 +    }
    1.58 +
    1.59 +    backgroundProb = b2b * backgroundProb + toForeground;
    1.60 +  }
    1.61 +
    1.62    void rescale(double scale) {
    1.63      backgroundProb *= scale;
    1.64      multiplyAll(foregroundProbs, scale);
    1.65 @@ -322,8 +370,7 @@
    1.66      initializeForwardAlgorithm();
    1.67  
    1.68      while (seqPtr < seqEnd) {
    1.69 -      calcForwardTransitionProbs();
    1.70 -      calcEmissionProbs();
    1.71 +      calcForwardTransitionAndEmissionProbs();
    1.72        rescaleForward();
    1.73        *letterProbs = static_cast<float>(backgroundProb);
    1.74        ++letterProbs;
    1.75 @@ -343,8 +390,7 @@
    1.76        // a sequence:
    1.77        *letterProbs = 1 - static_cast<float>(nonRepeatProb);
    1.78        rescaleBackward();
    1.79 -      calcEmissionProbs();
    1.80 -      calcBackwardTransitionProbs();
    1.81 +      calcEmissionAndBackwardTransitionProbs();
    1.82      }
    1.83  
    1.84      double z2 = backwardTotal();