Make last-split a bit faster
authorMartin C. Frith
Wed Mar 11 21:32:19 2020 +0900 (2 months ago)
changeset 10548621c8e512ff
parent 1053 13c4e8b892dc
child 1055 3887935792ab
Make last-split a bit faster
src/split/cbrc_split_aligner.cc
     1.1 --- a/src/split/cbrc_split_aligner.cc	Wed Mar 11 19:19:09 2020 +0900
     1.2 +++ b/src/split/cbrc_split_aligner.cc	Wed Mar 11 21:32:19 2020 +0900
     1.3 @@ -24,6 +24,30 @@
     1.4  }
     1.5  
     1.6  // Orders candidate alignments by increasing DP start coordinate.
     1.7 +// Breaks ties by decreasing DP end coordinate.
     1.8 +struct DpBegLess {
     1.9 +  DpBegLess(const unsigned *b, const unsigned *e) : dpBegs(b), dpEnds(e) {}
    1.10 +  bool operator()(unsigned a, unsigned b) const {
    1.11 +    return
    1.12 +      dpBegs[a] != dpBegs[b] ? dpBegs[a] < dpBegs[b] : dpEnds[a] > dpEnds[b];
    1.13 +  }
    1.14 +  const unsigned *dpBegs;
    1.15 +  const unsigned *dpEnds;
    1.16 +};
    1.17 +
    1.18 +// Orders candidate alignments by decreasing DP end coordinate.
    1.19 +// Breaks ties by increasing DP start coordinate.
    1.20 +struct DpEndLess {
    1.21 +  DpEndLess(const unsigned *b, const unsigned *e) : dpBegs(b), dpEnds(e) {}
    1.22 +  bool operator()(unsigned a, unsigned b) const {
    1.23 +    return
    1.24 +      dpEnds[a] != dpEnds[b] ? dpEnds[a] > dpEnds[b] : dpBegs[a] < dpBegs[b];
    1.25 +  }
    1.26 +  const unsigned *dpBegs;
    1.27 +  const unsigned *dpEnds;
    1.28 +};
    1.29 +
    1.30 +// Orders candidate alignments by increasing DP start coordinate.
    1.31  // Breaks ties by chromosome & strand, then by increasing genomic
    1.32  // start coordinate.
    1.33  struct QbegLess {
    1.34 @@ -298,23 +322,33 @@
    1.35    resizeMatrix(Vmat);
    1.36    resizeVector(Vvec);
    1.37  
    1.38 -  unsigned sortedAlnPos = 0;
    1.39 -  unsigned oldNumInplay = 0;
    1.40 -  unsigned newNumInplay = 0;
    1.41 +  unsigned *inplayAlnBeg = &newInplayAlnIndices[0];
    1.42 +  unsigned *inplayAlnEnd = inplayAlnBeg;
    1.43 +  unsigned *sortedAlnPtr = &sortedAlnIndices[0];
    1.44 +  unsigned *sortedAlnEnd = sortedAlnPtr + numAlns;
    1.45  
    1.46 -  stable_sort(sortedAlnIndices.begin(), sortedAlnIndices.end(),
    1.47 -	      QbegLess(&dpBegs[0], &rnameAndStrandIds[0], &rBegs[0]));
    1.48 +  std::stable_sort(sortedAlnPtr, sortedAlnEnd,
    1.49 +		   DpBegLess(&dpBegs[0], &dpEnds[0]));
    1.50  
    1.51    for (unsigned i = 0; i < numAlns; ++i) cell(Vmat, i, dpBeg(i)) = INT_MIN/2;
    1.52    long maxScore = 0;
    1.53  
    1.54    for (unsigned j = minBeg; j < maxEnd; j++) {
    1.55 -    updateInplayAlnIndicesF(sortedAlnPos, oldNumInplay, newNumInplay, j);
    1.56 +    while (inplayAlnEnd > inplayAlnBeg && dpEnd(inplayAlnEnd[-1]) == j) {
    1.57 +      --inplayAlnEnd;  // it is no longer "in play"
    1.58 +    }
    1.59 +    const unsigned *sortedAlnBeg = sortedAlnPtr;
    1.60 +    while (sortedAlnPtr < sortedAlnEnd && dpBeg(*sortedAlnPtr) == j) {
    1.61 +      ++sortedAlnPtr;
    1.62 +    }
    1.63 +    mergeInto(inplayAlnBeg, inplayAlnEnd, sortedAlnBeg, sortedAlnPtr,
    1.64 +	      DpEndLess(&dpBegs[0], &dpEnds[0]));
    1.65 +    inplayAlnEnd += sortedAlnPtr - sortedAlnBeg;
    1.66 +
    1.67      cell(Vvec, j) = maxScore;
    1.68      long scoreFromJump = maxScore + restartScore;
    1.69 -    for (unsigned x = 0; x < newNumInplay; ++x) {
    1.70 -      unsigned i = newInplayAlnIndices[x];
    1.71 -      size_t ij = matrixRowOrigins[i] + j;
    1.72 +    for (const unsigned *x = inplayAlnBeg; x < inplayAlnEnd; ++x) {
    1.73 +      size_t ij = matrixRowOrigins[*x] + j;
    1.74        long s = std::max(scoreFromJump, Vmat[ij] + Dmat[ij]) + Amat[ij];
    1.75        Vmat[ij + 1] = s;
    1.76        maxScore = std::max(maxScore, s);
    1.77 @@ -517,25 +551,35 @@
    1.78    resizeVector(rescales);
    1.79    resizeMatrix(Fmat);
    1.80  
    1.81 -  unsigned sortedAlnPos = 0;
    1.82 -  unsigned oldNumInplay = 0;
    1.83 -  unsigned newNumInplay = 0;
    1.84 +  unsigned *inplayAlnBeg = &newInplayAlnIndices[0];
    1.85 +  unsigned *inplayAlnEnd = inplayAlnBeg;
    1.86 +  unsigned *sortedAlnPtr = &sortedAlnIndices[0];
    1.87 +  unsigned *sortedAlnEnd = sortedAlnPtr + numAlns;
    1.88  
    1.89 -  stable_sort(sortedAlnIndices.begin(), sortedAlnIndices.end(),
    1.90 -	      QbegLess(&dpBegs[0], &rnameAndStrandIds[0], &rBegs[0]));
    1.91 +  std::stable_sort(sortedAlnPtr, sortedAlnEnd,
    1.92 +		   DpBegLess(&dpBegs[0], &dpEnds[0]));
    1.93  
    1.94    for (unsigned i = 0; i < numAlns; ++i) cell(Fmat, i, dpBeg(i)) = 0.0;
    1.95    double sumOfProbs = 1;
    1.96    double rescale = 1;
    1.97  
    1.98    for (unsigned j = minBeg; j < maxEnd; j++) {
    1.99 -    updateInplayAlnIndicesF(sortedAlnPos, oldNumInplay, newNumInplay, j);
   1.100 +    while (inplayAlnEnd > inplayAlnBeg && dpEnd(inplayAlnEnd[-1]) == j) {
   1.101 +      --inplayAlnEnd;  // it is no longer "in play"
   1.102 +    }
   1.103 +    const unsigned *sortedAlnBeg = sortedAlnPtr;
   1.104 +    while (sortedAlnPtr < sortedAlnEnd && dpBeg(*sortedAlnPtr) == j) {
   1.105 +      ++sortedAlnPtr;
   1.106 +    }
   1.107 +    mergeInto(inplayAlnBeg, inplayAlnEnd, sortedAlnBeg, sortedAlnPtr,
   1.108 +	      DpEndLess(&dpBegs[0], &dpEnds[0]));
   1.109 +    inplayAlnEnd += sortedAlnPtr - sortedAlnBeg;
   1.110 +
   1.111      cell(rescales, j) = rescale;
   1.112      double probFromJump = sumOfProbs * restartProb;
   1.113      double pSum = 0.0;
   1.114 -    for (unsigned x = 0; x < newNumInplay; ++x) {
   1.115 -      unsigned i = newInplayAlnIndices[x];
   1.116 -      size_t ij = matrixRowOrigins[i] + j;
   1.117 +    for (unsigned *x = inplayAlnBeg; x < inplayAlnEnd; ++x) {
   1.118 +      size_t ij = matrixRowOrigins[*x] + j;
   1.119        double p = (probFromJump + Fmat[ij] * Dexp[ij]) * Aexp[ij] * rescale;
   1.120        Fmat[ij + 1] = p;
   1.121        pSum += p;
   1.122 @@ -599,23 +643,33 @@
   1.123  void SplitAligner::backwardSplit() {
   1.124    resizeMatrix(Bmat);
   1.125  
   1.126 -  unsigned sortedAlnPos = 0;
   1.127 -  unsigned oldNumInplay = 0;
   1.128 -  unsigned newNumInplay = 0;
   1.129 +  unsigned *inplayAlnBeg = &newInplayAlnIndices[0];
   1.130 +  unsigned *inplayAlnEnd = inplayAlnBeg;
   1.131 +  unsigned *sortedAlnPtr = &sortedAlnIndices[0];
   1.132 +  unsigned *sortedAlnEnd = sortedAlnPtr + numAlns;
   1.133  
   1.134 -  stable_sort(sortedAlnIndices.begin(), sortedAlnIndices.end(),
   1.135 -	      QendLess(&dpEnds[0], &rnameAndStrandIds[0], &rEnds[0]));
   1.136 +  std::stable_sort(sortedAlnPtr, sortedAlnEnd,
   1.137 +		   DpEndLess(&dpBegs[0], &dpEnds[0]));
   1.138  
   1.139    for (unsigned i = 0; i < numAlns; ++i) cell(Bmat, i, dpEnd(i)) = 0.0;
   1.140    double sumOfProbs = 1;
   1.141  
   1.142    for (unsigned j = maxEnd; j > minBeg; j--) {
   1.143 -    updateInplayAlnIndicesB(sortedAlnPos, oldNumInplay, newNumInplay, j);
   1.144 +    while (inplayAlnEnd > inplayAlnBeg && dpBeg(inplayAlnEnd[-1]) == j) {
   1.145 +      --inplayAlnEnd;  // it is no longer "in play"
   1.146 +    }
   1.147 +    const unsigned *sortedAlnBeg = sortedAlnPtr;
   1.148 +    while (sortedAlnPtr < sortedAlnEnd && dpEnd(*sortedAlnPtr) == j) {
   1.149 +      ++sortedAlnPtr;
   1.150 +    }
   1.151 +    mergeInto(inplayAlnBeg, inplayAlnEnd, sortedAlnBeg, sortedAlnPtr,
   1.152 +	      DpBegLess(&dpBegs[0], &dpEnds[0]));
   1.153 +    inplayAlnEnd += sortedAlnPtr - sortedAlnBeg;
   1.154 +
   1.155      double rescale = cell(rescales, j);
   1.156      double pSum = 0.0;
   1.157 -    for (unsigned x = 0; x < newNumInplay; ++x) {
   1.158 -      unsigned i = newInplayAlnIndices[x];
   1.159 -      size_t ij = matrixRowOrigins[i] + j;
   1.160 +    for (const unsigned *x = inplayAlnBeg; x < inplayAlnEnd; ++x) {
   1.161 +      size_t ij = matrixRowOrigins[*x] + j;
   1.162        double p = (sumOfProbs + Bmat[ij] * Dexp[ij]) * Aexp[ij - 1] * rescale;
   1.163        Bmat[ij - 1] = p;
   1.164        pSum += p;