Make lastal startup faster
authorMartin C. Frith
Mon Apr 01 08:29:58 2019 +0900 (2 months ago)
changeset 978d429589d040e
parent 977 9eeaeea88f2c
child 979 9b416e560dc3
Make lastal startup faster
src/LambdaCalculator.cc
src/LambdaCalculator.hh
src/makefile
     1.1 --- a/src/LambdaCalculator.cc	Mon Apr 01 07:48:24 2019 +0900
     1.2 +++ b/src/LambdaCalculator.cc	Mon Apr 01 08:29:58 2019 +0900
     1.3 @@ -1,12 +1,14 @@
     1.4  // Copyright 2015 Yutaro Konta
     1.5  
     1.6  #include "LambdaCalculator.hh"
     1.7 +#include "cbrc_linalg.hh"
     1.8  #include <vector>
     1.9  #include <cassert>
    1.10  #include <cstdio>  // sprintf
    1.11  #include <cstdlib>
    1.12  #include <cfloat>
    1.13  #include <cmath>
    1.14 +#include <numeric>
    1.15  //#include <iostream>
    1.16  using namespace std;
    1.17  
    1.18 @@ -20,167 +22,21 @@
    1.19    return atof(buffer);
    1.20  }
    1.21  
    1.22 -static double** makematrix(int m, int n, double val)
    1.23 +static bool calculate_inv_sum(double **matrix, int alpha_size, double tau, double* inv_sum, double **tmpMat, double *tmpVec)
    1.24  {
    1.25 -  double** mat = new double* [m];
    1.26 -  for (int i=0; i<m; i++)
    1.27 -    {
    1.28 -      mat[i] = new double [n];
    1.29 -      for (int j=0; j<n; j++)
    1.30 -        mat[i][j] = val;
    1.31 -    }
    1.32 -  return mat;
    1.33 -}
    1.34 -
    1.35 -static void deletematrix(double** a, int m)
    1.36 -{
    1.37 -  for (int i=0; i<m; i++)
    1.38 -    delete[] a[i];
    1.39 -  delete[] a;
    1.40 -}
    1.41 -
    1.42 -static double summatrix(double** a, int m, int n)
    1.43 -{
    1.44 -  double s = 0;
    1.45 -  for (int i=0; i<m; i++)
    1.46 -    for (int j=0; j<n; j++)
    1.47 -      s += a[i][j];
    1.48 -  return s;
    1.49 -}
    1.50 -
    1.51 -static int max_index(double** a, int n, int i)
    1.52 -{
    1.53 -  double max = -DBL_MAX;
    1.54 -  int maxindex = -1;
    1.55 -
    1.56 -  for (int j=i; j<n; j++)
    1.57 -    {
    1.58 -      if (fabs(a[j][i]) > max)
    1.59 -        {
    1.60 -          max = fabs(a[j][i]);
    1.61 -          maxindex = j;
    1.62 -        }
    1.63 -    }
    1.64 -  return maxindex;
    1.65 -}
    1.66 -
    1.67 -static void swap_matrix(double** a, int i, int j)
    1.68 -{
    1.69 -  double* v = a[i];
    1.70 -  a[i] = a[j];
    1.71 -  a[j] = v;
    1.72 -}
    1.73 -
    1.74 -static void swap_vector(int* a, int i, int j)
    1.75 -{
    1.76 -  int v = a[i];
    1.77 -  a[i] = a[j];
    1.78 -  a[j] = v;
    1.79 -}
    1.80 -
    1.81 -static bool lu_pivoting(double** a, int* idx, int n)
    1.82 -{
    1.83 -  int v;
    1.84 -
    1.85 -  for (int i=0; i<n; i++)
    1.86 -    idx[i] = i;
    1.87 -
    1.88 -  for (int i=0; i<n; i++)
    1.89 -    {
    1.90 -      v = max_index(a, n, i);
    1.91 -      assert(v >= 0);
    1.92 -      if (fabs(a[v][i]) < 1e-10)
    1.93 -        {
    1.94 -          return false; // singular matrix!
    1.95 -        }
    1.96 -
    1.97 -      swap_matrix(a, i, v);
    1.98 -      swap_vector(idx, i, v);
    1.99 -
   1.100 -      a[i][i] = 1.0/a[i][i];
   1.101 -      for (int j=i+1; j<n; j++)
   1.102 -        {
   1.103 -          a[j][i] = a[j][i] * a[i][i];
   1.104 -          for (int k=i+1; k<n; k++)
   1.105 -            a[j][k] = a[j][k] - a[j][i] * a[i][k];
   1.106 -        }
   1.107 -    }
   1.108 -  return true;
   1.109 -}
   1.110 -
   1.111 -static void solvep(double **a, double *x, double *b, int n)
   1.112 -{
   1.113 -  double *y = new double [n];
   1.114 -
   1.115 -  for (int i=0; i<n; i++)
   1.116 -    {
   1.117 -      y[i] = b[i];
   1.118 -      for (int j=0; j<i; j++)
   1.119 -        y[i] -= a[i][j] * y[j];
   1.120 -    }
   1.121 -
   1.122 -  for (int i=n-1; i>=0; i--)
   1.123 -    {
   1.124 -      x[i] = y[i];
   1.125 -      for (int j=i+1; j<n; j++)
   1.126 -        x[i] -= a[i][j] * x[j];
   1.127 -      x[i] *= a[i][i]; // needed because a[i][i] is inverted
   1.128 -    }
   1.129 -  delete[] y;
   1.130 -}
   1.131 -
   1.132 -static void transpose(double **a, int n)
   1.133 -{
   1.134 -  double v;
   1.135 -  for (int i=0; i<n; i++)
   1.136 -    {
   1.137 -      for (int j=0; j<i; j++)
   1.138 -        {
   1.139 -          v = a[i][j];
   1.140 -          a[i][j] = a[j][i];
   1.141 -          a[j][i] = v;
   1.142 -        }
   1.143 -    }
   1.144 -}
   1.145 -
   1.146 -static bool invert(double **a, double **inv, int n)
   1.147 -{
   1.148 -  int* idx = new int [n];
   1.149 -
   1.150 -  double **e = makematrix(n,n,0);
   1.151 -
   1.152 -  if(!lu_pivoting(a, idx, n))
   1.153 -    return false;
   1.154 -
   1.155 -  for (int i=0; i<n; i++)
   1.156 -    e[idx[i]][i] = 1; // transposed
   1.157 -
   1.158 -  delete[] idx;
   1.159 -
   1.160 -  for (int i=0; i<n; i++)
   1.161 -    solvep(a, inv[i], e[i], n);
   1.162 -
   1.163 -  deletematrix(e, n);
   1.164 -  transpose(inv, n); // transpose inv to make the inverted matrix of a
   1.165 -  return true;
   1.166 -}
   1.167 -
   1.168 -static bool calculate_inv_sum(double **matrix, int alpha_size, double tau, double* inv_sum)
   1.169 -{
   1.170 -  double **m = makematrix(alpha_size, alpha_size, 0);
   1.171 -  double **y = makematrix(alpha_size, alpha_size, 0);
   1.172 -
   1.173    for (int i=0; i<alpha_size; i++)
   1.174      for (int j=0; j<alpha_size; j++)
   1.175 -      m[i][j] = exp(tau * matrix[i][j]);
   1.176 +      tmpMat[i][j] = exp(tau * matrix[i][j]);
   1.177  
   1.178 -  if(!invert(m, y, alpha_size))
   1.179 +  std::fill_n(tmpVec, alpha_size, 1.0);
   1.180 +
   1.181 +  try {
   1.182 +    cbrc::linalgSolve(tmpMat, tmpVec, alpha_size);
   1.183 +  } catch(...) {
   1.184      return false;
   1.185 +  }
   1.186  
   1.187 -  *inv_sum = summatrix(y, alpha_size, alpha_size);
   1.188 -
   1.189 -  deletematrix(m, alpha_size);
   1.190 -  deletematrix(y, alpha_size);
   1.191 +  *inv_sum = std::accumulate(tmpVec, tmpVec + alpha_size, 0.0);
   1.192    return true;
   1.193  }
   1.194  
   1.195 @@ -265,12 +121,21 @@
   1.196    double r_sum=0;
   1.197    int iter=0;
   1.198  
   1.199 +  std::vector<double> tmpVals(alpha_size * alpha_size);
   1.200 +  std::vector<double *> tmpPtrs(alpha_size);
   1.201 +  for (int i = 0; i < alpha_size; ++i)
   1.202 +    tmpPtrs[i] = &tmpVals[i * alpha_size];
   1.203 +  double **tmpMat = &tmpPtrs[0];
   1.204 +
   1.205 +  double *tmpVec = &letprob2[0];
   1.206 +
   1.207    while (iter < maxiter && (l>=r || (l_sum < 1.0 && r_sum < 1.0) || (l_sum > 1.0 && r_sum > 1.0)))
   1.208      {
   1.209        l = lb + (ub - lb)*rand()/RAND_MAX;
   1.210        r = lb + (ub - lb)*rand()/RAND_MAX;
   1.211  
   1.212 -      if (!calculate_inv_sum(matrix, alpha_size, l, &l_sum) || !calculate_inv_sum(matrix, alpha_size, r, &r_sum))
   1.213 +      if (!calculate_inv_sum(matrix, alpha_size, l, &l_sum, tmpMat, tmpVec) ||
   1.214 +	  !calculate_inv_sum(matrix, alpha_size, r, &r_sum, tmpMat, tmpVec))
   1.215          {
   1.216            l = 0;
   1.217            r = 0;
   1.218 @@ -285,7 +150,7 @@
   1.219      {
   1.220        double mid = (l + r)/2.0;
   1.221        double mid_sum;
   1.222 -      if (!calculate_inv_sum(matrix, alpha_size, mid, &mid_sum))
   1.223 +      if (!calculate_inv_sum(matrix, alpha_size, mid, &mid_sum, tmpMat, tmpVec))
   1.224          return false;
   1.225  
   1.226        if (fabs(mid_sum) >= DBL_MAX)
   1.227 @@ -309,7 +174,7 @@
   1.228  
   1.229    if (fabs(l_sum - 1.0) < fabs(r_sum - 1.0))
   1.230      {
   1.231 -      if (check_lambda(matrix, l, alpha_size, letprob1, letprob2))
   1.232 +      if (check_lambda(matrix, l, alpha_size, letprob1, letprob2, tmpMat))
   1.233          {
   1.234            *lambda = l;
   1.235            return true;
   1.236 @@ -317,7 +182,7 @@
   1.237        return false;
   1.238      }
   1.239  
   1.240 -  if (check_lambda(matrix, r, alpha_size, letprob1, letprob2))
   1.241 +  if (check_lambda(matrix, r, alpha_size, letprob1, letprob2, tmpMat))
   1.242      {
   1.243        *lambda = r;
   1.244        return true;
   1.245 @@ -346,63 +211,62 @@
   1.246    return lambda;
   1.247  }
   1.248  
   1.249 -bool LambdaCalculator::check_lambda(double** matrix, double lambda, int alpha_size, vector<double>& letprob1, vector<double>& letprob2)
   1.250 +bool LambdaCalculator::check_lambda(double** matrix, double lambda, int alpha_size, vector<double>& letprob1, vector<double>& letprob2, double** tmpMat)
   1.251  {
   1.252 -  double **m = makematrix(alpha_size, alpha_size, 0);
   1.253 -  double **y = makematrix(alpha_size, alpha_size, 0);
   1.254 +  for (int i=0; i<alpha_size; i++)
   1.255 +    for (int j=0; j<alpha_size; j++)
   1.256 +      tmpMat[i][j] = exp(lambda * matrix[i][j]);
   1.257 +
   1.258 +  std::fill_n(&letprob2[0], alpha_size, 1.0);
   1.259 +  cbrc::linalgSolve(tmpMat, &letprob2[0], alpha_size);
   1.260 +
   1.261 +  for (int i=0; i<alpha_size; i++)
   1.262 +    {
   1.263 +      double p = letprob2[i];
   1.264 +      if (p < 0 || p > 1)
   1.265 +	return false;
   1.266 +      letprob2[i] = roundToFewDigits(p);
   1.267 +    }
   1.268  
   1.269    for (int i=0; i<alpha_size; i++)
   1.270      for (int j=0; j<alpha_size; j++)
   1.271 -      m[i][j] = exp(lambda * matrix[i][j]);
   1.272 +      tmpMat[i][j] = exp(lambda * matrix[j][i]);
   1.273  
   1.274 -  invert(m, y, alpha_size);
   1.275 -
   1.276 -  for (int i=0; i<alpha_size; i++)
   1.277 -    {
   1.278 -      double p = 0;
   1.279 -      for (int j=0;j<alpha_size; j++)
   1.280 -        p += y[i][j];
   1.281 -      if (p < 0 || p > 1)
   1.282 -        {
   1.283 -          letprob2.clear();
   1.284 -          return false;
   1.285 -        }
   1.286 -      letprob2.push_back(roundToFewDigits(p));
   1.287 -    }
   1.288 +  std::fill_n(&letprob1[0], alpha_size, 1.0);
   1.289 +  cbrc::linalgSolve(tmpMat, &letprob1[0], alpha_size);
   1.290  
   1.291    for (int j=0; j<alpha_size; j++)
   1.292      {
   1.293 -      double q = 0;
   1.294 -      for (int i=0; i<alpha_size; i++)
   1.295 -        q += y[i][j];
   1.296 +      double q = letprob1[j];
   1.297        if (q < 0 || q > 1)
   1.298 -        {
   1.299 -          letprob2.clear();
   1.300 -          letprob1.clear();
   1.301 -          return false;
   1.302 -        }
   1.303 -      letprob1.push_back(roundToFewDigits(q));
   1.304 +	return false;
   1.305 +      letprob1[j] = roundToFewDigits(q);
   1.306      }
   1.307  
   1.308 -  deletematrix(m, alpha_size);
   1.309 -  deletematrix(y, alpha_size);
   1.310 -
   1.311    return true;
   1.312  }
   1.313  
   1.314  void LambdaCalculator::calculate(const const_int_ptr *matrix, int alphSize) {
   1.315    assert(alphSize >= 0);
   1.316 -  setBad();
   1.317 +  lambda_ = -1;
   1.318 +  letterProbs1_.resize(alphSize);
   1.319 +  letterProbs2_.resize(alphSize);
   1.320  
   1.321    int maxiter = 1000;
   1.322    int max_boundary_search_iter = 100;
   1.323    double lb_ratio = 1e-6;
   1.324  
   1.325 -  double** mat = makematrix(alphSize, alphSize, 0);
   1.326 +  std::vector<double> matVals(alphSize * alphSize);
   1.327 +  std::vector<double *> matPtrs(alphSize);
   1.328 +  for (int i = 0; i < alphSize; ++i)
   1.329 +    matPtrs[i] = &matVals[i * alphSize];
   1.330 +  double** mat = &matPtrs[0];
   1.331 +
   1.332    for (int i=0; i<alphSize; i++)
   1.333      for (int j=0; j<alphSize; j++)
   1.334        mat[i][j] = matrix[i][j];
   1.335 +
   1.336 +  // xxx srand ?
   1.337    lambda_ = calculate_lambda(mat, alphSize, letterProbs1_, letterProbs2_, maxiter, max_boundary_search_iter, lb_ratio);
   1.338 -  deletematrix(mat, alphSize);
   1.339  }
   1.340  }
     2.1 --- a/src/LambdaCalculator.hh	Mon Apr 01 07:48:24 2019 +0900
     2.2 +++ b/src/LambdaCalculator.hh	Mon Apr 01 08:29:58 2019 +0900
     2.3 @@ -50,7 +50,7 @@
     2.4    bool find_ub(double **matrix, int alpha_size, double *ub);
     2.5    bool binary_search(double** matrix, int alpha_size, double lb, double ub, std::vector<double>& letprob1, std::vector<double>& letprob2, double* lambda, int maxiter);
     2.6    double calculate_lambda(double** matrix, int alpha_size, std::vector<double>& letprob1, std::vector<double>& letprob2, int maxiter, int max_boundary_search_iter, double lb_ratio);
     2.7 -  bool check_lambda(double** matrix, double lambda, int alpha_size, std::vector<double>& letprob1, std::vector<double>& letprob2);
     2.8 +  bool check_lambda(double** matrix, double lambda, int alpha_size, std::vector<double>& letprob1, std::vector<double>& letprob2, double** tmpMat);
     2.9  };
    2.10  
    2.11  }  // end namespace
     3.1 --- a/src/makefile	Mon Apr 01 07:48:24 2019 +0900
     3.2 +++ b/src/makefile	Mon Apr 01 08:29:58 2019 +0900
     3.3 @@ -16,7 +16,7 @@
     3.4  
     3.5  indexObj0 = Alphabet.o CyclicSubsetSeed.o LambdaCalculator.o	\
     3.6  ScoreMatrix.o SubsetMinimizerFinder.o TantanMasker.o fileMap.o	\
     3.7 -tantan.o LastdbArguments.o
     3.8 +cbrc_linalg.o tantan.o LastdbArguments.o
     3.9  
    3.10  indexObj4 = MultiSequence.o MultiSequenceQual.o SubsetSuffixArray.o	\
    3.11  SubsetSuffixArraySort.o lastdb.o
    3.12 @@ -167,7 +167,8 @@
    3.13  GeneticCode.o GeneticCode.o8: GeneticCode.cc GeneticCode.hh Alphabet.hh
    3.14  GreedyXdropAligner.o GreedyXdropAligner.o8: GreedyXdropAligner.cc GreedyXdropAligner.hh \
    3.15   ScoreMatrixRow.hh
    3.16 -LambdaCalculator.o LambdaCalculator.o8: LambdaCalculator.cc LambdaCalculator.hh
    3.17 +LambdaCalculator.o LambdaCalculator.o8: LambdaCalculator.cc LambdaCalculator.hh \
    3.18 + cbrc_linalg.hh
    3.19  LastEvaluer.o LastEvaluer.o8: LastEvaluer.cc LastEvaluer.hh ScoreMatrixRow.hh \
    3.20   alp/sls_alignment_evaluer.hpp alp/sls_pvalues.hpp alp/sls_basic.hpp \
    3.21   alp/sls_falp_alignment_evaluer.hpp alp/sls_fsa1_pvalues.hpp \