Refactor
authorMartin C. Frith
Mon Oct 07 12:46:14 2019 +0900 (6 weeks ago)
changeset 988ca6613dd1f06
parent 987 f757e6ca4edb
child 989 50bcc51ca59b
Refactor
src/CyclicSubsetSeed.hh
src/ScoreMatrix.cc
src/ScoreMatrix.hh
src/ScoreMatrixRow.hh
src/makefile
     1.1 --- a/src/CyclicSubsetSeed.hh	Mon Oct 07 12:02:11 2019 +0900
     1.2 +++ b/src/CyclicSubsetSeed.hh	Mon Oct 07 12:46:14 2019 +0900
     1.3 @@ -33,7 +33,7 @@
     1.4  
     1.5  class CyclicSubsetSeed{
     1.6  public:
     1.7 -  enum { MAX_LETTERS = 64 };
     1.8 +  enum { MAX_LETTERS = ALPHABET_CAPACITY };
     1.9    enum { DELIMITER = 255 };
    1.10  
    1.11    // Converts a name to a text string defining one or more seeds.
     2.1 --- a/src/ScoreMatrix.cc	Mon Oct 07 12:02:11 2019 +0900
     2.2 +++ b/src/ScoreMatrix.cc	Mon Oct 07 12:46:14 2019 +0900
     2.3 @@ -12,8 +12,6 @@
     2.4  #include <cctype>  // toupper, tolower
     2.5  #include <stddef.h>  // size_t
     2.6  
     2.7 -#define ERR(x) throw std::runtime_error(x)
     2.8 -
     2.9  #define COUNTOF(a) (sizeof (a) / sizeof *(a))
    2.10  
    2.11  static void makeUppercase(std::string& s) {
    2.12 @@ -25,6 +23,8 @@
    2.13  
    2.14  namespace cbrc{
    2.15  
    2.16 +typedef std::runtime_error Err;
    2.17 +
    2.18  const char *ScoreMatrix::canonicalName( const std::string& name ){
    2.19    for( size_t i = 0; i < COUNTOF(scoreMatrixNicknames); ++i )
    2.20      if( name == scoreMatrixNicknames[i].nickname )
    2.21 @@ -59,7 +59,7 @@
    2.22  void ScoreMatrix::fromString( const std::string& matString ){
    2.23    std::istringstream iss(matString);
    2.24    iss >> *this;
    2.25 -  if( !iss ) ERR( "can't read the score matrix" );
    2.26 +  if (!iss) throw Err("can't read the score matrix");
    2.27  }
    2.28  
    2.29  static unsigned s2i(const uchar symbolToIndex[], uchar c) {
    2.30 @@ -72,12 +72,14 @@
    2.31    upper = symbolToIndex[s];
    2.32    lower = symbolToIndex[std::tolower(s)];
    2.33    if (upper >= tooBig || lower >= tooBig) {
    2.34 -    ERR(std::string("bad letter in score matrix: ") + symbol);
    2.35 +    throw Err(std::string("bad letter in score matrix: ") + symbol);
    2.36    }
    2.37  }
    2.38  
    2.39  void ScoreMatrix::init(const uchar symbolToIndex[]) {
    2.40 -  assert( !rowSymbols.empty() && !colSymbols.empty() );
    2.41 +  unsigned fastMatrixSize = ALPHABET_CAPACITY;
    2.42 +  assert(!rowSymbols.empty());
    2.43 +  assert(!colSymbols.empty());
    2.44  
    2.45    makeUppercase(rowSymbols);
    2.46    makeUppercase(colSymbols);
    2.47 @@ -93,8 +95,8 @@
    2.48    }
    2.49  
    2.50    // set default score = minScore:
    2.51 -  for( unsigned i = 0; i < MAT; ++i ){
    2.52 -    for( unsigned j = 0; j < MAT; ++j ){
    2.53 +  for (unsigned i = 0; i < fastMatrixSize; ++i) {
    2.54 +    for (unsigned j = 0; j < fastMatrixSize; ++j) {
    2.55        caseSensitive[i][j] = minScore;
    2.56        caseInsensitive[i][j] = minScore;
    2.57      }
    2.58 @@ -103,8 +105,8 @@
    2.59    for( size_t i = 0; i < rowSymbols.size(); ++i ){
    2.60      for( size_t j = 0; j < colSymbols.size(); ++j ){
    2.61        unsigned iu, il, ju, jl;
    2.62 -      upperAndLowerIndex(MAT, symbolToIndex, rowSymbols[i], iu, il);
    2.63 -      upperAndLowerIndex(MAT, symbolToIndex, colSymbols[j], ju, jl);
    2.64 +      upperAndLowerIndex(fastMatrixSize, symbolToIndex, rowSymbols[i], iu, il);
    2.65 +      upperAndLowerIndex(fastMatrixSize, symbolToIndex, colSymbols[j], ju, jl);
    2.66        caseSensitive[iu][jl] = std::min( cells[i][j], 0 );
    2.67        caseSensitive[il][ju] = std::min( cells[i][j], 0 );
    2.68        caseSensitive[il][jl] = std::min( cells[i][j], 0 );
    2.69 @@ -119,8 +121,8 @@
    2.70    // set a hugely negative score for the delimiter symbol:
    2.71    uchar delimiter = ' ';
    2.72    uchar z = symbolToIndex[delimiter];
    2.73 -  assert( z < MAT );
    2.74 -  for( unsigned i = 0; i < MAT; ++i ){
    2.75 +  assert(z < fastMatrixSize);
    2.76 +  for (unsigned i = 0; i < fastMatrixSize; ++i) {
    2.77      caseSensitive[z][i] = -INF;
    2.78      caseSensitive[i][z] = -INF;
    2.79      caseInsensitive[z][i] = -INF;
    2.80 @@ -151,20 +153,17 @@
    2.81    std::string tmpColSymbols;
    2.82    std::vector< std::vector<int> > tmpCells;
    2.83    std::string line;
    2.84 -  int state = 0;
    2.85  
    2.86 -  while( std::getline( stream, line ) ){
    2.87 +  while (getline(stream, line)) {
    2.88      std::istringstream iss(line);
    2.89      char c;
    2.90 -    if( !(iss >> c) ) continue;  // skip blank lines
    2.91 -    if( state == 0 ){
    2.92 -      if( c == '#' ) continue;  // skip comment lines at the top
    2.93 -      do{
    2.94 +    if (!(iss >> c)) continue;  // skip blank lines
    2.95 +    if (tmpColSymbols.empty()) {
    2.96 +      if (c == '#') continue;  // skip comment lines at the top
    2.97 +      do {
    2.98  	tmpColSymbols.push_back(c);
    2.99 -      }while( iss >> c );
   2.100 -      state = 1;
   2.101 -    }
   2.102 -    else{
   2.103 +      } while (iss >> c);
   2.104 +    } else {
   2.105        tmpRowSymbols.push_back(c);
   2.106        tmpCells.resize( tmpCells.size() + 1 );
   2.107        int score;
   2.108 @@ -172,12 +171,12 @@
   2.109  	tmpCells.back().push_back(score);
   2.110        }
   2.111        if (tmpCells.back().size() != tmpColSymbols.size()) {
   2.112 -	ERR("bad score matrix");
   2.113 +	throw Err("bad score matrix");
   2.114        }
   2.115      }
   2.116    }
   2.117  
   2.118 -  if( stream.eof() && !stream.bad() && !tmpRowSymbols.empty() ){
   2.119 +  if (stream.eof() && !stream.bad() && !tmpCells.empty()) {
   2.120      stream.clear();
   2.121      m.rowSymbols.swap(tmpRowSymbols);
   2.122      m.colSymbols.swap(tmpColSymbols);
   2.123 @@ -261,8 +260,8 @@
   2.124  				     double scale,
   2.125  				     const double rowSymbolProbs[],
   2.126  				     const double colSymbolProbs[]) {
   2.127 -  int *fastMatrix[MAT];
   2.128 -  std::copy(caseInsensitive, caseInsensitive + MAT, fastMatrix);
   2.129 +  int *fastMatrix[ALPHABET_CAPACITY];
   2.130 +  std::copy(caseInsensitive, caseInsensitive + ALPHABET_CAPACITY, fastMatrix);
   2.131  
   2.132    char scratch[2] = {0};
   2.133  
     3.1 --- a/src/ScoreMatrix.hh	Mon Oct 07 12:02:11 2019 +0900
     3.2 +++ b/src/ScoreMatrix.hh	Mon Oct 07 12:46:14 2019 +0900
     3.3 @@ -8,6 +8,7 @@
     3.4  
     3.5  #ifndef SCOREMATRIX_HH
     3.6  #define SCOREMATRIX_HH
     3.7 +
     3.8  #include <string>
     3.9  #include <vector>
    3.10  #include <iosfwd>
    3.11 @@ -19,7 +20,6 @@
    3.12  
    3.13  struct ScoreMatrix{
    3.14    enum { INF = INT_MAX / 2 };  // big, but try to avoid overflow
    3.15 -  enum { MAT = 64 };           // matrix size = MAT x MAT
    3.16  
    3.17    static const char *canonicalName( const std::string& name );
    3.18    static std::string stringFromName( const std::string& name );
    3.19 @@ -47,8 +47,8 @@
    3.20    std::string rowSymbols;  // row headings (letters)
    3.21    std::string colSymbols;  // column headings (letters)
    3.22    std::vector< std::vector<int> > cells;  // scores
    3.23 -  int caseSensitive[MAT][MAT];
    3.24 -  int caseInsensitive[MAT][MAT];
    3.25 +  int caseSensitive[ALPHABET_CAPACITY][ALPHABET_CAPACITY];
    3.26 +  int caseInsensitive[ALPHABET_CAPACITY][ALPHABET_CAPACITY];
    3.27    int minScore;
    3.28    int maxScore;
    3.29  };
     4.1 --- a/src/ScoreMatrixRow.hh	Mon Oct 07 12:02:11 2019 +0900
     4.2 +++ b/src/ScoreMatrixRow.hh	Mon Oct 07 12:46:14 2019 +0900
     4.3 @@ -11,11 +11,7 @@
     4.4  
     4.5  namespace cbrc{
     4.6  
     4.7 -// The row size must be fixed to some value.  It is fixed to 64
     4.8 -// because: this is big enough for all amino acids, including
     4.9 -// ambiguous ones, in upper and lower case, and using a power-of-2
    4.10 -// might be fast.
    4.11 -enum { scoreMatrixRowSize = 64 };
    4.12 +enum { scoreMatrixRowSize = ALPHABET_CAPACITY };
    4.13  
    4.14  typedef int ScoreMatrixRow[scoreMatrixRowSize];
    4.15  
     5.1 --- a/src/makefile	Mon Oct 07 12:02:11 2019 +0900
     5.2 +++ b/src/makefile	Mon Oct 07 12:46:14 2019 +0900
     5.3 @@ -3,6 +3,11 @@
     5.4  # -Wconversion
     5.5  # -fomit-frame-pointer ?
     5.6  
     5.7 +# The number of symbol types that we can keep in sequences (e.g. 20
     5.8 +# amino acids, plus ambiguous ones, in upper & lower case, plus one
     5.9 +# delimiter):
    5.10 +CPPFLAGS = -DALPHABET_CAPACITY=64
    5.11 +
    5.12  CFLAGS = -Wall -O2
    5.13  
    5.14  alpObj = alp/sls_alignment_evaluer.o alp/sls_pvalues.o		\