Add lastal/last-train -X option for ambiguous N/X
authorMartin C. Frith
Thu Mar 28 16:46:23 2019 +0900 (2 months ago)
changeset 9742b0e54aee679
parent 973 1ed9db7f46ca
child 975 3701b1e2019b
Add lastal/last-train -X option for ambiguous N/X
doc/last-train.txt
doc/lastal.txt
scripts/last-train
src/LastalArguments.cc
src/LastalArguments.hh
src/ScoreMatrix.cc
src/ScoreMatrix.hh
src/lastal.cc
test/dfam3-LTR22B1.fa
test/dfam3-LTR22C.fa
test/last-test.out
test/last-test.sh
     1.1 --- a/doc/last-train.txt	Wed Feb 27 14:29:58 2019 +0900
     1.2 +++ b/doc/last-train.txt	Thu Mar 28 16:46:23 2019 +0900
     1.3 @@ -102,6 +102,16 @@
     1.4    -k STEP    Look for initial matches starting only at every STEP-th
     1.5               position in each query.
     1.6    -P COUNT   Number of parallel threads.
     1.7 +  -X NUMBER  How to score a match/mismatch involving N (for DNA) or X
     1.8 +             (otherwise).  By default, the lowest match/mismatch score
     1.9 +             is used. 0 means the default; 1 means treat reference
    1.10 +             Ns/Xs as fully-ambiguous letters; 2 means treat query
    1.11 +             Ns/Xs as ambiguous; 3 means treat reference and query
    1.12 +             Ns/Xs as ambiguous.
    1.13 +
    1.14 +             If specified, this parameter is written in last-train's
    1.15 +             output, so it will override lastal's default.
    1.16 +
    1.17    -Q NUMBER  How to read the query sequences.  By default, they must
    1.18               be in ``fasta`` format.  ``-Q0`` means ``fasta`` or
    1.19               ``fastq-ignore``.  ``-Q1`` means ``fastq-sanger``.
     2.1 --- a/doc/lastal.txt	Wed Feb 27 14:29:58 2019 +0900
     2.2 +++ b/doc/lastal.txt	Thu Mar 28 16:46:23 2019 +0900
     2.3 @@ -144,6 +144,14 @@
     2.4        Other options can be specified on lines starting with "#last",
     2.5        but command line options override them.
     2.6  
     2.7 +  -X NUMBER
     2.8 +      How to score a match/mismatch involving N (for DNA) or X
     2.9 +      (otherwise), if not specified by a score matrix.  By default,
    2.10 +      the lowest match/mismatch score is used.  0 means the default; 1
    2.11 +      means treat reference Ns/Xs as fully-ambiguous letters; 2 means
    2.12 +      treat query Ns/Xs as ambiguous; 3 means treat reference and
    2.13 +      query Ns/Xs as ambiguous.
    2.14 +
    2.15    -a COST
    2.16        Gap existence cost.
    2.17  
     3.1 --- a/scripts/last-train	Wed Feb 27 14:29:58 2019 +0900
     3.2 +++ b/scripts/last-train	Thu Mar 28 16:46:23 2019 +0900
     3.3 @@ -346,6 +346,7 @@
     3.4      if opts.m: x.append("-m" + opts.m)
     3.5      if opts.k: x.append("-k" + opts.k)
     3.6      if opts.P: x.append("-P" + opts.P)
     3.7 +    if opts.X: x.append("-X" + opts.X)
     3.8      if opts.Q: x.append("-Q" + opts.Q)
     3.9      if opts.verbose: x.append("-" + "v" * opts.verbose)
    3.10      return x
    3.11 @@ -415,6 +416,7 @@
    3.12              q = subprocess.Popen(z, stdin=q.stdout, stdout=subprocess.PIPE,
    3.13                                   universal_newlines=True)
    3.14  
    3.15 +    if opts.X: print("#last -X", opts.X)
    3.16      if opts.Q: print("#last -Q", opts.Q)
    3.17      gapCosts = gapCostsFromProbs(externalScale, gapProbs)
    3.18      writeGapCosts(gapCosts, sys.stdout)
    3.19 @@ -496,6 +498,9 @@
    3.20                    "every STEP-th position in each query (default: 1)")
    3.21      og.add_option("-P", metavar="THREADS",
    3.22                    help="number of parallel threads")
    3.23 +    og.add_option("-X", metavar="NUMBER", help="N/X is ambiguous in: "
    3.24 +                  "0=neither sequence, 1=reference, 2=query, 3=both "
    3.25 +                  "(default=0)")
    3.26      og.add_option("-Q", metavar="NUMBER", help=
    3.27                    "input format: 0=fasta or fastq-ignore, 1=fastq-sanger "
    3.28                    "(default=fasta)")
     4.1 --- a/src/LastalArguments.cc	Wed Feb 27 14:29:58 2019 +0900
     4.2 +++ b/src/LastalArguments.cc	Thu Mar 28 16:46:23 2019 +0900
     4.3 @@ -77,6 +77,7 @@
     4.4    gapPairCost(-1),  // this means: OFF
     4.5    frameshiftCost(-1),  // this means: ordinary, non-translated alignment
     4.6    matrixFile(""),
     4.7 +  ambiguousLetterOpt(0),
     4.8    maxDropGapped(100),  // depends on maxDropFinal
     4.9    maxDropGappedSuffix('%'),
    4.10    maxDropGapless(-1),  // depends on the score matrix
    4.11 @@ -123,6 +124,8 @@
    4.12  -r: match score   (2 if -M, else  6 if 0<Q<5, else 1 if DNA)\n\
    4.13  -q: mismatch cost (3 if -M, else 18 if 0<Q<5, else 1 if DNA)\n\
    4.14  -p: match/mismatch score matrix (protein-protein: BL62, DNA-protein: BL80)\n\
    4.15 +-X: N/X is ambiguous in: 0=neither sequence, 1=reference, 2=query, 3=both ("
    4.16 +    + stringify(ambiguousLetterOpt) + ")\n\
    4.17  -a: gap existence cost (DNA: 7, protein: 11, 0<Q<5: 21)\n\
    4.18  -b: gap extension cost (DNA: 1, protein:  2, 0<Q<5:  9)\n\
    4.19  -A: insertion existence cost (a)\n\
    4.20 @@ -180,7 +183,10 @@
    4.21  ";
    4.22  
    4.23    int c;
    4.24 -  const char optionString[] = "hVvf:" "r:q:p:a:b:A:B:c:F:x:y:z:d:e:" "D:E:"
    4.25 +  const char optionString[] =
    4.26 +    "hVvf:"
    4.27 +    "r:q:p:X:a:b:A:B:c:F:x:y:z:d:e:"
    4.28 +    "D:E:"
    4.29      "s:S:MT:m:l:L:n:N:C:K:k:W:i:P:R:u:w:t:g:G:j:Q:";
    4.30    while( (c = myGetopt(argc, argv, optionString)) != -1 ){
    4.31      switch(c){
    4.32 @@ -211,6 +217,10 @@
    4.33      case 'p':
    4.34        matrixFile = optarg;
    4.35        break;
    4.36 +    case 'X':
    4.37 +      unstringify(ambiguousLetterOpt, optarg);
    4.38 +      if (ambiguousLetterOpt < 0 || ambiguousLetterOpt > 3) badopt(c, optarg);
    4.39 +      break;
    4.40      case 'a':
    4.41        unstringify( gapExistCost, optarg );
    4.42        break;
     5.1 --- a/src/LastalArguments.hh	Wed Feb 27 14:29:58 2019 +0900
     5.2 +++ b/src/LastalArguments.hh	Thu Mar 28 16:46:23 2019 +0900
     5.3 @@ -80,6 +80,7 @@
     5.4    int gapPairCost;
     5.5    int frameshiftCost;
     5.6    std::string matrixFile;
     5.7 +  int ambiguousLetterOpt;
     5.8    int maxDropGapped;
     5.9    char maxDropGappedSuffix;
    5.10    int maxDropGapless;
     6.1 --- a/src/ScoreMatrix.cc	Wed Feb 27 14:29:58 2019 +0900
     6.2 +++ b/src/ScoreMatrix.cc	Thu Mar 28 16:46:23 2019 +0900
     6.3 @@ -11,7 +11,6 @@
     6.4  #include <cassert>
     6.5  #include <cctype>  // toupper, tolower
     6.6  #include <stddef.h>  // size_t
     6.7 -//#include <iostream>  // for debugging
     6.8  
     6.9  #define ERR(x) throw std::runtime_error(x)
    6.10  
    6.11 @@ -188,7 +187,7 @@
    6.12    return stream;
    6.13  }
    6.14  
    6.15 -const char *ambiguities[] = {
    6.16 +const char *ntAmbiguities[] = {
    6.17    "M" "AC",
    6.18    "S" "CG",
    6.19    "K" "GT",
    6.20 @@ -198,16 +197,21 @@
    6.21    "B" "CGT",
    6.22    "D" "AGT",
    6.23    "H" "ACT",
    6.24 -  "V" "ACG"
    6.25 +  "V" "ACG",
    6.26 +  "N" "ACGT"
    6.27  };
    6.28  
    6.29 -const size_t numOfAmbiguousSymbols = COUNTOF(ambiguities);
    6.30 +const char *aaAmbiguities[] = {
    6.31 +  "X" "ACDEFGHIKLMNPQRSTVWY"
    6.32 +};
    6.33  
    6.34  static bool isIn(const std::string& s, char x) {
    6.35    return find(s.begin(), s.end(), x) != s.end();
    6.36  }
    6.37  
    6.38 -static const char *ambiguityList(char symbol, char scratch[]) {
    6.39 +static const char *ambiguityList(const char *ambiguities[],
    6.40 +				 size_t numOfAmbiguousSymbols,
    6.41 +				 char symbol, char scratch[]) {
    6.42    for (size_t i = 0; i < numOfAmbiguousSymbols; ++i) {
    6.43      if (ambiguities[i][0] == symbol) return ambiguities[i] + 1;
    6.44    }
    6.45 @@ -251,7 +255,9 @@
    6.46    return scoreFromProb(scale, p / (rowProbSum * colProbSum));
    6.47  }
    6.48  
    6.49 -void ScoreMatrix::addAmbiguousScores(const uchar symbolToIndex[],
    6.50 +void ScoreMatrix::addAmbiguousScores(bool isDna, bool isFullyAmbiguousRow,
    6.51 +				     bool isFullyAmbiguousCol,
    6.52 +				     const uchar symbolToIndex[],
    6.53  				     double scale,
    6.54  				     const double rowSymbolProbs[],
    6.55  				     const double colSymbolProbs[]) {
    6.56 @@ -260,12 +266,18 @@
    6.57  
    6.58    char scratch[2] = {0};
    6.59  
    6.60 -  for (size_t k = 0; k < numOfAmbiguousSymbols; ++k) {
    6.61 +  const char **ambiguities = isDna ? ntAmbiguities : aaAmbiguities;
    6.62 +  size_t numOfAmbig = isDna ? COUNTOF(ntAmbiguities) : COUNTOF(aaAmbiguities);
    6.63 +  size_t numOfAmbiguousRows = numOfAmbig - 1 + isFullyAmbiguousRow;
    6.64 +  size_t numOfAmbiguousCols = numOfAmbig - 1 + isFullyAmbiguousCol;
    6.65 +
    6.66 +  for (size_t k = 0; k < numOfAmbiguousCols; ++k) {
    6.67      char ambiguousSymbol = ambiguities[k][0];
    6.68      if (isIn(colSymbols, ambiguousSymbol)) continue;
    6.69      colSymbols.push_back(ambiguousSymbol);
    6.70      for (size_t i = 0; i < rowSymbols.size(); ++i) {
    6.71 -      const char *rSymbols = ambiguityList(rowSymbols[i], scratch);
    6.72 +      const char *rSymbols = ambiguityList(ambiguities, numOfAmbiguousRows,
    6.73 +					   rowSymbols[i], scratch);
    6.74        const char *cSymbols = ambiguities[k] + 1;
    6.75        int s = jointScore(symbolToIndex, fastMatrix, scale,
    6.76  			 rowSymbolProbs, colSymbolProbs, rSymbols, cSymbols);
    6.77 @@ -273,14 +285,15 @@
    6.78      }
    6.79    }
    6.80  
    6.81 -  for (size_t k = 0; k < numOfAmbiguousSymbols; ++k) {
    6.82 +  for (size_t k = 0; k < numOfAmbiguousRows; ++k) {
    6.83      char ambiguousSymbol = ambiguities[k][0];
    6.84      if (isIn(rowSymbols, ambiguousSymbol)) continue;
    6.85      rowSymbols.push_back(ambiguousSymbol);
    6.86      cells.resize(cells.size() + 1);
    6.87      for (size_t j = 0; j < colSymbols.size(); ++j) {
    6.88        const char *rSymbols = ambiguities[k] + 1;
    6.89 -      const char *cSymbols = ambiguityList(colSymbols[j], scratch);
    6.90 +      const char *cSymbols = ambiguityList(ambiguities, numOfAmbiguousCols,
    6.91 +					   colSymbols[j], scratch);
    6.92        int s = jointScore(symbolToIndex, fastMatrix, scale,
    6.93  			 rowSymbolProbs, colSymbolProbs, rSymbols, cSymbols);
    6.94        cells.back().push_back(s);
     7.1 --- a/src/ScoreMatrix.hh	Wed Feb 27 14:29:58 2019 +0900
     7.2 +++ b/src/ScoreMatrix.hh	Thu Mar 28 16:46:23 2019 +0900
     7.3 @@ -32,8 +32,12 @@
     7.4  
     7.5    void init(const uchar symbolToIndex[]);  // unspecified letters get minScore
     7.6  
     7.7 -  // add scores for e.g. "W" meaning A or T
     7.8 -  void addAmbiguousScores(const uchar symbolToIndex[],
     7.9 +  // Add scores for e.g. "W" meaning A or T.  The 2nd and 3rd
    7.10 +  // arguments specify whether to add a row/column for the
    7.11 +  // fully-ambiguous letter (N if DNA, else X).
    7.12 +  void addAmbiguousScores(bool isDna,
    7.13 +			  bool isFullyAmbiguousRow, bool isFullyAmbiguousCol,
    7.14 +			  const uchar symbolToIndex[],
    7.15  			  double scale,  // "lambda" for getting probabilities
    7.16  			  const double rowSymbolProbs[],
    7.17  			  const double colSymbolProbs[]);
     8.1 --- a/src/lastal.cc	Wed Feb 27 14:29:58 2019 +0900
     8.2 +++ b/src/lastal.cc	Thu Mar 28 16:46:23 2019 +0900
     8.3 @@ -155,8 +155,11 @@
     8.4    if (args.outputType > 0) {
     8.5      calculateSubstitutionScoreMatrixStatistics();
     8.6      const mcf::SubstitutionMatrixStats &stats = fwdMatrices.stats;
     8.7 -    if (alph.letters == alph.dna && !stats.isBad()) {
     8.8 -      scoreMatrix.addAmbiguousScores(alph.encode, stats.lambda(),
     8.9 +    if (!stats.isBad()) {
    8.10 +      scoreMatrix.addAmbiguousScores(alph.letters == alph.dna,
    8.11 +				     args.ambiguousLetterOpt % 2,
    8.12 +				     args.ambiguousLetterOpt / 2,
    8.13 +				     alph.encode, stats.lambda(),
    8.14  				     stats.letterProbs1(),
    8.15  				     stats.letterProbs2());
    8.16        scoreMatrix.init(alph.encode);
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/test/dfam3-LTR22B1.fa	Thu Mar 28 16:46:23 2019 +0900
     9.3 @@ -0,0 +1,2 @@
     9.4 +>LTR22B1#LTR/ERVK
     9.5 +TGTGGGGGTTCAGTCAGGNTGGTGGGAAAAATTNTAAGACGNAGTTATAGGAAATAGACACAAACCTTCTTGGAAGGCCGGAAGGTTTTGCANAAGCCTCAGGATAGGGTTATGGCTGAAGGCAGCCTAATCCTTACCTTGAGTTAATAGCTTAGGGTAGNTAACANAGGAATGTAGAGGAGTTTATCTAAATAGCTTGTTTACTCATGTGGTCCTAAGACCAACCTTTGATCATCCGCGGGTGCATGATTGCTCTCTACTCGGGNGCGGGGGTGGGCAATCGGCAACCAGGTTAATTACCCTCTAGTGGTGTTTACTCGAGACCTTTGTCATTTNNCGNNNAATCTGTGCTGAATAAATGCNAGCNNNGCCGGCTAGTCGGGGCCGCGGCTGCNACTCTTTACAGCACCCTCCTTGGNGTCTGTGAGCGGCCCGGACCCTCAGCCGGACTGACAGGCANAATATCTGTGTCAGTGTACGTTATTCATCCGTCGTTGGGTCAGGGTCTGCGGGACGGACCCCCGCA
    10.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.2 +++ b/test/dfam3-LTR22C.fa	Thu Mar 28 16:46:23 2019 +0900
    10.3 @@ -0,0 +1,2 @@
    10.4 +>LTR22C#LTR/ERVK
    10.5 +TGTAGGGGNTCGGTCAGGGTGGTGGGAGAAATTGTAAAAATAAATTATAGGAAATAGACACAAACCTTCTTGGAAGGCCGGGAGGTTTTGCAAAAGCTTCAGNAAAGGGTTTGGCTGAAGGCAGCCGAATCCTCTTANCTTGAGTTANANAGCTNAGGGTAGATANCAAAGGAATGTAAAGGAGTTTATCTAAATGNGCTTGTTTACTCATGTGGTCCTAAGACCGACCTTTGATCATTTGTGCGCAGGACTGCTCTCTNCTCGGGGGGCGGCCATGTTAATTACCCACAAGTNGTGTTGACTCGAGGCCTTTGTCATTAAATCTGTACTGAATAAATGCCCGCAGCGCCAGCTTGTCAGGGCCGCGGCTGCTACAACTCTTTNCGNCGNNCGNCTNGGNNTCTGTGAGCGGCCCGGTCCCTTAGCCGCNCTGNCANNNCTGAATATCTGTGTCTGAGTGCACGTTNTTCATCCGTCGCGCGGCCAGGGTCTGCGGGTCAGACCCGGCA
    11.1 --- a/test/last-test.out	Wed Feb 27 14:29:58 2019 +0900
    11.2 +++ b/test/last-test.out	Thu Mar 28 16:46:23 2019 +0900
    11.3 @@ -3045,3 +3045,107 @@
    11.4  # batch 4
    11.5  # Query sequences=1000
    11.6  
    11.7 +#
    11.8 +# a=10 b=1 A=10 B=1 e=36 d=20 x=35 y=18 z=35 D=1e+06 E=9.86193e+08
    11.9 +# R=10 u=0 s=2 S=0 M=0 T=0 m=10 l=1 n=10 k=1 w=1000 t=1.82048 j=3 Q=0
   11.10 +# /tmp/last-test
   11.11 +# Reference sequences=1 normal letters=507
   11.12 +# lambda=0.545487 K=0.26232
   11.13 +#
   11.14 +#     A   C   G   T   M   S   K   W   R   Y   B   D   H   V
   11.15 +# A   2  -2  -2  -2   1  -2  -2   1   1  -2  -2   0   0   0
   11.16 +# C  -2   2  -2  -2   1   1  -2  -2  -2   1   0  -2   0   0
   11.17 +# G  -2  -2   2  -2  -2   1   1  -2   1  -2   0   0  -2   0
   11.18 +# T  -2  -2  -2   2  -2  -2   1   1  -2   1   0   0   0  -2
   11.19 +# M   1   1  -2  -2   1   0  -2   0   0   0   0   0   0   0
   11.20 +# S  -2   1   1  -2   0   1   0  -2   0   0   0   0   0   0
   11.21 +# K  -2  -2   1   1  -2   0   1   0   0   0   0   0   0   0
   11.22 +# W   1  -2  -2   1   0  -2   0   1   0   0   0   0   0   0
   11.23 +# R   1  -2   1  -2   0   0   0   0   1  -2   0   0   0   0
   11.24 +# Y  -2   1  -2   1   0   0   0   0  -2   1   0   0   0   0
   11.25 +# B  -2   0   0   0   0   0   0   0   0   0   0   0   0   0
   11.26 +# D   0  -2   0   0   0   0   0   0   0   0   0   0   0   0
   11.27 +# H   0   0  -2   0   0   0   0   0   0   0   0   0   0   0
   11.28 +# V   0   0   0  -2   0   0   0   0   0   0   0   0   0   0
   11.29 +# N   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   11.30 +#
   11.31 +# Coordinates are 0-based.  For - strand matches, coordinates
   11.32 +# in the reverse complement of the 2nd sequence are used.
   11.33 +#
   11.34 +# name start alnSize strand seqSize alignment
   11.35 +#
   11.36 +# batch 0
   11.37 +a score=558 EG2=1.7e-115 E=5e-132
   11.38 +s LTR22B1#LTR/ERVK 0 521 + 526 TGTGGGGGTTCAGTCAGGNTGGTGGGAAAAATTNTAAGACGNAGTTATAGGAAATAGACACAAACCTTCTTGGAAGGCCGGAAGGTTTTGCANAAGCCTCAGGATAGGGTTATGGCTGAAGGCAGCCTAATC--CTTACCTTGAGTTA-ATAGCTTAGGGTAGNTAACANAGGAATGTAGAGGAGTTTATCTAAAT-AGCTTGTTTACTCATGTGGTCCTAAGACCAACCTTTGATCATCCGCGGGTGCATGATTGCTCTCTACTCGGGNGCGGGGGTGGGCAATCGGCAACCAGGTTAATTACCCTCTAGTGGTGTTTACTCGAGACCTTTGTCATTTNNCGNNNAATCTGTGCTGAATAAATGCNAGCNNNGCCGGCTAGTCGGGGCCGCGGCTG---CNACTCTTTACAGCACCCTCCTTGGNGTCTGTGAGCGGCCCGGACCCTCAGCCGGACTGACA-GGCANAATATCTGTGTC--AGTGTACGTTATTCATCCGTCGTTGGGTCAGGGTCTGCGGGACGGACCC
   11.39 +s LTR22C#LTR/ERVK  0 505 + 509 TGTAGGGGNTCGGTCAGGGTGGTGGGAGAAATTGTAAAAATAAATTATAGGAAATAGACACAAACCTTCTTGGAAGGCCGGGAGGTTTTGCAAAAGCTTCAGNAAAGGGTT-TGGCTGAAGGCAGCCGAATCCTCTTANCTTGAGTTANANAGCTNAGGGTAGATANCAAAGGAATGTAAAGGAGTTTATCTAAATGNGCTTGTTTACTCATGTGGTCCTAAGACCGACCTTTGATCAT--TTGTGCGCAGGACTGCTCTCTNCTCGGGG----------------GGCGGCCATGTTAATTACCCACAAGTNGTGTTGACTCGAGGCCTTTGTCATT-------AAATCTGTACTGAATAAATGCCCGCAGCGCCAGCTTGTCAGGGCCGCGGCTGCTACAACTCTTTNCGNCGNNCGNCTNGGNNTCTGTGAGCGGCCCGGTCCCTTAGCCGCNCTGNCANNNCTGAATATCTGTGTCTGAGTGCACGTTNTTCATCCGTCGCGCGGCCAGGGTCTGCGGGTCAGACCC
   11.40 +
   11.41 +# Query sequences=1
   11.42 +#
   11.43 +# a=10 b=1 A=10 B=1 e=36 d=20 x=35 y=18 z=35 D=1e+06 E=9.86193e+08
   11.44 +# R=10 u=0 s=2 S=0 M=0 T=0 m=10 l=1 n=10 k=1 w=1000 t=1.82048 j=3 Q=0
   11.45 +# /tmp/last-test
   11.46 +# Reference sequences=1 normal letters=507
   11.47 +# lambda=0.545487 K=0.26232
   11.48 +#
   11.49 +#     A   C   G   T   M   S   K   W   R   Y   B   D   H   V   N
   11.50 +# A   2  -2  -2  -2   1  -2  -2   1   1  -2  -2   0   0   0   0
   11.51 +# C  -2   2  -2  -2   1   1  -2  -2  -2   1   0  -2   0   0   0
   11.52 +# G  -2  -2   2  -2  -2   1   1  -2   1  -2   0   0  -2   0   0
   11.53 +# T  -2  -2  -2   2  -2  -2   1   1  -2   1   0   0   0  -2   0
   11.54 +# M   1   1  -2  -2   1   0  -2   0   0   0   0   0   0   0   0
   11.55 +# S  -2   1   1  -2   0   1   0  -2   0   0   0   0   0   0   0
   11.56 +# K  -2  -2   1   1  -2   0   1   0   0   0   0   0   0   0   0
   11.57 +# W   1  -2  -2   1   0  -2   0   1   0   0   0   0   0   0   0
   11.58 +# R   1  -2   1  -2   0   0   0   0   1  -2   0   0   0   0   0
   11.59 +# Y  -2   1  -2   1   0   0   0   0  -2   1   0   0   0   0   0
   11.60 +# B  -2   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   11.61 +# D   0  -2   0   0   0   0   0   0   0   0   0   0   0   0   0
   11.62 +# H   0   0  -2   0   0   0   0   0   0   0   0   0   0   0   0
   11.63 +# V   0   0   0  -2   0   0   0   0   0   0   0   0   0   0   0
   11.64 +#
   11.65 +# Coordinates are 0-based.  For - strand matches, coordinates
   11.66 +# in the reverse complement of the 2nd sequence are used.
   11.67 +#
   11.68 +# name start alnSize strand seqSize alignment
   11.69 +#
   11.70 +# batch 0
   11.71 +a score=572 EG2=8.1e-119 E=7e-136
   11.72 +s LTR22B1#LTR/ERVK 0 521 + 526 TGTGGGGGTTCAGTCAGGNTGGTGGGAAAAATTNTAAGACGNAGTTATAGGAAATAGACACAAACCTTCTTGGAAGGCCGGAAGGTTTTGCANAAGCCTCAGGATAGGGTTATGGCTGAAGGCAGCCTAATC--CTTACCTTGAGTTA-ATAGCTTAGGGTAGNTAACANAGGAATGTAGAGGAGTTTATCTAAAT-AGCTTGTTTACTCATGTGGTCCTAAGACCAACCTTTGATCATCCGCGGGTGCATGATTGCTCTCTACTCGGGNGCGGGGGTGGGCAATCGGCAACCAGGTTAATTACCCTCTAGTGGTGTTTACTCGAGACCTTTGTCATTTNNCGNNNAATCTGTGCTGAATAAATGCNAGCNNNGCCGGCTAGTCGGGGCCGCGGCTG---CNACTCTTTACAGCACCCTCCTTGGNGTCTGTGAGCGGCCCGGACCCTCAGCCGGACTGACA-GGCANAATATCTGTGTC--AGTGTACGTTATTCATCCGTCGTTGGGTCAGGGTCTGCGGGACGGACCC
   11.73 +s LTR22C#LTR/ERVK  0 505 + 509 TGTAGGGGNTCGGTCAGGGTGGTGGGAGAAATTGTAAAAATAAATTATAGGAAATAGACACAAACCTTCTTGGAAGGCCGGGAGGTTTTGCAAAAGCTTCAGNAAAGGGTT-TGGCTGAAGGCAGCCGAATCCTCTTANCTTGAGTTANANAGCTNAGGGTAGATANCAAAGGAATGTAAAGGAGTTTATCTAAATGNGCTTGTTTACTCATGTGGTCCTAAGACCGACCTTTGATCAT--TTGTGCGCAGGACTGCTCTCTNCTCGGG----------------GGGCGGCCATGTTAATTACCCACAAGTNGTGTTGACTCGAGGCCTTTGTCATT-------AAATCTGTACTGAATAAATGCCCGCAGCGCCAGCTTGTCAGGGCCGCGGCTGCTACAACTCTTTNCGNCGNNCGNCTNGGNNTCTGTGAGCGGCCCGGTCCCTTAGCCGCNCTGNCANNNCTGAATATCTGTGTCTGAGTGCACGTTNTTCATCCGTCGCGCGGCCAGGGTCTGCGGGTCAGACCC
   11.74 +
   11.75 +# Query sequences=1
   11.76 +#
   11.77 +# a=10 b=1 A=10 B=1 e=36 d=20 x=35 y=18 z=35 D=1e+06 E=9.86193e+08
   11.78 +# R=10 u=0 s=2 S=0 M=0 T=0 m=10 l=1 n=10 k=1 w=1000 t=1.82048 j=3 Q=0
   11.79 +# /tmp/last-test
   11.80 +# Reference sequences=1 normal letters=507
   11.81 +# lambda=0.545487 K=0.26232
   11.82 +#
   11.83 +#     A   C   G   T   M   S   K   W   R   Y   B   D   H   V   N
   11.84 +# A   2  -2  -2  -2   1  -2  -2   1   1  -2  -2   0   0   0   0
   11.85 +# C  -2   2  -2  -2   1   1  -2  -2  -2   1   0  -2   0   0   0
   11.86 +# G  -2  -2   2  -2  -2   1   1  -2   1  -2   0   0  -2   0   0
   11.87 +# T  -2  -2  -2   2  -2  -2   1   1  -2   1   0   0   0  -2   0
   11.88 +# M   1   1  -2  -2   1   0  -2   0   0   0   0   0   0   0   0
   11.89 +# S  -2   1   1  -2   0   1   0  -2   0   0   0   0   0   0   0
   11.90 +# K  -2  -2   1   1  -2   0   1   0   0   0   0   0   0   0   0
   11.91 +# W   1  -2  -2   1   0  -2   0   1   0   0   0   0   0   0   0
   11.92 +# R   1  -2   1  -2   0   0   0   0   1  -2   0   0   0   0   0
   11.93 +# Y  -2   1  -2   1   0   0   0   0  -2   1   0   0   0   0   0
   11.94 +# B  -2   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   11.95 +# D   0  -2   0   0   0   0   0   0   0   0   0   0   0   0   0
   11.96 +# H   0   0  -2   0   0   0   0   0   0   0   0   0   0   0   0
   11.97 +# V   0   0   0  -2   0   0   0   0   0   0   0   0   0   0   0
   11.98 +# N   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   11.99 +#
  11.100 +# Coordinates are 0-based.  For - strand matches, coordinates
  11.101 +# in the reverse complement of the 2nd sequence are used.
  11.102 +#
  11.103 +# name start alnSize strand seqSize alignment
  11.104 +#
  11.105 +# batch 0
  11.106 +a score=602 EG2=6.4e-126 E=2.5e-144
  11.107 +s LTR22B1#LTR/ERVK 0 521 + 526 TGTGGGGGTTCAGTCAGGNTGGTGGGAAAAATTNTAAGACGNAGTTATAGGAAATAGACACAAACCTTCTTGGAAGGCCGGAAGGTTTTGCANAAGCCTCAGGATAGGGTTATGGCTGAAGGCAGCCTAATC--CTTACCTTGAGTTA-ATAGCTTAGGGTAGNTAACANAGGAATGTAGAGGAGTTTATCTAAAT-AGCTTGTTTACTCATGTGGTCCTAAGACCAACCTTTGATCATCCGCGGGTGCATGATTGCTCTCTACTCGGGNGCGGGGGTGGGCAATCGGCAACCAGGTTAATTACCCTCTAGTGGTGTTTACTCGAGACCTTTGTCATTTNNCGNNNAATCTGTGCTGAATAAATGCNAGCNNNGCCGGCTAGTCGGGGCCGCGGCTG---CNACTCTTTACAGCACCCTCCTTGGNGTCTGTGAGCGGCCCGGACCCTCAGCCGGACTGACA-GGCANAATATCTGTGTC--AGTGTACGTTATTCATCCGTCGTTGGGTCAGGGTCTGCGGGACGGACCC
  11.108 +s LTR22C#LTR/ERVK  0 505 + 509 TGTAGGGGNTCGGTCAGGGTGGTGGGAGAAATTGTAAAAATAAATTATAGGAAATAGACACAAACCTTCTTGGAAGGCCGGGAGGTTTTGCAAAAGCTTCAGNAAAGGGTT-TGGCTGAAGGCAGCCGAATCCTCTTANCTTGAGTTANANAGCTNAGGGTAGATANCAAAGGAATGTAAAGGAGTTTATCTAAATGNGCTTGTTTACTCATGTGGTCCTAAGACCGACCTTTGATCAT--TTGTGCGCAGGACTGCTCTCTNCTCGGGG----------------GGCGGCCATGTTAATTACCCACAAGTNGTGTTGACTCGAGGCCTTTGTCATT-------AAATCTGTACTGAATAAATGCCCGCAGCGCCAGCTTGTCAGGGCCGCGGCTGCTACAACTCTTTNCGNCGNNCGNCTNGGNNTCTGTGAGCGGCCCGGTCCCTTAGCCGCNCTGNCANNNCTGAATATCTGTGTCTGAGTGCACGTTNTTCATCCGTCGCGCGGCCAGGGTCTGCGGGTCAGACCC
  11.109 +
  11.110 +# Query sequences=1
    12.1 --- a/test/last-test.sh	Wed Feb 27 14:29:58 2019 +0900
    12.2 +++ b/test/last-test.sh	Thu Mar 28 16:46:23 2019 +0900
    12.3 @@ -197,6 +197,11 @@
    12.4      # fastq-versus-fastq gapless overlap alignment
    12.5      lastdb -Q1 -uNEAR -cR01 $db $fastq
    12.6      try lastal -Q1 -T1 -j1 -s0 $db $fastq
    12.7 +
    12.8 +    lastdb $db dfam3-LTR22B1.fa
    12.9 +    lastal -r2 -q2 -a10 -X1 $db dfam3-LTR22C.fa
   12.10 +    lastal -r2 -q2 -a10 -X2 $db dfam3-LTR22C.fa
   12.11 +    lastal -r2 -q2 -a10 -X3 $db dfam3-LTR22C.fa
   12.12  } 2>&1 |
   12.13  grep -v version |  # omit header lines with the LAST version number
   12.14  diff -u last-test.out -