Add NCBI genetic codes
authorMartin C. Frith
Mon Oct 07 10:57:27 2019 +0900 (6 weeks ago)
changeset 98688adecc284af
parent 985 49caf55d4d4d
child 987 f757e6ca4edb
Add NCBI genetic codes
build/gc-inc.sh
data/gc.prt
doc/lastal.txt
src/GeneticCode.cc
src/GeneticCode.hh
src/LastalArguments.cc
src/lastal.cc
src/makefile
test/last-test.out
test/last-test.sh
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/build/gc-inc.sh	Mon Oct 07 10:57:27 2019 +0900
     1.3 @@ -0,0 +1,20 @@
     1.4 +#! /bin/sh
     1.5 +
     1.6 +# This generates source code from genetic codes.
     1.7 +
     1.8 +cat <<EOF
     1.9 +const struct {
    1.10 +  const char *name;
    1.11 +  const char *text;
    1.12 +} geneticCodes[] = {
    1.13 +EOF
    1.14 +
    1.15 +cat "$@" | tr -d '",' |
    1.16 +awk '
    1.17 +$1 == "id" {print "{\"" $2 "\", \"\\"}
    1.18 +$1 == "ncbieaa" {print "  AAs = " $2 "\\n\\"}
    1.19 +/-- Base/ {print $2 " = " $3 "\\n\\"}
    1.20 +/-- Base3/ {print "\"},"}
    1.21 +'
    1.22 +
    1.23 +echo "};"
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/data/gc.prt	Mon Oct 07 10:57:27 2019 +0900
     2.3 @@ -0,0 +1,355 @@
     2.4 +--**************************************************************************
     2.5 +--  This is the NCBI genetic code table
     2.6 +--  Initial base data set from Andrzej Elzanowski while at PIR International
     2.7 +--  Addition of Eubacterial and Alternative Yeast by J.Ostell at NCBI
     2.8 +--  Base 1-3 of each codon have been added as comments to facilitate
     2.9 +--    readability at the suggestion of Peter Rice, EMBL
    2.10 +--  Later additions by Taxonomy Group staff at NCBI
    2.11 +--
    2.12 +--  Version 4.5
    2.13 +--     Added Cephalodiscidae mitochondrial genetic code 33
    2.14 +--
    2.15 +--  Version 4.4
    2.16 +--     Added GTG as start codon for genetic code 3
    2.17 +--     Added Balanophoraceae plastid genetic code 32
    2.18 +--
    2.19 +--  Version 4.3
    2.20 +--     Change to CTG -> Leu in genetic codes 27, 28, 29, 30
    2.21 +--
    2.22 +--  Version 4.2
    2.23 +--     Added Karyorelict nuclear genetic code 27
    2.24 +--     Added Condylostoma nuclear genetic code 28
    2.25 +--     Added Mesodinium nuclear genetic code 29
    2.26 +--     Added Peritrich nuclear genetic code 30
    2.27 +--     Added Blastocrithidia nuclear genetic code 31
    2.28 +--
    2.29 +--  Version 4.1
    2.30 +--     Added Pachysolen tannophilus nuclear genetic code 26
    2.31 +--
    2.32 +--  Version 4.0
    2.33 +--     Updated version to reflect numerous undocumented changes:
    2.34 +--     Corrected start codons for genetic code 25
    2.35 +--     Name of new genetic code is Candidate Division SR1 and Gracilibacteria
    2.36 +--     Added candidate division SR1 nuclear genetic code 25
    2.37 +--     Added GTG as start codon for genetic code 24
    2.38 +--     Corrected Pterobranchia Mitochondrial genetic code (24)
    2.39 +--     Added genetic code 24, Pterobranchia Mitochondrial
    2.40 +--     Genetic code 11 is now Bacterial, Archaeal and Plant Plastid
    2.41 +--     Fixed capitalization of mitochondrial in codes 22 and 23
    2.42 +--     Added GTG, ATA, and TTG as alternative start codons to code 13
    2.43 +--
    2.44 +--  Version 3.9
    2.45 +--     Code 14 differs from code 9 only by translating UAA to Tyr rather than
    2.46 +--     STOP.  A recent study (Telford et al, 2000) has found no evidence that
    2.47 +--     the codon UAA codes for Tyr in the flatworms, but other opinions exist.
    2.48 +--     There are very few GenBank records that are translated with code 14,
    2.49 +--     but a test translation shows that retranslating these records with code
    2.50 +--     9 can cause premature terminations.  Therefore, GenBank will maintain
    2.51 +--     code 14 until further information becomes available.
    2.52 +--
    2.53 +--  Version 3.8
    2.54 +--     Added GTG start to Echinoderm mitochondrial code, code 9
    2.55 +--
    2.56 +--  Version 3.7
    2.57 +--     Added code 23 Thraustochytrium mitochondrial code
    2.58 +--        formerly OGMP code 93
    2.59 +--        submitted by Gertraude Berger, Ph.D.
    2.60 +--
    2.61 +--  Version 3.6
    2.62 +--     Added code 22 TAG-Leu, TCA-stop
    2.63 +--        found in mitochondrial DNA of Scenedesmus obliquus
    2.64 +--        submitted by Gertraude Berger, Ph.D.
    2.65 +--        Organelle Genome Megasequencing Program, Univ Montreal
    2.66 +--
    2.67 +--  Version 3.5
    2.68 +--     Added code 21, Trematode Mitochondrial
    2.69 +--       (as deduced from: Garey & Wolstenholme,1989; Ohama et al, 1990)
    2.70 +--     Added code 16, Chlorophycean Mitochondrial
    2.71 +--       (TAG can translated to Leucine instaed to STOP in chlorophyceans
    2.72 +--        and fungi)
    2.73 +--
    2.74 +--  Version 3.4
    2.75 +--     Added CTG,TTG as allowed alternate start codons in Standard code.
    2.76 +--        Prats et al. 1989, Hann et al. 1992
    2.77 +--
    2.78 +--  Version 3.3 - 10/13/95
    2.79 +--     Added alternate intiation codon ATC to code 5
    2.80 +--        based on complete mitochondrial genome of honeybee
    2.81 +--        Crozier and Crozier (1993)
    2.82 +--
    2.83 +--  Version 3.2 - 6/24/95
    2.84 +--  Code       Comments
    2.85 +--   10        Alternative Ciliate Macronuclear renamed to Euplotid Macro...
    2.86 +--   15        Blepharisma Macro.. code added
    2.87 +--    5        Invertebrate Mito.. GTG allowed as alternate initiator
    2.88 +--   11        Eubacterial renamed to Bacterial as most alternate starts
    2.89 +--               have been found in Archea
    2.90 +--
    2.91 +--
    2.92 +--  Version 3.1 - 1995
    2.93 +--  Updated as per Andrzej Elzanowski at NCBI
    2.94 +--     Complete documentation in NCBI toolkit documentation
    2.95 +--  Note: 2 genetic codes have been deleted
    2.96 +--
    2.97 +--   Old id   Use id     - Notes
    2.98 +--
    2.99 +--   id 7      id 4      - Kinetoplast code now merged in code id 4
   2.100 +--   id 8      id 1      - all plant chloroplast differences due to RNA edit
   2.101 +--
   2.102 +--
   2.103 +--*************************************************************************
   2.104 +
   2.105 +Genetic-code-table ::= {
   2.106 + {
   2.107 +  name "Standard" ,
   2.108 +  name "SGC0" ,
   2.109 +  id 1 ,
   2.110 +  ncbieaa  "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
   2.111 +  sncbieaa "---M------**--*----M---------------M----------------------------"
   2.112 +  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
   2.113 +  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
   2.114 +  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
   2.115 + },
   2.116 + {
   2.117 +  name "Vertebrate Mitochondrial" ,
   2.118 +  name "SGC1" ,
   2.119 +  id 2 ,
   2.120 +  ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG",
   2.121 +  sncbieaa "----------**--------------------MMMM----------**---M------------"
   2.122 +  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
   2.123 +  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
   2.124 +  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
   2.125 + },
   2.126 + {
   2.127 +  name "Yeast Mitochondrial" ,
   2.128 +  name "SGC2" ,
   2.129 +  id 3 ,
   2.130 +  ncbieaa  "FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
   2.131 +  sncbieaa "----------**----------------------MM---------------M------------"
   2.132 +  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
   2.133 +  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
   2.134 +  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
   2.135 + },
   2.136 + {
   2.137 +    name "Mold Mitochondrial; Protozoan Mitochondrial; Coelenterate
   2.138 + Mitochondrial; Mycoplasma; Spiroplasma" ,
   2.139 +  name "SGC3" ,
   2.140 +  id 4 ,
   2.141 +  ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
   2.142 +  sncbieaa "--MM------**-------M------------MMMM---------------M------------"
   2.143 +  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
   2.144 +  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
   2.145 +  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
   2.146 + },
   2.147 + {
   2.148 +  name "Invertebrate Mitochondrial" ,
   2.149 +  name "SGC4" ,
   2.150 +  id 5 ,
   2.151 +  ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG",
   2.152 +  sncbieaa "---M------**--------------------MMMM---------------M------------"
   2.153 +  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
   2.154 +  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
   2.155 +  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
   2.156 + },
   2.157 + {
   2.158 +  name "Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear" ,
   2.159 +  name "SGC5" ,
   2.160 +  id 6 ,
   2.161 +  ncbieaa  "FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
   2.162 +  sncbieaa "--------------*--------------------M----------------------------"
   2.163 +  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
   2.164 +  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
   2.165 +  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
   2.166 + },
   2.167 + {
   2.168 +  name "Echinoderm Mitochondrial; Flatworm Mitochondrial" ,
   2.169 +  name "SGC8" ,
   2.170 +  id 9 ,
   2.171 +  ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
   2.172 +  sncbieaa "----------**-----------------------M---------------M------------"
   2.173 +  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
   2.174 +  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
   2.175 +  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
   2.176 + },
   2.177 + {
   2.178 +  name "Euplotid Nuclear" ,
   2.179 +  name "SGC9" ,
   2.180 +  id 10 ,
   2.181 +  ncbieaa  "FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
   2.182 +  sncbieaa "----------**-----------------------M----------------------------"
   2.183 +  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
   2.184 +  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
   2.185 +  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
   2.186 + },
   2.187 + {
   2.188 +  name "Bacterial, Archaeal and Plant Plastid" ,
   2.189 +  id 11 ,
   2.190 +  ncbieaa  "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
   2.191 +  sncbieaa "---M------**--*----M------------MMMM---------------M------------"
   2.192 +  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
   2.193 +  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
   2.194 +  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
   2.195 + },
   2.196 + {
   2.197 +  name "Alternative Yeast Nuclear" ,
   2.198 +  id 12 ,
   2.199 +  ncbieaa  "FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
   2.200 +  sncbieaa "----------**--*----M---------------M----------------------------"
   2.201 +  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
   2.202 +  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
   2.203 +  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
   2.204 + },
   2.205 + {
   2.206 +  name "Ascidian Mitochondrial" ,
   2.207 +  id 13 ,
   2.208 +  ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG",
   2.209 +  sncbieaa "---M------**----------------------MM---------------M------------"
   2.210 +  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
   2.211 +  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
   2.212 +  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
   2.213 + },
   2.214 + {
   2.215 +  name "Alternative Flatworm Mitochondrial" ,
   2.216 +  id 14 ,
   2.217 +  ncbieaa  "FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
   2.218 +  sncbieaa "-----------*-----------------------M----------------------------"
   2.219 +  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
   2.220 +  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
   2.221 +  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
   2.222 + } ,
   2.223 + {
   2.224 +  name "Blepharisma Macronuclear" ,
   2.225 +  id 15 ,
   2.226 +  ncbieaa  "FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
   2.227 +  sncbieaa "----------*---*--------------------M----------------------------"
   2.228 +  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
   2.229 +  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
   2.230 +  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
   2.231 + } ,
   2.232 + {
   2.233 +  name "Chlorophycean Mitochondrial" ,
   2.234 +  id 16 ,
   2.235 +  ncbieaa  "FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
   2.236 +  sncbieaa "----------*---*--------------------M----------------------------"
   2.237 +  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
   2.238 +  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
   2.239 +  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
   2.240 + } ,
   2.241 + {
   2.242 +  name "Trematode Mitochondrial" ,
   2.243 +  id 21 ,
   2.244 +  ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
   2.245 +  sncbieaa "----------**-----------------------M---------------M------------"
   2.246 +  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
   2.247 +  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
   2.248 +  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
   2.249 + } ,
   2.250 + {
   2.251 +  name "Scenedesmus obliquus Mitochondrial" ,
   2.252 +  id 22 ,
   2.253 +  ncbieaa  "FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
   2.254 +  sncbieaa "------*---*---*--------------------M----------------------------"
   2.255 +  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
   2.256 +  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
   2.257 +  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
   2.258 + } ,
   2.259 + {
   2.260 +  name "Thraustochytrium Mitochondrial" ,
   2.261 +  id 23 ,
   2.262 +  ncbieaa  "FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
   2.263 +  sncbieaa "--*-------**--*-----------------M--M---------------M------------"
   2.264 +  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
   2.265 +  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
   2.266 +  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
   2.267 + } ,
   2.268 + {
   2.269 +  name "Pterobranchia Mitochondrial" ,
   2.270 +  id 24 ,
   2.271 +  ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG",
   2.272 +  sncbieaa "---M------**-------M---------------M---------------M------------"
   2.273 +  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
   2.274 +  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
   2.275 +  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
   2.276 + } ,
   2.277 + {
   2.278 +  name "Candidate Division SR1 and Gracilibacteria" ,
   2.279 +  id 25 ,
   2.280 +  ncbieaa  "FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
   2.281 +  sncbieaa "---M------**-----------------------M---------------M------------"
   2.282 +  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
   2.283 +  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
   2.284 +  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
   2.285 + } ,
   2.286 + {
   2.287 +  name "Pachysolen tannophilus Nuclear" ,
   2.288 +  id 26 ,
   2.289 +  ncbieaa  "FFLLSSSSYY**CC*WLLLAPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
   2.290 +  sncbieaa "----------**--*----M---------------M----------------------------"
   2.291 +  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
   2.292 +  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
   2.293 +  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
   2.294 + } ,
   2.295 + {
   2.296 +  name "Karyorelict Nuclear" ,
   2.297 +  id 27 ,
   2.298 +  ncbieaa  "FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
   2.299 +  sncbieaa "--------------*--------------------M----------------------------"
   2.300 +  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
   2.301 +  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
   2.302 +  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
   2.303 + } ,
   2.304 + {
   2.305 +  name "Condylostoma Nuclear" ,
   2.306 +  id 28 ,
   2.307 +  ncbieaa  "FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
   2.308 +  sncbieaa "----------**--*--------------------M----------------------------"
   2.309 +  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
   2.310 +  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
   2.311 +  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
   2.312 + } ,
   2.313 + {
   2.314 +  name "Mesodinium Nuclear" ,
   2.315 +  id 29 ,
   2.316 +  ncbieaa  "FFLLSSSSYYYYCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
   2.317 +  sncbieaa "--------------*--------------------M----------------------------"
   2.318 +  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
   2.319 +  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
   2.320 +  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
   2.321 + } ,
   2.322 + {
   2.323 +  name "Peritrich Nuclear" ,
   2.324 +  id 30 ,
   2.325 +  ncbieaa  "FFLLSSSSYYEECC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
   2.326 +  sncbieaa "--------------*--------------------M----------------------------"
   2.327 +  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
   2.328 +  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
   2.329 +  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
   2.330 + } ,
   2.331 + {
   2.332 +  name "Blastocrithidia Nuclear" ,
   2.333 +  id 31 ,
   2.334 +  ncbieaa  "FFLLSSSSYYEECCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
   2.335 +  sncbieaa "----------**-----------------------M----------------------------"
   2.336 +  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
   2.337 +  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
   2.338 +  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
   2.339 + } ,
   2.340 + {
   2.341 +  name "Balanophoraceae Plastid" ,
   2.342 +  id 32 ,
   2.343 +  ncbieaa  "FFLLSSSSYY*WCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
   2.344 +  sncbieaa "---M------*---*----M------------MMMM---------------M------------"
   2.345 +  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
   2.346 +  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
   2.347 +  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
   2.348 + } ,
   2.349 + {
   2.350 +  name "Cephalodiscidae Mitochondrial" ,
   2.351 +  id 33 ,
   2.352 +  ncbieaa  "FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG",
   2.353 +  sncbieaa "---M-------*-------M---------------M---------------M------------"
   2.354 +  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
   2.355 +  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
   2.356 +  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
   2.357 + }
   2.358 +}
     3.1 --- a/doc/lastal.txt	Mon Oct 07 10:02:38 2019 +0900
     3.2 +++ b/doc/lastal.txt	Mon Oct 07 10:57:27 2019 +0900
     3.3 @@ -400,12 +400,14 @@
     3.4  
     3.5        Use -w0 to turn this off.
     3.6  
     3.7 -  -G FILE
     3.8 -      Use an alternative genetic code in the specified file.  For an
     3.9 -      example of the format, see vertebrateMito.gc in the examples
    3.10 -      directory.  By default, the standard genetic code is used.  This
    3.11 +  -G GENETIC-CODE
    3.12 +      Specify the genetic code for translating DNA to protein.  This
    3.13        option has no effect unless DNA-versus-protein alignment is
    3.14 -      selected with option -F.
    3.15 +      selected with option -F.  Codes are specified by numbers
    3.16 +      (e.g. 1 = standard, 2 = vertebrate mitochondrial), listed here:
    3.17 +      https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi.  Any
    3.18 +      other GENETIC-CODE is assumed to be a file name: for an example
    3.19 +      of the format, see vertebrateMito.gc in the examples directory.
    3.20  
    3.21    -t TEMPERATURE
    3.22        Parameter for converting between scores and probability ratios.
     4.1 --- a/src/GeneticCode.cc	Mon Oct 07 10:02:38 2019 +0900
     4.2 +++ b/src/GeneticCode.cc	Mon Oct 07 10:57:27 2019 +0900
     4.3 @@ -1,38 +1,30 @@
     4.4  // Copyright 2009 Toshiyuki Sato
     4.5  
     4.6  #include "GeneticCode.hh"
     4.7 +#include "GeneticCodeData.hh"
     4.8  #include "Alphabet.hh"
     4.9 +#include "zio.hh"
    4.10 +
    4.11  #include <cctype>  // toupper, tolower, islower
    4.12  #include <fstream>
    4.13  #include <sstream>
    4.14  #include <stdexcept>
    4.15  //#include <iostream>  // for debugging
    4.16  
    4.17 +#define COUNTOF(a) (sizeof (a) / sizeof *(a))
    4.18 +
    4.19  namespace cbrc{
    4.20  
    4.21 -const char* GeneticCode::standard = "\
    4.22 -AAs  =   FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG\n\
    4.23 -Base1  = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG\n\
    4.24 -Base2  = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG\n\
    4.25 -Base3  = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG\n\
    4.26 -";
    4.27 -
    4.28 -//
    4.29 -void GeneticCode::fromFile( const std::string& tableFile )
    4.30 -{
    4.31 -  std::ifstream file(tableFile.c_str(),std::ios::in);
    4.32 -  if( !file ) throw std::runtime_error("can't open file: " + tableFile);
    4.33 -  file >> *this;
    4.34 -
    4.35 -  return;
    4.36 +std::string GeneticCode::stringFromName(const std::string &name) {
    4.37 +  for (size_t i = 0; i < COUNTOF(geneticCodes); ++i)
    4.38 +    if (name == geneticCodes[i].name)
    4.39 +      return geneticCodes[i].text;
    4.40 +  return slurp(name.c_str());
    4.41  }
    4.42  
    4.43 -//
    4.44  void GeneticCode::fromString( const std::string& s ){
    4.45    std::istringstream iss(s);
    4.46    iss >> *this;
    4.47 -
    4.48 -  return;
    4.49  }
    4.50  
    4.51  //
     5.1 --- a/src/GeneticCode.hh	Mon Oct 07 10:02:38 2019 +0900
     5.2 +++ b/src/GeneticCode.hh	Mon Oct 07 10:57:27 2019 +0900
     5.3 @@ -31,15 +31,16 @@
     5.4    friend std::istream& operator>>( std::istream& stream, GeneticCode& codon );
     5.5  
     5.6   public:
     5.7 -  void fromFile( const std::string& codeTable );
     5.8 +  // Converts a name to a text string defining a genetic code.
     5.9 +  // If the name isn't known, it assumes it's a file and tries to read it.
    5.10 +  static std::string stringFromName(const std::string &name);
    5.11 +
    5.12    void fromString( const std::string& s );
    5.13    void codeTableSet( const Alphabet& aaAlph, const Alphabet& dnaAlph );
    5.14    void translate( const uchar* beg, const uchar* end, uchar* dest ) const;
    5.15  
    5.16    uchar translation( const uchar* codon ) const
    5.17    { return genome2residue[ codon2number( codon ) ]; }
    5.18 -
    5.19 -  static const char* standard;  // the standard genetic code
    5.20  };
    5.21  
    5.22  // Convert an amino-acid (translated) coordinate to a DNA coordinate
     6.1 --- a/src/LastalArguments.cc	Mon Oct 07 10:02:38 2019 +0900
     6.2 +++ b/src/LastalArguments.cc	Mon Oct 07 10:57:27 2019 +0900
     6.3 @@ -112,7 +112,7 @@
     6.4    maxRepeatDistance(1000),  // sufficiently conservative?
     6.5    temperature(-1),  // depends on the score matrix
     6.6    gamma(1),
     6.7 -  geneticCodeFile(""),
     6.8 +  geneticCodeFile("1"),
     6.9    verbosity(0){}
    6.10  
    6.11  void LastalArguments::fromArgs( int argc, char** argv, bool optionsOnly ){
    6.12 @@ -181,7 +181,7 @@
    6.13      2=gapless+postmask, 3=always (2 if lastdb -c and Q<5, else 0)\n\
    6.14  -w: suppress repeats inside exact matches, offset by <= this distance ("
    6.15      + stringify(maxRepeatDistance) + ")\n\
    6.16 --G: genetic code file\n\
    6.17 +-G: genetic code (" + geneticCodeFile + ")\n\
    6.18  -t: 'temperature' for calculating probabilities (1/lambda)\n\
    6.19  -g: 'gamma' parameter for gamma-centroid and LAMA ("
    6.20      + stringify(gamma) + ")\n\
     7.1 --- a/src/lastal.cc	Mon Oct 07 10:02:38 2019 +0900
     7.2 +++ b/src/lastal.cc	Mon Oct 07 10:57:27 2019 +0900
     7.3 @@ -259,7 +259,7 @@
     7.4    const char *canonicalMatrixName = ScoreMatrix::canonicalName( matrixName );
     7.5    if (args.temperature > 0 && !matrixName.empty()) canonicalMatrixName = " ";
     7.6    bool isGapped = (args.outputType > 1);
     7.7 -  bool isStandardGeneticCode = args.geneticCodeFile.empty();
     7.8 +  bool isStandardGeneticCode = (args.geneticCodeFile == "1");
     7.9    LOG( "getting E-value parameters..." );
    7.10    try{
    7.11      const mcf::GapCosts::Piece &del = gapCosts.delPieces[0];
    7.12 @@ -1160,10 +1160,7 @@
    7.13      if( isDna )  // allow user-defined alphabet
    7.14        ERR( "expected protein database, but got DNA" );
    7.15      queryAlph.fromString( queryAlph.dna );
    7.16 -    if( args.geneticCodeFile.empty() )
    7.17 -      geneticCode.fromString( geneticCode.standard );
    7.18 -    else
    7.19 -      geneticCode.fromFile( args.geneticCodeFile );
    7.20 +    geneticCode.fromString(GeneticCode::stringFromName(args.geneticCodeFile));
    7.21      geneticCode.codeTableSet( alph, queryAlph );
    7.22      query.initForAppending(3);
    7.23    }
     8.1 --- a/src/makefile	Mon Oct 07 10:02:38 2019 +0900
     8.2 +++ b/src/makefile	Mon Oct 07 10:57:27 2019 +0900
     8.3 @@ -108,6 +108,9 @@
     8.4  CyclicSubsetSeedData.hh: ../data/*.seed
     8.5  	../build/seed-inc.sh ../data/*.seed > $@
     8.6  
     8.7 +GeneticCodeData.hh: ../data/gc.prt
     8.8 +	../build/gc-inc.sh ../data/gc.prt > $@
     8.9 +
    8.10  ScoreMatrixData.hh: ../data/*.mat
    8.11  	../build/mat-inc.sh ../data/*.mat > $@
    8.12  
    8.13 @@ -165,7 +168,8 @@
    8.14   GappedXdropAligner.hh ScoreMatrixRow.hh GappedXdropAlignerInl.hh
    8.15  GappedXdropAlignerPssm.o GappedXdropAlignerPssm.o8: GappedXdropAlignerPssm.cc GappedXdropAligner.hh \
    8.16   ScoreMatrixRow.hh GappedXdropAlignerInl.hh
    8.17 -GeneticCode.o GeneticCode.o8: GeneticCode.cc GeneticCode.hh Alphabet.hh
    8.18 +GeneticCode.o GeneticCode.o8: GeneticCode.cc GeneticCode.hh GeneticCodeData.hh \
    8.19 + Alphabet.hh zio.hh mcf_zstream.hh
    8.20  GreedyXdropAligner.o GreedyXdropAligner.o8: GreedyXdropAligner.cc GreedyXdropAligner.hh \
    8.21   ScoreMatrixRow.hh
    8.22  LambdaCalculator.o LambdaCalculator.o8: LambdaCalculator.cc LambdaCalculator.hh \
     9.1 --- a/test/last-test.out	Mon Oct 07 10:02:38 2019 +0900
     9.2 +++ b/test/last-test.out	Mon Oct 07 10:57:27 2019 +0900
     9.3 @@ -372,6 +372,90 @@
     9.4  # batch 1
     9.5  # Query sequences=2
     9.6  
     9.7 +TEST lastal -F12 -pBL62 -e40 -G2 -j1 /tmp/last-test galGal3-M-32.fa
     9.8 +#
     9.9 +# a=11 b=2 A=11 B=2 F=12 e=40 d=40 x=39 y=31 z=39 D=1e+06 E=1.06052e+09
    9.10 +# R=10 u=0 s=2 S=0 M=0 T=0 m=10 l=1 n=10 k=1 w=1000 t=3.08611 j=1 Q=0
    9.11 +# /tmp/last-test
    9.12 +# Reference sequences=1 normal letters=491
    9.13 +# lambda=0.354842 K=0.156005
    9.14 +#
    9.15 +#    A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V  B  J  Z  X  *
    9.16 +# A  4 -1 -2 -2  0 -1 -1  0 -2 -1 -1 -1 -1 -2 -1  1  0 -3 -2  0 -2 -1 -1 -1 -4
    9.17 +# R -1  5  0 -2 -3  1  0 -2  0 -3 -2  2 -1 -3 -2 -1 -1 -3 -2 -3 -1 -2  0 -1 -4
    9.18 +# N -2  0  6  1 -3  0  0  0  1 -3 -3  0 -2 -3 -2  1  0 -4 -2 -3  4 -3  0 -1 -4
    9.19 +# D -2 -2  1  6 -3  0  2 -1 -1 -3 -4 -1 -3 -3 -1  0 -1 -4 -3 -3  4 -3  1 -1 -4
    9.20 +# C  0 -3 -3 -3  9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -1 -3 -1 -4
    9.21 +# Q -1  1  0  0 -3  5  2 -2  0 -3 -2  1  0 -3 -1  0 -1 -2 -1 -2  0 -2  4 -1 -4
    9.22 +# E -1  0  0  2 -4  2  5 -2  0 -3 -3  1 -2 -3 -1  0 -1 -3 -2 -2  1 -3  4 -1 -4
    9.23 +# G  0 -2  0 -1 -3 -2 -2  6 -2 -4 -4 -2 -3 -3 -2  0 -2 -2 -3 -3 -1 -4 -2 -1 -4
    9.24 +# H -2  0  1 -1 -3  0  0 -2  8 -3 -3 -1 -2 -1 -2 -1 -2 -2  2 -3  0 -3  0 -1 -4
    9.25 +# I -1 -3 -3 -3 -1 -3 -3 -4 -3  4  2 -3  1  0 -3 -2 -1 -3 -1  3 -3  3 -3 -1 -4
    9.26 +# L -1 -2 -3 -4 -1 -2 -3 -4 -3  2  4 -2  2  0 -3 -2 -1 -2 -1  1 -4  3 -3 -1 -4
    9.27 +# K -1  2  0 -1 -3  1  1 -2 -1 -3 -2  5 -1 -3 -1  0 -1 -3 -2 -2  0 -3  1 -1 -4
    9.28 +# M -1 -1 -2 -3 -1  0 -2 -3 -2  1  2 -1  5  0 -2 -1 -1 -1 -1  1 -3  2 -1 -1 -4
    9.29 +# F -2 -3 -3 -3 -2 -3 -3 -3 -1  0  0 -3  0  6 -4 -2 -2  1  3 -1 -3  0 -3 -1 -4
    9.30 +# P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4  7 -1 -1 -4 -3 -2 -2 -3 -1 -1 -4
    9.31 +# S  1 -1  1  0 -1  0  0  0 -1 -2 -2  0 -1 -2 -1  4  1 -3 -2 -2  0 -2  0 -1 -4
    9.32 +# T  0 -1  0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1  1  5 -2 -2  0 -1 -1 -1 -1 -4
    9.33 +# W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1  1 -4 -3 -2 11  2 -3 -4 -2 -2 -1 -4
    9.34 +# Y -2 -2 -2 -3 -2 -1 -2 -3  2 -1 -1 -2 -1  3 -3 -2 -2  2  7 -1 -3 -1 -2 -1 -4
    9.35 +# V  0 -3 -3 -3 -1 -2 -2 -3 -3  3  1 -2  1 -1 -2 -2  0 -3 -1  4 -3  2 -2 -1 -4
    9.36 +# B -2 -1  4  4 -3  0  1 -1  0 -3 -4  0 -3 -3 -2  0 -1 -4 -3 -3  4 -3  0 -1 -4
    9.37 +# J -1 -2 -3 -3 -1 -2 -3 -4 -3  3  3 -3  2  0 -3 -2 -1 -2 -1  2 -3  3 -3 -1 -4
    9.38 +# Z -1  0  0  1 -3  4  4 -2  0 -3 -3  1 -1 -3 -1  0 -1 -2 -2 -2  0 -3  4 -1 -4
    9.39 +# X -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -4
    9.40 +# * -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4  1
    9.41 +#
    9.42 +# Coordinates are 0-based.  For - strand matches, coordinates
    9.43 +# in the reverse complement of the 2nd sequence are used.
    9.44 +#
    9.45 +# name start alnSize strand seqSize alignment
    9.46 +#
    9.47 +# batch 0
    9.48 +a score=48 EG2=6.3e+09 E=0.097
    9.49 +s Q2LCP8   394 27 +   491 FISKILILLPYMVLGRVYLTMLAFFLS
    9.50 +s chrM   11767 81 + 16775 FISTLIIIQPFIILAFSATELMLFYIS
    9.51 +
    9.52 +a score=43 EG2=3.7e+10 E=0.57
    9.53 +s Q2LCP8   239 24 +   491 LMTFYLTITQKLVTLMVLINLYQN
    9.54 +s chrM   11827 72 + 16775 LMLFYISFEATLIPTLILITRWGN
    9.55 +
    9.56 +a score=42 EG2=5.3e+10 E=0.82
    9.57 +s Q2LCP8   292 22 +   491 LRQQKLIRFIAYSAIVNSALLI
    9.58 +s chrM   12313 66 + 16775 LRQTDLKSLIAYSSVSHMGLVI
    9.59 +
    9.60 +a score=41 EG2=7.5e+10 E=1.2
    9.61 +s Q2LCP8   375 28 +   491 AIVYILVLMYLAGLPPMTNFISKILILL
    9.62 +s chrM   12550 84 + 16775 SVWWLLANLTNMALPPTTNLMAELTIMV
    9.63 +
    9.64 +a score=83 EG2=2.5e+04 E=3.7e-07
    9.65 +s Q2LCP8  110  83 +   491 LILIYSSIIGMLISMEAHNLITLFLSLEISSICFYILALNKNSRKGIEGGLKYYIIGGIATTILLLGIVSIYKSTGSLMYTDL
    9.66 +s chrM   5258 249 + 16775 LICTVSLIMGTSITISSNHWILAWTGLEINTLAIIPLISKSHHPRAIEATIKYFLTQSTASALILFSSMTNAWSTGQWDITQL
    9.67 +
    9.68 +a score=89 EG2=3e+03 E=4.4e-08
    9.69 +s Q2LCP8  211  53 +   491 LIVLGLILKLGIAPFHGWLIDVYEGAGMLMTFYLTITQKLVTLMVLINLYQNL
    9.70 +s chrM   5528 159 + 16775 MLTMAIAIKLGLVPFHFWFPEVLQGSSLITALLLSTLMKLPPITLLLLTSQSL
    9.71 +
    9.72 +a score=72 EG2=1.3e+06 E=1.9e-05
    9.73 +s Q2LCP8  379  52 +   491 ILVLMYLAGLPPMTNFISKILILLPYMVLGRVYLTMLAFFLSVGVMIYYMNL
    9.74 +s chrM   5972 156 + 16775 MLTLLSLAGLPPLTGFMPKWLIIQELTKQEMTPMATIITMLSLLSLFFYLRL
    9.75 +
    9.76 +a score=44 EG2=2.6e+10 E=0.4
    9.77 +s Q2LCP8  223  67 +   491 APFHGWLIDVYEGAGMLMTFYLTITQKLVTLMVLINLYQNLIIYTNAIMFTNGLIILILVTLVVGTI
    9.78 +s chrM   8369 201 + 16775 SPIMEELVEFHDHALMVALAICSLVLYLLTLMLMEKLSSNTVDAQEVELIWTILPAIVLVLLALPSL
    9.79 +
    9.80 +a score=40 EG2=1.1e+11 E=1.7
    9.81 +s Q2LCP8  138 23 +   491 ISSICFYILALNKNSRKGIEGGL
    9.82 +s chrM   7839 69 - 16775 VSSADFWALSVKHSWSSGDEGGL
    9.83 +
    9.84 +a score=41 EG2=7.5e+10 E=1.2
    9.85 +s Q2LCP8   0  34 +   491 MVCLFENLMNMIKYSIYILPLIILIVLSISIKKD
    9.86 +s chrM   196 102 - 16775 LVSLGVSFVSLALFLVYLGGMLVVFVYSVSLAAD
    9.87 +
    9.88 +# batch 1
    9.89 +# Query sequences=2
    9.90 +
    9.91  TEST lastal -s0 -f0 -e18 /tmp/last-test galGal3-M-32.fa
    9.92  #
    9.93  # a=7 b=1 A=7 B=1 e=18 d=13 x=17 y=9 z=17 D=1e+06 E=3.38586e+07
    10.1 --- a/test/last-test.sh	Mon Oct 07 10:02:38 2019 +0900
    10.2 +++ b/test/last-test.sh	Mon Oct 07 10:57:27 2019 +0900
    10.3 @@ -40,6 +40,7 @@
    10.4      # gapless translated alignment & genetic code file
    10.5      lastdb -p $db $protSeq
    10.6      try lastal -F12 -pBL62 -e40 -G $gc -j1 $db $dnaSeq
    10.7 +    try lastal -F12 -pBL62 -e40 -G2 -j1 $db $dnaSeq
    10.8  
    10.9      # subset seed file, soft-masking
   10.10      lastdb -c -u ../data/YASS.seed $db $dnaSeq