ppALIGN API documentation
src/alphabet.hpp
00001 /****************************************************************************** 00002 Copyright 2009 Stefan Wolfsheimer & Gregory Nuel. 00003 00004 This file is part of ppALIGN 00005 00006 ppALIGN is free software; you can redistribute it and/or modify 00007 it under the terms of the GNU General Public License as published by 00008 the Free Software Foundation; either version 2 of the License, or 00009 (at your option) any later version. 00010 00011 ppALIGN is distributed in the hope that it will be useful, 00012 but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 GNU General Public License for more details. 00015 00016 You should have received a copy of the GNU General Public License 00017 along with ppALIGN; if not, write to the Free Software 00018 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 00019 *******************************************************************************/ 00020 00021 #ifndef _ALPHABET_HPP_ 00022 #define _ALPHABET_HPP_ 00023 00024 #include "exceptions.hpp" 00025 #include <stdlib.h> 00026 #include <vector> 00027 #include <map> 00028 #include <set> 00029 00031 // 00032 // SubAlphabetMask 00033 // 00035 00038 enum SubAlphabetMask { BaseAlphabet = 1, /* < all letters from the basic alphabet (AGCT for DNA) */ 00039 IuPacExtension = 2, /* < IuPac extension of the alphabet */ 00040 GapSymbol = 4 /* < the gap symbol */ 00041 }; 00042 00043 00045 // 00046 // Alphabet 00047 // 00049 00051 class Alphabet 00052 { 00053 private: 00054 00055 inline void ValidateInt(const int & ) const; 00056 00057 int gap_code; 00058 char gap_char; 00059 00060 protected: 00064 Alphabet(size_t base_size, const char * str); 00065 00069 size_t n_basic_letters; 00070 00074 std::map<char,int> letterToInt; 00075 00081 std::vector<char> intToLetter; 00082 00086 std::vector<std::set<int> > intToSet; 00087 00088 public: 00089 friend class Encoder; 00090 00091 virtual ~Alphabet() {} 00092 00096 class InvalidCharacter : public ExceptionBase 00097 { 00098 public: 00099 InvalidCharacter(const char c, const Alphabet & a); 00100 const char ch; 00101 const Alphabet & alphabet; 00102 }; 00103 00107 class InvalidCharacterCode : public ExceptionBase 00108 { 00109 public: 00110 InvalidCharacterCode(int code, const Alphabet & a); 00111 const int code; 00112 const Alphabet & alphabet; 00113 }; 00114 00117 class MatchError : public ExceptionBase 00118 { 00119 public: 00120 MatchError(const Alphabet & a1,const Alphabet &a2); 00121 const Alphabet & alphabet1; 00122 const Alphabet & alphabet2; 00123 }; 00124 00125 00129 inline size_t Size() const { return intToLetter.size(); } 00130 00131 00135 inline size_t BaseSize() const { return n_basic_letters; } 00136 00140 inline const int & Gap() const { return gap_code; } 00141 inline const char & GapChar() const { return gap_char; } 00142 00146 inline const int & Encode(const char & ch) const { return letterToInt.find(ch)->second; } 00147 00148 inline const int & ValidEncode(const char & ch) const 00149 { 00150 std::map<char,int>::const_iterator itr = letterToInt.find(ch); 00151 if(itr != letterToInt.end()) return itr->second; 00152 else throw Alphabet::InvalidCharacter(ch,*this); 00153 } 00154 00155 00158 inline const char & Decode(const int & i) const { ValidateInt(i); return intToLetter[i]; } 00159 inline const char & Decode(const unsigned int & i) const { ValidateInt(i); return intToLetter[i]; } 00160 inline const char & Decode(const long & i) const { ValidateInt(i); return intToLetter[i]; } 00161 inline const char & Decode(const unsigned long & i) const { ValidateInt(i); return intToLetter[i]; } 00162 00163 00164 00165 00174 const std::set<int> & IupacSet(int i) const { ValidateInt(i); return intToSet[i]; } 00175 00176 00177 virtual const char * Name() const = 0; 00178 00185 bool Probe(int letter,int mask) const; 00186 00187 00191 static Alphabet & GetAlphabet(const std::string & str) ; 00192 00193 static void Match(const Alphabet & a, const Alphabet & b) 00194 { if(&a != &b) throw Alphabet::MatchError(a,b); } 00195 }; 00196 00197 void Alphabet::ValidateInt(const int & c) const 00198 { 00199 if(c < 0 || c >= (int)Size()) 00200 throw Alphabet::InvalidCharacterCode(c,*this); 00201 } 00202 00203 00205 // 00206 // DnaAlphabet 00207 // 00209 00231 class DnaAlphabet : public Alphabet 00232 { 00233 public: 00234 DnaAlphabet(); 00235 virtual const char * Name() const; 00236 }; 00237 00240 extern DnaAlphabet dnaAlphabet; 00241 00242 00244 // 00245 // ProteinAlphabet 00246 // 00248 00274 class ProteinAlphabet : public Alphabet 00275 { 00276 public: 00277 ProteinAlphabet(); 00278 virtual const char * Name() const; 00279 }; 00280 00282 extern ProteinAlphabet proteinAlphabet; 00283 00284 #endif 00285 00286