ppALIGN API documentation
src/sequence.hpp
00001 /****************************************************************************** 00002 Copyright 2009 Stefan Wolfsheimer & Gregory Nuel. 00003 00004 This file is part of ppALIGN 00005 00006 ppALIGN is free software; you can redistribute it and/or modify 00007 it under the terms of the GNU General Public License as published by 00008 the Free Software Foundation; either version 2 of the License, or 00009 (at your option) any later version. 00010 00011 ppALIGN is distributed in the hope that it will be useful, 00012 but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 GNU General Public License for more details. 00015 00016 You should have received a copy of the GNU General Public License 00017 along with ppALIGN; if not, write to the Free Software 00018 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 00019 *******************************************************************************/ 00020 00021 #ifndef _SEQUENCE_HPP_ 00022 #define _SEQUENCE_HPP_ 00023 00024 #include "alphabet.hpp" 00025 00027 // 00028 // class Sequence 00029 // 00031 00038 class Sequence : public std::vector<int> 00039 { 00040 private: 00041 const Alphabet * alphabet; 00042 00043 public: 00044 00045 typedef std::vector<int> parent_t; 00046 00049 class ParseError : public ExceptionBase 00050 { 00051 public: 00052 ParseError(char ch, size_t line, size_t pos,const Alphabet & a); 00053 const char ch; 00054 const size_t pos; 00055 const size_t line; 00056 const Alphabet & alphabet; 00057 }; 00058 00059 00060 00061 00063 00066 00068 Sequence(const Alphabet & a) : 00069 alphabet(&a) {} 00070 00071 Sequence(const Sequence & seq) : 00072 parent_t(seq),alphabet(seq.alphabet) { } 00073 00074 00075 Sequence(const Alphabet & a, 00076 const std::string & str) 00077 : alphabet(&a) 00078 { *this+= str; } 00079 00080 Sequence(const Alphabet & a, 00081 const_iterator begin, 00082 const_iterator end) : alphabet(&a) 00083 { resize(end-begin); 00084 for(const_iterator itr = begin; itr != end; ++itr) 00085 this->operator[](itr-begin) = *itr; 00086 } 00087 00088 00089 Sequence(const Alphabet & a, 00090 const char * str) 00091 : alphabet(&a) 00092 { *this+= str; } 00093 00094 Sequence(const Alphabet & a, 00095 char c) 00096 : alphabet(&a) 00097 { *this+= c; } 00098 00100 00101 inline const Alphabet & GetAlphabet() const 00102 { return *alphabet; } 00103 00104 00106 00109 Sequence & operator=(const char * str); 00110 inline Sequence & operator=(const std::string & str) 00111 { clear(); *this = str.c_str(); return *this; } 00112 00113 inline Sequence & operator=(const Sequence & seq) 00114 { Alphabet::Match(*alphabet,*seq.alphabet); 00115 clear(); 00116 insert(begin(),seq.begin(),seq.end()); 00117 return *this; } 00118 00119 00120 Sequence & operator+= (const char * str); 00121 inline Sequence & operator+= (const std::string & str) 00122 { *this+= str.c_str(); 00123 return *this; } 00124 00125 inline Sequence & operator+= (const Sequence & seq) 00126 { 00127 Alphabet::Match(*alphabet,*seq.alphabet); 00128 insert(end(),seq.begin(),seq.end()); 00129 return *this; } 00130 00131 00132 inline Sequence & operator+= (const int i) 00133 { push_back(i); return *this; } 00134 00135 inline Sequence & operator+= (const unsigned int i) 00136 { push_back(i); return *this; } 00137 00139 00142 inline Sequence & operator+= (const char ch) 00143 { push_back(alphabet->ValidEncode(ch)); 00144 return *this; } 00145 00150 void Append(const char * str, int len); 00151 00155 00156 friend inline Sequence operator + (const Sequence & a, const Sequence & b) 00157 { Sequence tmp(a); tmp+= b; return tmp; } 00158 00159 friend inline Sequence operator + (char a, const Sequence & b) 00160 { Sequence tmp(*b.alphabet,a); tmp+= b; return tmp; } 00161 00162 friend inline Sequence operator + (const std::string & a,const Sequence & b) 00163 { Sequence tmp(*b.alphabet,a); tmp+=b; return tmp; } 00164 00165 friend inline Sequence operator + (int a, const Sequence & b) 00166 { Sequence tmp(*b.alphabet,a); 00167 return tmp; } 00168 00169 friend inline Sequence operator + (unsigned int a, const Sequence & b) 00170 { Sequence tmp(*b.alphabet,a); tmp+=b; 00171 return tmp; } 00172 00173 friend inline Sequence operator + (const Sequence & a, char b) 00174 { Sequence tmp(a); 00175 tmp+=b; 00176 return tmp; } 00177 00178 friend inline Sequence operator + (const Sequence & a, const std::string & b) 00179 { Sequence tmp(a); 00180 tmp+=b; 00181 return tmp; } 00182 00183 friend inline Sequence operator + (const Sequence & a,int b) 00184 { Sequence tmp(a); 00185 tmp+= b; 00186 return tmp; } 00187 00188 friend inline Sequence operator + (const Sequence & a, unsigned int b) 00189 { Sequence tmp(a); 00190 tmp+= b; 00191 return tmp+=b; } 00193 00194 friend inline bool operator == (const Sequence & a,const Sequence & b) 00195 { return parent_t(a)==parent_t(b); } 00196 00197 friend inline bool operator == (char a, const Sequence & b) 00198 { return Sequence(*b.alphabet,a) == b; } 00199 00200 friend inline bool operator == (const std::string & a,const Sequence & b) 00201 { return Sequence(*b.alphabet,a) == b; } 00202 00203 friend inline bool operator == (int a, const Sequence & b) 00204 { return Sequence(*b.alphabet,a) == b; } 00205 00206 friend inline bool operator == (unsigned int a, const Sequence & b) 00207 { return Sequence(*b.alphabet,a) == b; } 00208 00209 friend inline bool operator == (const Sequence & a, char b) 00210 { return a == Sequence(*a.alphabet,b); } 00211 00212 friend inline bool operator == (const Sequence & a, const std::string & b) 00213 { return a == Sequence(*a.alphabet,b); } 00214 00215 friend inline bool operator == (const Sequence & a, int b) 00216 { return a == Sequence(*a.alphabet,b); } 00217 00218 friend inline bool operator == (const Sequence & a, unsigned int b) 00219 { return a == Sequence(*a.alphabet,b); } 00220 00221 friend inline bool operator != (const Sequence & a, const Sequence & b) 00222 { return parent_t(a)!=parent_t(b); } 00223 00224 friend inline bool operator != (char a, const Sequence & b) 00225 { return Sequence(*b.alphabet,a) != b; } 00226 00227 friend inline bool operator != (const std::string & a,const Sequence & b) 00228 { return Sequence(*b.alphabet,a) != b; } 00229 00230 friend inline bool operator != (int a, const Sequence & b) 00231 { return Sequence(*b.alphabet,a) != b; } 00232 00233 friend inline bool operator != (unsigned int a, const Sequence & b) 00234 { return Sequence(*b.alphabet,a) != b; } 00235 00236 friend inline bool operator != (const Sequence & a, char b) 00237 { return a != Sequence(*a.alphabet,b); } 00238 00239 friend inline bool operator != (const Sequence & a, const std::string & b) 00240 { return a != Sequence(*a.alphabet,b); } 00241 00242 friend inline bool operator != (const Sequence & a, int b) 00243 { return a != Sequence(*a.alphabet,b); } 00244 00245 friend inline bool operator != (const Sequence & a, unsigned int b) 00246 { return a != Sequence(*a.alphabet,b); } 00247 00248 00253 inline Sequence & operator/= (int mask) 00254 { 00255 Sequence v(*this); 00256 clear(); 00257 for(iterator itr = v.begin(); itr != v.end(); ++itr) 00258 { 00259 00260 if(!alphabet->Probe(*itr,mask)) 00261 { 00262 push_back(*itr); 00263 } 00264 } 00265 return *this; 00266 } 00267 00273 Sequence operator/ (int mask) const 00274 { 00275 Sequence s(*alphabet); 00276 for(const_iterator itr = begin(); 00277 itr != end(); 00278 ++itr) 00279 { 00280 if(!alphabet->Probe(*itr,mask)) 00281 { 00282 s.push_back(*itr); 00283 } 00284 } 00285 return s; 00286 } 00287 00294 bool CString(char * str, int max_length) const; 00295 00296 00297 00298 00299 friend std::ostream & operator<<(std::ostream & ost, const Sequence & s) ; 00300 00301 friend std::istream & operator>> (std::istream & ist, Sequence & s); 00302 00303 private: 00304 void Append(iterator & myitr,const char * tmp,size_t olds); 00305 00306 }; 00307 00308 00311 class DnaSequence : public Sequence 00312 { 00313 public: 00314 DnaSequence() : Sequence(dnaAlphabet) {} 00315 DnaSequence(const Sequence & seq) : Sequence(seq) {} 00316 template<typename ITER> 00317 DnaSequence(const ITER & begin, 00318 const ITER & end) : Sequence(dnaAlphabet,begin,end) {} 00319 00320 template<typename ITER> 00321 DnaSequence(const ITER & begin, 00322 const ITER & end, 00323 bool validate) : Sequence(dnaAlphabet,begin,end,validate) {} 00324 00325 DnaSequence(const std::string & str) : Sequence(dnaAlphabet,str) {} 00326 DnaSequence(const char * c) : Sequence(dnaAlphabet,c) {} 00327 00329 DnaSequence & operator=(const std::string & str) { Sequence::operator=(str); return *this; } 00330 DnaSequence & operator=(const char * str) { Sequence::operator=(str); return *this; } 00331 DnaSequence & operator=(const Sequence & seq) { Sequence::operator=(seq); return *this; } 00332 00333 }; 00334 00335 00338 class ProteinSequence : public Sequence 00339 { 00340 public: 00341 ProteinSequence() : Sequence(proteinAlphabet) {} 00342 ProteinSequence(const Sequence & seq) : Sequence(seq) {} 00343 template<typename ITER> 00344 ProteinSequence(const ITER & begin, 00345 const ITER & end) : Sequence(proteinAlphabet,begin,end) {} 00346 00347 ProteinSequence(const std::string & str) : Sequence(proteinAlphabet,str) {} 00348 ProteinSequence(const char * str) : Sequence(proteinAlphabet,str) {} 00349 ProteinSequence(const char & c) : Sequence(proteinAlphabet,c) {} 00350 00351 ProteinSequence & operator=(const std::string & str) { Sequence::operator=(str); return *this; } 00352 ProteinSequence & operator=(const char * str) { Sequence::operator=(str); return *this; } 00353 ProteinSequence & operator=(const Sequence & seq) { Sequence::operator=(seq); return *this; } 00354 00355 00356 }; 00357 00358 00359 #endif