ppALIGN API documentation
src/alignoverlap.hpp
00001 /****************************************************************************** 00002 Copyright 2009 Stefan Wolfsheimer & Gregory Nuel. 00003 00004 This file is part of ppALIGN 00005 00006 ppALIGN is free software; you can redistribute it and/or modify 00007 it under the terms of the GNU General Public License as published by 00008 the Free Software Foundation; either version 2 of the License, or 00009 (at your option) any later version. 00010 00011 ppALIGN is distributed in the hope that it will be useful, 00012 but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 GNU General Public License for more details. 00015 00016 You should have received a copy of the GNU General Public License 00017 along with ppALIGN; if not, write to the Free Software 00018 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 00019 *******************************************************************************/ 00020 00021 #ifndef _ALIGN_OVERLAP_HPP_ 00022 #define _ALIGN_OVERLAP_HPP_ 00023 #include "sequence.hpp" 00024 #include "align.hpp" 00025 #include <string> 00026 00027 00028 // 2010-02-17: sw: removed dep. from io part of the library. 00029 00034 class AlignHandler 00035 { 00036 public: 00037 virtual ~AlignHandler() {} 00038 00043 virtual void Add(const std::string & generator, 00044 const Align & a) = 0; 00045 }; 00046 00047 00052 class AlignStartOverlap : public AlignHandler 00053 { 00054 public: 00055 void Add(const std::string & generator, 00056 const Align & a); 00057 void StreamOut(std::ostream & ost, Pair offset); 00058 00059 std::map<std::string,std::map<Pair,int> > start_points; 00060 std::map<std::string,std::map<Pair,int> > end_points; 00061 private: 00062 void StreamOut(std::ostream & ost, 00063 std::map<std::string,std::map<Pair,int> >::const_iterator, 00064 std::map<std::string,std::map<Pair,int> >::const_iterator, 00065 const Pair & offset) const; 00066 00067 }; 00068 00069 00082 class AlignOverlap : public AlignHandler 00083 { 00084 private: 00085 00086 bool overlap_segments_built; 00089 struct AlignCol 00090 { 00091 AlignCol(); 00092 AlignCol(int i, int j, AlignState s, long col, double prob) ; 00094 Pair pair; 00095 00097 AlignState state; 00098 00101 bool overlap; 00102 00103 00104 long segment; 00105 long col; 00106 double prob; 00107 }; 00108 00109 00110 00111 00112 public: 00118 AlignOverlap(const Align & ref, 00119 Sequence & _seq1, 00120 Sequence & _seq2); 00121 00127 void Add(const std::string & generator, 00128 const Align & a); 00134 void StreamOutPartition(std::ostream & ost,Pair seq_offset=Pair(0,0)); 00135 00136 class AlignSegment; 00137 00141 class AlternativeSegment 00142 { 00143 public: 00144 friend class AlignOverlap; 00145 friend class AlignSegment; 00148 Align::const_iterator begin; 00149 Align::const_iterator end; 00150 Pair begin_pos; 00151 00152 void StreamOutQuery(std::ostream & ost) const; 00153 void StreamOutSubject(std::ostream & ost) const; 00154 00155 00156 private: 00163 AlternativeSegment(Align::const_iterator b, 00164 Align::const_iterator e, 00165 const Pair & bp, 00166 AlignSegment & seg) ; 00167 00168 void StreamOut(std::ostream & ost, 00169 const std::string & generator, 00170 int n) const; 00171 00172 00176 inline friend bool operator < (const AlternativeSegment & a, 00177 const AlternativeSegment & b); 00178 inline friend bool operator == (const AlternativeSegment & a, 00179 const AlternativeSegment & b); 00181 AlignSegment * parent; 00182 00183 }; 00184 00185 00188 typedef std::map<AlternativeSegment,int>::iterator alternative_iterator; 00189 00192 typedef std::map<AlternativeSegment,int>::const_iterator alternative_const_iterator; 00193 00194 00195 00198 class AlignSegment 00199 { 00200 public: 00201 friend class AlignOverlap; 00202 friend class AlternativeSegment; 00204 Pair start_pos; 00205 00210 bool overlap; 00213 long n_matches,n_insertions,n_deletions; 00214 00223 void StreamOutQuery(std::ostream & ost); 00224 00230 void StreamOutSubject(std::ostream & ost); 00231 00235 void GetAlign(Align & a) const; 00236 00239 inline alternative_iterator BeginAlternatives(const std::string & generator); 00240 00243 inline alternative_iterator EndAlternatives(const std::string & generator); 00245 00246 00247 private: 00248 AlignSegment(const std::vector<AlignOverlap::AlignCol>::iterator & _begin, 00249 const std::vector<AlignOverlap::AlignCol>::iterator & _end, 00250 Pair _start_pos, 00251 bool _ol, 00252 long n_matches, 00253 long n_insertions, 00254 long n_deletions, 00255 AlignOverlap & overlap); 00256 00257 AlignOverlap * parent; 00258 00259 std::vector<AlignCol>::iterator begin; 00260 std::vector<AlignCol>::iterator end; 00261 00262 00263 00270 std::map<const std::string *, 00271 std::map<AlternativeSegment,int> > 00272 alternative_segments; 00273 00274 inline void InsertAlternativeSegment(const std::string * generator, 00275 Align::const_iterator begin, 00276 Align::const_iterator end, 00277 const Pair & begin_pos); 00278 }; 00279 00282 typedef std::vector<AlignSegment>::iterator segment_iterator; 00283 00286 typedef std::vector<AlignSegment>::const_iterator segment_const_iterator; 00287 00293 inline segment_iterator BeginSegments(); 00294 00297 inline segment_iterator EndSegments(); 00299 00300 void BuildOverlapSegments() ; 00301 00302 private: 00303 00304 void BuildLookup(); 00305 00306 00312 void HandleAlignment(const Align & a, 00313 const std::string * generator); 00314 00315 00316 00319 const Align & ref_align; 00320 std::vector<AlignSegment> align_segments; 00321 00322 long length1,length2; 00323 const Sequence & seq1; 00324 const Sequence & seq2; 00325 00328 Pair pos_begin,pos_end; 00329 00330 /* a map from sequence position (with respect to pos_begin) 00331 to positions in alignments 00332 pair of positions in the input sequence 00333 if state is a match it points to the last match 00334 seen so far. 00335 example a_4 - - 00336 b_3 00337 -> state=StateDelete 00338 pair= Pair(4,3) 00339 */ 00340 std::vector<std::vector<AlignCol>::iterator> lookup_first; 00341 std::vector<std::vector<AlignCol>::iterator> lookup_second; 00342 std::vector<AlignCol> lookup_align; 00343 00344 00348 std::map<std::string,std::vector<Align > > 00349 alternative_alignments; 00350 }; 00351 00352 00353 00354 // inline functions 00355 AlignOverlap::segment_iterator AlignOverlap::BeginSegments() 00356 { 00357 return align_segments.begin(); 00358 } 00359 00360 AlignOverlap::segment_iterator AlignOverlap::EndSegments() 00361 { 00362 return align_segments.end(); 00363 } 00364 00365 00366 AlignOverlap::alternative_iterator 00367 AlignOverlap::AlignSegment::BeginAlternatives(const std::string & generator) 00368 { 00369 std::map<std::string,std::vector<Align> >::const_iterator 00370 name_itr = parent->alternative_alignments.find(generator); 00371 if(name_itr == parent->alternative_alignments.end()) 00372 { 00373 std::cerr << "AlignOverlap::AlignSegment::BeginAlternatives: " 00374 << " generator " << generator << " not found " << std::endl; 00375 exit(8); 00376 } 00377 return alternative_segments[&name_itr->first].begin(); 00378 } 00379 00380 00381 AlignOverlap::alternative_iterator 00382 AlignOverlap::AlignSegment::EndAlternatives(const std::string & generator) 00383 { 00384 std::map<std::string,std::vector<Align> >::const_iterator 00385 name_itr = parent->alternative_alignments.find(generator); 00386 if(name_itr == parent->alternative_alignments.end()) 00387 { 00388 std::cerr << "AlignOverlap::AlignSegment::BeginAlternatives: " 00389 << " generator " << generator << " not found " << std::endl; 00390 exit(8); 00391 } 00392 return alternative_segments[&name_itr->first].end(); 00393 } 00394 00395 00396 #endif