ppALIGN API documentation
src/partitionworkspace.hpp
00001 /****************************************************************************** 00002 Copyright 2009 Stefan Wolfsheimer & Gregory Nuel. 00003 00004 This file is part of ppALIGN 00005 00006 ppALIGN is free software; you can redistribute it and/or modify 00007 it under the terms of the GNU General Public License as published by 00008 the Free Software Foundation; either version 2 of the License, or 00009 (at your option) any later version. 00010 00011 ppALIGN is distributed in the hope that it will be useful, 00012 but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 GNU General Public License for more details. 00015 00016 You should have received a copy of the GNU General Public License 00017 along with ppALIGN; if not, write to the Free Software 00018 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 00019 *******************************************************************************/ 00020 00073 #ifndef _PARTITION_WORKSPACE_HPP_ 00074 #define _PARTITION_WORKSPACE_HPP_ 00075 00076 #include <list> 00077 00078 #include "sequence.hpp" 00079 #include "align.hpp" 00080 #include "weights.hpp" 00081 #include "checkpoint.hpp" 00082 00083 00084 class PartitionDriverBase; 00085 00095 class PartitionWorkspace : public Workspace<DynCell<xdouble> >, 00096 private CheckpointDriver 00097 { 00098 public: 00099 00104 struct RowInfo 00105 { 00110 long begin_j; 00111 00115 long end_j; 00116 }; 00117 00118 00130 struct ForwardBackwardInfo 00131 { 00132 friend class PartitionWorkspace; 00133 00140 inline double PairProb(long i, long j); 00141 00142 00145 long i; 00148 PartitionWorkspace::RowInfo last_row; 00149 00152 PartitionWorkspace::RowInfo curr_row; 00153 00156 PartitionWorkspace::RowInfo next_row; 00157 00160 PartitionWorkspace::const_iterator last_for; 00161 00164 PartitionWorkspace::const_iterator curr_for; 00165 00168 PartitionWorkspace::const_iterator curr_back; 00169 00170 private: 00171 std::vector<double> pair_prob; 00172 xdouble invZ; 00173 }; 00174 00179 struct Transition 00180 { 00181 00184 double MM,MI,MD; 00186 00189 double IM,II,ID; 00191 00194 double DM,DI,DD; 00196 } ; 00197 00200 typedef Workspace<Transition>::iterator trans_iterator; 00201 00202 00203 00208 PartitionWorkspace(size_t s); 00209 00212 ~PartitionWorkspace(); 00213 00214 00222 void AddDriver(PartitionDriverBase & driver); 00223 00226 void RemoveDriver(PartitionDriverBase & driver); 00227 00232 void RemoveAllDrivers(); 00233 00234 00241 xdouble PartitionFunction(const Sequence & seq1, 00242 const Sequence & seq2, 00243 const Weight & weight); 00244 00260 xdouble PartitionFunction(const Sequence & seq1, 00261 const Sequence & seq2, 00262 const Align & a, 00263 const Weight & weight, 00264 long strip_size = 10, 00265 long max_strip_size = 50, 00266 long inc_strip_size = 10); 00267 00268 00275 Pair LocalStrip(Sequence & seq1, 00276 Sequence & seq2, 00277 Align & align, 00278 const Weight & weight, 00279 long stip_size = 10, 00280 long max_strip_size = 50, 00281 long inc_strip_size = 10); 00282 00290 inline long Length1() const; 00291 00296 inline long Length2() const; 00297 00303 inline Sequence::const_iterator Seq1Begin() const; 00304 00310 inline Sequence::const_iterator Seq2Begin() const; 00311 00312 00316 inline const Weight & GetWeight() const; 00318 00322 00330 inline RowInfo & GetRowInfo(long i) ; 00331 00333 inline const RowInfo & GetRowInfo(long i) const; 00334 00343 inline RowInfo & GetLastRowInfo(long i) ; 00344 00346 inline const RowInfo & GetLastRowInfo(long i) const; 00347 00351 inline const xdouble & GetZ() const; 00352 00356 inline const xdouble & GetInvZ() const; 00357 00365 inline double GetInsertionProb(long i) const; 00366 00374 inline double GetDeletionProb(long j) const; 00375 00383 inline double GetPairProb1(long i) const; 00384 00393 inline double GetPairProb2(long j) const; 00394 00395 00405 inline xdouble GetDanglingForwardInsert(long i) const; 00406 00410 inline xdouble GetDanglingBackwardInsert(long i) const; 00411 00412 00417 inline xdouble GetDanglingForwardDelete(long j) const; 00418 00422 inline xdouble GetDanglingBackwardDelete(long j) const; 00424 00429 inline xdouble GetNullProb(); 00430 00432 00433 00434 private: 00436 00438 long length1; 00439 00442 RowInfo length2; 00443 00446 Sequence::const_iterator begin_seq1; 00447 Sequence::const_iterator end_seq1; 00448 Sequence::const_iterator begin_seq2; 00449 Sequence::const_iterator end_seq2; 00450 00453 const Weight * ptr_weights; 00454 00458 bool full_computation; 00459 00460 00464 xdouble Z_backward; 00465 xdouble Z_forward; 00466 xdouble Z; 00467 xdouble inv_Z; 00468 bool backward_done; 00469 00472 std::vector<DynCell<xdouble> > forward_ws[2]; 00473 std::vector<DynCell<xdouble> >::iterator curr_for_itr; 00474 std::vector<DynCell<xdouble> >::iterator last_for_itr; 00475 00476 00477 std::vector<xdouble> row_z; 00478 00479 00482 Workspace<Transition> aux_markov_trans; 00484 struct XTransition 00485 { 00486 xdouble MM,MI,MD; 00487 xdouble IM,II,ID; 00488 xdouble DM,DI,DD; 00489 } aux_trans; 00490 00491 00492 00493 00494 00497 std::vector<double> ins_Z; 00498 std::vector<double> del_Z; 00499 00500 std::vector<xdouble> for_dang_ins; 00501 std::vector<xdouble> for_dang_del; 00502 std::vector<xdouble> back_dang_ins; 00503 std::vector<xdouble> back_dang_del; 00504 00508 std::vector<double> pair1_Z; 00509 std::vector<double> pair2_Z; 00510 00512 // @{ 00513 00514 typedef std::list<PartitionDriverBase *> 00515 drivers_list_t; 00516 00519 bool use_drivers; 00520 drivers_list_t drivers; 00521 00522 00523 //drivers_list_t on_first_backward_drivers; 00524 drivers_list_t on_backward_drivers; 00525 drivers_list_t on_forward_drivers; 00526 drivers_list_t on_forward_backward_drivers; 00527 drivers_list_t on_markov_drivers; 00528 00529 00530 bool need_forward; 00531 bool need_backward; 00532 bool need_forward_backward; 00533 bool need_checkpoint; 00534 bool need_markov_chain; 00535 00537 00545 std::vector<std::vector<RowInfo> > row_info_ws; 00546 std::vector<std::vector<RowInfo> >::iterator row_info; 00547 std::vector<std::vector<RowInfo> >::iterator last_row_info; 00548 00552 long n_cells; 00553 00556 long max_width; 00558 00565 void InitStrip(Align::const_iterator & a_begin, 00566 Align::const_reverse_iterator & a_rbegin, 00567 Align::const_iterator & a_end, 00568 Align::const_reverse_iterator & a_rend, 00569 long strip_size); 00570 00574 xdouble PartitionFunctionStrip(); 00575 00576 00577 void StartComputation(); 00578 00579 void InitLocalDangling(const Sequence & seq1, const Sequence seq2); 00580 00581 void InitForward(); 00582 00583 00584 00585 // aux function 00586 inline void TransMX(const Weight & w, 00587 DynCell<xdouble> & cell, 00588 const DynCell<xdouble> & diag, 00589 int ai, 00590 int bj); 00591 inline void TransIX(const Weight & w, 00592 DynCell<xdouble> & cell, 00593 const DynCell<xdouble> & up, 00594 int ai, 00595 int bj); 00596 00597 inline void TransMX0(DynCell<xdouble> & cell); 00598 inline void TransIX0(DynCell<xdouble> & cell); 00599 inline void TransDX0(DynCell<xdouble> & cell); 00600 00603 inline void HandleMarkov(Transition & t, DynCell<xdouble> & cell); 00604 00605 00606 00607 00609 void InitBackward(long i, 00610 PartitionWorkspace::RowInfo & curr_row, 00611 PartitionWorkspace::iterator curr_back); 00612 00616 xdouble FirstBackward(PartitionWorkspace::RowInfo & curr_row, 00617 const PartitionWorkspace::RowInfo & last_row, 00618 PartitionWorkspace::iterator curr, 00619 trans_iterator curr_trans, 00620 PartitionWorkspace::iterator last); 00621 00623 void LastBackward(PartitionWorkspace::RowInfo & curr_row, 00624 const PartitionWorkspace::RowInfo & last_row, 00625 PartitionWorkspace::iterator curr, 00626 trans_iterator curr_trans, 00627 PartitionWorkspace::iterator last); 00629 xdouble Backward(long i, 00630 PartitionWorkspace::RowInfo & curr_row, 00631 const PartitionWorkspace::RowInfo & last_row, 00632 PartitionWorkspace::iterator curr_back, 00633 trans_iterator curr_trans, 00634 PartitionWorkspace::iterator last_back); 00635 00637 void Forward(long i, 00638 const PartitionWorkspace::RowInfo & curr_row, 00639 const PartitionWorkspace::RowInfo & last_row, 00640 std::vector<DynCell<xdouble> >::iterator curr_for, 00641 std::vector<DynCell<xdouble> >::iterator last_for); 00642 00647 void ForwardBackward(long i, 00648 const PartitionWorkspace::RowInfo & last_row, 00649 const PartitionWorkspace::RowInfo & curr_row, 00650 const PartitionWorkspace::RowInfo & next_row, 00651 PartitionWorkspace::const_iterator last_for, 00652 PartitionWorkspace::const_iterator curr_for, 00653 PartitionWorkspace::const_iterator curr_back, 00654 Workspace<PartitionWorkspace::Transition>::const_iterator 00655 curr_markov); 00656 00657 00658 // virtual functions from checkpoint driver 00659 void FirstPassForward(long last_row, long current_row, long row_i); 00660 void Forward(long last_row, long current_row, long row_i); 00661 bool Backward(long current_row, long row_i); 00662 long SpaceAvail(); 00663 long SpaceRequired(); 00664 }; 00665 00667 00706 class PartitionDriverBase 00707 { 00708 public: 00709 friend class PartitionWorkspace; 00710 friend class Program; 00711 00713 PartitionDriverBase(); 00714 00716 virtual ~PartitionDriverBase() {} 00717 00721 const std::string & Name() const; 00722 00723 00731 //virtual int Type() { return 0; } 00732 00735 virtual bool NeedInitStrip() { return false; } 00736 00737 00743 virtual bool NeedFirstBackward() { return false; } 00744 00745 00754 virtual bool NeedBackward() { return false; } 00755 00756 00757 00764 virtual bool NeedForward() { return false; } 00765 00766 00774 virtual bool NeedForwardBackward() { return false; } 00775 00776 00785 virtual bool NeedMarkovChain() { return false; } 00786 00788 00792 virtual bool ProvideAlternativePosteriorProb() { return false; } 00793 00797 00800 virtual void BeforeComputation() { } 00801 00802 00803 00804 00825 virtual void OnForward(long i, 00826 const PartitionWorkspace::RowInfo & last_row, 00827 const PartitionWorkspace::RowInfo & curr_row, 00828 const PartitionWorkspace::const_iterator last, 00829 const PartitionWorkspace::const_iterator curr) {} 00830 00831 00849 virtual void OnBackward(long i, 00850 const PartitionWorkspace::RowInfo & last_row, 00851 const PartitionWorkspace::RowInfo & curr_row, 00852 const PartitionWorkspace::const_iterator last, 00853 const PartitionWorkspace::const_iterator curr) {} 00854 00855 00856 00857 00865 virtual void OnForwardBackward(PartitionWorkspace::ForwardBackwardInfo & fb_info) { } 00866 00867 00877 virtual void OnMarkovChain(PartitionWorkspace::ForwardBackwardInfo & fb_info, 00878 Workspace<PartitionWorkspace::Transition> 00879 ::const_iterator curr_markov, 00880 const xdouble & row_z) {} 00881 00882 00886 virtual void ForwardDone() { } 00887 00891 virtual void AfterComputation() {} 00893 00901 std::vector<Align> alternative_alignments; 00902 00903 00908 inline void LocalDangling(bool dang); 00909 00914 inline void ComputePosterior(bool doit); 00915 00916 protected: 00917 bool do_add_dangling; 00918 bool do_compute_posterior; 00919 00923 std::string driver_name; 00924 00928 PartitionWorkspace * ws; 00929 00932 virtual void SetWorkspace(PartitionWorkspace * ws); 00933 }; 00934 00935 00936 00937 00938 00939 00940 00942 PartitionWorkspace::RowInfo & PartitionWorkspace::GetRowInfo(long i) 00943 { 00944 if(full_computation) return length2; 00945 else return (*row_info)[i+1]; 00946 } 00947 00948 const PartitionWorkspace::RowInfo & PartitionWorkspace::GetRowInfo(long i) const 00949 { 00950 if(full_computation) return length2; 00951 else return (*row_info)[i+1]; 00952 } 00953 00954 00955 PartitionWorkspace::RowInfo & PartitionWorkspace::GetLastRowInfo(long i) 00956 { 00957 if(full_computation) return length2; 00958 else return (*last_row_info)[i+1]; 00959 } 00960 00961 const PartitionWorkspace::RowInfo & PartitionWorkspace::GetLastRowInfo(long i) const 00962 { 00963 if(full_computation) return length2; 00964 else return (*last_row_info)[i+1]; 00965 } 00966 00967 00968 const xdouble & PartitionWorkspace::GetZ() const 00969 { return Z; } 00970 00971 const xdouble & PartitionWorkspace::GetInvZ() const 00972 { return inv_Z; } 00973 00974 00975 inline long PartitionWorkspace::Length1() const 00976 { 00977 return length1; 00978 } 00979 00980 inline long PartitionWorkspace::Length2() const 00981 { 00982 return length2.end_j; 00983 } 00984 00985 00986 inline Sequence::const_iterator PartitionWorkspace::Seq1Begin() const 00987 { 00988 return begin_seq1; 00989 } 00990 00991 inline Sequence::const_iterator PartitionWorkspace::Seq2Begin() const 00992 { 00993 return begin_seq2; 00994 } 00995 00996 00997 inline const Weight & PartitionWorkspace::GetWeight() const 00998 { 00999 return *ptr_weights; 01000 } 01001 01002 inline double PartitionWorkspace::GetInsertionProb(long i) const 01003 { 01004 if(i < (long)ins_Z.size()) 01005 return ins_Z[i]; //(ins_Z[i] * inv_Z).to_double(); 01006 else 01007 { 01008 std::cerr << "PartitionWorkspace::GetInsertionProb(): i out of range" << std::endl 01009 << "forward-backward computation performed?" << std::endl; 01010 exit(8); 01011 return 0; 01012 } 01013 } 01014 01015 inline double PartitionWorkspace::GetDeletionProb(long j) const 01016 { 01017 if(j < (long)del_Z.size()) 01018 return del_Z[j]; //(del_Z[j]*inv_Z).to_double(); 01019 else 01020 { 01021 std::cerr << "PartitionWorkspace::GetDeletionProb(): j out of range" << std::endl 01022 << "forward-backward computation performed?" << std::endl; 01023 exit(8); 01024 return 0; 01025 } 01026 } 01027 01028 01029 inline double PartitionWorkspace::GetPairProb1(long i) const 01030 { 01031 if(i < (long)pair1_Z.size()) 01032 return pair1_Z[i]; 01033 else 01034 { 01035 std::cerr << "PartitionWorkspace::GetPairProb1(): i (" << i << ") out of range" << std::endl 01036 << "forward-backward computation performed?" << std::endl; 01037 exit(8); 01038 return 0; 01039 } 01040 01041 } 01042 01043 inline double PartitionWorkspace::GetPairProb2(long j) const 01044 { 01045 if(j < (long)pair2_Z.size()) 01046 return pair2_Z[j]; 01047 else 01048 { 01049 std::cerr << "PartitionWorkspace::GetPairProb2(): j out of range" << std::endl 01050 << "forward-backward computation performed?" << std::endl; 01051 exit(8); 01052 return 0; 01053 } 01054 01055 } 01056 01057 inline void PartitionWorkspace::TransMX(const Weight & w, 01058 DynCell<xdouble> & cell, 01059 const DynCell<xdouble> & diag, 01060 int ai, 01061 int bj) 01062 { 01063 aux_trans.MM = 01064 w.tMM * 01065 diag.M * 01066 w[ai][bj]; 01067 01068 aux_trans.MI = 01069 w.tMI * 01070 diag.I * 01071 w.freq1[ai]; 01072 01073 aux_trans.MD = 01074 w.tMD * 01075 diag.D * 01076 w.freq2[bj]; 01077 01078 cell.M = aux_trans.MM + aux_trans.MI + aux_trans.MD; 01079 01080 } 01081 01082 inline void PartitionWorkspace::TransIX(const Weight & w, 01083 DynCell<xdouble> & cell, 01084 const DynCell<xdouble> & up, 01085 int ai, 01086 int bj) 01087 { 01088 aux_trans.IM = w.tIM * up.M * w[ai][bj]; 01089 aux_trans.II = w.tII * up.I * w.freq1[ai]; 01090 if(w.allow_ID) 01091 aux_trans.ID = w.tID * up.D * w.freq2[bj]; 01092 else 01093 aux_trans.ID = 0; 01094 cell.I = aux_trans.IM + aux_trans.II + aux_trans.ID; 01095 } 01096 01097 01098 01099 01100 inline void PartitionWorkspace::TransMX0(DynCell<xdouble> & cell) 01101 { 01102 cell.M = 0; 01103 aux_trans.MM = 0; 01104 aux_trans.MI = 0; 01105 aux_trans.MD = 0; 01106 } 01107 01108 inline void PartitionWorkspace::TransIX0(DynCell<xdouble> & cell) 01109 { 01110 cell.I = 0; 01111 aux_trans.IM = 0; 01112 aux_trans.II = 0; 01113 aux_trans.ID = 0; 01114 } 01115 01116 inline void PartitionWorkspace::TransDX0(DynCell<xdouble> & cell) 01117 { 01118 cell.D = 0; 01119 aux_trans.DM = 0; 01120 aux_trans.DI = 0; 01121 aux_trans.DD = 0; 01122 } 01123 01124 inline void PartitionWorkspace::HandleMarkov(Transition & t, DynCell<xdouble> & cell) 01125 { 01126 static xdouble EPS(xexp(-20000)); 01127 static xdouble invz; 01128 //cerr << "xxx" << endl; 01129 if(cell.M > 0) 01130 { 01131 //cout << cell.M << " " << (log(cell.M)) << endl; 01132 invz = xdouble(1.0) / cell.M; 01133 t.MM = (aux_trans.MM * invz).to_double(); 01134 t.MI = (aux_trans.MI * invz).to_double(); 01135 t.MD = (aux_trans.MD * invz).to_double(); 01136 } 01137 else 01138 t.MM = t.MI = t.MD = 0; 01139 01140 if(cell.D > 0) 01141 { 01142 //cout << cell.D << " " << (log(cell.D)) << endl; 01143 invz = xdouble(1.0) / cell.D; 01144 t.DM = (aux_trans.DM * invz).to_double(); 01145 t.DI = (aux_trans.DI * invz).to_double(); 01146 t.DD = (aux_trans.DD * invz).to_double(); 01147 } 01148 else 01149 t.DM = t.DI = t.DD = 0; 01150 01151 if(cell.I > 0) 01152 { 01153 //cout << cell.I << " " << (log(cell.I)) << endl; 01154 invz = xdouble(1.0) / cell.I; 01155 t.IM = (aux_trans.IM * invz).to_double(); 01156 t.II = (aux_trans.II * invz).to_double(); 01157 t.ID = (aux_trans.ID * invz).to_double(); 01158 } 01159 else 01160 t.IM = t.II = t.ID = 0; 01161 //cerr << "yyy" << endl; 01162 } 01163 01164 01165 inline double PartitionWorkspace::ForwardBackwardInfo::PairProb(long i, long j) 01166 { 01167 if(i < 0) return 0; 01168 if(j < curr_row.begin_j) return 0; 01169 else if(j >= curr_row.end_j) return 0; 01170 else 01171 { 01172 long j0 = j-curr_row.begin_j; 01173 return pair_prob[j0]; 01174 } 01175 } 01176 01177 01178 inline xdouble PartitionWorkspace::GetDanglingForwardInsert(long i) const 01179 { 01180 01181 if(i >= 0) 01182 return for_dang_ins[i]; 01183 else 01184 return 1.0; 01185 } 01186 01187 inline xdouble PartitionWorkspace::GetDanglingForwardDelete(long j) const 01188 { 01189 if(j >= 0) 01190 return for_dang_del[j]; 01191 else 01192 return 1.0; 01193 } 01194 01195 inline xdouble PartitionWorkspace::GetDanglingBackwardInsert(long i) const 01196 { 01197 if(i < length1) 01198 return back_dang_ins[i]; 01199 else 01200 return 1.0; 01201 } 01202 01203 inline xdouble PartitionWorkspace::GetDanglingBackwardDelete(long j) const 01204 { 01205 if(j < length2.end_j) 01206 return back_dang_del[j]; 01207 else 01208 return 1.0; 01209 } 01210 01211 inline xdouble PartitionWorkspace::GetNullProb() 01212 { 01213 return back_dang_ins[0]*back_dang_del[0] * inv_Z; 01214 } 01215 01216 01217 inline void PartitionDriverBase::LocalDangling(bool dang) 01218 { do_add_dangling = dang; } 01219 01220 inline void PartitionDriverBase::ComputePosterior(bool doit) 01221 { do_compute_posterior = doit; } 01222 01223 01224 #endif