ppALIGN API documentation
alternative_alignments.cpp
This example shows illustrate the usage of the AlignOverlap class.
// This program illustrate how alternative alignments, // sampled from the posterior distribution and the // max. posterior alignment (max. "accuracy alignment") // can be obtained and compared with a given alignment. // The orginal alignment is divided in section which is common // to all alignments and in those where alternative alignments // differ from the given alignment. #include <partitionworkspace.hpp> #include <weights.hpp> #include <posterprob.hpp> #include <sampling.hpp> #include <posterdecode.hpp> #include <alignoverlap.hpp> using namespace std; int main() { // Initalization // output options bool print_xml = false; bool print_posterior = true; // the aligned sequences ProteinSequence seq1("ISLWMRFLPLLAVLALWGPDPASAFVNQHL----------------VEALYLICDDRGFFYTSKTRRELENLQAGQVELGGGPGAGSLQALALEGSLQKRPAVEQCCTSNCKLYQLESY"); ProteinSequence seq2("MALWRRLLPMLALLALWSPEKAAAFIHQHLCGSNLVQPNRPGPTASIQWLCLVGGGRGFFYTRKARREAEDLQVGQVELGGGPGASSLQALALESSMQEPGVTQLICTSICSLYKPQLY"); // alignment parameters double temperature = 1.0; int gap_open = 11; int gap_extension = 1; bool allow_DI=true; bool allow_ID=false; // construct of an alignment from the sequences with // regular letters and gap symbols Align align(seq1,seq2); // remove gap symbols from the sequences seq1 /= GapSymbol; seq2 /= GapSymbol; // Random number generator DRand48 rng(1); // Boltzmann Weight // you may replace it with the class HmmWeight BoltzmannWeight weight(proteinAlphabet, proteinAlphabet, "blosum62", temperature, gap_open, gap_extension, GlobalBounded, allow_DI, allow_ID); // The workspace where the main computation is performed PartitionWorkspace pws(100000); // Initalization of the Drivers and computation // different drivers we are using // we want to compute the posterior probabilities of // the given alignment align DriverPosterProb poster_prob; poster_prob.SetAlign(align); pws.AddDriver(poster_prob); // we want to sample 10 alternative alignments DriverSampling sampling(rng); sampling.SetNumber(10); pws.AddDriver(sampling); // we want to determine the max posterior posterior // alignment DriverPosterDecode poster_decode; pws.AddDriver(poster_decode); // perform the computation on a strip around the given // alignment pws.PartitionFunction(seq1, seq2, align, weight); // determine the max. posterior alignment and // store it in poster_decode.alternative_alignments[0] poster_decode.Decode(); // Output // stream out alignments cout << "given alignment" << endl; cout << align << endl; cout << "sampled alignments" << endl; for(vector<Align>::iterator itr = sampling.alternative_alignments.begin(); itr != sampling.alternative_alignments.end(); ++itr) { cout << *itr << endl; } cout << "max. accuracy alignment" << endl; cout << poster_decode.alternative_alignments[0] << endl; // next we compute the overlap between the original alignment // and the alternatives: AlignOverlap overlap(align,seq1,seq2); for(vector<Align>::iterator itr = sampling.alternative_alignments.begin(); itr != sampling.alternative_alignments.end(); ++itr) { overlap.Add(sampling.Name(),*itr); } overlap.Add(poster_decode.Name(), poster_decode.alternative_alignments[0]); overlap.BuildOverlapSegments() ; // structured XML output if(print_xml) { overlap.StreamOutPartition(cout); } Align align_seg; for(AlignOverlap::segment_iterator itr = overlap.BeginSegments(); itr != overlap.EndSegments(); ++itr) { itr->GetAlign(align_seg); for(Align::iterator aitr = align_seg.begin(); aitr != align_seg.end(); ++aitr) { cout << aitr->pair.i << " " << aitr->pair.j << " " << aitr->posterior << endl; //printf("(%d %d) %f\n", aitr->pair.i, aitr->pair.j, aitr->posterior); /* if(aitr->posterior > 0.0) { if(itr->overlap) { glob->num_overlap++; glob->pp_overlap += aitr->posterior; glob->overlapping[t] = 1; } else { glob->num_non_overlap++; glob->pp_non_overlap += aitr->posterior; glob->overlapping[t] = -1; } */ } } // manual output // prints the segments of the reference alignment and alternative alignments // iteration over segments in the original alignment //Align align_seg; for(AlignOverlap::segment_iterator itr = overlap.BeginSegments(); itr != overlap.EndSegments(); ++itr) { if(itr->overlap) cout << "Overlapping Segment " << endl; else cout << "none-Overlapping Segment " << endl; itr->StreamOutQuery(cout); cout << endl; itr->StreamOutSubject(cout); cout << endl; cout << endl; cout << "Start pos in the sequences : " << itr->start_pos << endl; cout << "matches/mismatches : " << itr->n_matches << endl; cout << "n_insertions : " << itr->n_insertions << endl; cout << "n_deletions : " << itr->n_deletions << endl; // print out posterior probs of the segment if(print_posterior) { itr->GetAlign(align_seg); for(Align::iterator aitr = align_seg.begin(); aitr != align_seg.end(); ++aitr) { cout << aitr->pair << " " << aitr->posterior << endl; } } cout << endl; // iteration over alternative alignment methods if(!itr->overlap) { for(int method = 0; method < 2; method++) { // set name of the decoding method string name = (method == 0) ? sampling.Name() : poster_decode.Name(); AlignOverlap::alternative_iterator alter_itr,alter_begin,alter_end; alter_begin = itr->BeginAlternatives(name); alter_end = itr->EndAlternatives(name); cout << " alternative, generator: " << name << endl; // iteration over alternative alignments for(alter_itr = alter_begin; alter_itr != alter_end; ++alter_itr) { // output of the segment alter_itr->first.StreamOutQuery(cout); // number of occerencies cout << " number: " << alter_itr->second << endl; alter_itr->first.StreamOutSubject(cout); cout << endl; // more details of the segment if(print_posterior) { // posterior probs for alternative segment for(Align::const_iterator aitr = alter_itr->first.begin; aitr != alter_itr->first.end; ++aitr) { cout << aitr->pair << " " << aitr->posterior << endl; } } } // for each alternative segment } // for each method cout << "##############" << endl << endl; } } return 0; }