ppALIGN API documentation

alternative_alignments.cpp

This example shows illustrate the usage of the AlignOverlap class.

//  This program illustrate how alternative alignments,
//  sampled from the posterior distribution and the 
//  max. posterior alignment (max. "accuracy alignment") 
//  can be obtained and compared with a given alignment.
  
//  The orginal alignment is divided in section which is common 
//  to all alignments and in those where alternative alignments 
//  differ from the given alignment.

#include <partitionworkspace.hpp>
#include <weights.hpp>
#include <posterprob.hpp>
#include <sampling.hpp>
#include <posterdecode.hpp>
#include <alignoverlap.hpp>

using namespace std;


int main()
{
  // Initalization

  // output options
  bool print_xml = false; 
  bool print_posterior = true;
  
  // the aligned sequences 

  ProteinSequence seq1("ISLWMRFLPLLAVLALWGPDPASAFVNQHL----------------VEALYLICDDRGFFYTSKTRRELENLQAGQVELGGGPGAGSLQALALEGSLQKRPAVEQCCTSNCKLYQLESY");
  ProteinSequence seq2("MALWRRLLPMLALLALWSPEKAAAFIHQHLCGSNLVQPNRPGPTASIQWLCLVGGGRGFFYTRKARREAEDLQVGQVELGGGPGASSLQALALESSMQEPGVTQLICTSICSLYKPQLY");

  // alignment parameters 
  double temperature = 1.0;
  int    gap_open = 11;
  int    gap_extension = 1;
  bool   allow_DI=true;
  bool   allow_ID=false;

  // construct of an alignment from the sequences with 
  // regular letters and gap symbols 
  Align  align(seq1,seq2); 
  // remove gap symbols from the sequences 
  seq1 /= GapSymbol;
  seq2 /= GapSymbol;

  // Random number generator 
  DRand48 rng(1);



  // Boltzmann Weight 
  // you may replace it with the class HmmWeight
  BoltzmannWeight weight(proteinAlphabet,
                         proteinAlphabet,
                         "blosum62",
                         temperature,
                         gap_open,
                         gap_extension,
                         GlobalBounded,
                         allow_DI,
                         allow_ID);

  // The workspace where the main computation is performed 
  PartitionWorkspace pws(100000);



  // Initalization of the Drivers and computation

  // different drivers we are using 

  // we want to compute the posterior probabilities of 
  // the given alignment  align
  DriverPosterProb   poster_prob;
  poster_prob.SetAlign(align);
  pws.AddDriver(poster_prob);

  // we want to sample 10 alternative alignments 
  DriverSampling     sampling(rng);
  sampling.SetNumber(10);
  pws.AddDriver(sampling);

  // we want to determine the max posterior posterior 
  // alignment
  DriverPosterDecode poster_decode;
  pws.AddDriver(poster_decode);  

  // perform the computation on a strip around the given 
  // alignment 
  pws.PartitionFunction(seq1,
                        seq2,
                        align,
                        weight);

  // determine the max. posterior alignment and 
  // store it in poster_decode.alternative_alignments[0] 
  poster_decode.Decode();
  
  // Output 

  // stream out  alignments 
  cout << "given alignment" << endl;
  cout << align << endl;

  cout << "sampled alignments" << endl;
  for(vector<Align>::iterator itr = sampling.alternative_alignments.begin();
      itr != sampling.alternative_alignments.end();
      ++itr) 
    {
      cout << *itr << endl;
    }
  cout << "max. accuracy alignment" << endl;
  cout << poster_decode.alternative_alignments[0] << endl;
  
  // next we compute the overlap between the original alignment 
  // and the alternatives: 
  AlignOverlap overlap(align,seq1,seq2);
  for(vector<Align>::iterator itr = sampling.alternative_alignments.begin();
      itr != sampling.alternative_alignments.end();
      ++itr) 
    {
      overlap.Add(sampling.Name(),*itr);
    }
  overlap.Add(poster_decode.Name(),
              poster_decode.alternative_alignments[0]);


  overlap.BuildOverlapSegments() ;
  // structured XML output 
  if(print_xml) 
    {
      overlap.StreamOutPartition(cout);
    }


  Align align_seg;
  for(AlignOverlap::segment_iterator itr = overlap.BeginSegments();
      itr != overlap.EndSegments();
      ++itr) 
    {
      itr->GetAlign(align_seg);
      for(Align::iterator aitr = align_seg.begin(); 
          aitr != align_seg.end();
          ++aitr) 
      {
        cout << aitr->pair.i << " " << aitr->pair.j << " " << aitr->posterior << endl;
        //printf("(%d %d) %f\n", aitr->pair.i, aitr->pair.j, aitr->posterior);
        /* if(aitr->posterior > 0.0) 
          {
          if(itr->overlap) 
          {
            glob->num_overlap++;
            glob->pp_overlap += aitr->posterior;
            glob->overlapping[t] = 1;
          }
          else
          {
            glob->num_non_overlap++;
            glob->pp_non_overlap += aitr->posterior;
            glob->overlapping[t] = -1;
          }
          */
      }
    }



  
  // manual output 
  // prints the segments of the reference alignment and alternative alignments 
  // iteration over segments in the original alignment 

  //Align align_seg;
  for(AlignOverlap::segment_iterator itr = overlap.BeginSegments();
      itr != overlap.EndSegments();
      ++itr) 
    {

      if(itr->overlap) 
        cout << "Overlapping Segment " << endl;
      else 
        cout << "none-Overlapping Segment " << endl;
      itr->StreamOutQuery(cout);
      cout << endl;
      itr->StreamOutSubject(cout);
      cout << endl;
      cout << endl;
      cout << "Start pos in the sequences : " << itr->start_pos << endl;
      cout << "matches/mismatches         : " << itr->n_matches << endl;
      cout << "n_insertions               : " << itr->n_insertions << endl;
      cout << "n_deletions                : " << itr->n_deletions << endl;

      // print out posterior probs of the segment
      if(print_posterior) 
        {
          itr->GetAlign(align_seg);
          for(Align::iterator aitr = align_seg.begin(); 
              aitr != align_seg.end();
              ++aitr) 
            {
              cout << aitr->pair << " " << aitr->posterior << endl;
            }
        }
      cout << endl;
      
      // iteration over alternative alignment methods 
      if(!itr->overlap) 
        {
          for(int method = 0; method < 2; method++) 
            {
              // set name of the decoding method 
              string name = (method == 0) ? 
                sampling.Name() : 
                poster_decode.Name();
              
              AlignOverlap::alternative_iterator alter_itr,alter_begin,alter_end;
              alter_begin = itr->BeginAlternatives(name);
              alter_end = itr->EndAlternatives(name);
              cout << "  alternative, generator: " << name << endl;

              // iteration over alternative alignments
              for(alter_itr = alter_begin; 
                  alter_itr != alter_end; 
                  ++alter_itr) 
                {
                  // output of the segment 
                  alter_itr->first.StreamOutQuery(cout);
                  // number of occerencies 
                  cout << " number: " << alter_itr->second << endl;
                  alter_itr->first.StreamOutSubject(cout);
                  cout << endl;
                  
                  // more details of the segment
                  if(print_posterior) 
                    {
                      // posterior probs for alternative segment 
                      for(Align::const_iterator aitr = alter_itr->first.begin;
                          aitr != alter_itr->first.end;
                          ++aitr) 
                        {
                          cout << aitr->pair << " " 
                               << aitr->posterior << endl;
                        }
                    }
                } // for each alternative segment 
              
            } // for each method 
          cout << "##############" << endl << endl;
        }
    }
  return 0;
}