Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

MalWrapper.hpp

Go to the documentation of this file.
00001 #ifndef MALWRAPPER_HPP
00002 #define MALWRAPPER_HPP
00003 
00004 #include <limits>
00005 #include <vector>
00006 //#include "dna_alphabet.hpp"
00007 // #include "TraceSymbols.hpp"
00008 #include "AlAlgs.hpp"
00009 #include "Div.hpp"  // defines print_elements_cerr
00010 #include "mal.h"
00011 
00012 /**
00013  * Defines an iterator to be used in class MAlWrapper since Realigner
00014  * uses iterators to iterate through reads.
00015  */
00016 template <typename Wrappee>
00017 class Iterator {
00018  public:
00019   typedef std::random_access_iterator_tag iterator_category;
00020   typedef char                            value_type;
00021   typedef int                             difference_type;
00022   typedef value_type *                    pointer;
00023   typedef value_type &                    reference;
00024   
00025   Iterator() {}
00026   
00027   Iterator(Wrappee * w, size_t r, size_t c) : w_(w), r_(r), c_(c) {}
00028   
00029   Iterator & operator++() {
00030     ++c_;
00031     return *this;
00032   }
00033 
00034   Iterator operator++(int) {
00035     Iterator tmp( *this );
00036     ++*this;
00037     return tmp;
00038   }
00039   
00040   value_type operator*() { 
00041     return w_->get_base_global(r_, c_); // kolla
00042   }
00043   size_t r() { return r_; }
00044   size_t c() { return c_; }
00045   
00046   friend Iterator<MAl> operator+(Iterator<MAl> it, int i);
00047   friend Iterator<MAl> operator-(Iterator<MAl> it, int i);
00048   friend difference_type operator-(Iterator<MAl> it1, Iterator<MAl> it2);
00049   friend bool operator==(Iterator<MAl> & it1, Iterator<MAl> & it2);
00050   friend bool operator!=(Iterator<MAl> & it1, Iterator<MAl> & it2);
00051  private:
00052   Wrappee * w_;
00053   size_t r_, c_;
00054 };
00055 
00056 Iterator<MAl> operator+(Iterator<MAl> it, int i) { 
00057   it.c_ += i;
00058   return it;
00059 }
00060 Iterator<MAl> operator-(Iterator<MAl> it, int i) { 
00061   it.c_ -= i;
00062   return it;
00063 }
00064 Iterator<MAl>::difference_type operator-(Iterator<MAl> it1, Iterator<MAl> it2) {
00065   return it1.c_ - it2.c_;
00066 }
00067 bool operator==(Iterator<MAl> & it1, Iterator<MAl> & it2) { // by value or by reference?
00068   return it1.r_ == it2.r_ && it1.c_ == it2.c_;
00069 }
00070 bool operator!=(Iterator<MAl> & it1, Iterator<MAl> & it2) {
00071   return !(it1 == it2);
00072 }
00073 
00074 
00075 /**
00076  * This class wraps the MAl class so that it can be used by Realigner.
00077  */
00078 template <typename alphabet>
00079 class MAlWrapper {
00080 public:
00081   typedef char               value_type;
00082   typedef value_type &       reference;
00083   typedef Iterator<MAl>      iterator;
00084   
00085   MAlWrapper( MAl* mal) : mal_(mal) {}
00086 
00087   size_t nrows() { return mal_->get_num_seq(); } 
00088 
00089   bool empty(int row) {
00090     assert(0 <= row && row < static_cast<int>(nrows()));
00091     return (last_pos(row) == first_pos(row) - 1);
00092   }
00093 
00094   void print_info(size_t r) {
00095     mal_->print_info(r);    
00096     cerr << "In the wrapper: r = " << r << endl;
00097     cerr << "end(r) - begin(r) = " << (end(r) - begin(r)) << endl;
00098 //     cerr << "Info for row " << r << endl;
00099 //     cerr << "first_pos(r) = " << first_pos(r) << endl;
00100 //     cerr << "last_pos(r) = " << last_pos(r) << endl;
00101   }
00102 
00103   iterator begin(size_t r) {
00104 //     cerr << "begin(): r = " << r << ", first_pos(r) = " << first_pos(r) << endl;
00105     return Iterator<MAl>(mal_, r, first_pos(r));
00106   }
00107 
00108   iterator end(size_t r) {
00109 //     cerr << "end(): r = " << r << ", last_pos(r) = " << last_pos(r) << endl;
00110     return Iterator<MAl>(mal_, r, last_pos(r) + 1);
00111   }
00112 
00113   void decrement_first_pos(size_t r) {
00114     assert(!realign_in_band_);
00115     mal_->set_seq_begin_global(r, mal_->get_seq_begin_global(r) - 1);    
00116   }
00117 
00118   void decrement_last_pos(size_t r) {
00119     assert(!realign_in_band_);
00120     mal_->set_seq_end_global(r, mal_->get_seq_end_global(r) - 1);    
00121   }
00122 
00123   // Insert v (a gap!) at position pos in read r. 
00124   void insert(size_t r, iterator pos, const value_type & v) {
00125     assert(v == '-');
00126 
00127     // NB: last_pos is already updated!!! 
00128     mal_->insert_gap_global(r, pos - begin(r) + first_pos(r));
00129 
00130     // Correct last_pos:
00131     mal_->set_seq_end_global(r, mal_->get_seq_end_global(r) - 1);
00132   }
00133 
00134   // Erase 
00135   void erase(size_t r, iterator pos) {
00136     assert(operator()(r, pos - begin(r) == '-'));
00137     mal_->remove_gap_global(r, pos - begin(r) + first_pos(r));
00138 
00139     // Correct last_pos:
00140     mal_->set_seq_end_global(r, mal_->get_seq_end_global(r) + 1);
00141   }
00142 
00143   // Move the whole read to the left.
00144   void move_read_left(size_t r, int steps) {
00145     assert(steps > 0);
00146 
00147     mal_->set_seq_begin_global(r, mal_->get_seq_begin_global(r) - steps);
00148     mal_->set_seq_end_global(r, mal_->get_seq_end_global(r) - steps);
00149   }
00150 
00151   // Move the whole read to the left.
00152   void move_read_right(size_t r, int steps) {
00153     assert(steps > 0);
00154 
00155     mal_->set_seq_begin_global(r, mal_->get_seq_begin_global(r) + steps);
00156     mal_->set_seq_end_global(r, mal_->get_seq_end_global(r) + steps);
00157   }
00158 
00159   value_type operator()(size_t r, size_t pos_in_read) {
00160     // Remark: pos_in_read is local!
00161     return mal_->get_base(r, pos_in_read);
00162   }
00163 
00164   /**
00165    * Swap the supplied new_read with the (whole) read r.
00166    */
00167   void swap(size_t r, int first_pos, int last_pos, const std::string & new_read) {
00168     // check consistency of indata
00169     assert(last_pos + 1 - first_pos == static_cast<int>(new_read.size()));
00170 
00171     // check that new_read equals read r (except for gaps)
00172     assert(( al::equal_without_gaps<alphabet, alphabet>(begin(r), end(r),
00173                                                                         new_read.begin(), new_read.end())));
00174     // check that first or last character of new_read is not gap (just to be sure)
00175     assert(alphabet::isNotAbsGap(new_read[0]) && 
00176            alphabet::isNotAbsGap(new_read[new_read.size() - 1]));
00177 
00178     // get instructions
00179     vector<al::instr_value_type> instr;
00180     al::get_instructions<alphabet, alphabet>(begin(r), end(r), 
00181                                                              new_read.begin(), new_read.end(), 
00182                                                              back_inserter(instr));
00183     // update read r according to the instructions
00184     int pos = 0;
00185     for (unsigned i = 0; i < instr.size(); ++i) {
00186       pos += instr[i].first;
00187       switch(instr[i].second) {
00188       case al::DELETE_GAP:
00189         assert(alphabet::isAbsGap((*this)(r, pos)));
00190         mal_->remove_gap(r, pos);
00191         break;
00192       case al::INSERT_GAP:
00193         mal_->insert_gap(r, pos);
00194         ++pos;
00195         break;
00196       }
00197     }
00198 
00199     // set global begin and end
00200     mal_->set_seq_begin_global(r, first_pos);
00201     mal_->set_seq_end_global(r, first_pos + new_read.size());
00202 
00203     // check that the modified read r is equals new_read 
00204     if ( new_read.end() - new_read.begin() != end(r) - begin(r) ) {
00205       std::cerr << "new_read.end() - new_read.begin() = " << new_read.end() - new_read.begin() << endl;
00206       std::cerr << "begin(r) - end(r) = " << begin(r) - end(r) << endl;
00207     }
00208     assert( new_read.end() - new_read.begin() == end(r) - begin(r) );
00209     if ( !equal(new_read.begin(), new_read.end(), begin(r)) ) {
00210       al::print_elements_cerr(new_read, "", "new_read\n");
00211       al::print_elements_cerr(begin(r), end(r), "", "modified row r\n");
00212     }
00213     assert( std::equal(new_read.begin(), new_read.end(), begin(r)) );
00214                   
00215 //     mal_->set_seq_end(r, new_read.size() - 1);
00216 
00217 //     // write new sequence
00218 //     for (size_t i = 0; i < new_read.size(); ++i) {
00219 //       mal_->set_base(r, i, new_read[i]);
00220 //     }
00221   }
00222  
00223   int first_pos(size_t r) { return mal_->get_seq_begin_global(r); }
00224   int last_pos(size_t r) { return mal_->get_seq_end_global(r) - 1; }
00225 
00226   // should local seq_begin / seq_end be set?
00227 //   void set_first_pos(size_t r, int pos) { mal_->set_seq_begin_global(r, pos); }
00228 //   void set_last_pos(size_t r, int pos) { mal_->set_seq_end_global(r, pos + 1); }
00229   void adjust_last_pos(size_t r, int steps, int) {
00230     mal_->set_seq_end_global(r, mal_->get_seq_end_global(r) + steps);
00231   }
00232   
00233 
00234   // methods added Febr 20 to make it compile -----------------------------------------------------------
00235   int getMinStartCol() {
00236     int m = std::numeric_limits<int>::max();
00237     assert(nrows() > 0);
00238     for (size_t r = 0; r < nrows(); ++r) {
00239       m = std::min(m, first_pos(r));
00240     }
00241     return m;
00242   }
00243   int getMaxStopCol() {
00244     int m = std::numeric_limits<int>::min();
00245     assert(nrows() > 0);
00246     for (size_t r = 0; r < nrows(); ++r) {
00247         m = std::max(m, last_pos(r));
00248     }
00249     return m;
00250   }
00251   //! Beräkna minsta startCol (beakta inte rad realRow).
00252   int getMinStartCol(int realRow) {
00253     int m = std::numeric_limits<int>::max();
00254     assert(nrows() > 0);
00255 
00256     for (size_t r = 0; r < nrows(); ++r) {
00257       if (static_cast<int>(r) != realRow)
00258         m = std::min(m, first_pos(r));
00259     }
00260     return m;
00261   }
00262 
00263   //! Beräkna största stopCol (beakta inte rad realRow).
00264   int getMaxStopCol(int realRow) {
00265     int m = std::numeric_limits<int>::min();
00266     assert(nrows() > 0);
00267 
00268     for (size_t r = 0; r < nrows(); ++r) {
00269       if (static_cast<int>(r) != realRow)
00270         m = std::max(m, last_pos(r));
00271     }
00272     return m;
00273   }
00274 
00275   void set_realign_in_band(bool b) { 
00276 
00277     realign_in_band_ = b;
00278     if (b) { std::cerr << "realign in band not implemented yet" << endl; exit(1); }
00279     
00280   }
00281   bool left_fixed(size_t r) { 
00282     return false;
00283     //    return (realign_in_band_ ? left_fixed_[r] : false); 
00284   }
00285   bool right_fixed(size_t r) {     
00286     return false;
00287     //    return (realign_in_band_ ? right_fixed_[r] : false); 
00288   } 
00289   bool others_left_fixed(size_t r) { 
00290     return false;
00291 //   assert(realign_in_band_);
00292 //     return others_left_fixed_[r]; }
00293   }
00294   bool others_right_fixed(size_t r) {       
00295     return false;
00296 //     assert(realign_in_band_);
00297 //     return others_right_fixed_[r]; }
00298   }
00299   size_t nrows_global() { return nrows(); }
00300   void adjust_pos_right_of_band_border(int d, int band_border_right) {}
00301   //  void print() { mal_->print(); }
00302   iterator begin_global(size_t r) { return begin(r); }
00303   iterator end_global(size_t r) { return end(r); }
00304   int first_pos_global(size_t r) { return first_pos(r); }
00305   int last_pos_global(size_t r) { return last_pos(r); }
00306   int swap_substr(size_t r, std::string & other_read) { exit(0); }
00307   void swap_suffix(size_t r, int first_pos, int last_pos, std::string & other_read) { exit(0); }
00308   void swap_prefix(size_t r, int first_pos, int last_pos, std::string & other_read) { exit(0); }
00309   bool realign_in_band() { return false; }
00310 
00311  private:
00312   MAl* mal_;
00313   bool realign_in_band_;
00314 };
00315 
00316 
00317 #endif

Generated on Fri Mar 17 17:44:24 2006 for trapper by  doxygen 1.4.4