Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

mal_readonly.h

Go to the documentation of this file.
00001 
00002 #ifndef MAL_READONLY_H_EA
00003 #define MAL_READONLY_H_EA
00004 
00005 /** @author Erik Arner, Karolinska Institute, (c) Erik Arner 2003.
00006     @version MAl version 0.1
00007 */
00008 
00009 /** \brief MAl stands for Multiple Alignment, it is a container class
00010     for sequences in such an alignment.
00011     
00012     The idea is that this class should have "two" interfaces, one
00013     alignment/matrix interface with global indexes, columns etc and
00014     one sequence collection interface similar to the old cosmid
00015     interface.
00016 
00017     First version is a front end to the Berkeley DB. This could be
00018     made more flexible in future versions, allowing any storage of
00019     data using pImpl idiom.
00020 
00021     Problem: BDB has 1-based indexing (recno), while every algo
00022     written in the TRAP system uses 0-based indexing. I'll stick to
00023     0-based indexing in this system, to avoid future hassle.
00024 
00025     Future plans: make this system plugable along the same lines as
00026     the data classes in trapper, as the first implementation of the
00027     MAl class will be based on these classes. Also remove the coupling
00028     between this class and TrapperDoc...
00029 */
00030 
00031 #include <set>
00032 #include <vector>
00033 #include "trapperdoc.h"//maybe fwd-decl instead??
00034 #include "db_cxx.h"
00035 
00036 
00037 //Public, global typedefs
00038 typedef char base_t;
00039 // typedef short int qual_t;
00040 typedef Q_UINT32 qual_t;
00041 
00042 class MAl_Readonly 
00043 {
00044   
00045 public:
00046 
00047   //'tors...
00048   MAl_Readonly(size_t bufsize, std::set<db_recno_t>& recnolist, TrapperDoc* pdoc);
00049   virtual ~MAl_Readonly();  
00050 
00051 
00052   void print_info(size_t ID) {
00053     cerr << "mal_readonly: print_info: ID = " << ID << endl;
00054     cerr << "get_seq_begin_global( ID ) = " << get_seq_begin_global( ID ) << endl;
00055     cerr << "get_seq_end_global( ID ) = " << get_seq_end_global( ID ) << endl;
00056     cerr << "get_len( ID ) = " << get_len( ID ) << endl;
00057   }
00058 
00059   //Common methods
00060 
00061 
00062   size_t get_num_seq();
00063   std::string get_name( size_t ID );
00064   std::string get_header( size_t ID );  
00065   std::string get_seq( size_t ID );
00066   std::string get_strand( size_t ID );
00067   size_t get_len( size_t ID);
00068   void select_read( size_t ID, bool status );
00069 
00070   //Separate interfaces
00071 
00072   size_t get_seq_row( size_t ID );
00073 
00074   size_t get_seq_begin( size_t ID );
00075   size_t get_seq_begin_global( size_t ID );
00076 
00077 //   int get_seq_begin( size_t ID );//FIX THIS
00078 //   int get_seq_begin_global( size_t ID );//FIX THIS
00079 
00080   size_t get_seq_end( size_t ID );
00081   size_t get_seq_end_global( size_t ID );
00082 
00083   size_t get_beginGood( size_t ID );
00084   size_t get_beginGood_global( size_t ID );
00085 
00086   size_t get_endGood( size_t ID );
00087   size_t get_endGood_global( size_t ID );
00088 
00089   base_t get_base( size_t ID, size_t index );
00090   base_t get_base_global( size_t ID, size_t index );
00091 
00092   qual_t get_qual( size_t ID, size_t index );
00093   qual_t get_qual_global( size_t ID, size_t index );
00094 
00095   bool is_DNP(size_t ID, size_t index);
00096   bool is_DNP_global(size_t ID, size_t index);
00097 
00098   int get_DNP_ID(size_t ID, size_t index);
00099   int get_DNP_ID_global(size_t ID, size_t index);
00100 
00101   int get_DNP_type(size_t ID, size_t index);
00102   int get_DNP_type_global(size_t ID, size_t index);
00103   
00104 protected:
00105   //Protected methods
00106   size_t get_buffID(size_t ID);
00107   size_t next_buffID();
00108   
00109   virtual void flush_buffer(size_t buffID );
00110   void read_from_db(size_t buffID, size_t ID);
00111   void read_seq_from_db( db_recno_t recno, size_t buffID );
00112   void read_feat_from_db( db_recno_t recno, size_t buffID, const string& data_type_name);
00113   
00114 
00115   //Protected structs
00116 
00117   struct dnp_struct
00118   {
00119     dnp_struct(bool is = false, db_recno_t rec = 0, int id = -1, int t = -1 ) :
00120       isDNP(is), recno(rec), ID(id), type(t) {}
00121     
00122     bool isDNP;
00123     db_recno_t recno;
00124     int ID;
00125     int type;
00126   };
00127 
00128   //Members
00129   size_t buff_size;
00130   size_t num_seq;
00131   TrapperDoc* doc;
00132   std::set<db_recno_t>& selectedReads;
00133   
00134   //Maybe use deques instead??
00135   //These guys are of the buffer size
00136   std::vector<std::vector<base_t> > seqs;
00137   std::vector<std::vector<qual_t> > quals;
00138   std::vector<std::vector<dnp_struct> > DNPs;
00139   std::vector<std::string> names;
00140   std::vector<std::string> headers;
00141   std::vector<std::string> mates;
00142   std::vector<std::string> strands;
00143   
00144   std::vector<size_t> seq_rows;
00145   std::vector<size_t> seq_begin_global;
00146   std::vector<size_t> seq_end_global;//Unnecessary?? We have the sizes of seqs...
00147   std::vector<size_t> seq_beginGood;//NB, not global!
00148   std::vector<size_t> seq_endGood;//NB, not global!
00149   std::vector<size_t> mate_lengths;
00150 
00151   //Buffer stuff
00152   vector<db_recno_t> ID_to_dbID;//Should be of actual data set size
00153   vector<size_t> ID_to_buffID;//Ditto
00154   vector<db_recno_t> buffID_to_dbID;//buffer size
00155   vector<db_recno_t> buffID_to_ID;//buffer size
00156   vector<bool> put_in_db;//Watch out for vector<bool>...
00157   
00158 
00159 };
00160 
00161 
00162 #endif //MAL_READONLY_H_EA
00163  
00164 
00165 
00166 //Should this stuff be private??
00167 /*
00168   void change_base( size_t ID, size_t baseIndex, char newBase );
00169   void set_seq_begin( size_t index, size_t pos );
00170   void set_beginGood( const size_t index, const size_t pos );
00171   void set_endGood( const size_t index, const size_t pos );
00172   void insert_base(size_t ID, size_t before_index, char base);  
00173   void remove_base(size_t ID, size_t index);  
00174   void put_qual( size_t ID, size_t qualValIndex, const size_t qualityValue );
00175   void delete_seq(size_t ID);
00176   bool is_deleted( size_t ID );
00177 */
00178 
00179 
00180 //OBSOLETE???
00181 /*
00182   std::string get_headerQ( size_t ID );
00183   size_t isPossibleRepeat(size_t ID);//????????????????????
00184   void mark_possibleRepeat(size_t ID);//??????????????????
00185   size_t append_seq( char seqName[], char seqHeader[] );
00186   size_t get_first_revComp_index();
00187   void set_max_coverage( size_t ID, size_t index, size_t cov );
00188   size_t get_max_coverage( size_t ID, size_t index );
00189   void set_pos_non_chimeric( size_t ID, size_t index);
00190   size_t is_chimeric( size_t ID, size_t index );
00191   void set_beginAnalyzable( const size_t index, const size_t pos );
00192   size_t get_beginAnalyzable( const size_t index );
00193   void set_endAnalyzable( const size_t index, const size_t pos );
00194   size_t get_endAnalyzable( const size_t index );  
00195   size_t get_number_seqs_in_file(const std::string fileName);
00196   size_t get_number_seqs_in_DATA_file(const std::string fileName);
00197   void set_qualBegin( const size_t index, const size_t pos );
00198   size_t get_qualBegin( const size_t index );
00199   void set_qualEnd( const size_t index, const size_t pos );
00200   size_t get_qualEnd( const size_t index );
00201   size_t is_quality( size_t index );
00202   void mark_is_quality(size_t id);
00203   size_t seq_size( size_t ID );
00204   size_t different_strands(size_t ID1, size_t ID2);
00205   size_t qual_size( size_t ID );
00206   char get_comp_base(char base);
00207   size_t get_ID_in_revComp_counterpart(size_t ID);
00208   size_t get_index_in_revComp_counterpart(size_t ID, size_t index);
00209   char get_DNP(size_t ID, size_t index);
00210   void set_DNP(size_t ID, size_t index, char base, size_t unique);
00211   void set_templ_DNP(size_t ID, size_t index);
00212   void set_templ_DNP_pos(size_t ID, size_t index, size_t pos);
00213   size_t is_templ_DNP(size_t ID, size_t index);
00214   size_t get_templ_DNP_pos(size_t ID, size_t index);
00215   void set_DNP_ncorr(size_t ID, size_t index, size_t ncorr);
00216   size_t get_DNP_ncorr(size_t ID, size_t index);
00217   void set_DNP_p(size_t ID, size_t index, double p);
00218   double get_DNP_p(size_t ID, size_t index);
00219   void set_DNP_against_insert(size_t ID, size_t index, char base, size_t unique);
00220   void set_DNP_against_deletion(size_t ID, size_t index, size_t unique, size_t unset_DNP);
00221   char get_DNP_against_insert(size_t ID, size_t index);
00222   char get_DNP_against_deletion(size_t ID, size_t index);
00223   size_t get_DNP_type(size_t ID, size_t index);
00224   size_t get_DNP_type_against_insert(size_t ID, size_t index);
00225   size_t get_DNP_type_against_deletion(size_t ID, size_t index);
00226   size_t DNP_mismatch(size_t ID, size_t index, char base);
00227   size_t DNP_mismatch_against_insert(size_t ID, size_t index, char base);
00228   size_t DNP_mismatch_against_deletion(size_t ID, size_t index);
00229 */

Generated on Fri Mar 17 17:44:24 2006 for trapper by  doxygen 1.4.4