Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

mal_readonly.cc

Go to the documentation of this file.
00001 #include "mal_readonly.h"
00002 #include "generaldata.h"
00003 #include "readdata.h"
00004 #include "featuredata.h"
00005 
00006 using namespace std;
00007 
00008 
00009 MAl_Readonly::MAl_Readonly( size_t bufsize, set<db_recno_t>& recnolist, TrapperDoc* pdoc ) :
00010   buff_size(bufsize + 1),// +1 because buffID == 0 is unused
00011   num_seq( recnolist.size() ),
00012   doc(pdoc),
00013   selectedReads(recnolist)
00014 {
00015   //Init the bookkeeping arrays, the rest will be automatically filled as the reads are used
00016 
00017   //These guys are of the buffer size
00018   seqs.reserve( buff_size );
00019   quals.reserve( buff_size );
00020   DNPs.reserve( buff_size );//NOTA BENE, vector<bool> is special case...
00021   names.reserve( buff_size );
00022   headers.reserve( buff_size );
00023   mates.reserve( buff_size );
00024   strands.reserve( buff_size );
00025   
00026 
00027   //New deal - NOT always kept in memory...
00028   seq_rows.reserve( buff_size );
00029   seq_begin_global.reserve( buff_size );
00030   seq_end_global.reserve( buff_size );//Unnecessary?? We have the sizes of seqs...
00031   seq_beginGood.reserve( buff_size );//NB, not global!
00032   seq_endGood.reserve( buff_size );//NB, not global!
00033   mate_lengths.reserve( buff_size );
00034 
00035   //Buffer stuff
00036   ID_to_dbID.reserve( num_seq );//Should be of actual data set size
00037   ID_to_buffID.reserve( num_seq );//Ditto
00038   buffID_to_dbID.reserve( buff_size );//buffer size
00039   buffID_to_ID.reserve( buff_size );//buffer size
00040   put_in_db.reserve( buff_size );//buffer size
00041   
00042   for( set<db_recno_t>::iterator it = recnolist.begin(); it != recnolist.end(); ++it ) {
00043     ID_to_dbID.push_back( *it );
00044     ID_to_buffID.push_back( 0 );
00045   }
00046 
00047   //Init all containers
00048   for( size_t i = 0; i < buff_size; i++ ) {
00049 
00050     buffID_to_dbID.push_back( 0 );
00051     buffID_to_ID.push_back( 0 );
00052     put_in_db.push_back(false);
00053 
00054     //NOTA BENE, ONLY START, END AND ROW so far...
00055     
00056     seq_rows.push_back( 0 );//Need better initval...
00057     seq_begin_global.push_back( 0 );//Ditto
00058     seq_end_global.push_back( 0 );//Ditto
00059 
00060     seqs.push_back( vector<base_t>() );
00061     quals.push_back( vector<qual_t>() );
00062     DNPs.push_back( vector<dnp_struct>() );
00063     names.push_back( string() );
00064     mates.push_back( string() );
00065     strands.push_back( string() );
00066     
00067 
00068     seq_beginGood.push_back( 0 );
00069     seq_endGood.push_back( 0 );
00070     mate_lengths.push_back( 0 );
00071     
00072   }
00073   cerr<<"buff_size: "<<buff_size<<endl;
00074 }
00075 
00076 MAl_Readonly::~MAl_Readonly()
00077 {
00078   //Nothing to be done here...
00079 }
00080 
00081 
00082 //Public methods - common
00083 
00084 size_t MAl_Readonly::get_num_seq()
00085 {
00086   return num_seq;
00087 }
00088 
00089 std::string MAl_Readonly::get_name( size_t ID )
00090 {
00091   ID = get_buffID( ID );
00092   
00093   assert( ID < names.size() );
00094   
00095   return names[ ID ];
00096   
00097 }
00098 
00099 std::string MAl_Readonly::get_header( size_t ID )
00100 {
00101   ID = get_buffID( ID );
00102   
00103   assert( ID < headers.size() );
00104   
00105   return headers[ ID ];
00106   
00107 }
00108 
00109 std::string MAl_Readonly::get_seq( size_t ID )
00110 {
00111   ID = get_buffID( ID );
00112   
00113   assert( ID < seqs.size() );
00114   
00115 //   return seqs[ ID ];
00116   return string(seqs[ ID ].begin(), seqs[ ID ].end());
00117   
00118 }
00119 
00120 string MAl_Readonly::get_strand( size_t ID )
00121 {
00122   ID = get_buffID( ID );
00123   
00124   assert( ID < strands.size() );
00125   
00126   return strands[ ID ];
00127   
00128 }
00129 
00130 size_t MAl_Readonly::get_len( size_t ID)
00131 {
00132   ID = get_buffID( ID );
00133   
00134   assert( ID < seqs.size() );
00135   
00136   return seqs[ ID ].size();
00137   
00138 }
00139 
00140 void MAl_Readonly::select_read( size_t ID, bool status )
00141 {
00142   
00143   assert( ID < ID_to_dbID.size() );
00144   
00145   db_recno_t recno = ID_to_dbID[ ID ];
00146   if ( recno == 0 ) return;
00147 
00148   set<db_recno_t>::iterator pos;
00149   if ( status == true )
00150     selectedReads.insert( recno );
00151   else if ( ( pos = selectedReads.find( recno )) != selectedReads.end() ) {
00152     selectedReads.erase( pos );
00153   }
00154   
00155   
00156 }
00157 
00158 
00159 //Public methods - separate interfaces
00160 
00161 size_t MAl_Readonly::get_seq_row( size_t ID )
00162 {
00163   ID = get_buffID( ID );
00164 
00165   assert( ID < seq_rows.size() );
00166   
00167   return seq_rows[ ID ];
00168   
00169 }
00170 
00171 
00172 size_t MAl_Readonly::get_seq_begin_global( size_t ID )
00173 // int MAl_Readonly::get_seq_begin_global( size_t ID )
00174 {
00175   ID = get_buffID( ID );
00176 
00177   assert( ID < seq_begin_global.size() );
00178   
00179   return seq_begin_global[ ID ];
00180                            
00181 }
00182 
00183 
00184 size_t MAl_Readonly::get_seq_end( size_t ID )
00185 {
00186   
00187   return get_seq_end_global( ID ) - get_seq_begin_global( ID );
00188 }
00189 
00190 size_t MAl_Readonly::get_seq_end_global( size_t ID )
00191 {
00192   ID = get_buffID( ID );
00193 
00194   assert( ID < seq_end_global.size() );
00195   
00196   return seq_end_global[ ID ];
00197   
00198 }
00199 
00200 
00201 size_t MAl_Readonly::get_beginGood( size_t ID )
00202 {
00203   ID = get_buffID( ID );
00204 
00205   assert( ID < seq_beginGood.size() );
00206   
00207   return seq_beginGood[ ID ];
00208 }
00209 
00210 size_t MAl_Readonly::get_beginGood_global( size_t ID )
00211 {
00212   return get_beginGood( ID ) + get_seq_begin_global( ID );
00213   
00214 }
00215 
00216 
00217 size_t MAl_Readonly::get_endGood( size_t ID )
00218 {
00219   ID = get_buffID( ID );
00220 
00221   assert( ID < seq_endGood.size() );
00222   
00223   return seq_endGood[ ID ];
00224   
00225 }
00226 
00227 size_t MAl_Readonly::get_endGood_global( size_t ID )
00228 {
00229   return get_endGood( ID ) + get_seq_begin_global( ID );
00230 }
00231 
00232 base_t MAl_Readonly::get_base( size_t ID, size_t index )
00233 {
00234   
00235   ID = get_buffID( ID );
00236   
00237   assert( ID < seqs.size() );
00238 
00239   if (index >= seqs[ ID ].size() ) {
00240     cout << flush;
00241     cerr << "index (local) = " << index << ", seqs[ ID ].size() = " << seqs[ ID ].size() << endl;
00242 
00243   }
00244   assert( index < seqs[ ID ].size() );
00245   
00246   base_t tmp = seqs[ ID ][ index ];
00247   if (tmp == '*')
00248     tmp = '-';
00249 
00250   return tmp;
00251 //  return seqs[ ID ][ index ];
00252 }
00253 
00254 base_t MAl_Readonly::get_base_global( size_t ID, size_t index )
00255 {
00256   return get_base( ID, index - get_seq_begin_global( ID ) );
00257   
00258 }
00259 
00260 qual_t MAl_Readonly::get_qual( size_t ID, size_t index )
00261 {
00262   ID = get_buffID( ID );
00263   
00264   assert( ID < quals.size() );
00265   assert( index < quals[ ID ].size() );
00266   
00267   return quals[ ID ][ index ];
00268   
00269 }
00270 
00271 qual_t MAl_Readonly::get_qual_global( size_t ID, size_t index )
00272 {
00273   
00274   return get_qual( ID, index - get_seq_begin_global( ID ) );
00275   
00276 }
00277 
00278 
00279 bool MAl_Readonly::is_DNP(size_t ID, size_t index)
00280 {
00281   ID = get_buffID( ID );
00282 
00283   assert( ID < DNPs.size() );
00284   assert( index < DNPs[ ID ].size() );
00285   
00286   return DNPs[ ID ][ index ].isDNP;
00287 }
00288 
00289 bool MAl_Readonly::is_DNP_global(size_t ID, size_t index)
00290 {
00291 
00292   return is_DNP( ID, index - get_seq_begin_global( ID ) );
00293 }
00294 
00295 int MAl_Readonly::get_DNP_ID(size_t ID, size_t index)
00296 {
00297   ID = get_buffID( ID );
00298 
00299   assert( ID < DNPs.size() );
00300   assert( index < DNPs[ ID ].size() );
00301   
00302   return DNPs[ ID ][ index ].ID;  
00303 }
00304 
00305 int MAl_Readonly::get_DNP_ID_global(size_t ID, size_t index)
00306 {
00307   return get_DNP_ID( ID, index - get_seq_begin_global( ID ) ); 
00308 }
00309 
00310 int MAl_Readonly::get_DNP_type(size_t ID, size_t index)
00311 {
00312   ID = get_buffID( ID );
00313 
00314   assert( ID < DNPs.size() );
00315   assert( index < DNPs[ ID ].size() );
00316   
00317   return DNPs[ ID ][ index ].type;
00318 }
00319 
00320 int MAl_Readonly::get_DNP_type_global(size_t ID, size_t index)
00321 {
00322   return get_DNP_type( ID, index - get_seq_begin_global( ID ) );  
00323 }
00324 
00325 
00326 //Private methods
00327 
00328 size_t MAl_Readonly::get_buffID(size_t ID)
00329 {
00330   
00331 
00332   assert( ID < ID_to_buffID.size() );
00333   size_t buffID = ID_to_buffID[ ID ];
00334 
00335   if ( buffID == 0 ) {
00336     //Seq is not present in buffer and thus has to be read into buffer,
00337     //OR a new seq is being created
00338     buffID = next_buffID();
00339 
00340     if ( buffID >= put_in_db.size() ) {
00341       cerr<<"buffID: "<<buffID<<endl;
00342       cerr<<"put_in_db.size(): "<<put_in_db.size()<<endl;
00343     }
00344     
00345     assert( buffID < put_in_db.size() );
00346     
00347     if ( put_in_db[ buffID ] ) {
00348       
00349       flush_buffer( buffID );
00350     }
00351     
00352     //If dbID is 0, a new seq is being created and nothing has to be read from DB
00353     //This is taken care of in read_from_db()
00354 
00355     read_from_db( buffID, ID );
00356 
00357     
00358   }
00359   assert( buffID != 0 );
00360   return buffID;
00361 
00362 }
00363 
00364 size_t MAl_Readonly::next_buffID()
00365 {
00366 //   static size_t curr_buffID(static_cast<size_t>(-1));//Have to check this, but should work although size_t is unsigned
00367   static size_t curr_buffID(0);//ID == 0 is unused...
00368   ++curr_buffID;
00369   
00370   if ( curr_buffID == buff_size ) curr_buffID = 1;
00371   
00372   return curr_buffID;
00373   
00374 }
00375 
00376 void MAl_Readonly::flush_buffer( size_t buffID )
00377 {
00378   //Not implemented in read only MAl
00379   
00380 }
00381 
00382 void MAl_Readonly::read_from_db(size_t buffID, size_t ID)
00383 {
00384 
00385   db_recno_t recno = ID_to_dbID[ ID ];
00386 
00387   if ( recno != 0 ) {
00388 
00389     //NB: EXTREMELY important that DNP features and other features
00390     //that appear more than once on reads are read AFTER DnaStrData!!!
00391 
00392     read_seq_from_db( recno, buffID );
00393     read_feat_from_db( recno, buffID, "DnaStrData");//The seq
00394     read_feat_from_db( recno, buffID, "QualityData");//The qual
00395     read_feat_from_db( recno, buffID, "DnpData");//The dnps
00396     
00397     
00398     /*
00399       read_feat_from_db( recno, buffID, "beginGood");//Maybe this should be added as data in the seq class...
00400       read_feat_from_db( recno, buffID, "endGood");//Ditto
00401       read_feat_from_db( recno, buffID, "name");//Ditto
00402       read_feat_from_db( recno, buffID, "header");//Ditto
00403       read_feat_from_db( recno, buffID, "strand");//Ditto
00404       
00405     */
00406     
00407   }
00408   
00409 
00410   ID_to_buffID[ ID ] = buffID;
00411   buffID_to_dbID[ buffID ] = recno;
00412   buffID_to_ID[ buffID ] = ID;
00413   put_in_db[ buffID ] = true;
00414 }
00415 
00416 void MAl_Readonly::read_seq_from_db( db_recno_t recno, size_t buffID )
00417 {
00418 
00419   Database::PrimaryIterator<ReadData> read_it(doc, "ReadData");
00420 
00421   int ret_read = read_it.setFromRecno(recno);
00422   ReadData* r_test = (ret_read != DB_NOTFOUND) ? read_it.answer() : 0;
00423   assert( r_test );
00424 
00425   seq_begin_global[ buffID ] = r_test->startPos();
00426   seq_end_global[ buffID ] = r_test->endPos() + 1;//Index annoyance...
00427   seq_rows[ buffID ] = r_test->row();
00428   names[ buffID ] = r_test->name();
00429   mates[ buffID ] = r_test->mate();
00430   strands[ buffID ] = r_test->strand();
00431   seq_beginGood[ buffID ] = r_test->beginGood();
00432   seq_endGood[ buffID ] = r_test->endGood() + 1;
00433   mate_lengths[ buffID ] = r_test->mateLength();
00434   
00435 //   cerr<<"Reading seq: "<<endl;
00436 //   cerr<<"recno: "<<recno<<endl;
00437 //   cerr<<"r_test->startPos(): "<<r_test->startPos()<<endl;
00438 //   cerr<<"r_test->endPos(): "<<r_test->endPos()<<endl;
00439 
00440 }
00441 
00442 void MAl_Readonly::read_feat_from_db( db_recno_t recno, size_t buffID, const string& data_type_name)
00443 {
00444   Database::SecondaryIterator<FeatureData> feat_it("readRecno", doc, data_type_name);
00445   
00446   feat_it.key()->setReadRecno( recno );
00447 
00448   if ( feat_it.set() != 0 ) {
00449 //     cerr<<"MAl_Readonly::read_feat_from_db(): No "<<data_type_name<<" for read with recno "<<recno<<endl;
00450     return;
00451   }
00452 
00453 //   cerr<<"MAl_Readonly::read_feat_from_db(): Reading feature "<<data_type_name<<endl;
00454   
00455   if ( DnaStrData* data = dynamic_cast<DnaStrData*>(feat_it.answer()) ) {
00456     seqs[buffID] = data->dnaVector.stlVector();
00457     //Have to init DNPs here to assert that DNP vec is of same size...
00458     DNPs[buffID] = vector<dnp_struct>(seqs[buffID].size(), dnp_struct());
00459   }
00460   else if ( QualityData* data = dynamic_cast<QualityData*>(feat_it.answer()) ) {
00461     quals[buffID] = data->qualityVector.stlVector();
00462   }
00463   else if ( DnpData* data = dynamic_cast<DnpData*>(feat_it.answer()) ) {
00464     
00465     do {
00466 //       cerr<<"Found DNP at position "<<data->startPos()<<endl;
00467       DNPs[buffID][ data->startPos() ].isDNP = true;
00468       DNPs[buffID][ data->startPos() ].recno = data->getRecno();
00469       DNPs[buffID][ data->startPos() ].ID = data->get_dnpID();      
00470       DNPs[buffID][ data->startPos() ].type = data->get_dnp_type();      
00471       
00472     } while ( (feat_it.nextdup() == 0) && (data = dynamic_cast<DnpData*>(feat_it.answer())) );
00473   }
00474   
00475   
00476 }
00477 
00478 
00479 
00480 

Generated on Fri Mar 17 17:44:24 2006 for trapper by  doxygen 1.4.4