00001 #include "mal_readonly.h"
00002 #include "generaldata.h"
00003 #include "readdata.h"
00004 #include "featuredata.h"
00005
00006 using namespace std;
00007
00008
00009 MAl_Readonly::MAl_Readonly( size_t bufsize, set<db_recno_t>& recnolist, TrapperDoc* pdoc ) :
00010 buff_size(bufsize + 1),
00011 num_seq( recnolist.size() ),
00012 doc(pdoc),
00013 selectedReads(recnolist)
00014 {
00015
00016
00017
00018 seqs.reserve( buff_size );
00019 quals.reserve( buff_size );
00020 DNPs.reserve( buff_size );
00021 names.reserve( buff_size );
00022 headers.reserve( buff_size );
00023 mates.reserve( buff_size );
00024 strands.reserve( buff_size );
00025
00026
00027
00028 seq_rows.reserve( buff_size );
00029 seq_begin_global.reserve( buff_size );
00030 seq_end_global.reserve( buff_size );
00031 seq_beginGood.reserve( buff_size );
00032 seq_endGood.reserve( buff_size );
00033 mate_lengths.reserve( buff_size );
00034
00035
00036 ID_to_dbID.reserve( num_seq );
00037 ID_to_buffID.reserve( num_seq );
00038 buffID_to_dbID.reserve( buff_size );
00039 buffID_to_ID.reserve( buff_size );
00040 put_in_db.reserve( buff_size );
00041
00042 for( set<db_recno_t>::iterator it = recnolist.begin(); it != recnolist.end(); ++it ) {
00043 ID_to_dbID.push_back( *it );
00044 ID_to_buffID.push_back( 0 );
00045 }
00046
00047
00048 for( size_t i = 0; i < buff_size; i++ ) {
00049
00050 buffID_to_dbID.push_back( 0 );
00051 buffID_to_ID.push_back( 0 );
00052 put_in_db.push_back(false);
00053
00054
00055
00056 seq_rows.push_back( 0 );
00057 seq_begin_global.push_back( 0 );
00058 seq_end_global.push_back( 0 );
00059
00060 seqs.push_back( vector<base_t>() );
00061 quals.push_back( vector<qual_t>() );
00062 DNPs.push_back( vector<dnp_struct>() );
00063 names.push_back( string() );
00064 mates.push_back( string() );
00065 strands.push_back( string() );
00066
00067
00068 seq_beginGood.push_back( 0 );
00069 seq_endGood.push_back( 0 );
00070 mate_lengths.push_back( 0 );
00071
00072 }
00073 cerr<<"buff_size: "<<buff_size<<endl;
00074 }
00075
00076 MAl_Readonly::~MAl_Readonly()
00077 {
00078
00079 }
00080
00081
00082
00083
00084 size_t MAl_Readonly::get_num_seq()
00085 {
00086 return num_seq;
00087 }
00088
00089 std::string MAl_Readonly::get_name( size_t ID )
00090 {
00091 ID = get_buffID( ID );
00092
00093 assert( ID < names.size() );
00094
00095 return names[ ID ];
00096
00097 }
00098
00099 std::string MAl_Readonly::get_header( size_t ID )
00100 {
00101 ID = get_buffID( ID );
00102
00103 assert( ID < headers.size() );
00104
00105 return headers[ ID ];
00106
00107 }
00108
00109 std::string MAl_Readonly::get_seq( size_t ID )
00110 {
00111 ID = get_buffID( ID );
00112
00113 assert( ID < seqs.size() );
00114
00115
00116 return string(seqs[ ID ].begin(), seqs[ ID ].end());
00117
00118 }
00119
00120 string MAl_Readonly::get_strand( size_t ID )
00121 {
00122 ID = get_buffID( ID );
00123
00124 assert( ID < strands.size() );
00125
00126 return strands[ ID ];
00127
00128 }
00129
00130 size_t MAl_Readonly::get_len( size_t ID)
00131 {
00132 ID = get_buffID( ID );
00133
00134 assert( ID < seqs.size() );
00135
00136 return seqs[ ID ].size();
00137
00138 }
00139
00140 void MAl_Readonly::select_read( size_t ID, bool status )
00141 {
00142
00143 assert( ID < ID_to_dbID.size() );
00144
00145 db_recno_t recno = ID_to_dbID[ ID ];
00146 if ( recno == 0 ) return;
00147
00148 set<db_recno_t>::iterator pos;
00149 if ( status == true )
00150 selectedReads.insert( recno );
00151 else if ( ( pos = selectedReads.find( recno )) != selectedReads.end() ) {
00152 selectedReads.erase( pos );
00153 }
00154
00155
00156 }
00157
00158
00159
00160
00161 size_t MAl_Readonly::get_seq_row( size_t ID )
00162 {
00163 ID = get_buffID( ID );
00164
00165 assert( ID < seq_rows.size() );
00166
00167 return seq_rows[ ID ];
00168
00169 }
00170
00171
00172 size_t MAl_Readonly::get_seq_begin_global( size_t ID )
00173
00174 {
00175 ID = get_buffID( ID );
00176
00177 assert( ID < seq_begin_global.size() );
00178
00179 return seq_begin_global[ ID ];
00180
00181 }
00182
00183
00184 size_t MAl_Readonly::get_seq_end( size_t ID )
00185 {
00186
00187 return get_seq_end_global( ID ) - get_seq_begin_global( ID );
00188 }
00189
00190 size_t MAl_Readonly::get_seq_end_global( size_t ID )
00191 {
00192 ID = get_buffID( ID );
00193
00194 assert( ID < seq_end_global.size() );
00195
00196 return seq_end_global[ ID ];
00197
00198 }
00199
00200
00201 size_t MAl_Readonly::get_beginGood( size_t ID )
00202 {
00203 ID = get_buffID( ID );
00204
00205 assert( ID < seq_beginGood.size() );
00206
00207 return seq_beginGood[ ID ];
00208 }
00209
00210 size_t MAl_Readonly::get_beginGood_global( size_t ID )
00211 {
00212 return get_beginGood( ID ) + get_seq_begin_global( ID );
00213
00214 }
00215
00216
00217 size_t MAl_Readonly::get_endGood( size_t ID )
00218 {
00219 ID = get_buffID( ID );
00220
00221 assert( ID < seq_endGood.size() );
00222
00223 return seq_endGood[ ID ];
00224
00225 }
00226
00227 size_t MAl_Readonly::get_endGood_global( size_t ID )
00228 {
00229 return get_endGood( ID ) + get_seq_begin_global( ID );
00230 }
00231
00232 base_t MAl_Readonly::get_base( size_t ID, size_t index )
00233 {
00234
00235 ID = get_buffID( ID );
00236
00237 assert( ID < seqs.size() );
00238
00239 if (index >= seqs[ ID ].size() ) {
00240 cout << flush;
00241 cerr << "index (local) = " << index << ", seqs[ ID ].size() = " << seqs[ ID ].size() << endl;
00242
00243 }
00244 assert( index < seqs[ ID ].size() );
00245
00246 base_t tmp = seqs[ ID ][ index ];
00247 if (tmp == '*')
00248 tmp = '-';
00249
00250 return tmp;
00251
00252 }
00253
00254 base_t MAl_Readonly::get_base_global( size_t ID, size_t index )
00255 {
00256 return get_base( ID, index - get_seq_begin_global( ID ) );
00257
00258 }
00259
00260 qual_t MAl_Readonly::get_qual( size_t ID, size_t index )
00261 {
00262 ID = get_buffID( ID );
00263
00264 assert( ID < quals.size() );
00265 assert( index < quals[ ID ].size() );
00266
00267 return quals[ ID ][ index ];
00268
00269 }
00270
00271 qual_t MAl_Readonly::get_qual_global( size_t ID, size_t index )
00272 {
00273
00274 return get_qual( ID, index - get_seq_begin_global( ID ) );
00275
00276 }
00277
00278
00279 bool MAl_Readonly::is_DNP(size_t ID, size_t index)
00280 {
00281 ID = get_buffID( ID );
00282
00283 assert( ID < DNPs.size() );
00284 assert( index < DNPs[ ID ].size() );
00285
00286 return DNPs[ ID ][ index ].isDNP;
00287 }
00288
00289 bool MAl_Readonly::is_DNP_global(size_t ID, size_t index)
00290 {
00291
00292 return is_DNP( ID, index - get_seq_begin_global( ID ) );
00293 }
00294
00295 int MAl_Readonly::get_DNP_ID(size_t ID, size_t index)
00296 {
00297 ID = get_buffID( ID );
00298
00299 assert( ID < DNPs.size() );
00300 assert( index < DNPs[ ID ].size() );
00301
00302 return DNPs[ ID ][ index ].ID;
00303 }
00304
00305 int MAl_Readonly::get_DNP_ID_global(size_t ID, size_t index)
00306 {
00307 return get_DNP_ID( ID, index - get_seq_begin_global( ID ) );
00308 }
00309
00310 int MAl_Readonly::get_DNP_type(size_t ID, size_t index)
00311 {
00312 ID = get_buffID( ID );
00313
00314 assert( ID < DNPs.size() );
00315 assert( index < DNPs[ ID ].size() );
00316
00317 return DNPs[ ID ][ index ].type;
00318 }
00319
00320 int MAl_Readonly::get_DNP_type_global(size_t ID, size_t index)
00321 {
00322 return get_DNP_type( ID, index - get_seq_begin_global( ID ) );
00323 }
00324
00325
00326
00327
00328 size_t MAl_Readonly::get_buffID(size_t ID)
00329 {
00330
00331
00332 assert( ID < ID_to_buffID.size() );
00333 size_t buffID = ID_to_buffID[ ID ];
00334
00335 if ( buffID == 0 ) {
00336
00337
00338 buffID = next_buffID();
00339
00340 if ( buffID >= put_in_db.size() ) {
00341 cerr<<"buffID: "<<buffID<<endl;
00342 cerr<<"put_in_db.size(): "<<put_in_db.size()<<endl;
00343 }
00344
00345 assert( buffID < put_in_db.size() );
00346
00347 if ( put_in_db[ buffID ] ) {
00348
00349 flush_buffer( buffID );
00350 }
00351
00352
00353
00354
00355 read_from_db( buffID, ID );
00356
00357
00358 }
00359 assert( buffID != 0 );
00360 return buffID;
00361
00362 }
00363
00364 size_t MAl_Readonly::next_buffID()
00365 {
00366
00367 static size_t curr_buffID(0);
00368 ++curr_buffID;
00369
00370 if ( curr_buffID == buff_size ) curr_buffID = 1;
00371
00372 return curr_buffID;
00373
00374 }
00375
00376 void MAl_Readonly::flush_buffer( size_t buffID )
00377 {
00378
00379
00380 }
00381
00382 void MAl_Readonly::read_from_db(size_t buffID, size_t ID)
00383 {
00384
00385 db_recno_t recno = ID_to_dbID[ ID ];
00386
00387 if ( recno != 0 ) {
00388
00389
00390
00391
00392 read_seq_from_db( recno, buffID );
00393 read_feat_from_db( recno, buffID, "DnaStrData");
00394 read_feat_from_db( recno, buffID, "QualityData");
00395 read_feat_from_db( recno, buffID, "DnpData");
00396
00397
00398
00399
00400
00401
00402
00403
00404
00405
00406
00407 }
00408
00409
00410 ID_to_buffID[ ID ] = buffID;
00411 buffID_to_dbID[ buffID ] = recno;
00412 buffID_to_ID[ buffID ] = ID;
00413 put_in_db[ buffID ] = true;
00414 }
00415
00416 void MAl_Readonly::read_seq_from_db( db_recno_t recno, size_t buffID )
00417 {
00418
00419 Database::PrimaryIterator<ReadData> read_it(doc, "ReadData");
00420
00421 int ret_read = read_it.setFromRecno(recno);
00422 ReadData* r_test = (ret_read != DB_NOTFOUND) ? read_it.answer() : 0;
00423 assert( r_test );
00424
00425 seq_begin_global[ buffID ] = r_test->startPos();
00426 seq_end_global[ buffID ] = r_test->endPos() + 1;
00427 seq_rows[ buffID ] = r_test->row();
00428 names[ buffID ] = r_test->name();
00429 mates[ buffID ] = r_test->mate();
00430 strands[ buffID ] = r_test->strand();
00431 seq_beginGood[ buffID ] = r_test->beginGood();
00432 seq_endGood[ buffID ] = r_test->endGood() + 1;
00433 mate_lengths[ buffID ] = r_test->mateLength();
00434
00435
00436
00437
00438
00439
00440 }
00441
00442 void MAl_Readonly::read_feat_from_db( db_recno_t recno, size_t buffID, const string& data_type_name)
00443 {
00444 Database::SecondaryIterator<FeatureData> feat_it("readRecno", doc, data_type_name);
00445
00446 feat_it.key()->setReadRecno( recno );
00447
00448 if ( feat_it.set() != 0 ) {
00449
00450 return;
00451 }
00452
00453
00454
00455 if ( DnaStrData* data = dynamic_cast<DnaStrData*>(feat_it.answer()) ) {
00456 seqs[buffID] = data->dnaVector.stlVector();
00457
00458 DNPs[buffID] = vector<dnp_struct>(seqs[buffID].size(), dnp_struct());
00459 }
00460 else if ( QualityData* data = dynamic_cast<QualityData*>(feat_it.answer()) ) {
00461 quals[buffID] = data->qualityVector.stlVector();
00462 }
00463 else if ( DnpData* data = dynamic_cast<DnpData*>(feat_it.answer()) ) {
00464
00465 do {
00466
00467 DNPs[buffID][ data->startPos() ].isDNP = true;
00468 DNPs[buffID][ data->startPos() ].recno = data->getRecno();
00469 DNPs[buffID][ data->startPos() ].ID = data->get_dnpID();
00470 DNPs[buffID][ data->startPos() ].type = data->get_dnp_type();
00471
00472 } while ( (feat_it.nextdup() == 0) && (data = dynamic_cast<DnpData*>(feat_it.answer())) );
00473 }
00474
00475
00476 }
00477
00478
00479
00480