00001 #include "mal.h"
00002 #include "generaldata.h"
00003 #include "readdata.h"
00004 #include "featuredata.h"
00005
00006 using namespace std;
00007
00008
00009 MAl::MAl( size_t bufsize, set<db_recno_t>& recnolist, TrapperDoc* pdoc ) :
00010 MAl_Readonly( bufsize, recnolist, pdoc )
00011 {
00012
00013
00014 }
00015
00016 MAl::~MAl()
00017 {
00018 cerr<<"~MAl(): flushing buffer... ";
00019
00020 for( size_t i = 1; i < put_in_db.size(); i++ ) {
00021
00022 if ( put_in_db[ i ] ) {
00023 flush_buffer( i );
00024 }
00025 }
00026 cerr<<"Done "<<endl;
00027 }
00028
00029
00030
00031
00032 void MAl::set_seq_row(size_t ID, size_t row)
00033 {
00034 ID = get_buffID( ID );
00035
00036 assert( ID < seq_rows.size() );
00037 seq_rows[ ID ] = row;
00038 }
00039
00040 size_t MAl::create_seq(size_t length)
00041 {
00042 num_seq++;
00043 ID_to_dbID.push_back(0);
00044 ID_to_buffID.push_back(0);
00045
00046 size_t newID( num_seq - 1 );
00047
00048 size_t buffID = get_buffID( newID );
00049
00050 seqs[ buffID ] = vector<base_t>( length );
00051 quals[ buffID ] = vector<qual_t>( length );
00052 DNPs[ buffID ] = vector<dnp_struct>( length );
00053
00054 return newID;
00055 }
00056
00057
00058
00059
00060
00061 void MAl::set_DNP(size_t ID, size_t index, bool status)
00062 {
00063 ID = get_buffID( ID );
00064
00065
00066 assert( ID < DNPs.size() );
00067
00068 if ( index >= DNPs[ ID ].size() ) {
00069 cerr<<"index: "<<index<<endl;
00070 cerr<<"DNPs[ ID ].size(): "<<DNPs[ ID ].size()<<endl;
00071 }
00072
00073 assert( index < DNPs[ ID ].size() );
00074
00075 DNPs[ ID ][ index ].isDNP = status;
00076 }
00077
00078 void MAl::set_DNP_global(size_t ID, size_t index, bool status)
00079 {
00080
00081 set_DNP( ID, index - get_seq_begin_global( ID ), status );
00082 }
00083
00084 void MAl::set_DNP_ID(size_t ID, size_t index, int dnpID)
00085 {
00086 ID = get_buffID( ID );
00087
00088 assert( ID < DNPs.size() );
00089 assert( index < DNPs[ ID ].size() );
00090
00091 DNPs[ ID ][ index ].ID = dnpID;
00092 }
00093
00094 void MAl::set_DNP_ID_global(size_t ID, size_t index, int dnpID)
00095 {
00096 set_DNP_ID( ID, index - get_seq_begin_global( ID ), dnpID );
00097 }
00098
00099 void MAl::set_DNP_type(size_t ID, size_t index, int type)
00100 {
00101 ID = get_buffID( ID );
00102
00103 assert( ID < DNPs.size() );
00104 assert( index < DNPs[ ID ].size() );
00105
00106 DNPs[ ID ][ index ].type = type;
00107 }
00108
00109 void MAl::set_DNP_type_global(size_t ID, size_t index, int type)
00110 {
00111 set_DNP_type( ID, index - get_seq_begin_global( ID ), type );
00112 }
00113
00114 void MAl::set_base(size_t ID, size_t index, base_t base)
00115 {
00116 if (base == '-')
00117 base = '*';
00118
00119 ID = get_buffID( ID );
00120
00121 assert( ID < seqs.size());
00122 assert( index < seqs[ ID ].size() );
00123
00124 seqs[ ID ][ index ] = base;
00125 }
00126
00127 void MAl::set_base_global(size_t ID, size_t index, base_t base)
00128 {
00129
00130 set_base( ID, index - get_seq_begin_global( ID ), base );
00131
00132 }
00133
00134 void MAl::set_qual(size_t ID, size_t index, qual_t qual)
00135 {
00136 ID = get_buffID( ID );
00137
00138 assert( ID < quals.size());
00139 assert( index < quals[ ID ].size() );
00140
00141 quals[ ID ][ index ] = qual;
00142
00143 }
00144
00145 void MAl::set_qual_global(size_t ID, size_t index, qual_t qual)
00146 {
00147
00148 set_qual( ID, index - get_seq_begin_global( ID ), qual );
00149
00150 }
00151
00152 void MAl::insert_gap(size_t ID, size_t index)
00153 {
00154 int rowID = ID;
00155
00156
00157
00158
00159 ID = get_buffID( ID );
00160
00161 assert( ID < seqs.size());
00162 assert( index < seqs[ ID ].size() );
00163
00164 seqs[ ID ].insert( seqs[ ID ].begin() + index, '*' );
00165
00166 qual_t dummy_qual_val = 0;
00167 assert( index < quals[ ID ].size() );
00168 assert( index > 0 );
00169 dummy_qual_val = (quals[ ID ][index -1] + quals[ ID ][index])/2;
00170
00171 quals[ ID ].insert( quals[ ID ].begin() + index, dummy_qual_val );
00172
00173 DNPs[ ID ].insert( DNPs[ ID ].begin() + index, dnp_struct() );
00174
00175 set_seq_end( rowID, get_seq_end( rowID ) + 1 );
00176
00177 if ( index <= get_beginGood( rowID ) ) {
00178 set_beginGood( rowID, get_beginGood( rowID ) + 1 );
00179 }
00180
00181 if ( index <= get_endGood( rowID ) ) {
00182 set_endGood( rowID , get_endGood( rowID ) + 1);
00183 }
00184
00185 }
00186
00187 void MAl::insert_gap_global(size_t ID, size_t index)
00188 {
00189 insert_gap( ID, index - get_seq_begin_global( ID ) );
00190
00191 }
00192
00193 void MAl::remove_gap(size_t ID, size_t index)
00194 {
00195 int rowID = ID;
00196
00197 ID = get_buffID( ID );
00198
00199 assert( ID < seqs.size());
00200 assert( index < seqs[ ID ].size() );
00201 if ( seqs[ ID ][ index ] != '*' ) {
00202 cerr<<"Error, calling remove gap with base "<<seqs[ ID ][ index ]<<endl;
00203 }
00204 assert( seqs[ ID ][ index ] == '*' );
00205
00206
00207 seqs[ ID ].erase( seqs[ ID ].begin() + index );
00208 quals[ ID ].erase( quals[ ID ].begin() + index );
00209 DNPs[ ID ].erase( DNPs[ ID ].begin() + index );
00210
00211 set_seq_end( rowID, get_seq_end( rowID ) - 1 );
00212
00213 if ( index < get_beginGood( rowID ) ) {
00214 set_beginGood( rowID , get_beginGood( rowID ) - 1 );
00215 }
00216
00217 if ( index < get_endGood( rowID ) ) {
00218 set_endGood( rowID , get_endGood( rowID ) - 1 );
00219 }
00220
00221 }
00222
00223 void MAl::remove_gap_global(size_t ID, size_t index)
00224 {
00225 remove_gap( ID, index - get_seq_begin_global( ID ) );
00226
00227 }
00228
00229 void MAl::set_seq_begin_global(size_t ID, size_t index)
00230
00231 {
00232
00233 ID = get_buffID( ID );
00234
00235 assert( ID < seq_begin_global.size() );
00236
00237 seq_begin_global[ ID ] = index;
00238 }
00239
00240
00241 void MAl::set_seq_end(size_t ID, size_t index)
00242 {
00243 set_seq_end_global( ID, index + get_seq_begin_global( ID ) );
00244
00245 }
00246
00247 void MAl::set_seq_end_global(size_t ID, size_t index)
00248 {
00249
00250 ID = get_buffID( ID );
00251
00252 assert( ID < seq_end_global.size() );
00253
00254 seq_end_global[ ID ] = index;
00255
00256 }
00257
00258 void MAl::set_beginGood(size_t ID, size_t index)
00259 {
00260 ID = get_buffID( ID );
00261
00262 assert( ID < seq_beginGood.size() );
00263
00264 seq_beginGood[ ID ] = index;
00265
00266 }
00267
00268 void MAl::set_beginGood_global(size_t ID, size_t index)
00269 {
00270 set_beginGood( ID, index - get_seq_begin_global( ID ) );
00271
00272 }
00273
00274 void MAl::set_endGood(size_t ID, size_t index)
00275 {
00276 ID = get_buffID( ID );
00277
00278 assert( ID < seq_endGood.size() );
00279
00280 seq_endGood[ ID ] = index;
00281
00282 }
00283
00284 void MAl::set_endGood_global(size_t ID, size_t index)
00285 {
00286 set_endGood( ID, index - get_seq_begin_global( ID ) );
00287
00288 }
00289
00290
00291
00292
00293 void MAl::flush_buffer( size_t buffID )
00294 {
00295
00296
00297
00298
00299
00300
00301
00302
00303 commit_seq_to_db( buffID );
00304
00305
00306
00307
00308 commit_feat_to_db( buffID, "DnaStrData" );
00309 commit_feat_to_db( buffID, "QualityData" );
00310 commit_feat_to_db( buffID, "ChromatData" );
00311 commit_feat_to_db( buffID, "DnpData" );
00312
00313
00314
00315 ID_to_buffID[ buffID_to_ID[ buffID ] ] = 0;
00316 buffID_to_dbID[ buffID ] = 0;
00317 put_in_db[ buffID ] = false;
00318
00319 }
00320
00321
00322
00323 void MAl::commit_seq_to_db( size_t buffID )
00324 {
00325 db_recno_t recno = buffID_to_dbID[ buffID ];
00326
00327 bool create_flag = ( recno != 0 );
00328
00329 Database::Creator<ReadData> a_creator(doc, "ReadData");
00330
00331
00332
00333
00334
00335
00336 a_creator.data()->setRecno( recno );
00337 a_creator.data()->setRow( seq_rows[buffID] );
00338 a_creator.data()->setStartPos( seq_begin_global[ buffID ] );
00339 a_creator.data()->setEndPos( seq_end_global[ buffID ] - 1 );
00340 a_creator.data()->setName( names[ buffID ] );
00341 a_creator.data()->setMate( mates[ buffID ] );
00342 a_creator.data()->setMateLength( mate_lengths[ buffID ] );
00343 a_creator.data()->setStrand( strands[ buffID ] );
00344 a_creator.data()->setBeginGood( seq_beginGood[ buffID ] );
00345 a_creator.data()->setEndGood( seq_endGood[ buffID ] - 1);
00346 recno = a_creator.create( create_flag );
00347
00348 }
00349
00350
00351
00352 void MAl::commit_feat_to_db(size_t buffID, const string& feat_data_name)
00353 {
00354 db_recno_t readrec = buffID_to_dbID[ buffID ];
00355
00356 assert( readrec != 0 );
00357 Database::Creator<FeatureData> a_creator(doc, feat_data_name);
00358
00359 a_creator.data()->setReadRecno( readrec );
00360
00361 if ( DnaStrData* data = dynamic_cast<DnaStrData*>(a_creator.data()) ) {
00362
00363 data->setRecno( readrec );
00364 data->setStartPos( 0 );
00365 data->setEndPos( seq_end_global[buffID] - seq_begin_global[buffID] - 1 );
00366
00367
00368 data->dnaVector.stlVector().clear();
00369 last_gap_vec.clear();
00370 size_t numgap(0);
00371 for( size_t i = 0; i < seqs[ buffID ].size(); i++ ) {
00372 data->dnaVector.stlVector().push_back( seqs[ buffID ][i] );
00373 if ( seqs[ buffID ][i] == '*' ) {
00374 numgap++;
00375 }
00376 last_gap_vec.push_back(numgap);
00377 }
00378
00379 a_creator.create(true);
00380 }
00381 else if ( QualityData* data = dynamic_cast<QualityData*>(a_creator.data()) ) {
00382
00383 data->setRecno( readrec );
00384 data->setStartPos( 0 );
00385 data->setEndPos( seq_end_global[buffID] - seq_begin_global[buffID] - 1);
00386 data->qualityVector.stlVector() = quals[ buffID ];
00387
00388 a_creator.create(true);
00389
00390 }
00391 else if ( ChromatData* data = dynamic_cast<ChromatData*>(a_creator.data()) ) {
00392
00393 Database::PrimaryIterator<ChromatData>* chr_it =
00394 new Database::PrimaryIterator<ChromatData>(doc, "ChromatData");
00395
00396 int ret_chr = chr_it->setFromRecno(readrec);
00397 ChromatData* chr_test = (ret_chr != DB_NOTFOUND) ? chr_it->answer() : 0;
00398 assert( chr_test );
00399
00400 data->copy( chr_test );
00401 delete chr_it;
00402
00403 data->setStartPos( 0 );
00404 data->setEndPos( seq_end_global[buffID] - seq_begin_global[buffID] - 1);
00405 data->gap_vec.stlVector() = last_gap_vec;
00406
00407 a_creator.create(true);
00408
00409 }
00410 else if ( DnpData* data = dynamic_cast<DnpData*>(a_creator.data()) ) {
00411
00412 for( size_t i = 0; i < DNPs[buffID].size(); i++ ) {
00413
00414 if ( DNPs[buffID][i].isDNP || DNPs[buffID][i].recno != 0 ) {
00415 bool recno_flag( DNPs[buffID][i].recno != 0 );
00416 data->setRecno( DNPs[buffID][i].recno );
00417 data->set_dnpID( DNPs[buffID][i].ID );
00418 data->set_dnp_type( DNPs[buffID][i].type );
00419 if ( DNPs[buffID][i].isDNP ) {
00420
00421 data->setStartPos( i );
00422 data->setEndPos( i );
00423
00424 a_creator.create( recno_flag );
00425
00426 }
00427 else {
00428
00429 assert( DNPs[buffID][i].recno != 0 );
00430 a_creator.destroy();
00431
00432 }
00433
00434
00435 }
00436
00437 }
00438 }
00439
00440
00441 }