Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

TrashAlgo Class Reference

#include <trash.h>

Inheritance diagram for TrashAlgo:

Inheritance graph
[legend]
Collaboration diagram for TrashAlgo:

Collaboration graph
[legend]
List of all members.

Public Member Functions

 TrashAlgo (TrapperDoc *pDoc_, std::set< db_recno_t > &recnoList, AlgoParam *param)
void start ()
void trash (size_t seqNumber)
int check_qual_in_win (int start, int stop, int ID, int &curr_win_good)
void kill_read (int ID)

Private Attributes

vector< double > logQual_to_quality
vector< double > decode_qualval_square
int TRASH_WINDOW_LEN
int TRASH_WINDOW_STEP
int MIN_NUM_ADJ_WIN_GOOD
int MIN_LEN_GOOD
double GOOD_PERCENTAGE
double MAX_VARIANCE_IN_QUALWIN

Detailed Description

Definition at line 8 of file trash.h.


Constructor & Destructor Documentation

TrashAlgo::TrashAlgo TrapperDoc pDoc_,
std::set< db_recno_t > &  recnoList,
AlgoParam param
[inline]
 

Definition at line 11 of file trash.h.

00011                                                                                    : 
00012     RWAlgo(pDoc_, recnoList, param) {}


Member Function Documentation

int TrashAlgo::check_qual_in_win int  start,
int  stop,
int  ID,
int &  curr_win_good
 

Definition at line 224 of file trash.cc.

References decode_qualval_square, MAl_Readonly::get_qual(), RWAlgo::getMAl(), GOOD_PERCENTAGE, logQual_to_quality, and TRASH_WINDOW_LEN.

Referenced by trash().

00225      {{{ 
00226 
00227 {
00228 
00229 //   cerr<<"stop: "<<stop<<endl;
00230 //   cerr<<"start: "<<start<<endl;
00231   assert( stop <= getMAl()->get_seq_end( ID) );
00232 
00233   double sum_qual(0.0), sum_qual_square(0.0), average_qual(0.0);
00234   int minLogQual(0);
00235   int num_gap(0);
00236 
00237   //Check quality in window
00238   for( int k = start; k < stop; k++ ) {
00239 //     cerr<<getMAl()->get_base( ID, k );
00240 //     if ( stop >= getMAl()->get_seq_end( ID) ) {
00241 //       assert( stop == getMAl()->get_seq_end( ID) );
00242 //       //Should not happen...
00243 // //       assert(false);
00244 //       break;
00245 //     }
00246     
00247     if ( getMAl()->get_base( ID, k ) == '-' ) {
00248 
00249       num_gap++;
00250       continue;
00251     }
00252     
00253     
00254     sum_qual += logQual_to_quality[ getMAl()->get_qual(ID, k) ];
00255     sum_qual_square += decode_qualval_square[ getMAl()->get_qual(ID, k) ];
00256     int tempQual = getMAl()->get_qual(ID, k);
00257     if(tempQual >= minLogQual)
00258       minLogQual = tempQual;
00259   }
00260 //   cerr<<endl;
00261   average_qual = sum_qual / (double)TRASH_WINDOW_LEN;
00262 
00263   if ( average_qual >= GOOD_PERCENTAGE ) 
00264     {
00265       
00266       //Window may qualify as good and high quality
00267       double Q = sum_qual_square - sum_qual*sum_qual/(double)TRASH_WINDOW_LEN;
00268       assert(TRASH_WINDOW_LEN > 1);
00269       double variance = Q / (double)(TRASH_WINDOW_LEN - 1);
00270       if ( variance <= MAX_VARIANCE_IN_QUALWIN || minLogQual >= 27) {
00271         //Window qualifies as good
00272         curr_win_good = 1;
00273       }
00274       else {
00275         //Variance too high for good and hq
00276         curr_win_good = 0;
00277       }
00278       
00279     }
00280   else 
00281     {
00282       //Window does not qualify as good or high quality
00283       curr_win_good = 0;
00284     }
00285 
00286   if ( stop - start != TRASH_WINDOW_LEN + num_gap ) {
00287     cerr<<"stop - start: "<<stop - start<<endl;
00288     cerr<<"TRASH_WINDOW_LEN + num_gap: "<<TRASH_WINDOW_LEN + num_gap<<endl;
00289     cerr<<"stop: "<<stop<<endl;
00290     cerr<<"start: "<<start<<endl;
00291     cerr<<"TRASH_WINDOW_LEN: "<<TRASH_WINDOW_LEN<<endl;
00292     cerr<<"num_gap: "<<num_gap<<endl;
00293   }
00294 //   cerr<<"num_gap: "<<num_gap<<endl;
00295   
00296   assert( stop - start == TRASH_WINDOW_LEN + num_gap );
00297   return num_gap;
00298 }

void TrashAlgo::kill_read int  ID  ) 
 

Definition at line 301 of file trash.cc.

References RWAlgo::getMAl(), MAl::set_beginGood(), and MAl::set_endGood().

Referenced by trash().

00302      {{{ 
00303 
00304 {
00305 
00306   getMAl()->set_beginGood( ID, getMAl()->get_len( ID )/2 );
00307   getMAl()->set_endGood( ID, getMAl()->get_len( ID )/2 );
00308 }

void TrashAlgo::start  )  [virtual]
 

Implements RWAlgo.

Definition at line 23 of file trash.cc.

References decode_qualval_square, MAl_Readonly::get_num_seq(), RWAlgo::getMAl(), GOOD_PERCENTAGE, logQual_to_quality, MAX_VARIANCE_IN_QUALWIN, MIN_LEN_GOOD, MIN_NUM_ADJ_WIN_GOOD, trash(), TRASH_WINDOW_LEN, and TRASH_WINDOW_STEP.

Referenced by trash().

00024      {{{ 
00025 {
00026 
00027 
00028 
00029   TRASH_WINDOW_LEN = 35;
00030   TRASH_WINDOW_STEP = 3;
00031   MIN_NUM_ADJ_WIN_GOOD = 4;
00032   MIN_LEN_GOOD = 50;
00033   GOOD_PERCENTAGE = 0.89;
00034   MAX_VARIANCE_IN_QUALWIN = 0.0035;
00035   
00036 
00037   //Note: this assumes that quality values are 99 at the most!
00038   for( int i = 0; i < 100; i++ ) {
00039     logQual_to_quality.push_back( 1 - pow(10.0,double(-i)/10.0) );
00040     decode_qualval_square.push_back( logQual_to_quality[i] * logQual_to_quality[i] );
00041   }
00042 
00043 
00044    
00045    
00046   for( size_t i = 0; i < getMAl()->get_num_seq(); i++ ) {
00047     
00048     trash(i);
00049     
00050     //Temp test
00051 //     for( size_t j = 0; j < getMAl()->get_seq_end( i ); j++ ) {
00052 //       if ( j < getMAl()->get_beginGood( i ) ||  j >= getMAl()->get_endGood( i ) )
00053 //         getMAl()->set_base( i, j, 'x' );
00054 //     }
00055     
00056   }
00057 
00058     
00059 }

void TrashAlgo::trash size_t  seqNumber  ) 
 

Definition at line 62 of file trash.cc.

References check_qual_in_win(), MAl_Readonly::get_seq_end(), RWAlgo::getMAl(), kill_read(), MIN_LEN_GOOD, MIN_NUM_ADJ_WIN_GOOD, MAl::set_beginGood(), MAl::set_endGood(), start(), TRASH_WINDOW_LEN, and TRASH_WINDOW_STEP.

Referenced by start().

00063      {{{ 
00064 
00065 {
00066   /* This function computes the quality regions of the
00067      sequences, and marks those sequences with too poor
00068      quality to be deleted from further investigation.
00069 
00070   */
00071 
00072 
00073   int num_adj_win_good(0);
00074   int begin_good(-1), end_good(-1);
00075   int bg_found(0), eg_found(0);
00076   int  reset_good(1), curr_win_good(0);
00077     
00078   //Left end
00079   bool break_for(false);
00080   int start = 0;
00081   int stop = getMAl()->get_seq_end(seqNumber) - TRASH_WINDOW_LEN + 1;
00082  
00083   for( int j = start; j < stop; j += TRASH_WINDOW_STEP ) {
00084       
00085     if ( reset_good ) {
00086       begin_good = j;
00087     }
00088     reset_good = 0;
00089     
00090     //Adjust window size for gaps
00091     int num_bases_in_win(0);
00092     int num_gaps_in_win(0);
00093     while ( num_bases_in_win < TRASH_WINDOW_LEN ) {
00094       if ( j + TRASH_WINDOW_LEN + num_gaps_in_win > getMAl()->get_seq_end(seqNumber) ) {
00095         
00096         break_for = true;
00097         break;
00098       }
00099 //       cerr<<getMAl()->get_base( seqNumber, j + num_bases_in_win + num_gaps_in_win );
00100       if ( getMAl()->get_base( seqNumber, j + num_bases_in_win + num_gaps_in_win ) == '-' ) {
00101 
00102         num_gaps_in_win++;
00103       }
00104       else {
00105         num_bases_in_win++;
00106       }
00107     }
00108 //     cerr<<endl;
00109     if ( break_for ) break;
00110 
00111     j += check_qual_in_win(j, j + TRASH_WINDOW_LEN + num_gaps_in_win, seqNumber, curr_win_good);
00112 
00113     /* Window status is now computed and it may be
00114        possible to set begin_good */
00115 
00116     if ( curr_win_good ) {//good or hq
00117       num_adj_win_good++;
00118       
00119       if ( num_adj_win_good >= MIN_NUM_ADJ_WIN_GOOD ) {
00120         //Found begin_good
00121         bg_found = 1;
00122       }
00123     }
00124     else {
00125       if ( !bg_found ) {
00126         reset_good = 1;
00127         num_adj_win_good = 0;
00128       }
00129     }
00130       
00131     if ( bg_found ) {
00132       break;
00133     }
00134   }//End left
00135 
00136   /* Right end -- same thing, but backwards.
00137      Unnecessary if begin good not found. */
00138 
00139   if ( bg_found ) {
00140     reset_good = 1;
00141     curr_win_good = 0;
00142     num_adj_win_good = 0;
00143 
00144     break_for = false;
00145     
00146     start = getMAl()->get_seq_end(seqNumber) - 1;
00147     stop = TRASH_WINDOW_LEN - 2;
00148       
00149     for( int j = start; j > stop; j -= TRASH_WINDOW_STEP ) {
00150         
00151       if ( reset_good ) {
00152         end_good = j + 1;
00153       }
00154 
00155       reset_good = 0;
00156 
00157       //Adjust window size for gaps
00158       int num_bases_in_win(0);
00159       int num_gaps_in_win(0);
00160       while ( num_bases_in_win < TRASH_WINDOW_LEN ) {
00161 //         cerr<<getMAl()->get_base( seqNumber, j - num_gaps_in_win - num_bases_in_win );
00162         if ( j - TRASH_WINDOW_LEN + 1 - num_gaps_in_win < 0 ) {
00163           
00164           break_for = true;
00165           break;
00166         }
00167         if ( getMAl()->get_base( seqNumber, j - num_gaps_in_win - num_bases_in_win ) == '-' ) {
00168           num_gaps_in_win++;
00169         }
00170         else {
00171           num_bases_in_win++;
00172         }
00173       }
00174 //       cerr<<endl;
00175       if ( break_for ) break;
00176       
00177       
00178       j -= check_qual_in_win(j - TRASH_WINDOW_LEN + 1 - num_gaps_in_win, j + 1, seqNumber, curr_win_good);
00179         
00180       /* Window status is now computed and it may be
00181          possible to set end_good */
00182         
00183       if ( curr_win_good ) {
00184         num_adj_win_good++;
00185           
00186         if ( num_adj_win_good >= MIN_NUM_ADJ_WIN_GOOD ) {
00187           //Found begin_good
00188           eg_found = 1;
00189         }
00190       }
00191       else {
00192         if ( !eg_found ) {
00193           reset_good = 1;
00194           num_adj_win_good = 0;
00195         }
00196       }
00197         
00198       if ( eg_found ) {
00199         break;
00200       }
00201     }//End right forj
00202   }//End right
00203     
00204   //Set the parameters for the read
00205     
00206   int len = end_good - begin_good;
00207   if ( ( bg_found && eg_found ) && ( len >= MIN_LEN_GOOD ) ) {
00208 
00209     getMAl()->set_beginGood(seqNumber, begin_good);
00210     getMAl()->set_endGood(seqNumber, end_good);
00211 
00212   }
00213   else {
00214 
00215     kill_read(seqNumber);
00216   }
00217 
00218   //}//End fori loop over all seqs
00219 
00220 
00221 }


Member Data Documentation

vector<double> TrashAlgo::decode_qualval_square [private]
 

Definition at line 23 of file trash.h.

Referenced by check_qual_in_win(), and start().

double TrashAlgo::GOOD_PERCENTAGE [private]
 

Definition at line 28 of file trash.h.

Referenced by check_qual_in_win(), and start().

vector<double> TrashAlgo::logQual_to_quality [private]
 

Definition at line 22 of file trash.h.

Referenced by check_qual_in_win(), and start().

double TrashAlgo::MAX_VARIANCE_IN_QUALWIN [private]
 

Definition at line 29 of file trash.h.

Referenced by start().

int TrashAlgo::MIN_LEN_GOOD [private]
 

Definition at line 27 of file trash.h.

Referenced by start(), and trash().

int TrashAlgo::MIN_NUM_ADJ_WIN_GOOD [private]
 

Definition at line 26 of file trash.h.

Referenced by start(), and trash().

int TrashAlgo::TRASH_WINDOW_LEN [private]
 

Definition at line 24 of file trash.h.

Referenced by check_qual_in_win(), start(), and trash().

int TrashAlgo::TRASH_WINDOW_STEP [private]
 

Definition at line 25 of file trash.h.

Referenced by start(), and trash().


The documentation for this class was generated from the following files:
Generated on Fri Mar 17 17:45:02 2006 for trapper by  doxygen 1.4.4