#include <trash.h>
Inheritance diagram for TrashAlgo:
Public Member Functions | |
TrashAlgo (TrapperDoc *pDoc_, std::set< db_recno_t > &recnoList, AlgoParam *param) | |
void | start () |
void | trash (size_t seqNumber) |
int | check_qual_in_win (int start, int stop, int ID, int &curr_win_good) |
void | kill_read (int ID) |
Private Attributes | |
vector< double > | logQual_to_quality |
vector< double > | decode_qualval_square |
int | TRASH_WINDOW_LEN |
int | TRASH_WINDOW_STEP |
int | MIN_NUM_ADJ_WIN_GOOD |
int | MIN_LEN_GOOD |
double | GOOD_PERCENTAGE |
double | MAX_VARIANCE_IN_QUALWIN |
Definition at line 8 of file trash.h.
|
Definition at line 11 of file trash.h. 00011 : 00012 RWAlgo(pDoc_, recnoList, param) {}
|
|
Definition at line 224 of file trash.cc. References decode_qualval_square, MAl_Readonly::get_qual(), RWAlgo::getMAl(), GOOD_PERCENTAGE, logQual_to_quality, and TRASH_WINDOW_LEN. Referenced by trash(). 00225 {{{ 00226 00227 { 00228 00229 // cerr<<"stop: "<<stop<<endl; 00230 // cerr<<"start: "<<start<<endl; 00231 assert( stop <= getMAl()->get_seq_end( ID) ); 00232 00233 double sum_qual(0.0), sum_qual_square(0.0), average_qual(0.0); 00234 int minLogQual(0); 00235 int num_gap(0); 00236 00237 //Check quality in window 00238 for( int k = start; k < stop; k++ ) { 00239 // cerr<<getMAl()->get_base( ID, k ); 00240 // if ( stop >= getMAl()->get_seq_end( ID) ) { 00241 // assert( stop == getMAl()->get_seq_end( ID) ); 00242 // //Should not happen... 00243 // // assert(false); 00244 // break; 00245 // } 00246 00247 if ( getMAl()->get_base( ID, k ) == '-' ) { 00248 00249 num_gap++; 00250 continue; 00251 } 00252 00253 00254 sum_qual += logQual_to_quality[ getMAl()->get_qual(ID, k) ]; 00255 sum_qual_square += decode_qualval_square[ getMAl()->get_qual(ID, k) ]; 00256 int tempQual = getMAl()->get_qual(ID, k); 00257 if(tempQual >= minLogQual) 00258 minLogQual = tempQual; 00259 } 00260 // cerr<<endl; 00261 average_qual = sum_qual / (double)TRASH_WINDOW_LEN; 00262 00263 if ( average_qual >= GOOD_PERCENTAGE ) 00264 { 00265 00266 //Window may qualify as good and high quality 00267 double Q = sum_qual_square - sum_qual*sum_qual/(double)TRASH_WINDOW_LEN; 00268 assert(TRASH_WINDOW_LEN > 1); 00269 double variance = Q / (double)(TRASH_WINDOW_LEN - 1); 00270 if ( variance <= MAX_VARIANCE_IN_QUALWIN || minLogQual >= 27) { 00271 //Window qualifies as good 00272 curr_win_good = 1; 00273 } 00274 else { 00275 //Variance too high for good and hq 00276 curr_win_good = 0; 00277 } 00278 00279 } 00280 else 00281 { 00282 //Window does not qualify as good or high quality 00283 curr_win_good = 0; 00284 } 00285 00286 if ( stop - start != TRASH_WINDOW_LEN + num_gap ) { 00287 cerr<<"stop - start: "<<stop - start<<endl; 00288 cerr<<"TRASH_WINDOW_LEN + num_gap: "<<TRASH_WINDOW_LEN + num_gap<<endl; 00289 cerr<<"stop: "<<stop<<endl; 00290 cerr<<"start: "<<start<<endl; 00291 cerr<<"TRASH_WINDOW_LEN: "<<TRASH_WINDOW_LEN<<endl; 00292 cerr<<"num_gap: "<<num_gap<<endl; 00293 } 00294 // cerr<<"num_gap: "<<num_gap<<endl; 00295 00296 assert( stop - start == TRASH_WINDOW_LEN + num_gap ); 00297 return num_gap; 00298 }
|
|
Definition at line 301 of file trash.cc. References RWAlgo::getMAl(), MAl::set_beginGood(), and MAl::set_endGood(). Referenced by trash(). 00302 {{{ 00303 00304 { 00305 00306 getMAl()->set_beginGood( ID, getMAl()->get_len( ID )/2 ); 00307 getMAl()->set_endGood( ID, getMAl()->get_len( ID )/2 ); 00308 }
|
|
Implements RWAlgo. Definition at line 23 of file trash.cc. References decode_qualval_square, MAl_Readonly::get_num_seq(), RWAlgo::getMAl(), GOOD_PERCENTAGE, logQual_to_quality, MAX_VARIANCE_IN_QUALWIN, MIN_LEN_GOOD, MIN_NUM_ADJ_WIN_GOOD, trash(), TRASH_WINDOW_LEN, and TRASH_WINDOW_STEP. Referenced by trash(). 00024 {{{ 00025 { 00026 00027 00028 00029 TRASH_WINDOW_LEN = 35; 00030 TRASH_WINDOW_STEP = 3; 00031 MIN_NUM_ADJ_WIN_GOOD = 4; 00032 MIN_LEN_GOOD = 50; 00033 GOOD_PERCENTAGE = 0.89; 00034 MAX_VARIANCE_IN_QUALWIN = 0.0035; 00035 00036 00037 //Note: this assumes that quality values are 99 at the most! 00038 for( int i = 0; i < 100; i++ ) { 00039 logQual_to_quality.push_back( 1 - pow(10.0,double(-i)/10.0) ); 00040 decode_qualval_square.push_back( logQual_to_quality[i] * logQual_to_quality[i] ); 00041 } 00042 00043 00044 00045 00046 for( size_t i = 0; i < getMAl()->get_num_seq(); i++ ) { 00047 00048 trash(i); 00049 00050 //Temp test 00051 // for( size_t j = 0; j < getMAl()->get_seq_end( i ); j++ ) { 00052 // if ( j < getMAl()->get_beginGood( i ) || j >= getMAl()->get_endGood( i ) ) 00053 // getMAl()->set_base( i, j, 'x' ); 00054 // } 00055 00056 } 00057 00058 00059 }
|
|
Definition at line 62 of file trash.cc. References check_qual_in_win(), MAl_Readonly::get_seq_end(), RWAlgo::getMAl(), kill_read(), MIN_LEN_GOOD, MIN_NUM_ADJ_WIN_GOOD, MAl::set_beginGood(), MAl::set_endGood(), start(), TRASH_WINDOW_LEN, and TRASH_WINDOW_STEP. Referenced by start(). 00063 {{{ 00064 00065 { 00066 /* This function computes the quality regions of the 00067 sequences, and marks those sequences with too poor 00068 quality to be deleted from further investigation. 00069 00070 */ 00071 00072 00073 int num_adj_win_good(0); 00074 int begin_good(-1), end_good(-1); 00075 int bg_found(0), eg_found(0); 00076 int reset_good(1), curr_win_good(0); 00077 00078 //Left end 00079 bool break_for(false); 00080 int start = 0; 00081 int stop = getMAl()->get_seq_end(seqNumber) - TRASH_WINDOW_LEN + 1; 00082 00083 for( int j = start; j < stop; j += TRASH_WINDOW_STEP ) { 00084 00085 if ( reset_good ) { 00086 begin_good = j; 00087 } 00088 reset_good = 0; 00089 00090 //Adjust window size for gaps 00091 int num_bases_in_win(0); 00092 int num_gaps_in_win(0); 00093 while ( num_bases_in_win < TRASH_WINDOW_LEN ) { 00094 if ( j + TRASH_WINDOW_LEN + num_gaps_in_win > getMAl()->get_seq_end(seqNumber) ) { 00095 00096 break_for = true; 00097 break; 00098 } 00099 // cerr<<getMAl()->get_base( seqNumber, j + num_bases_in_win + num_gaps_in_win ); 00100 if ( getMAl()->get_base( seqNumber, j + num_bases_in_win + num_gaps_in_win ) == '-' ) { 00101 00102 num_gaps_in_win++; 00103 } 00104 else { 00105 num_bases_in_win++; 00106 } 00107 } 00108 // cerr<<endl; 00109 if ( break_for ) break; 00110 00111 j += check_qual_in_win(j, j + TRASH_WINDOW_LEN + num_gaps_in_win, seqNumber, curr_win_good); 00112 00113 /* Window status is now computed and it may be 00114 possible to set begin_good */ 00115 00116 if ( curr_win_good ) {//good or hq 00117 num_adj_win_good++; 00118 00119 if ( num_adj_win_good >= MIN_NUM_ADJ_WIN_GOOD ) { 00120 //Found begin_good 00121 bg_found = 1; 00122 } 00123 } 00124 else { 00125 if ( !bg_found ) { 00126 reset_good = 1; 00127 num_adj_win_good = 0; 00128 } 00129 } 00130 00131 if ( bg_found ) { 00132 break; 00133 } 00134 }//End left 00135 00136 /* Right end -- same thing, but backwards. 00137 Unnecessary if begin good not found. */ 00138 00139 if ( bg_found ) { 00140 reset_good = 1; 00141 curr_win_good = 0; 00142 num_adj_win_good = 0; 00143 00144 break_for = false; 00145 00146 start = getMAl()->get_seq_end(seqNumber) - 1; 00147 stop = TRASH_WINDOW_LEN - 2; 00148 00149 for( int j = start; j > stop; j -= TRASH_WINDOW_STEP ) { 00150 00151 if ( reset_good ) { 00152 end_good = j + 1; 00153 } 00154 00155 reset_good = 0; 00156 00157 //Adjust window size for gaps 00158 int num_bases_in_win(0); 00159 int num_gaps_in_win(0); 00160 while ( num_bases_in_win < TRASH_WINDOW_LEN ) { 00161 // cerr<<getMAl()->get_base( seqNumber, j - num_gaps_in_win - num_bases_in_win ); 00162 if ( j - TRASH_WINDOW_LEN + 1 - num_gaps_in_win < 0 ) { 00163 00164 break_for = true; 00165 break; 00166 } 00167 if ( getMAl()->get_base( seqNumber, j - num_gaps_in_win - num_bases_in_win ) == '-' ) { 00168 num_gaps_in_win++; 00169 } 00170 else { 00171 num_bases_in_win++; 00172 } 00173 } 00174 // cerr<<endl; 00175 if ( break_for ) break; 00176 00177 00178 j -= check_qual_in_win(j - TRASH_WINDOW_LEN + 1 - num_gaps_in_win, j + 1, seqNumber, curr_win_good); 00179 00180 /* Window status is now computed and it may be 00181 possible to set end_good */ 00182 00183 if ( curr_win_good ) { 00184 num_adj_win_good++; 00185 00186 if ( num_adj_win_good >= MIN_NUM_ADJ_WIN_GOOD ) { 00187 //Found begin_good 00188 eg_found = 1; 00189 } 00190 } 00191 else { 00192 if ( !eg_found ) { 00193 reset_good = 1; 00194 num_adj_win_good = 0; 00195 } 00196 } 00197 00198 if ( eg_found ) { 00199 break; 00200 } 00201 }//End right forj 00202 }//End right 00203 00204 //Set the parameters for the read 00205 00206 int len = end_good - begin_good; 00207 if ( ( bg_found && eg_found ) && ( len >= MIN_LEN_GOOD ) ) { 00208 00209 getMAl()->set_beginGood(seqNumber, begin_good); 00210 getMAl()->set_endGood(seqNumber, end_good); 00211 00212 } 00213 else { 00214 00215 kill_read(seqNumber); 00216 } 00217 00218 //}//End fori loop over all seqs 00219 00220 00221 }
|
|
Definition at line 23 of file trash.h. Referenced by check_qual_in_win(), and start(). |
|
Definition at line 28 of file trash.h. Referenced by check_qual_in_win(), and start(). |
|
Definition at line 22 of file trash.h. Referenced by check_qual_in_win(), and start(). |
|
Definition at line 29 of file trash.h. Referenced by start(). |
|
|
|
|
|
Definition at line 24 of file trash.h. Referenced by check_qual_in_win(), start(), and trash(). |
|
|