#include <trapperDNP.h>
Collaboration diagram for DNPclass:
Public Member Functions | |
DNPclass (MAl_addon *mal, int minqual, int d_min=3, double p_max=0.001) | |
~DNPclass () | |
Private Member Functions | |
void | look_for_corr (vector< char > &first_col, vector< char > &second_col, vector< int > &first_qual_col, vector< int > &second_qual_col, char cand_base_1, char cand_base_2, int &num_err_first, int &num_err_second, double &exp_err_first, double &exp_err_second, int &num_corr, double &expectation_val) |
void | find_DNP_candidate_positions () |
int | column_is_candidate (int col) |
void | evaluate_DNP_candidate_positions () |
void | set_DNPs (int col1, int col2, char DNPbase1, char DNPbase2, vector< int > &rownum_vec, int ID1, int ID2, char cons1, char cons2) |
double | poisson_probability (double lambda, int obs) |
int | find_DNP_base_candidates (int &a, int &t, int &c, int &g, int &gap, char seqbase, int &tot_seqbase, int &tot_others) |
void | pick_base_row_and_qual_columns (int firstDNPindex, int secondDNPindex, vector< char > &firstDNPvec, vector< char > &secondDNPvec, vector< int > &first_qual_vec, vector< int > &second_qual_vec, vector< int > &rownum_vec) |
Private Attributes | |
MAl_addon * | m |
int | num_found |
int | num_pos |
vector< int > | confirmed_DNP_pos |
vector< double > | quality2error |
map< char, char > | base_trans |
int | num_candidates |
int | qualmin |
int | Dmin |
double | p_tot_max |
vector< int > | cand_vec |
vector< char > | cand_bases |
vector< char > | cons_bases |
Definition at line 8 of file trapperDNP.h.
|
Definition at line 8 of file trapperDNP.cc. References base_trans, evaluate_DNP_candidate_positions(), find_DNP_candidate_positions(), num_found, num_pos, and quality2error. 00008 : 00009 m(mal), num_candidates(0), qualmin(minqual), Dmin(d_min), p_tot_max(p_max) 00010 // {{{ 00011 00012 { 00013 00014 // Fill in the 'quality2error vector' 00015 00016 for( int q = 0; q < 100; q++ ) { 00017 00018 quality2error.push_back( std::pow(10.0, -(double)q/10.0) ); 00019 00020 } 00021 base_trans['a'] = 'a'; 00022 base_trans['A'] = 'a'; 00023 00024 base_trans['t'] = 't'; 00025 base_trans['T'] = 't'; 00026 00027 base_trans['g'] = 'g'; 00028 base_trans['G'] = 'g'; 00029 00030 base_trans['c'] = 'c'; 00031 base_trans['C'] = 'c'; 00032 00033 base_trans['n'] = 'n'; 00034 base_trans['N'] = 'n'; 00035 00036 base_trans['x'] = 'x'; 00037 base_trans['X'] = 'x'; 00038 00039 base_trans['-'] = '-'; 00040 00041 base_trans['*'] = '*'; 00042 00043 cerr<<"Finding candidate positions... "; 00044 find_DNP_candidate_positions(); 00045 cerr<<"Done"<<endl; 00046 cerr<<"Evaluating candidates... "; 00047 evaluate_DNP_candidate_positions(); 00048 cerr<<"Done"<<endl; 00049 cerr<<num_found<<" DNPs found at "<<num_pos<<" positions"<<endl; 00050 }
|
|
Definition at line 53 of file trapperDNP.cc.
|
|
Definition at line 86 of file trapperDNP.cc. References base_trans, cand_bases, cons_bases, Dmin, MAl_addon::get_most_freq_base_on_col(), MAl_addon::get_num_bases_on_col(), and m. Referenced by find_DNP_candidate_positions(). 00087 {{{ 00088 00089 { 00090 /* 00091 Definition of candidate base type: 00092 00093 ¤ Must be present in at least Dmin reads 00094 ¤ Can be present in at most half the number of reads on column 00095 00096 ¤ Extra: at present there can be only one candidate base type 00097 on a column. Without this constraint, a more involved approach 00098 has to be taken in order to identify candidate base types. 00099 00100 */ 00101 00102 char cons = base_trans [ m->get_most_freq_base_on_col( col ) ]; 00103 if ( cons != 'a' && cons != 'c' && 00104 cons != 'g' && cons != 't' ) return 0; 00105 00106 00107 00108 char base_arr[] = {'a', 'c', 'g', 't'/*, '-'*/}; 00109 int cand_num(0); 00110 int cand_base = 'u'; 00111 int cand_found(0); 00112 00113 for( int i = 0; i < 4; i++ ) { 00114 char base = base_trans [ base_arr[i] ]; 00115 if ( base == cons /*|| base != m->get_entry( 0, col )*/ /*NOTA BENE*/) continue; 00116 00117 if ( m->get_num_bases_on_col(col, base) >= Dmin ) { 00118 if ( m->get_num_bases_on_col(col, base) > cand_num ) { 00119 cand_num = m->get_num_bases_on_col(col, base); 00120 cand_base = base; 00121 cand_found = 1; 00122 } 00123 } 00124 } 00125 00126 00127 if ( cand_found ) { 00128 cand_bases.push_back( cand_base ); 00129 cons_bases.push_back( cons ); 00130 return 1; 00131 } 00132 return 0; 00133 00134 }
|
|
Definition at line 138 of file trapperDNP.cc. References cand_bases, cand_vec, confirmed_DNP_pos, cons_bases, Dmin, MAl_addon::get_max_col(), look_for_corr(), m, num_found, num_pos, pick_base_row_and_qual_columns(), poisson_probability(), and set_DNPs(). Referenced by DNPclass(). 00140 {{{ 00141 00142 { 00143 /* 00144 Tentative try: compare all candidate columns to each other 00145 00146 */ 00147 num_found = 0; 00148 num_pos = 0; 00149 00150 assert( (int)confirmed_DNP_pos.size() == 0 ); 00151 for( int i = 0; i < m->get_max_col(); i++ ) { 00152 confirmed_DNP_pos.push_back(0); 00153 } 00154 00155 vector<char> first_base_col, second_base_col; 00156 vector<int> first_qual_col, second_qual_col; 00157 vector<int> rownum_vec; 00158 00159 00160 for( int i = 0; i < (int)cand_vec.size() - 1; i++ ) { 00161 int proc = 100 * i / cand_vec.size(); 00162 // if ( proc % 5 == 0 ) 00163 // cerr<<proc<<" % checked"<<endl; 00164 for( int j = i+1; j < (int)cand_vec.size(); j++ ) { 00165 00166 pick_base_row_and_qual_columns(cand_vec[i], cand_vec[j], first_base_col, 00167 second_base_col, first_qual_col, second_qual_col, 00168 rownum_vec); 00169 00170 if ( (int)first_base_col.size() < Dmin*2 ) {//Need sufficient number of bases on columns 00171 //cerr<<"Size too small! i: "<<i<<", j: "<<j<<endl; 00172 //cerr<<"cand_vec[i]: "<<cand_vec[i]<<", cand_vec[j]: "<<cand_vec[j]<<endl; 00173 break; 00174 } 00175 char cand_base_1 = cand_bases[i]; 00176 char cand_base_2 = cand_bases[j]; 00177 00178 int num_cand_1(0), num_cand_2(0); 00179 double exp_err_first(0.0), exp_err_second(0.0); 00180 int num_corr(0); 00181 double expectation_val(0.0); 00182 look_for_corr(first_base_col, second_base_col, first_qual_col, 00183 second_qual_col, cand_base_1, cand_base_2, 00184 num_cand_1, num_cand_2, exp_err_first, exp_err_second, 00185 num_corr, expectation_val); 00186 00187 // double kvot; 00188 // if ( num_cand_1 > num_cand_2 ) { 00189 // kvot = double(num_cand_1)/double(num_cand_2); 00190 // } 00191 // else { 00192 // kvot = double(num_cand_2)/double(num_cand_1); 00193 // } 00194 00195 if ( num_cand_1 < Dmin ) { 00196 //cerr<<"Too few candidates! i: "<<i<<", j: "<<j<<endl; 00197 break; 00198 } 00199 int DNPfound(0); 00200 double tot_prob=0; 00201 double col_prob=0; 00202 double p1(-1), p2(-1), corr_prob(-1); 00203 00204 if ( num_corr >= Dmin ) { //NOTA BENE! Assumes Dmin = 3... 00205 //Computation of "the probability of observing any of the two columns by chance". 00206 00207 p1 = poisson_probability(exp_err_first, num_cand_1); 00208 p2 = poisson_probability(exp_err_second, num_cand_1); 00209 col_prob = 1.0 - (1.0 - p1) * (1.0 - p2); 00210 00211 assert( col_prob >= 0.0 && col_prob <= 1.0);//A bit shaky... 00212 00213 corr_prob = poisson_probability(expectation_val, num_corr); 00214 tot_prob = col_prob * corr_prob; 00215 00216 00217 if (tot_prob <= p_tot_max || num_corr > Dmin )DNPfound = 1; 00218 00219 } 00220 00221 00222 if ( DNPfound ) { 00223 00224 if ( !confirmed_DNP_pos[ cand_vec[i] ] || 00225 !confirmed_DNP_pos[ cand_vec[j] ]) { 00226 num_pos++; 00227 } 00228 00229 00230 assert( cand_vec[i] < (int)confirmed_DNP_pos.size() ); 00231 assert( cand_vec[j] < (int)confirmed_DNP_pos.size() ); 00232 00233 confirmed_DNP_pos[ cand_vec[i] ] = 1; 00234 confirmed_DNP_pos[ cand_vec[j] ] = 1; 00235 set_DNPs( cand_vec[i], cand_vec[j], 00236 cand_bases[i], cand_bases[j], rownum_vec, 00237 i, j, cons_bases[i], cons_bases[j]); 00238 } 00239 00240 } 00241 } 00242 }
|
|
|
|
Definition at line 62 of file trapperDNP.cc. References cand_bases, cand_vec, column_is_candidate(), MAl_addon::get_max_col(), MAl_addon::get_min_col(), m, and num_candidates. Referenced by DNPclass(). 00063 {{{ 00064 00065 { 00066 00067 00068 int min = m->get_min_col(); 00069 int max = m->get_max_col(); 00070 00071 00072 00073 for( int i = min; i < max; i++ ) { 00074 if ( column_is_candidate(i) ) { 00075 cand_vec.push_back(i); 00076 } 00077 int proc = 100 * i / max; 00078 // if ( proc % 5 == 0 ) 00079 // cerr<<proc<<" % checked"<<endl; 00080 } 00081 assert( cand_vec.size() == cand_bases.size() ); 00082 num_candidates += (int)cand_vec.size(); 00083 }
|
|
Definition at line 279 of file trapperDNP.cc. References quality2error, and qualmin. Referenced by evaluate_DNP_candidate_positions(). 00284 {{{ 00285 00286 { 00287 //NOTA BENE! This function currently performs its analysis 00288 //with regard to the candidate base. That is, every base 00289 //present on the column is regarded as correct if it is of 00290 //another type than the candidate base. 00291 00292 //The two results are stored in expectation_val and returned 00293 //in col_prob, respectively. 00294 00295 num_err_first = num_err_second = 0; 00296 exp_err_first = exp_err_second = 0.0; 00297 expectation_val = 0; 00298 num_corr = 0; 00299 00300 //Pre-computation of necessary params 00301 00302 for( int i = 0; i < (int)first_col.size(); i++ ) { 00303 assert( first_qual_col[i] > -1 && first_qual_col[i] < 100 ); 00304 assert( second_qual_col[i] > -1 && second_qual_col[i] < 100 ); 00305 00306 int no_1_cand(0), no_2_cand(0); 00307 00308 exp_err_first += quality2error[ first_qual_col[i] ]; 00309 exp_err_second += quality2error[ second_qual_col[i] ]; 00310 00311 00312 if ( first_col[i] == cand_base_1 && first_qual_col[i] >= qualmin ) { 00313 num_err_first++; 00314 no_1_cand = 1; 00315 } 00316 if ( second_col[i] == cand_base_2 && second_qual_col[i] >= qualmin ) { 00317 num_err_second++; 00318 no_2_cand = 1; 00319 } 00320 if ( no_1_cand && no_2_cand ) 00321 num_corr++; 00322 } 00323 00324 00325 //Computation of the expectation value for the number of coinciding deviations 00326 00327 for( int i = 0; i < (int)first_col.size(); i++ ) { 00328 double err1 = quality2error[ first_qual_col[i] ]; 00329 double err2 = quality2error[ second_qual_col[i] ]; 00330 double new_sum = exp_err_first - err1; 00331 double Exp1 = num_err_first * err1 / ( num_err_first * err1 + new_sum * (1.0 - err1) ); 00332 new_sum = exp_err_second - err2; 00333 double Exp2 = num_err_second * err2 / ( num_err_second * err2 + new_sum * (1.0 - err2) ); 00334 00335 expectation_val += Exp1 * Exp2; 00336 } 00337 00338 }
|
|
Definition at line 424 of file trapperDNP.cc. References base_trans, MAl_addon::get_entry(), MAl_addon::get_num_rows(), MAl_addon::get_qual(), MAl_addon::get_row_end(), MAl_addon::get_row_start(), and m. Referenced by evaluate_DNP_candidate_positions(). 00428 {{{ 00429 00430 { 00431 firstDNPvec.erase( firstDNPvec.begin(), firstDNPvec.end() ); 00432 secondDNPvec.erase( secondDNPvec.begin(), secondDNPvec.end() ); 00433 first_qual_vec.erase( first_qual_vec.begin(), first_qual_vec.end() ); 00434 second_qual_vec.erase( second_qual_vec.begin(), second_qual_vec.end() ); 00435 rownum_vec.erase( rownum_vec.begin(), rownum_vec.end() ); 00436 00437 for( int j = 0; j < m->get_num_rows(); j++ ) { 00438 00439 if ( firstDNPindex >= m->get_row_end(j) || firstDNPindex < m->get_row_start(j) ) 00440 continue; 00441 if ( secondDNPindex >= m->get_row_end(j) || secondDNPindex < m->get_row_start(j) ) 00442 continue; 00443 00444 //Both bases come from the same read 00445 00446 char baseOnPos1 = base_trans[ m->get_entry(j, firstDNPindex) ]; 00447 char baseOnPos2 = base_trans[ m->get_entry(j, secondDNPindex) ]; 00448 00449 //EK 030408 00450 if(baseOnPos1 == 'x' || baseOnPos2 == 'x') 00451 continue; 00452 00453 assert(baseOnPos1 == 'a' || baseOnPos1 == 't' || baseOnPos1 == 'g' || 00454 baseOnPos1 == 'c' || baseOnPos1 == 'n' || baseOnPos1 == '-' || 00455 baseOnPos1 == 'x' || baseOnPos1 == '*' ); 00456 assert(baseOnPos2 == 'a' || baseOnPos2 == 't' || baseOnPos2 == 'g' || 00457 baseOnPos2 == 'c' || baseOnPos2 == 'n' || baseOnPos2 == '-' || 00458 baseOnPos2 == 'x' || baseOnPos2 == '*'); 00459 00460 int qualOnPos1 = m->get_qual(j, firstDNPindex); 00461 int qualOnPos2 = m->get_qual(j, secondDNPindex); 00462 00463 firstDNPvec.push_back(baseOnPos1); 00464 secondDNPvec.push_back(baseOnPos2); 00465 00466 first_qual_vec.push_back(qualOnPos1); 00467 second_qual_vec.push_back(qualOnPos2); 00468 00469 assert( j < m->get_num_rows() ); 00470 00471 rownum_vec.push_back( j ); 00472 } 00473 }
|
|
Definition at line 341 of file trapperDNP.cc. Referenced by evaluate_DNP_candidate_positions(). 00342 {{{ 00343 00344 { 00345 // if ( lambda < 0.0 ) { 00346 // cerr<<"lambda: "<<lambda<<endl; 00347 // } 00348 assert( lambda >= 0.0); 00349 assert( obs >= 0 ); 00350 double M = lambda; 00351 int N_in = obs; 00352 int N = N_in - 1; 00353 int J (0); 00354 double S = exp(-M); 00355 double res(0.0); 00356 00357 if(N < 0){ 00358 return 1.0; 00359 } 00360 00361 00362 if(S==0){ 00363 J = N; 00364 if(J > M){ 00365 J = int(M); 00366 } 00367 00368 double F(0.0); 00369 for( int i = 1; i <= J; i++ ) { 00370 F += log((double)i); 00371 } 00372 double L = -M-F+J*log(M); 00373 L = exp(L); 00374 F=1; 00375 00376 double T(0.0); 00377 00378 for( int i = 1; i <= J; i++ ) { 00379 F *= (J+1-i)/M; 00380 T += F; 00381 } 00382 00383 if(J==N){ 00384 T=(T+1)*L; 00385 res = 1.0 - T; 00386 00387 } 00388 else{ 00389 F=1; 00390 for( int i = 1; i <= N-J; i++ ) { 00391 F *= M/(i+J); 00392 T=T+F; 00393 } 00394 T=(T+1)*L; 00395 res = (1.0 - T); 00396 } 00397 00398 } 00399 else{ 00400 00401 double T=S; 00402 for( int i = 0; i <= N-1; i++ ) { 00403 S = S*M/(i+1); 00404 T += S; 00405 } 00406 res = (1.0 - T); 00407 } 00408 00409 if ( res < 0.0 ) { 00410 if ( -res < 0.0000000000001 ) 00411 res = 0.0; 00412 } 00413 else if ( res > 1.0 ) { 00414 if ( res - 1.0 < 0.0000000000001 ) 00415 res = 1.0; 00416 } 00417 assert( res <= 1.0 ); 00418 assert( res >= 0.0 ); 00419 00420 return res; 00421 }
|
|
Definition at line 247 of file trapperDNP.cc. References dnp_type(), MAl_addon::get_entry(), MAl_addon::get_qual(), MAl_addon::is_DNP(), m, num_found, qualmin, and MAl_addon::set_DNP(). Referenced by evaluate_DNP_candidate_positions(). 00250 {{{ 00251 00252 { 00253 00254 for( int i = 0; i < (int)rownum_vec.size(); i++ ) { 00255 00256 if ( m->get_entry( rownum_vec[i], col1 ) == DNPbase1 && 00257 m->get_entry( rownum_vec[i], col2 ) == DNPbase2 && 00258 m->get_qual(rownum_vec[i], col1) >= qualmin && 00259 m->get_qual(rownum_vec[i], col2) >= qualmin) { 00260 //Set DNP 00261 if ( !m->is_DNP( rownum_vec[i] , col1 ) ) { 00262 num_found++; 00263 } 00264 if ( !m->is_DNP( rownum_vec[i] , col2 ) ) { 00265 num_found++; 00266 } 00267 00268 00269 m->set_DNP( rownum_vec[i] , col1, true, ID1, dnp_type(DNPbase1, cons1) ); 00270 m->set_DNP( rownum_vec[i] , col2, true, ID2, dnp_type(DNPbase2, cons2)); 00271 00272 00273 } 00274 } 00275 }
|
|
Definition at line 24 of file trapperDNP.h. Referenced by column_is_candidate(), DNPclass(), and pick_base_row_and_qual_columns(). |
|
Definition at line 33 of file trapperDNP.h. Referenced by column_is_candidate(), evaluate_DNP_candidate_positions(), and find_DNP_candidate_positions(). |
|
Definition at line 32 of file trapperDNP.h. Referenced by evaluate_DNP_candidate_positions(), and find_DNP_candidate_positions(). |
|
Definition at line 21 of file trapperDNP.h. Referenced by evaluate_DNP_candidate_positions(). |
|
Definition at line 34 of file trapperDNP.h. Referenced by column_is_candidate(), and evaluate_DNP_candidate_positions(). |
|
Definition at line 29 of file trapperDNP.h. Referenced by column_is_candidate(), and evaluate_DNP_candidate_positions(). |
|
Definition at line 17 of file trapperDNP.h. Referenced by column_is_candidate(), evaluate_DNP_candidate_positions(), find_DNP_candidate_positions(), pick_base_row_and_qual_columns(), and set_DNPs(). |
|
Definition at line 26 of file trapperDNP.h. Referenced by find_DNP_candidate_positions(). |
|
Definition at line 18 of file trapperDNP.h. Referenced by DNPclass(), evaluate_DNP_candidate_positions(), and set_DNPs(). |
|
Definition at line 19 of file trapperDNP.h. Referenced by DNPclass(), and evaluate_DNP_candidate_positions(). |
|
Definition at line 30 of file trapperDNP.h. |
|
Definition at line 22 of file trapperDNP.h. Referenced by DNPclass(), and look_for_corr(). |
|
Definition at line 28 of file trapperDNP.h. Referenced by look_for_corr(), and set_DNPs(). |