Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

AceWriter Class Reference

#include <acewriteralgo.h>

Inheritance diagram for AceWriter:

Inheritance graph
[legend]
Collaboration diagram for AceWriter:

Collaboration graph
[legend]
List of all members.

Public Member Functions

 AceWriter (TrapperDoc *pDoc_, std::set< db_recno_t > &recnoList, AlgoParam *param)
void start ()

Private Member Functions

void write_ace_file ()
int get_consensus (std::vector< char > &cons, std::vector< int > &rowVec, int &num)
int get_realignment_row (const int row, std::vector< char > &aligned_read, int &row_start)

Detailed Description

Definition at line 4 of file acewriteralgo.h.


Constructor & Destructor Documentation

AceWriter::AceWriter TrapperDoc pDoc_,
std::set< db_recno_t > &  recnoList,
AlgoParam param
[inline]
 

Definition at line 7 of file acewriteralgo.h.

00007                                                                                    : 
00008     RAlgo(pDoc_, recnoList, param) {}


Member Function Documentation

int AceWriter::get_consensus std::vector< char > &  cons,
std::vector< int > &  rowVec,
int &  num
[private]
 

Referenced by write_ace_file().

int AceWriter::get_realignment_row const int  row,
std::vector< char > &  aligned_read,
int &  row_start
[private]
 

Referenced by write_ace_file().

void AceWriter::start  )  [virtual]
 

Implements RAlgo.

Definition at line 8 of file acewriteralgo.cc.

References write_ace_file().

00009 {
00010   write_ace_file();
00011 }

void AceWriter::write_ace_file  )  [private]
 

Definition at line 14 of file acewriteralgo.cc.

References get_consensus(), MAl_Readonly::get_name(), MAl_Readonly::get_num_seq(), get_realignment_row(), RAlgo::getMAl(), and MAl_Readonly::is_DNP().

Referenced by start().

00015 { 
00016   //This is adapted from old code, needs a rewrite!
00017   
00018   cerr<<"Writing ace file... ";
00019 
00020   string dir = ".";
00021   string file_name = "trappercontig";
00022 
00023 
00024   // The parameters name are taken from the documentation of the 
00025   // ace file format doc at; 
00026   // http://www.phrap.org/consed/distributions/README.13.0.txt
00027 
00028   // There's a lot of variables in this function. It's possible to
00029   // write this function with less variables, but thay are there
00030   // to make it easier to rewrite the function if you want to add stuff.
00031 
00032   // ----------------------------------------------------------------------
00033   // ---------- Declerations of parameters with default values!
00034   // ----------------------------------------------------------------------
00035 
00036   // AS parameters
00037   int tot_num_of_reads_in_ace_file;
00038   // CO parameters
00039   string contig_name = "contig1";
00040   int num_of_bases;
00041   int num_of_reads_in_contig;
00042   int num_of_segments_in_contig;
00043   vector<char> consensus; 
00044   // BQ parameters
00045   int num_of_unpadded_bases;
00046   vector<short> consensus_quality_values;
00047   consensus_quality_values.push_back(50); // The consensus will get 50
00048   // AF parameters
00049   vector<string> read_name;
00050   vector<char> UorC;
00051   vector<int> padded_start_consensus_position;
00052   // BS parameters
00053   vector<int> BS_padded_start_consensus_position;
00054   vector<int> BS_padded_end_consensus_position;
00055   vector<string> BS_read_name;
00056   // RD parameters
00057   vector<int> num_of_padded_bases; 
00058   int num_of_whole_read_info_items = 0;  // puch dakota!
00059   vector<int> num_of_read_tags;            
00060   vector< vector<char> > read_seq;
00061   // QA parameters
00062   vector<int> qual_clipping_start;
00063   vector<int> qual_clipping_end;
00064   vector<int> align_clipping_start;
00065   vector<int> align_clipping_end;
00066   // DS parameters
00067   vector<string> name_of_chromat_file;
00068   vector<string> name_of_phd_file;
00069   string date_of_phd_file = "Thu Mar 19 18:38:28 1998"; // Northwest Energy Review Transition Board - Mar 19, 1998 report
00070   // RT parameters
00071   vector< vector<string> > tag_type;
00072   string what_program_created_tag = "TRAPPER";
00073   vector< vector<int> > padded_read_pos_start;
00074   vector< vector<int> > padded_read_pos_end;
00075   string date_when_tag_was_created = "980319:183828";
00076   vector< vector<string> > tag_data;
00077   // CA & WA are at the moment not in this function!
00078 
00079 
00080   // ----------------------------------------------------------------------
00081   // ---------- Set parameter values.
00082   // ----------------------------------------------------------------------
00083   cerr<<" (Setting parameter values.) ";  
00084   
00085   vector<char> tmp_charvec;
00086   vector<int> tmp_start, tmp_end;
00087   vector<string> tmp_data, tmp_type;
00088   
00089   // ################################# Get consensus #################################
00090   vector<int> rowVec;
00091   int consensus_start = get_consensus(consensus, rowVec, num_of_unpadded_bases);
00092   num_of_bases = consensus.size();
00093   
00094   // ################################# Get info from the cosmid #################################
00095   int id;
00096   int begin_pos;
00097   int len;
00098   int aligned_len;
00099   string name;
00100   //bool OK;
00101   tot_num_of_reads_in_ace_file = 0;
00102 
00103   // Mod to output query sequence in ace file.. DN 040407 
00104 
00105   int num_rows = getMAl()->get_num_seq ();
00106   
00107 
00108   for(int i=0; i < num_rows; i++) { // Loop over all aligned reads + query
00109 
00110     // Make room
00111     // for RT
00112     tmp_start.clear(); tmp_end.clear();
00113     tmp_data.clear(); tmp_type.clear();
00114     // and seq
00115     tmp_charvec.clear();
00116 
00117       
00118     // Get the name
00119     id = i;
00120     name = getMAl()->get_name(id); 
00121     
00122     // ################################# Get seq from real_matrix
00123     if ( ! get_realignment_row(i, tmp_charvec, begin_pos) ) 
00124       cerr << "Id: " << id <<" have been removed from alignment!"<<endl;
00125     else {
00126       tot_num_of_reads_in_ace_file++; // One more not empty read;
00127 
00128       UorC.push_back( getMAl()->get_strand(id)[0] );
00129       
00130       
00131       // Store names
00132       read_name.push_back(name);
00133       name_of_phd_file.push_back( name + ".phd.1");
00134       name_of_chromat_file.push_back( name + ".ab1");
00135       
00136       aligned_len = tmp_charvec.size();
00137       
00138       //int offset = current_cos->get_beginGood(id) - current_cos->get_seq_begin(id);
00139       int offset=0;//No offset?
00140       
00141       // Set start of the read aginst the consensus!
00142       padded_start_consensus_position.push_back(begin_pos - consensus_start + 1 - offset);
00143       
00144       // ################################# Set RT:s 
00145       int paddedCount = 0;
00146       for( int j = 0; j < aligned_len; j++ ) {
00147 
00148         // Count gaps
00149         if ( tmp_charvec[j] == '*'  ) paddedCount++;
00150         
00151         // DNP position
00152 
00153 
00154         else if( getMAl()->is_DNP(id, j) ) {
00155           tmp_start.push_back( j + 1 + offset );
00156           tmp_end.push_back( j + 1 + offset );
00157           tmp_data.push_back("a DNP position");
00158           tmp_type.push_back("matchElsewhereHighQual");
00159         }
00160         
00161         // Marked region!
00162 //         else if ( tmp_charvec[j] == 'x' ) {
00163 //           tmp_start.push_back( j + 1 + offset );
00164 //           tmp_data.push_back("Marked region");
00165 //           tmp_type.push_back("significantDiscrepancy");
00166           
00167 //           // Go through the whole marked region
00168 //           while((tmp_charvec[j] == '*' || tmp_charvec[j] == 'x') && j < aligned_len  ) {
00169 //             if ( tmp_charvec[j] == '*') paddedCount++;
00170 //             else tmp_charvec[j] = current_cos->get_base(id, current_cos->get_beginGood(id) + j - paddedCount);
00171 //             j++;
00172 //           }
00173 //           tmp_end.push_back(j + offset);
00174 //         }
00175 
00176       }
00177 
00178       // ################################# Adds the bases that was not aligned
00179 
00180       vector<char> TheWholeSeq; // = head + tmp_charvec + tail
00181       // Head
00182 
00183       for( int j = 0; j < aligned_len; j++ ) 
00184         TheWholeSeq.push_back(tmp_charvec[j]);
00185       // Tail
00186       
00187       len = TheWholeSeq.size();
00188       
00189       
00190       // Save everything!
00191       num_of_read_tags.push_back(tmp_start.size());
00192       
00193       padded_read_pos_start.push_back(tmp_start);
00194       padded_read_pos_end.push_back(tmp_end);
00195       
00196       tag_type.push_back(tmp_type);
00197       tag_data.push_back(tmp_data);
00198       
00199       read_seq.push_back(TheWholeSeq);
00200       num_of_padded_bases.push_back(len);
00201       
00202       qual_clipping_start.push_back(1);
00203       qual_clipping_end.push_back(aligned_len);
00204       align_clipping_start.push_back(1);
00205       align_clipping_end.push_back(aligned_len);
00206     }
00207   }
00208   
00209   num_of_reads_in_contig = tot_num_of_reads_in_ace_file; // One contig in acefile
00210   
00211   // ################################# Set BS values! #################################
00212   // It will give bs that are sorted, nonoverlapping and match the consensus!
00213 
00214   int curr = rowVec[0];
00215   
00216   num_of_segments_in_contig = 1;
00217   BS_padded_start_consensus_position.push_back(1);
00218   string tmp = getMAl()->get_name(rowVec[0]);
00219   BS_read_name.push_back( tmp );
00220   
00221   for( int n = 1; n < (int)rowVec.size(); n++ ) {
00222     if ( curr != rowVec[n] ) {
00223       BS_padded_end_consensus_position.push_back(n);
00224       num_of_segments_in_contig++;
00225       BS_padded_start_consensus_position.push_back(n+1);
00226       tmp = getMAl()->get_name(rowVec[n]);
00227       BS_read_name.push_back( tmp );
00228       curr = rowVec[n];
00229     }
00230   }
00231   BS_padded_end_consensus_position.push_back(rowVec.size());
00232   
00233   
00234   // ----------------------------------------------------------------------
00235   // ---------- Write ace file.
00236   // ----------------------------------------------------------------------
00237   if ( tot_num_of_reads_in_ace_file == 0 )
00238     return;
00239   else {
00240     // Disk full?
00241     if(access(dir.data(), W_OK) == -1) { cerr << "Cannot write ace-files to " << dir << endl; exit(-1); }
00242     
00243     ofstream outFile((dir + "/" + file_name + ".ace").data());
00244     
00245     cerr<<" (Printing to file.) ";
00246     
00247     // write AS
00248     int num_contigs = 1;
00249     outFile<<"AS "<<num_contigs<<" "<<tot_num_of_reads_in_ace_file<<endl;
00250     outFile<<endl;
00251     
00252     // write CO
00253     outFile<<"CO "<<contig_name<<" "<<num_of_bases
00254            <<" "<<num_of_reads_in_contig<<" "<<num_of_segments_in_contig
00255            <<" U"<<endl;
00256     for( int i = 0; i < num_of_bases; i++ ) {
00257       if ( i != 0 && i % 50 == 0 )
00258         outFile<<endl;
00259       outFile<<consensus[i];
00260     }
00261     outFile<<endl;
00262     outFile<<endl;
00263     
00264     // write BQ
00265     outFile<<"BQ "<<endl;
00266     for( int i = 0; i < num_of_unpadded_bases; i++ ) {
00267       if ( i != 0 && i % 50 == 0 )
00268         outFile<<endl;
00269       outFile<<' '<<consensus_quality_values[0];
00270     }
00271     outFile<<endl;
00272     outFile<<endl;
00273   
00274     // write AF
00275     for( int i = 0; i < num_of_reads_in_contig; i++ ) {
00276       // Change parameters value!
00277       outFile<<"AF "<<read_name[i]<<" "<<UorC[i]
00278              <<" "<<padded_start_consensus_position[i]<<endl;
00279     }
00280     
00281     // write BS
00282     for( int i = 0; i < num_of_segments_in_contig; i++ ) {
00283       // Change parameters value!
00284       outFile<<"BS "<<BS_padded_start_consensus_position[i]
00285              <<" "<<BS_padded_end_consensus_position[i]
00286              <<" "<<BS_read_name[i]<<endl;
00287     }
00288     outFile<<endl;
00289     
00290     // write RD, QA & DS
00291     for( int i = 0; i < num_of_reads_in_contig; i++ ) {
00292       // Change parameters value!
00293       
00294       // write RD
00295       outFile<<"RD "<<read_name[i]<<" "<<num_of_padded_bases[i]
00296              <<" "<<num_of_whole_read_info_items
00297              <<" "<<num_of_read_tags[i]<<endl;
00298       for( int j = 0; j < num_of_padded_bases[i]; j++ ) {
00299         if ( j != 0 && j % 50 == 0 )
00300           outFile<<endl;
00301         outFile<<read_seq[i][j];
00302       }
00303       outFile<<endl;
00304       outFile<<endl;
00305       
00306       // write QA
00307       outFile<<"QA "<<qual_clipping_start[i]
00308              <<" "<<qual_clipping_end[i]
00309              <<" "<<align_clipping_start[i]
00310              <<" "<<align_clipping_end[i]<<endl;
00311       outFile<<endl;
00312       
00313       // write DS
00314       outFile<<"DS CHROMAT_FILE: "<<name_of_chromat_file[i]
00315              <<" PHD_FILE: "<<name_of_phd_file[i]
00316              <<" TIME: "<<date_of_phd_file<<endl;
00317       outFile<<endl;
00318       
00319       // write RT
00320       for( int j = 0; j < (int)padded_read_pos_start[i].size(); j++ ) {
00321         outFile<<"RT{"<<endl;
00322         outFile<<read_name[i]<<' '<<tag_type[i][j]<<' '
00323                <<what_program_created_tag<<' '
00324                <<padded_read_pos_start[i][j]<<' '
00325                <<padded_read_pos_end[i][j]<<' '
00326                <<date_when_tag_was_created<<endl;
00327         outFile<<tag_data[i][j]<<endl;
00328         outFile<<'}'<<endl;
00329         outFile<<endl;
00330       }
00331     }
00332 
00333     cerr<<" Done. "<<endl;
00334     outFile.close();
00335   }
00336   
00337 }


The documentation for this class was generated from the following files:
Generated on Fri Mar 17 17:44:57 2006 for trapper by  doxygen 1.4.4