trapper: readdata.h Source File

00001 /*******************************************************************************
00002  *                                                                             *
00003  *   Copyright (C) 2003  Erik Sjolund,  (<html>Erik Sj&ouml;lund</html>)       *
00004  *                       Center for Genomics and Bioinformatics,               *
00005  *                       Karolinska Institutet,                                *
00006  *                       Stockholm,                                            *
00007  *                       Sweden                                                *
00008  *                                                                             *
00009  *  Author: Erik Sjolund                                                       *
00010  *  Email: erik.sjolund@adivo.se                                               *
00011  *                                                                             *
00012  *******************************************************************************
00013  */
00014 #ifndef READDATA_H
00015 #define READDATA_H
00016 
00017 #include "generaldata.h"
00018 #include "trdb.h"
00019 #include "trappertypes.h"
00020 #include <string>
00021 #include <iosfwd>
00022 
00023 /** \brief Representing a read stored in the berkeley db. Holds its name and geometric placement
00024   * in the contig.
00025   *
00026   * The index value of this read, m_recno, is used as a secondary search index for subclasses of
00027   * FeatureData.
00028   */ 
00029 
00030 
00031 class ReadData : public GeneralData
00032 {
00033 public:
00034   ReadData( TR_DNA row = 0 , TR_DNA startPos = 0, TR_DNA endPos = 0, std::string name = "N/A", std::string mate = "N/A", std::size_t matelen = 0, std::string strand = "U", TR_DNA bg = 0, TR_DNA eg = 0) : GeneralData()
00035   {
00036     s.row = row;
00037     s.startPos = startPos;
00038     s.endPos = endPos;
00039 
00040     name_ = name;
00041     mate_ = mate;
00042     matelen_ = matelen;
00043     strand_ = strand;
00044     beginGood_ = bg;
00045     endGood_ = eg;
00046   }
00047   ReadData( const ReadData& other );
00048   ~ReadData();
00049   void print_debug_info();
00050   std::string uniqueName() { return std::string("ReadData"); }
00051   TrDb::IndexMap getIndexMap();
00052   /** Set the row where this "read" is placed in the contig */
00053   void setRow( TR_DNA row ) { s.row = row; }
00054   /** Set the start position in DNA coordinates where this "read" is placed in the contig */
00055   void setStartPos( TR_DNA startPos ) { s.startPos = startPos; }
00056   /** Set the end position in DNA coordinates where this "read" is placed in the contig */
00057   void setEndPos( TR_DNA endPos ) { s.endPos = endPos; }
00058   void setName( const std::string& n ) { name_ = n; }
00059   void setMate( const std::string& m ) { mate_ = m; }
00060   void setMateLength( const std::size_t& l ) { matelen_ = l; }
00061   void setStrand( const std::string& m ) { strand_ = m; }
00062   void setBeginGood( TR_DNA index ) { beginGood_ = index; }
00063   void setEndGood( TR_DNA index ) { endGood_ = index; }
00064   /** Returns the row where this "read" is placed in the contig */
00065   TR_DNA row() { return s.row; }
00066   /** Returns the start position in DNA coordinates where this "read" is placed in the contig */
00067   TR_DNA startPos() { return s.startPos; }
00068   /** Returns the end position in DNA coordinates where this "read" is placed in the contig */
00069   TR_DNA endPos() { return s.endPos; }
00070   std::string name() { return name_; }
00071   std::string mate() { return mate_; }
00072   std::size_t mateLength() { return matelen_; }
00073   std::string strand() { return strand_; }
00074   TR_DNA beginGood() { return beginGood_; }
00075   TR_DNA endGood() { return endGood_; }
00076   
00077 
00078   //Associate functions for secondary indices
00079   static int getRowPos(Db *dbp, const Dbt *pkey, const Dbt *pdata, Dbt *skey);
00080   static int getName(Db *dbp, const Dbt *pkey, const Dbt *pdata, Dbt *skey);
00081   //Compare functions for secondary indices
00082   static int bt_compare_rowPos(DB * db, const DBT *a, const DBT *b);
00083   static int bt_compare_end( DB * db, const DBT *dbt1, const DBT *dbt2);
00084   static int bt_compare_name( DB * db, const DBT *dbt1, const DBT *dbt2);
00085   
00086   
00087   /** \brief to keep the order of the data members when serializing/unserializing
00088    *
00089    * This might make the serializing/unserializing slightly faster ( probably not significant at all, not tested ).
00090    *   And specially the order is kept right between serializing and unserializing. But a problem with this 
00091    *  approach might be that the endian awareness of QDataStream might be lost, when we don't serialize the
00092    *  types one by one. It might be a problem when moving berkeley dbs around archictures. But berkeley db probably doesn't allow this anyway?*/
00093   struct StorageData
00094   {
00095     TR_DNA row;
00096     TR_DNA startPos;
00097     TR_DNA endPos;
00098   };
00099   void writeXml( std::ostream& stream );
00100   void readStream( QDataStream & stream );
00101   void writeStream( QDataStream & stream );
00102   void writeDom( QDomElement & elem );
00103   void readDom( QDomElement & elem );
00104   void readAttributes( const QXmlAttributes& attr );
00105 protected:
00106   StorageData s;
00107   std::string name_;
00108   std::string mate_;
00109   std::size_t matelen_;
00110   std::string strand_;
00111   TR_DNA beginGood_;
00112   TR_DNA endGood_;
00113   
00114 };
00115 
00116 #endif