Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

generaldata.h

Go to the documentation of this file.
00001 /*******************************************************************************
00002  *                                                                             *
00003  *   Copyright (C) 2003  Erik Sjolund,  (<html>Erik Sj&ouml;lund</html>)       *
00004  *                       Center for Genomics and Bioinformatics,               *
00005  *                       Karolinska Institutet,                                *
00006  *                       Stockholm,                                            *
00007  *                       Sweden                                                *
00008  *                                                                             *
00009  *  Author: Erik Sjolund                                                       *
00010  *  Email: erik.sjolund@adivo.se                                               *
00011  *                                                                             *
00012  *******************************************************************************
00013  */
00014 #ifndef GENERALDATA_H
00015 #define GENERALDATA_H
00016 
00017 // berkeley db
00018 #include <db_cxx.h>
00019 
00020 // qt classes
00021 #include <qdatastream.h>
00022 #include <qdom.h>
00023 #include <qxml.h>
00024 
00025 // stl
00026 #include <iostream>
00027 #include <string>
00028 #include <cassert>
00029 #include <iosfwd>
00030 
00031 #include "trdb.h"
00032 #include "trapperdoc.h"
00033 #include "generalmaker.h"
00034 #include "generaldata.h"
00035 
00036 
00037 class TrapperDoc;
00038 
00039 /**
00040   * \brief This class is the base class for all data items that are to be stored in the
00041   * Berkeley Db databases. It has methods for serializing its data to
00042   * and from a bytestream and has methods for writing the same data 
00043   * to and from a xml dom tree. It also let you specify how this data should be 
00044   * indexed in the Berkeley Db databases for very fast retrieval.
00045   *
00046   * The function of this class is to transport data in and out of the Berkeley db backend. 
00047   * It is important to realize that changing values of data members of GeneralData and its subclasses
00048   * will have no effect to other parts of your application before your GeneralData object has been
00049   * used in the Database interface. 
00050   * 
00051   * When you are implementing subclasses of GeneralData you will see the benifits of object orientation.
00052   * You just have to serialize/unserialize the extra data members of the subclass.
00053   * The parent class will have to serialize/unserialize its data members so you have to remember 
00054   * letting readStream() and writeStream() of your subclass call the readStream() resp. writeStream() of its parent class. 
00055   * Just be careful of the order 
00056   * of things when doing serializing and unserializing.
00057   * 
00058   * Right now there is a limit of 2^32 ( ca 4 billions ) of records that can be stored in the
00059   * berkeley db backend for each GeneralData subclass. This limit is easy to recognize from the 
00060   * fact that the index type, db_recno_t, is a u_int32_t.
00061   * For more information about berkeley db limits see: http://www.sleepycat.com/docs/ref/am_misc/dbsizes.html
00062   *
00063   *
00064   * Idea for the future to  maybe implement:
00065   * add hooks to GeneralData, like
00066   * virtual void hookBeforeCreate(Txnid *) and
00067   * virtual void hookBeforeRemove(Txnid *).  
00068   * Database::Creator would then call these hooks everytime a 
00069   * create or remove is to be done.
00070   * We would now have the possibility to add substring search functionality for the
00071   * dna sequence in the DnaStrData feature.
00072   * We implement a new Class, SubStrData : public GeneralData that stores e.g. sequences 
00073   * of length 12. 
00074   * The DnaStrData::hookBeforeCreate(Txnid *) creates each possible 12-length-substring as records
00075   * in the SubStrData.
00076   * The SubStrData::getIndexMap() should return two secondary indices: one with sorting of the DnaStrData recno,
00077   * and one with sorting in lexicographical order of the 12-length-substring
00078   */
00079 
00080 class GeneralData
00081 {
00082 public:
00083   GeneralData();
00084   virtual ~GeneralData(){}
00085   /**Prints some info for debugging, this should be removed in release version!   */
00086   virtual void print_debug_info() { std::cerr<<"No debug info for class "<<uniqueName()<<endl;}
00087   
00088   /** Returns the primary key index value of this GeneralData object. The primary key index value is  
00089    * unique among all stored objects of this class. But the primary key index values are
00090    * not unique between different subclasses of GeneralData. The primary key indices for GeneralData and its
00091    * subclasses are all implemented as primary databases of type Recno.
00092    * \sa http://www.sleepycat.com/docs/ref/am_conf/intro.html
00093    */ 
00094   db_recno_t getRecno( );
00095   
00096   /** Set the primary key index value of this GeneralData object. */ 
00097   void setRecno( db_recno_t recno ) { m_recno = recno; }
00098   
00099   /** Loads the state of this object from QXmlAttributes.This method is used for importing. 
00100    */
00101   virtual void readAttributes( const QXmlAttributes& attr )= 0;
00102   /** Writes the state of this object to a stream as an XML entry. This method is used for exporting
00103    */
00104   virtual void writeXml( std::ostream& stream )= 0;
00105   /** Loads the state of this object from a dom tree. Or in other words reads
00106    * in the values of the data members of this class from a dom tree underneath
00107    * the QDomElement elem. This method is used for importing. 
00108    */
00109   virtual void readDom( QDomElement & elem );
00110   
00111   /** Saves the state of this object to a dom tree. Or in other words writes
00112    * out the internal data held in this class to a dom tree underneath
00113    * the QDomElement elem. This method is used for exporting. 
00114    */
00115   virtual void writeDom( QDomElement & elem );
00116   
00117   /** Loads the state of this object from a byte stream. Or in other words reads
00118    * and unserializes a byte stream to set the values of this class object.
00119    * This method is used for loading a GeneralData object from Berkeley Db. 
00120    */
00121   
00122   virtual void readStream( QDataStream & stream ) = 0;
00123   
00124   /** Saves the state of this object to a byte stream. Or in other words serializes
00125    * and writes the internal data held in this class object to a byte stream.
00126    * This method is used for storing a GeneralData object into Berkeley Db. 
00127    */
00128   virtual void writeStream( QDataStream & stream ) = 0;
00129   
00130   /** Returns a TrDb::IndexMap that specifies which secondary indexes should be 
00131    * generated for stored data objects of this class in the Berkeley Db backend. 
00132    * Each record in the TrDb::IndexMap generates
00133    * a secondary index built as a btree ( a DB_BTREE in Berkeley Db wording ).
00134    * The secondary index is sorted which makes it possible to do queries like
00135    * "Give me the object that has the smallest value, bigger or equal than x".
00136    *
00137    * And about speed performance, a quote from Berkeley Db documentation says: 
00138    * "Searches take O(log base_b N) time, where base_b is the average number of 
00139    * keys per page, and N is the total number of keys stored"
00140    */
00141   virtual TrDb::IndexMap getIndexMap() = 0;
00142   
00143   /** Specifies a name that uniquely should identify this class. It will be used
00144    * for lookups in the GeneralMaker class.
00145    */  
00146   virtual std::string uniqueName() = 0;
00147 
00148   void copy(GeneralData* other);
00149   
00150 protected:
00151   db_recno_t m_recno;
00152 };
00153 
00154 /** \brief namespace for all interactions with the berkeley db 
00155  *
00156  */
00157 
00158 namespace Database
00159 {
00160   void setFromDbt( const Dbt * dbtData, GeneralData * general);
00161   
00162   /** \brief to create records in the berkeley db.
00163    * @param T the derived GeneralData class that the pointer data() points to. By having this template 
00164    * argument we avoid the need of dynamic_cast by the users of this class.
00165    * 
00166    */
00167   
00168   template <class T>
00169   class Creator
00170   {
00171   public:
00172     /** \brief constructor
00173      * @param doc doc with the berkeley dbs opened
00174      * @param dataType the lookup string for the derived GeneralData class that are to be stored.
00175      */
00176     Creator( TrapperDoc *doc, std::string dataType );
00177     ~Creator();
00178     /** \brief create a new record in the berkeley db
00179      *
00180      * @param useRecno if true save at the index place of given from the GeneralData::getRecno() of the data().
00181      * If false a new recno value is automatically chosen by berkeley db. 
00182      * @param txnid a transaction id to encompass this operation in. If NULL
00183      * this operation will not be transaction-protected.
00184      
00185     */
00186     db_recno_t create( bool useRecno, DbTxn *txnid = NULL );
00187     /** \brief Removes a record from the DB 
00188      */
00189     void destroy( DbTxn *txnid = NULL );
00190     /** \brief returns a pointer to the data 
00191      */
00192     T * data() { return m_data; }
00193   protected:
00194     T * m_data;
00195     Db * m_db;
00196   };
00197   
00198   template <class T>
00199   Database::Creator<T>::Creator( TrapperDoc * doc, std::string dataType )
00200   {
00201     m_data = dynamic_cast< T * >(GeneralMaker::newData( dataType ));
00202     Q_CHECK_PTR( m_data );
00203     std::string datatype = m_data->uniqueName();
00204     m_db = doc->findTrDb( datatype )->primaryDb();
00205 //     cerr<<"In Creator(), doc: "<<doc<<endl;
00206   }
00207   
00208   template <class T>
00209   Database::Creator<T>::~Creator()
00210   {
00211     Q_CHECK_PTR( m_data );
00212     delete m_data;
00213   }
00214   
00215   template <class T>
00216   db_recno_t Database::Creator<T>::create( bool useRecno, DbTxn *txnid )
00217   {
00218 //     std::cerr<<"In "<<std::endl;
00219 //     std::cerr<<"txnid: "<<txnid<<std::endl;
00220     QByteArray ar;
00221     QDataStream stream( ar, IO_WriteOnly );
00222 //     std::cerr<<"Check1 "<<std::endl;
00223     m_data->writeStream( stream );
00224 //     std::cerr<<"Check2 "<<std::endl;
00225     
00226     Dbt data( ar.data(), ar.size() );
00227 //     std::cerr<<"Check3 "<<std::endl;
00228     Dbt key;
00229 
00230     //     db_recno_t recno;
00231     db_recno_t the_recno(0);//EA
00232     u_int32_t flags = 0;
00233     if ( useRecno )
00234       {
00235 //         std::cerr<<"Check4:1 "<<std::endl;
00236         the_recno = m_data->getRecno();
00237         key.set_data( &the_recno );
00238         key.set_size( sizeof( the_recno ) );
00239       }
00240     else
00241       {
00242 //         std::cerr<<"Check4:2 "<<std::endl;
00243         flags |= DB_APPEND;
00244         assert( flags == DB_APPEND );
00245       }
00246 //     std::cerr<<"Check5 "<<std::endl;
00247 //     std::cerr<<"Db: "<<m_db<<std::endl;
00248 //     m_data->print_debug_info();
00249 //     std::cerr<<"Back from debug "<<endl;
00250     
00251     int ret;
00252     if ((ret = m_db->put(txnid, &key, &data, flags )) != 0)
00253       {
00254 //         std::cerr<<"Check5:1 "<<std::endl;
00255         m_db->err(ret, "error in create");
00256 //         std::cerr<<"Check5:2 "<<std::endl;
00257         exit( 1 );
00258       }
00259 //     std::cerr<<"Check6 "<<std::endl;
00260     db_recno_t recnoRet = * static_cast< db_recno_t * >( key.get_data() );
00261 //     std::cerr<<"Out "<<std::endl;
00262     return recnoRet;
00263   }
00264   template <class T>
00265   void Database::Creator<T>::destroy( DbTxn *txnid )
00266   {
00267     Dbt key;
00268 
00269     db_recno_t the_recno(0);
00270     u_int32_t flags = 0;
00271     the_recno = m_data->getRecno();
00272     key.set_data( &the_recno );
00273     key.set_size( sizeof( the_recno ) );
00274     
00275     int ret;
00276     if ((ret = m_db->del(txnid, &key, flags )) != 0)
00277       {
00278         m_db->err(ret, "error in Creator::destroy");
00279         exit( 1 );
00280       }
00281   }
00282   /** \brief to search records and iterate over a secondary index
00283    * @param T the derived GeneralData class that the pointer data() points to. By having this template 
00284    * argument we avoid the need of dynamic_cast by the users of this class.
00285    * 
00286    */
00287   template <class T>
00288   class SecondaryIterator
00289   {
00290   public:
00291     /** \brief constructor
00292      *
00293      * @param secondaryIndexStr the secondary index lookup string
00294      * @param doc doc with the berkeley dbs opened
00295      * @param generalDataType the lookup string for the derived GeneralData class that are to be stored.
00296      * @param txnid a transaction id to encompass this operation in. If NULL
00297      * this operation will not be transaction-protected.
00298      */
00299     SecondaryIterator( string secondaryIndexStr, TrapperDoc * doc, string generalDataType, DbTxn *txnid = NULL );
00300     ~SecondaryIterator();
00301     std::string name() { return m_name; }
00302     int set();
00303     int setRange();
00304     int nextdup();
00305     int next();
00306     int first();
00307     int last();
00308     int delCurrentInclDups();
00309     int pget( Dbt & key, u_int32_t flags );
00310     T * key() { return m_GeneralData_key; }
00311     T * answer() { return m_GeneralData_answer; }
00312     void closeCursor();
00313   protected:
00314     void fillSecondaryDataDbt( Dbt & secondaryKey, const Dbt & primaryData );
00315     T * m_GeneralData_key;
00316     T * m_GeneralData_answer;
00317     Dbc * m_cursor;
00318     TrDb::Index index;
00319     std::string m_name;
00320     DbTxn* txn;
00321   };
00322   
00323   template <class T>
00324   SecondaryIterator<T>::SecondaryIterator( string secondaryIndexStr, TrapperDoc * doc, string generalDataType, DbTxn *txnid )
00325   {
00326     assert( doc!= 0 );
00327     
00328     m_name = generalDataType;
00329     m_GeneralData_key = dynamic_cast< T * >(GeneralMaker::newData( generalDataType ));
00330     m_GeneralData_answer = dynamic_cast< T * >(GeneralMaker::newData( generalDataType ));
00331     index = doc->findTrDb( generalDataType )->secondaryIndex( secondaryIndexStr );
00332     Q_CHECK_PTR(m_GeneralData_key);
00333     Q_CHECK_PTR(m_GeneralData_answer);
00334 
00335     index.db->cursor( txnid , &m_cursor, 0 );
00336 
00337     txn = txnid;
00338   }
00339   
00340   template <class T>
00341   SecondaryIterator<T>::~SecondaryIterator( )
00342   {
00343     if ( m_cursor ) {
00344       m_cursor->close();
00345     }
00346    if ( m_GeneralData_key ) {
00347      delete m_GeneralData_key;
00348      m_GeneralData_key = NULL;
00349    }
00350     if ( m_GeneralData_answer ) {
00351       delete m_GeneralData_answer;
00352       m_GeneralData_answer = NULL;
00353     }
00354   }
00355   template <class T>
00356   void SecondaryIterator<T>::closeCursor()
00357   {
00358     assert( m_cursor != 0 );
00359     m_cursor->close();
00360     m_cursor = 0;
00361   }
00362   
00363   
00364   template <class T>
00365   int SecondaryIterator<T>::set()
00366   {
00367       
00368     Dbt secondaryKey;
00369     QByteArray ar;
00370     QDataStream stream( ar, IO_WriteOnly );
00371     m_GeneralData_key->writeStream( stream );
00372     Dbt primaryData( ar.data(), ar.size() );
00373     fillSecondaryDataDbt( secondaryKey, primaryData );
00374     return pget( secondaryKey, DB_SET );
00375   }
00376 
00377   template <class T>
00378   int SecondaryIterator<T>::delCurrentInclDups()
00379   {
00380     Dbt secondaryKey;
00381     QByteArray ar;
00382     QDataStream stream( ar, IO_WriteOnly );
00383     m_GeneralData_key->writeStream( stream );
00384     Dbt primaryData( ar.data(), ar.size() );
00385     fillSecondaryDataDbt( secondaryKey, primaryData );
00386     
00387     int ret;
00388     if ( (ret = index.db->del(txn, &secondaryKey, 0)) != 0 ){
00389       if ( ret != DB_NOTFOUND )
00390         {
00391           index.db->err(ret, "del() call in SecondaryIterator::delCurrentInclDups()");
00392         }
00393       
00394     }
00395     return ret;
00396   }
00397   
00398   
00399   template <class T>
00400   int SecondaryIterator<T>::setRange()
00401   {
00402     Dbt secondaryKey;
00403     QByteArray ar;
00404     QDataStream stream( ar, IO_WriteOnly );
00405     m_GeneralData_key->writeStream( stream );
00406     Dbt primaryData( ar.data(), ar.size() );
00407     fillSecondaryDataDbt( secondaryKey, primaryData );
00408     return pget( secondaryKey, DB_SET_RANGE );
00409   }
00410   
00411   template <class T>
00412   void SecondaryIterator<T>::fillSecondaryDataDbt( Dbt & secondaryKey, const Dbt & primaryData )
00413   {
00414     Dbt primaryKey;
00415     index.associate_func( index.db , &primaryKey, &primaryData, &secondaryKey);
00416     return;
00417   }
00418   
00419   template <class T>
00420   int SecondaryIterator<T>::pget( Dbt & key, u_int32_t flags )
00421   {
00422     if ( !m_cursor )
00423       index.db->cursor( txn , &m_cursor, 0 );
00424 
00425 
00426     Dbt primary_key;
00427     Dbt primary_data;
00428     int ret;
00429     if ((ret = m_cursor->pget(&key, &primary_key, &primary_data, flags )) != 0)
00430       {
00431         if ( ret != DB_NOTFOUND )
00432           {
00433             index.db->err(ret, "pget call in SecondaryIterator::pget");
00434           }
00435         //  throw DbException(ret);
00436       }
00437     else
00438       {
00439         Database::setFromDbt( &primary_data, m_GeneralData_answer );
00440         db_recno_t recno = * static_cast<db_recno_t *> ( primary_key.get_data() );
00441         m_GeneralData_answer->setRecno( recno );
00442       }
00443     return ret;
00444   }
00445   
00446   template <class T>
00447   int SecondaryIterator<T>::nextdup()
00448   {
00449     Dbt secondaryKey;
00450     return pget( secondaryKey, DB_NEXT_DUP );
00451   }
00452   
00453   template <class T>
00454   int SecondaryIterator<T>::next()
00455   {
00456     Dbt secondaryKey;
00457     return pget( secondaryKey, DB_NEXT );
00458   }
00459 
00460   template <class T>
00461   int SecondaryIterator<T>::first()
00462   {
00463     Dbt secondaryKey;
00464     return pget( secondaryKey, DB_FIRST );
00465   }
00466 
00467   template <class T>
00468   int SecondaryIterator<T>::last()
00469   {
00470     Dbt secondaryKey;
00471     return pget( secondaryKey, DB_LAST );
00472   }
00473 
00474   
00475   
00476   template <class T>
00477   class PrimaryIterator
00478   {
00479   public:
00480     PrimaryIterator( TrapperDoc * doc, string generalDataType, DbTxn *txnid = NULL );
00481     ~PrimaryIterator();
00482     int first();
00483     int next();
00484     int get( u_int32_t flags, Dbt & primary_key );
00485     T * key() { return m_GeneralData_key; }
00486     T * answer() { return m_GeneralData_answer; }
00487     int delCurrent();
00488     int setFromRecno( db_recno_t recno );
00489   protected:
00490     /**Not implemented... What's this for anyway??
00491      */
00492     void fillSecondaryDataDbt( Dbt & secondaryKey, const Dbt & primaryData );
00493     T * m_GeneralData_key;
00494     T * m_GeneralData_answer;
00495     Dbc * m_cursor;
00496     Db * m_db;
00497     std::string name;
00498   };
00499 
00500   
00501   template <class T>
00502   PrimaryIterator<T>::PrimaryIterator( TrapperDoc * doc, string generalDataType, DbTxn *txnid )
00503   {
00504     m_GeneralData_key = dynamic_cast< T * >(GeneralMaker::newData( generalDataType ));
00505     m_GeneralData_answer = dynamic_cast< T * >(GeneralMaker::newData( generalDataType ));
00506     m_db = doc->findTrDb( generalDataType )->primaryDb();
00507     Q_CHECK_PTR(m_GeneralData_key);
00508     Q_CHECK_PTR(m_GeneralData_answer);
00509     m_db->cursor( txnid , &m_cursor, 0 );
00510   }
00511   
00512   template <class T>
00513   PrimaryIterator<T>::~PrimaryIterator( )
00514   {
00515     m_cursor->close();
00516     if ( m_GeneralData_key )
00517       {
00518         delete m_GeneralData_key;
00519         m_GeneralData_key = NULL;
00520       }
00521     if ( m_GeneralData_answer )
00522       {
00523         delete m_GeneralData_answer;
00524         m_GeneralData_answer = NULL;
00525       }
00526   }
00527   
00528   template <class T>
00529   int PrimaryIterator<T>::delCurrent() 
00530   {
00531     int ret;
00532     if ((ret = m_cursor->del(0)) != 0)
00533       {
00534         m_db->err(ret, "get call in PrimaryIterator::get");
00535       }
00536     return ret;    
00537   }
00538   
00539   template <class T>
00540   int PrimaryIterator<T>::first()
00541   {
00542     Dbt key;
00543     return get( DB_FIRST, key );
00544   }
00545   
00546   template <class T>
00547   int PrimaryIterator<T>::get( u_int32_t flags, Dbt & primary_key )
00548   {
00549     Dbt primary_data;
00550     int ret;
00551     if ((ret = m_cursor->get
00552          (&primary_key, &primary_data, flags )) != 0)
00553       {
00554         if ( ret != DB_NOTFOUND )
00555           {
00556             m_db->err(ret, "get call in PrimaryIterator::get");
00557           }
00558         //  throw DbException(ret);
00559       }
00560     else
00561       {
00562         setFromDbt( &primary_data, m_GeneralData_answer );
00563         db_recno_t recno = * static_cast<db_recno_t *> ( primary_key.get_data() );
00564         m_GeneralData_answer->setRecno( recno );
00565       }
00566     return ret;
00567   }
00568   
00569   template <class T>
00570   int PrimaryIterator<T>::setFromRecno( db_recno_t recno )
00571   {
00572     Dbt key;
00573     key.set_size( sizeof( db_recno_t ));
00574     key.set_data( &recno );
00575     return get( DB_SET, key);
00576   }
00577   
00578   template <class T>
00579   int PrimaryIterator<T>::next()
00580   {
00581     Dbt key;
00582     return get( DB_NEXT, key );
00583   }
00584   
00585 }//END Database namespace
00586 
00587 #endif

Generated on Fri Mar 17 17:44:24 2006 for trapper by  doxygen 1.4.4