Main Page   Class Hierarchy   Compound List   File List   Compound Members  

database.h

00001 //-< DATABASE.H >----------------------------------------------------*--------*
00002 // FastDB                    Version 1.0         (c) 1999  GARRET    *     ?  *
00003 // (Main Memory Database Management System)                          *   /\|  *
00004 //                                                                   *  /  \  *
00005 //                          Created:     20-Nov-98    K.A. Knizhnik  * / [] \ *
00006 //                          Last update: 23-Dec-98    K.A. Knizhnik  * GARRET *
00007 //-------------------------------------------------------------------*--------*
00008 // Database management
00009 //-------------------------------------------------------------------*--------*
00010 
00011 #ifndef __DATABASE_H__
00012 #define __DATABASE_H__
00013 
00014 #include "class.h"
00015 #include "reference.h"
00016 #include "file.h"
00017 
00021 const size_t dbDefaultInitDatabaseSize = 1024*1024;
00022 
00026 const size_t dbDefaultInitIndexSize = 512*1024;
00027 
00031 const size_t dbDefaultExtensionQuantum = 4*1024*1024;
00032 
00036 const unsigned dbMaxParallelSearchThreads = 64;
00037 
00041 enum dbInternalObject { 
00042     dbTableRow, 
00043     dbPageObjectMarker,
00044     dbTtreeMarker,
00045     dbTtreeNodeMarker,
00046     dbHashTableMarker,
00047     dbHashTableItemMarker,
00048     
00049     dbInternalObjectMarker = 7 // mask for internals object markers
00050 };
00051     
00052 const offs_t dbFreeHandleMarker = (offs_t)1 << (sizeof(offs_t)*8 - 1); 
00053 
00054 const size_t dbAllocationQuantumBits = 4;
00055 const size_t dbAllocationQuantum = 1 << dbAllocationQuantumBits;
00056 const size_t dbPageBits = 12;
00057 const size_t dbPageSize = 1 << dbPageBits;
00058 const size_t dbIdsPerPage = dbPageSize / sizeof(oid_t);
00059 const size_t dbHandlesPerPage = dbPageSize / sizeof(offs_t);
00060 const size_t dbBitmapSegmentBits = dbPageBits + 3 + dbAllocationQuantumBits;
00061 const size_t dbBitmapSegmentSize = 1 << dbBitmapSegmentBits;
00062 const size_t dbBitmapPages = 1 << (dbDatabaseOffsetBits-dbBitmapSegmentBits);
00063 const size_t dbDirtyPageBitmapSize = 1 << (dbDatabaseOidBits-dbPageBits-3);
00064 const size_t dbDefaultSelectionLimit = 2000000000;
00065 
00066 const int    dbBMsearchThreshold = 512;
00067 
00068 const char   dbMatchAnyOneChar = '_';
00069 const char   dbMatchAnySubstring = '%';
00070 
00074 enum dbPredefinedIds { 
00075     dbInvalidId,
00076     dbMetaTableId, 
00077     dbBitmapId,
00078     dbFirstUserId = dbBitmapId + dbBitmapPages
00079 };
00080 
00084 class dbHeader { 
00085   public:
00086     offs_t size;  // database file size
00087     int4   curr;  // current root
00088     int4   dirty; // database was not closed normally
00089     int4   initialized; // database is initilaized
00090     struct { 
00091         offs_t index;           // offset to object index
00092         offs_t shadowIndex;     // offset to shadow index
00093         oid_t  indexSize;       // size of object index
00094         oid_t  shadowIndexSize; // size of object index
00095         oid_t  indexUsed;       // used part of the index   
00096         oid_t  freeList;        // L1 list of free descriptors
00097     } root[2];
00098     
00099     int4 majorVersion;
00100     int4 minorVersion;
00101 };
00102 
00103 union  dbSynthesizedAttribute;
00104 struct dbInheritedAttribute;
00105 class dbDatabaseThreadContext;
00106 class dbAnyCursor;
00107 class dbQuery;
00108 class dbExprNode;
00109 
00110 class dbMonitor { 
00111   public:
00112     sharedsem_t sem;
00113     sharedsem_t mutatorSem;
00114     int  nReaders;
00115     int  nWriters;
00116     int  nWaitReaders;
00117     int  nWaitWriters;
00118     int  waitForUpgrade;
00119     int  forceCommitCount;
00120     int  backupInProgress;
00121     int  uncommittedChanges;
00122 
00123     int  curr;             // copy of header->root, used to allow read access 
00124                            // to the database during transaction commit
00125 
00126     int  commitInProgress;
00127     int  concurrentTransId;
00128 
00129     unsigned lastDeadlockRecoveryTime;
00130 
00131     int  version; 
00132     int  users;  
00133 
00134     dbProcessId ownerPid;
00135 
00136     dbDatabaseThreadContext*  delayedCommitContext;     // save context of delayed transaction
00137 
00138     int  dirtyPagesMap[dbDirtyPageBitmapSize/4];
00139 };
00140 
00144 class FASTDB_DLL_ENTRY dbL2List { 
00145   public:
00146     dbL2List* next; 
00147     dbL2List* prev; 
00148 
00149     void link(dbL2List* elem) { 
00150         elem->prev = this;
00151         elem->next = next;
00152         next = next->prev = elem;
00153     }
00154     void unlink() { 
00155         next->prev = prev;
00156         prev->next = next;
00157         next = prev = this;
00158     }
00159     bool isEmpty() { 
00160         return next == this;
00161     }
00162     void reset() { 
00163         next = prev = this;
00164     }        
00165     dbL2List() { 
00166         next = prev = this;
00167     }
00168     ~dbL2List() { 
00169         unlink();
00170     }
00171 };
00172 
00176 class FASTDB_DLL_ENTRY dbDatabase { 
00177     friend class dbSelection;
00178     friend class dbAnyCursor;
00179     friend class dbHashTable;
00180     friend class dbQuery;
00181     friend class dbTtree;
00182     friend class dbTtreeNode;
00183     friend class dbParallelQueryContext; 
00184     friend class dbServer;
00185     friend class dbColumnBinding;
00186     friend class dbUserFunctionArgument;
00187     friend class dbAnyContainer;
00188     friend class dbCLI;
00189     friend class GiSTdb;
00190   public:
00200     bool open(char const* databaseName, 
00201               char const* fileName = NULL, 
00202               time_t waitLockTimeoutMsec = INFINITE, 
00203               time_t commitDelaySec = 0);
00204 
00208     void close();
00209 
00213     void commit();
00214 
00219     void precommit();
00220     
00224     void rollback();
00225 
00232     void scheduleBackup(char const* fileName, time_t periodSec);
00233     
00238     void attach();
00239     
00240     enum DetachFlags { 
00241         COMMIT          = 1,
00242         DESTROY_CONTEXT = 2
00243     };
00248     void detach(int flags = COMMIT|DESTROY_CONTEXT);
00249 
00253     void lock() { beginTransaction(dbExclusiveLock); }
00254 
00263     bool backup(char const* file, bool compactify);
00264     
00269     void assign(dbTableDescriptor& desc) { 
00270         assert(((void)"Table is not yet assigned to the database", 
00271                 desc.tableId == 0));
00272         desc.db = this; 
00273         desc.fixedDatabase = true;
00274     }
00275 
00283     void setConcurrency(unsigned nThreads);
00284 
00289     long getAllocatedSize() { return allocatedSize; }
00290 
00295     long getDatabaseSize() { return header->size; }
00296 
00297     enum dbErrorClass { 
00298         NoError, 
00299         QueryError,
00300         ArithmeticError,
00301         IndexOutOfRangeError,
00302         DatabaseOpenError,
00303         FileError,
00304         OutOfMemoryError,
00305         Deadlock,
00306         NullReferenceError,
00307         LockRevoked,
00308         FileLimitExeeded        
00309     };
00310     typedef void (*dbErrorHandler)(dbErrorClass error, char const* msg, int msgarg); 
00311 
00317     dbErrorHandler setErrorHandler(dbErrorHandler newHandler);        
00318 
00326     virtual void handleError(dbErrorClass error, char const* msg = NULL, 
00327                              int arg = 0); 
00328 
00335     void insertRecord(dbTableDescriptor* table, dbAnyReference* ref, 
00336                       void const* record);
00337 
00341     bool isOpen() const { return opened; }
00342 
00346     int  getVersion();
00347 
00352     void setFileSizeLimit(size_t limit) { 
00353         dbFileSizeLimit = limit;
00354     }
00355 
00356 #ifndef NO_MEMBER_TEMPLATES
00357 
00362     template<class T>
00363     dbReference<T> insert(T const& record) {
00364         dbReference<T> ref;
00365         insertRecord(lookupTable(&T::dbDescriptor), &ref, &record);
00366         return ref;
00367     }
00368 #endif
00369 
00375     dbTableDescriptor* lookupTable(dbTableDescriptor* desc);
00376 
00377     enum dbAccessType { 
00378         dbReadOnly         = 0,
00379         dbAllAccess        = 1,
00380         dbConcurrentRead   = 2,
00381         dbConcurrentUpdate = 3
00382     };
00394     dbDatabase(dbAccessType type = dbAllAccess,
00395                size_t dbInitSize = dbDefaultInitDatabaseSize,
00396                size_t dbExtensionQuantum = dbDefaultExtensionQuantum,
00397                size_t dbInitIndexSize = dbDefaultInitIndexSize,
00398                int nThreads = 1 
00399                // Do not specify the last parameter - it is only for checking
00400                // that application and GigaBASE library were built with the 
00401                // same compiler options (-DNO_PTHREADS is critical)
00402                // Mismached parameters should cause linker error
00403 #ifdef NO_PTHREADS
00404                , bool usePthreads = false
00405 #endif
00406                );
00410     virtual ~dbDatabase(); 
00411 
00417     static void cleanup();
00418     
00419     const dbAccessType accessType;
00420     const size_t initSize;
00421     const size_t extensionQuantum;
00422     const size_t initIndexSize;
00423 
00424     static unsigned dbParallelScanThreshold; 
00425 
00426   protected:
00427     static size_t internalObjectSize[];
00428 
00429     dbThreadPool threadPool;
00430 
00431     dbThreadContext<dbDatabaseThreadContext> threadContext;
00432 
00433     byte*     baseAddr;         // base address of database file mapping
00434     dbHeader* header;           // base address of database file mapping
00435     offs_t*   currIndex;        // current database object index
00436     offs_t*   index[2];
00437     unsigned  parThreads;
00438     bool      modified;
00439 
00440     size_t    currRBitmapPage;  //current bitmap page for allocating records
00441     size_t    currRBitmapOffs;  //offset in current bitmap page for allocating
00442                                 //unaligned records
00443     size_t    currPBitmapPage;  //current bitmap page for allocating page objects
00444     size_t    currPBitmapOffs;  //offset in current bitmap page for allocating
00445                                 //page objects
00446     struct dbLocation { 
00447         offs_t      pos;
00448         size_t      size;
00449         dbLocation* next;
00450     };
00451     dbLocation* reservedChain;
00452     
00453     char*     databaseName;
00454     int       databaseNameLen;
00455     char*     fileName;
00456     int       version;
00457 
00458     size_t    mmapSize;
00459     
00460     size_t    committedIndexSize;
00461     size_t    currIndexSize;
00462     oid_t     updatedRecordId;
00463 
00464     unsigned  dbWaitLockTimeout;
00465 
00466     size_t    dbFileSizeLimit;
00467 
00468     bool      uncommittedChanges;
00469 
00470     dbFile                    file;
00471     dbSharedObject<dbMonitor> shm;
00472     dbGlobalCriticalSection   cs;
00473     dbGlobalCriticalSection   mutatorCS;
00474     dbInitializationMutex     initMutex;
00475     dbSemaphore               writeSem; 
00476     dbSemaphore               readSem; 
00477     dbSemaphore               upgradeSem; 
00478     dbEvent                   backupCompletedEvent;
00479     dbMonitor*                monitor;
00480 
00481     dbTableDescriptor*        tables;
00482 
00483     int*                      bitmapPageAvailableSpace;
00484     bool                      opened;
00485 
00486     long                      allocatedSize;
00487     
00488     time_t                    commitDelay;     
00489     time_t                    commitTimeout;
00490     time_t                    commitTimerStarted;
00491     
00492     dbMutex                   delayedCommitStartTimerMutex;
00493     dbMutex                   delayedCommitStopTimerMutex;
00494     dbLocalEvent              delayedCommitStartTimerEvent; 
00495     dbEvent                   delayedCommitStopTimerEvent; 
00496     dbLocalEvent              commitThreadSyncEvent;
00497 
00498     dbMutex                   backupMutex;    
00499     dbLocalEvent              backupInitEvent;
00500     char*                     backupFileName;
00501     time_t                    backupPeriod;
00502     bool                      stopDelayedCommitThread;
00503 
00504     dbThread                  backupThread;
00505     dbThread                  commitThread;
00506  
00507     int                       accessCount;
00508 
00509     dbL2List                  threadContextList;
00510     dbMutex                   threadContextListMutex;
00511 
00512     dbErrorHandler            errorHandler;
00513 
00514     void delayedCommit();
00515     void backupScheduler();
00516 
00517     static void thread_proc delayedCommitProc(void* arg) { 
00518         ((dbDatabase*)arg)->delayedCommit();
00519     }
00520 
00521     static void thread_proc backupSchedulerProc(void* arg) { 
00522         ((dbDatabase*)arg)->backupScheduler();
00523     }
00524 
00529     void commit(dbDatabaseThreadContext* ctx);
00530 
00535     void restoreTablesConsistency();
00536 
00542     dbRecord* getRow(oid_t oid) { 
00543         assert(!(currIndex[oid]&(dbFreeHandleMarker|dbInternalObjectMarker)));
00544         return (dbRecord*)(baseAddr + currIndex[oid]); 
00545     }
00546 
00556     dbRecord* putRow(oid_t oid, size_t newSize); 
00557 
00563     dbRecord* putRow(oid_t oid) { 
00564         if (oid < committedIndexSize && index[0][oid] == index[1][oid]) { 
00565             size_t size = getRow(oid)->size;
00566             size_t pageNo = oid/dbHandlesPerPage;
00567             monitor->dirtyPagesMap[pageNo >> 5] |= 1 << (pageNo & 31);
00568             cloneBitmap(currIndex[oid], size);
00569             allocate(size, oid);
00570         } 
00571         return (dbRecord*)(baseAddr + currIndex[oid]); 
00572     }
00573 
00579     byte* get(oid_t oid) { 
00580         return baseAddr + (currIndex[oid] & ~dbInternalObjectMarker); 
00581     }
00582 
00588     byte* put(oid_t oid) { 
00589         if (oid < committedIndexSize && index[0][oid] == index[1][oid]) { 
00590             offs_t offs = currIndex[oid];
00591             size_t size = internalObjectSize[offs & dbInternalObjectMarker];
00592             size_t pageNo = oid/dbHandlesPerPage;
00593             monitor->dirtyPagesMap[pageNo >> 5] |= 1 << (pageNo & 31);
00594             allocate(size, oid);
00595             cloneBitmap(offs & ~dbInternalObjectMarker, size);
00596         } 
00597         return baseAddr + (currIndex[oid] & ~dbInternalObjectMarker); 
00598     }
00599     
00612     bool isIndexApplicable(dbAnyCursor* cursor, 
00613                            dbExprNode* expr, dbExprNode* andExpr, 
00614                            dbFieldDescriptor* &indexedField);
00615 
00627     bool isIndexApplicable(dbAnyCursor* cursor, 
00628                            dbExprNode* expr, dbExprNode* andExpr);
00629 
00642     bool followInverseReference(dbExprNode* expr, dbExprNode* andExpr, 
00643                                 dbAnyCursor* cursor, oid_t iref);
00644 
00652     bool existsInverseReference(dbExprNode* expr, int nExistsClauses);
00653 
00663     static void _fastcall execute(dbExprNode* expr, 
00664                                   dbInheritedAttribute& iattr,
00665                                   dbSynthesizedAttribute& sattr);
00666 
00676     bool evaluate(dbExprNode* expr, oid_t oid, dbTable* table, dbAnyCursor* cursor);
00677 
00682     void select(dbAnyCursor* cursor);
00683 
00689     void select(dbAnyCursor* cursor, dbQuery& query);
00690 
00696     void traverse(dbAnyCursor* cursor, dbQuery& query);
00697 
00704     void update(oid_t oid, dbTableDescriptor* table, void const* record);
00705     
00711     void remove(dbTableDescriptor* table, oid_t oid);
00712 
00720     offs_t allocate(size_t size, oid_t oid = 0);
00721 
00727     void deallocate(offs_t pos, size_t size);
00728 
00734     void extend(offs_t size);
00735 
00741     void cloneBitmap(offs_t pos, size_t size);
00742 
00749     oid_t allocateId(int n = 1);
00750 
00757     void freeId(oid_t oid, int n = 1);
00758 
00763     void updateCursors(oid_t oid);
00764     
00768     void recovery();
00769 
00778     bool checkVersion();
00779 
00785     oid_t allocateObject(dbInternalObject marker) { 
00786         oid_t oid = allocateId();
00787         currIndex[oid] = allocate(internalObjectSize[marker]) + marker;
00788         return oid;
00789     }
00790 
00797     oid_t allocateRow(oid_t tableId, size_t size);
00798 
00804     void freeRow(oid_t tableId, oid_t oid);
00805 
00809     void freeObject(oid_t oid); 
00810     
00814     static void deleteCompiledQuery(dbExprNode* tree); 
00815 
00822     enum dbLockType { 
00823         dbSharedLock, 
00824         dbExclusiveLock,
00825         dbCommitLock
00826     };
00827 
00828     bool beginTransaction(dbLockType);
00829 
00833     void endTransaction() { 
00834         endTransaction(threadContext.get());
00835     }
00836     
00841     void endTransaction(dbDatabaseThreadContext* ctx);
00842 
00847     void initializeMetaTable();
00848     
00856     bool loadScheme(bool alter);
00857 
00863     bool completeDescriptorsInitialization();
00864 
00870     void reformatTable(oid_t tableId, dbTableDescriptor* desc);
00871 
00879     bool addIndices(bool alter, dbTableDescriptor* desc);
00880 
00886     oid_t addNewTable(dbTableDescriptor* desc);
00887 
00893     void updateTableDescriptor(dbTableDescriptor* desc, oid_t tableId);
00894 
00903     void insertInverseReference(dbFieldDescriptor* fd, 
00904                                 oid_t reverseId, oid_t targetId);
00905 
00911     void removeInverseReferences(dbTableDescriptor* desc, oid_t oid);
00912 
00921     void removeInverseReference(dbFieldDescriptor* fd, 
00922                                 oid_t reverseId, oid_t targetId);
00923 
00928     void deleteTable(dbTableDescriptor* desc);
00929 
00934     void dropTable(dbTableDescriptor* desc);
00935 
00940     void createIndex(dbFieldDescriptor* fd);
00941 
00946     void createHashTable(dbFieldDescriptor* fd);
00947 
00952     void dropIndex(dbFieldDescriptor* fd);
00953 
00958     void dropHashTable(dbFieldDescriptor* fd);
00959 
00965     void linkTable(dbTableDescriptor* table, oid_t tableId);
00966 
00971     void unlinkTable(dbTableDescriptor* table);
00972 
00979     bool wasReserved(offs_t pos, size_t size);
00980 
00989     void reserveLocation(dbLocation& location, offs_t pos, size_t size);
00990 
00995     void commitLocation();
00996 
01002     dbTableDescriptor* findTable(char const* name);
01003     
01010     dbTableDescriptor* findTableByName(char const* name);
01011 };
01012 
01013 
01014 #ifdef REPLICATION_SUPPORT
01015 
01016 #include "sockio.h"
01017 
01018 class dbConnection { 
01019  public:
01020     socket_t*    reqSock;
01021     socket_t*    respSock;
01022     dbLocalEvent statusEvent;
01023     dbLocalEvent readyEvent;
01024     dbLocalEvent useEvent;
01025     dbMutex      writeCS;
01026     int          useCount;
01027     int          waitUseEventFlag;
01028     int          waitStatusEventFlag;
01029     int          status;
01030 
01031     dbConnection() { 
01032         readyEvent.open();
01033         useEvent.open();
01034         statusEvent.open();
01035         useCount = 0;
01036         waitUseEventFlag = 0;
01037         waitStatusEventFlag = 0;
01038         status = 0;
01039         reqSock = respSock = NULL;
01040     }
01041     ~dbConnection() { 
01042         readyEvent.close();
01043         useEvent.close();
01044         statusEvent.close();
01045         delete reqSock;
01046         delete respSock;
01047     }
01048 };
01049 
01050 class dbReplicatedDatabase : public dbDatabase {
01051     friend class dbFile;
01052   protected:
01053     char**        serverURL;
01054     int           nServers;
01055     int           id;
01056     dbConnection* con;
01057 
01058     enum NodeStatus { 
01059         ST_OFFLINE,  // node is not available 
01060         ST_ONLINE,   // node is available 
01061         ST_ACTIVE,   // primary node is running, replicating changes
01062         ST_STANDBY,  // standby node receives changes from primary node
01063         ST_RECOVERED // node is recovered after the fault
01064     };
01065     
01066     dbLocalEvent  startEvent;
01067     dbMutex       startCS;
01068     fd_set        inputSD;
01069     int           nInputSD;
01070 
01071     int           activeNodeId;
01072     dbMutex       sockCS;
01073     socket_t*     acceptSock;
01074     dbThread      readerThread;
01075 
01076     static void thread_proc dbReplicatedDatabase::startReader(void* arg);
01077 
01078     void reader();
01079 
01080   public:
01081     void deleteConnection(int nodeId);
01082     void lockConnection(int nodeId);
01083     void unlockConnection(int nodeId);
01084     void changeActiveNode();
01085     void addConnection(int nodeId, socket_t* s);
01086     bool writeReq(int nodeId, ReplicationRequest const& hdr, 
01087                   void* body = NULL, size_t bodySize = 0);
01088     bool writeResp(int nodeId, ReplicationRequest const& hdr);
01089 
01090     bool open(char const* databaseName, char const* fileName,
01091               int id, char* servers[], int nServers);
01092     virtual void close();
01093 
01094     static int dbPollInterval;
01095     static int dbWaitReadyTimeout;
01096     static int dbWaitStatusTimeout;
01097     static int dbRecoveryConnectionAttempts;
01098     static int dbStartupConnectionAttempts;
01099 };
01100 #endif
01101 
01102 template<class T>
01103 dbReference<T> insert(T const& record) { 
01104     dbReference<T> ref;
01105     T::dbDescriptor.getDatabase()->insertRecord(&T::dbDescriptor, &ref, &record);
01106     return ref;
01107 }
01108 
01109 #ifdef NO_MEMBER_TEMPLATES
01110 template<class T>
01111 dbReference<T> insert(dbDatabase& db, T const& record) {
01112     dbReference<T> ref;
01113     db.insertRecord(db.lookupTable(&T::dbDescriptor), &ref, &record);
01114     return ref;
01115 }
01116 #endif
01117 
01121 class dbSearchContext { 
01122   public:
01123     dbDatabase*     db;
01124     dbExprNode*     condition;
01125     dbAnyCursor*    cursor;
01126     char*           firstKey;
01127     int             firstKeyInclusion;
01128     char*           lastKey;
01129     int             lastKeyInclusion;
01130     int             type;
01131     int             sizeofType;
01132     dbUDTComparator comparator;
01133     int             offs;
01134     int             probes;
01135 };
01136 
01137 
01138 #endif

Generated on Fri Nov 15 21:06:28 2002 for FastDB by doxygen1.2.15