00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #ifndef __DATABASE_H__
00012 #define __DATABASE_H__
00013
00014 #include "class.h"
00015 #include "reference.h"
00016 #include "file.h"
00017
00021 const size_t dbDefaultInitDatabaseSize = 1024*1024;
00022
00026 const size_t dbDefaultInitIndexSize = 512*1024;
00027
00031 const size_t dbDefaultExtensionQuantum = 4*1024*1024;
00032
00036 const unsigned dbMaxParallelSearchThreads = 64;
00037
00041 enum dbInternalObject {
00042 dbTableRow,
00043 dbPageObjectMarker,
00044 dbTtreeMarker,
00045 dbTtreeNodeMarker,
00046 dbHashTableMarker,
00047 dbHashTableItemMarker,
00048
00049 dbInternalObjectMarker = 7
00050 };
00051
00052 const offs_t dbFreeHandleMarker = (offs_t)1 << (sizeof(offs_t)*8 - 1);
00053
00054 const size_t dbAllocationQuantumBits = 4;
00055 const size_t dbAllocationQuantum = 1 << dbAllocationQuantumBits;
00056 const size_t dbPageBits = 12;
00057 const size_t dbPageSize = 1 << dbPageBits;
00058 const size_t dbIdsPerPage = dbPageSize / sizeof(oid_t);
00059 const size_t dbHandlesPerPage = dbPageSize / sizeof(offs_t);
00060 const size_t dbBitmapSegmentBits = dbPageBits + 3 + dbAllocationQuantumBits;
00061 const size_t dbBitmapSegmentSize = 1 << dbBitmapSegmentBits;
00062 const size_t dbBitmapPages = 1 << (dbDatabaseOffsetBits-dbBitmapSegmentBits);
00063 const size_t dbDirtyPageBitmapSize = 1 << (dbDatabaseOidBits-dbPageBits-3);
00064 const size_t dbDefaultSelectionLimit = 2000000000;
00065
00066 const int dbBMsearchThreshold = 512;
00067
00068 const char dbMatchAnyOneChar = '_';
00069 const char dbMatchAnySubstring = '%';
00070
00074 enum dbPredefinedIds {
00075 dbInvalidId,
00076 dbMetaTableId,
00077 dbBitmapId,
00078 dbFirstUserId = dbBitmapId + dbBitmapPages
00079 };
00080
00084 class dbHeader {
00085 public:
00086 offs_t size;
00087 int4 curr;
00088 int4 dirty;
00089 int4 initialized;
00090 struct {
00091 offs_t index;
00092 offs_t shadowIndex;
00093 oid_t indexSize;
00094 oid_t shadowIndexSize;
00095 oid_t indexUsed;
00096 oid_t freeList;
00097 } root[2];
00098
00099 int4 majorVersion;
00100 int4 minorVersion;
00101 };
00102
00103 union dbSynthesizedAttribute;
00104 struct dbInheritedAttribute;
00105 class dbDatabaseThreadContext;
00106 class dbAnyCursor;
00107 class dbQuery;
00108 class dbExprNode;
00109
00110 class dbMonitor {
00111 public:
00112 sharedsem_t sem;
00113 sharedsem_t mutatorSem;
00114 int nReaders;
00115 int nWriters;
00116 int nWaitReaders;
00117 int nWaitWriters;
00118 int waitForUpgrade;
00119 int forceCommitCount;
00120 int backupInProgress;
00121 int uncommittedChanges;
00122
00123 int curr;
00124
00125
00126 int commitInProgress;
00127 int concurrentTransId;
00128
00129 unsigned lastDeadlockRecoveryTime;
00130
00131 int version;
00132 int users;
00133
00134 dbProcessId ownerPid;
00135
00136 dbDatabaseThreadContext* delayedCommitContext;
00137
00138 int dirtyPagesMap[dbDirtyPageBitmapSize/4];
00139 };
00140
00144 class FASTDB_DLL_ENTRY dbL2List {
00145 public:
00146 dbL2List* next;
00147 dbL2List* prev;
00148
00149 void link(dbL2List* elem) {
00150 elem->prev = this;
00151 elem->next = next;
00152 next = next->prev = elem;
00153 }
00154 void unlink() {
00155 next->prev = prev;
00156 prev->next = next;
00157 next = prev = this;
00158 }
00159 bool isEmpty() {
00160 return next == this;
00161 }
00162 void reset() {
00163 next = prev = this;
00164 }
00165 dbL2List() {
00166 next = prev = this;
00167 }
00168 ~dbL2List() {
00169 unlink();
00170 }
00171 };
00172
00176 class FASTDB_DLL_ENTRY dbDatabase {
00177 friend class dbSelection;
00178 friend class dbAnyCursor;
00179 friend class dbHashTable;
00180 friend class dbQuery;
00181 friend class dbTtree;
00182 friend class dbTtreeNode;
00183 friend class dbParallelQueryContext;
00184 friend class dbServer;
00185 friend class dbColumnBinding;
00186 friend class dbUserFunctionArgument;
00187 friend class dbAnyContainer;
00188 friend class dbCLI;
00189 friend class GiSTdb;
00190 public:
00200 bool open(char const* databaseName,
00201 char const* fileName = NULL,
00202 time_t waitLockTimeoutMsec = INFINITE,
00203 time_t commitDelaySec = 0);
00204
00208 void close();
00209
00213 void commit();
00214
00219 void precommit();
00220
00224 void rollback();
00225
00232 void scheduleBackup(char const* fileName, time_t periodSec);
00233
00238 void attach();
00239
00240 enum DetachFlags {
00241 COMMIT = 1,
00242 DESTROY_CONTEXT = 2
00243 };
00248 void detach(int flags = COMMIT|DESTROY_CONTEXT);
00249
00253 void lock() { beginTransaction(dbExclusiveLock); }
00254
00263 bool backup(char const* file, bool compactify);
00264
00269 void assign(dbTableDescriptor& desc) {
00270 assert(((void)"Table is not yet assigned to the database",
00271 desc.tableId == 0));
00272 desc.db = this;
00273 desc.fixedDatabase = true;
00274 }
00275
00283 void setConcurrency(unsigned nThreads);
00284
00289 long getAllocatedSize() { return allocatedSize; }
00290
00295 long getDatabaseSize() { return header->size; }
00296
00297 enum dbErrorClass {
00298 NoError,
00299 QueryError,
00300 ArithmeticError,
00301 IndexOutOfRangeError,
00302 DatabaseOpenError,
00303 FileError,
00304 OutOfMemoryError,
00305 Deadlock,
00306 NullReferenceError,
00307 LockRevoked,
00308 FileLimitExeeded
00309 };
00310 typedef void (*dbErrorHandler)(dbErrorClass error, char const* msg, int msgarg);
00311
00317 dbErrorHandler setErrorHandler(dbErrorHandler newHandler);
00318
00326 virtual void handleError(dbErrorClass error, char const* msg = NULL,
00327 int arg = 0);
00328
00335 void insertRecord(dbTableDescriptor* table, dbAnyReference* ref,
00336 void const* record);
00337
00341 bool isOpen() const { return opened; }
00342
00346 int getVersion();
00347
00352 void setFileSizeLimit(size_t limit) {
00353 dbFileSizeLimit = limit;
00354 }
00355
00356 #ifndef NO_MEMBER_TEMPLATES
00357
00362 template<class T>
00363 dbReference<T> insert(T const& record) {
00364 dbReference<T> ref;
00365 insertRecord(lookupTable(&T::dbDescriptor), &ref, &record);
00366 return ref;
00367 }
00368 #endif
00369
00375 dbTableDescriptor* lookupTable(dbTableDescriptor* desc);
00376
00377 enum dbAccessType {
00378 dbReadOnly = 0,
00379 dbAllAccess = 1,
00380 dbConcurrentRead = 2,
00381 dbConcurrentUpdate = 3
00382 };
00394 dbDatabase(dbAccessType type = dbAllAccess,
00395 size_t dbInitSize = dbDefaultInitDatabaseSize,
00396 size_t dbExtensionQuantum = dbDefaultExtensionQuantum,
00397 size_t dbInitIndexSize = dbDefaultInitIndexSize,
00398 int nThreads = 1
00399
00400
00401
00402
00403 #ifdef NO_PTHREADS
00404 , bool usePthreads = false
00405 #endif
00406 );
00410 virtual ~dbDatabase();
00411
00417 static void cleanup();
00418
00419 const dbAccessType accessType;
00420 const size_t initSize;
00421 const size_t extensionQuantum;
00422 const size_t initIndexSize;
00423
00424 static unsigned dbParallelScanThreshold;
00425
00426 protected:
00427 static size_t internalObjectSize[];
00428
00429 dbThreadPool threadPool;
00430
00431 dbThreadContext<dbDatabaseThreadContext> threadContext;
00432
00433 byte* baseAddr;
00434 dbHeader* header;
00435 offs_t* currIndex;
00436 offs_t* index[2];
00437 unsigned parThreads;
00438 bool modified;
00439
00440 size_t currRBitmapPage;
00441 size_t currRBitmapOffs;
00442
00443 size_t currPBitmapPage;
00444 size_t currPBitmapOffs;
00445
00446 struct dbLocation {
00447 offs_t pos;
00448 size_t size;
00449 dbLocation* next;
00450 };
00451 dbLocation* reservedChain;
00452
00453 char* databaseName;
00454 int databaseNameLen;
00455 char* fileName;
00456 int version;
00457
00458 size_t mmapSize;
00459
00460 size_t committedIndexSize;
00461 size_t currIndexSize;
00462 oid_t updatedRecordId;
00463
00464 unsigned dbWaitLockTimeout;
00465
00466 size_t dbFileSizeLimit;
00467
00468 bool uncommittedChanges;
00469
00470 dbFile file;
00471 dbSharedObject<dbMonitor> shm;
00472 dbGlobalCriticalSection cs;
00473 dbGlobalCriticalSection mutatorCS;
00474 dbInitializationMutex initMutex;
00475 dbSemaphore writeSem;
00476 dbSemaphore readSem;
00477 dbSemaphore upgradeSem;
00478 dbEvent backupCompletedEvent;
00479 dbMonitor* monitor;
00480
00481 dbTableDescriptor* tables;
00482
00483 int* bitmapPageAvailableSpace;
00484 bool opened;
00485
00486 long allocatedSize;
00487
00488 time_t commitDelay;
00489 time_t commitTimeout;
00490 time_t commitTimerStarted;
00491
00492 dbMutex delayedCommitStartTimerMutex;
00493 dbMutex delayedCommitStopTimerMutex;
00494 dbLocalEvent delayedCommitStartTimerEvent;
00495 dbEvent delayedCommitStopTimerEvent;
00496 dbLocalEvent commitThreadSyncEvent;
00497
00498 dbMutex backupMutex;
00499 dbLocalEvent backupInitEvent;
00500 char* backupFileName;
00501 time_t backupPeriod;
00502 bool stopDelayedCommitThread;
00503
00504 dbThread backupThread;
00505 dbThread commitThread;
00506
00507 int accessCount;
00508
00509 dbL2List threadContextList;
00510 dbMutex threadContextListMutex;
00511
00512 dbErrorHandler errorHandler;
00513
00514 void delayedCommit();
00515 void backupScheduler();
00516
00517 static void thread_proc delayedCommitProc(void* arg) {
00518 ((dbDatabase*)arg)->delayedCommit();
00519 }
00520
00521 static void thread_proc backupSchedulerProc(void* arg) {
00522 ((dbDatabase*)arg)->backupScheduler();
00523 }
00524
00529 void commit(dbDatabaseThreadContext* ctx);
00530
00535 void restoreTablesConsistency();
00536
00542 dbRecord* getRow(oid_t oid) {
00543 assert(!(currIndex[oid]&(dbFreeHandleMarker|dbInternalObjectMarker)));
00544 return (dbRecord*)(baseAddr + currIndex[oid]);
00545 }
00546
00556 dbRecord* putRow(oid_t oid, size_t newSize);
00557
00563 dbRecord* putRow(oid_t oid) {
00564 if (oid < committedIndexSize && index[0][oid] == index[1][oid]) {
00565 size_t size = getRow(oid)->size;
00566 size_t pageNo = oid/dbHandlesPerPage;
00567 monitor->dirtyPagesMap[pageNo >> 5] |= 1 << (pageNo & 31);
00568 cloneBitmap(currIndex[oid], size);
00569 allocate(size, oid);
00570 }
00571 return (dbRecord*)(baseAddr + currIndex[oid]);
00572 }
00573
00579 byte* get(oid_t oid) {
00580 return baseAddr + (currIndex[oid] & ~dbInternalObjectMarker);
00581 }
00582
00588 byte* put(oid_t oid) {
00589 if (oid < committedIndexSize && index[0][oid] == index[1][oid]) {
00590 offs_t offs = currIndex[oid];
00591 size_t size = internalObjectSize[offs & dbInternalObjectMarker];
00592 size_t pageNo = oid/dbHandlesPerPage;
00593 monitor->dirtyPagesMap[pageNo >> 5] |= 1 << (pageNo & 31);
00594 allocate(size, oid);
00595 cloneBitmap(offs & ~dbInternalObjectMarker, size);
00596 }
00597 return baseAddr + (currIndex[oid] & ~dbInternalObjectMarker);
00598 }
00599
00612 bool isIndexApplicable(dbAnyCursor* cursor,
00613 dbExprNode* expr, dbExprNode* andExpr,
00614 dbFieldDescriptor* &indexedField);
00615
00627 bool isIndexApplicable(dbAnyCursor* cursor,
00628 dbExprNode* expr, dbExprNode* andExpr);
00629
00642 bool followInverseReference(dbExprNode* expr, dbExprNode* andExpr,
00643 dbAnyCursor* cursor, oid_t iref);
00644
00652 bool existsInverseReference(dbExprNode* expr, int nExistsClauses);
00653
00663 static void _fastcall execute(dbExprNode* expr,
00664 dbInheritedAttribute& iattr,
00665 dbSynthesizedAttribute& sattr);
00666
00676 bool evaluate(dbExprNode* expr, oid_t oid, dbTable* table, dbAnyCursor* cursor);
00677
00682 void select(dbAnyCursor* cursor);
00683
00689 void select(dbAnyCursor* cursor, dbQuery& query);
00690
00696 void traverse(dbAnyCursor* cursor, dbQuery& query);
00697
00704 void update(oid_t oid, dbTableDescriptor* table, void const* record);
00705
00711 void remove(dbTableDescriptor* table, oid_t oid);
00712
00720 offs_t allocate(size_t size, oid_t oid = 0);
00721
00727 void deallocate(offs_t pos, size_t size);
00728
00734 void extend(offs_t size);
00735
00741 void cloneBitmap(offs_t pos, size_t size);
00742
00749 oid_t allocateId(int n = 1);
00750
00757 void freeId(oid_t oid, int n = 1);
00758
00763 void updateCursors(oid_t oid);
00764
00768 void recovery();
00769
00778 bool checkVersion();
00779
00785 oid_t allocateObject(dbInternalObject marker) {
00786 oid_t oid = allocateId();
00787 currIndex[oid] = allocate(internalObjectSize[marker]) + marker;
00788 return oid;
00789 }
00790
00797 oid_t allocateRow(oid_t tableId, size_t size);
00798
00804 void freeRow(oid_t tableId, oid_t oid);
00805
00809 void freeObject(oid_t oid);
00810
00814 static void deleteCompiledQuery(dbExprNode* tree);
00815
00822 enum dbLockType {
00823 dbSharedLock,
00824 dbExclusiveLock,
00825 dbCommitLock
00826 };
00827
00828 bool beginTransaction(dbLockType);
00829
00833 void endTransaction() {
00834 endTransaction(threadContext.get());
00835 }
00836
00841 void endTransaction(dbDatabaseThreadContext* ctx);
00842
00847 void initializeMetaTable();
00848
00856 bool loadScheme(bool alter);
00857
00863 bool completeDescriptorsInitialization();
00864
00870 void reformatTable(oid_t tableId, dbTableDescriptor* desc);
00871
00879 bool addIndices(bool alter, dbTableDescriptor* desc);
00880
00886 oid_t addNewTable(dbTableDescriptor* desc);
00887
00893 void updateTableDescriptor(dbTableDescriptor* desc, oid_t tableId);
00894
00903 void insertInverseReference(dbFieldDescriptor* fd,
00904 oid_t reverseId, oid_t targetId);
00905
00911 void removeInverseReferences(dbTableDescriptor* desc, oid_t oid);
00912
00921 void removeInverseReference(dbFieldDescriptor* fd,
00922 oid_t reverseId, oid_t targetId);
00923
00928 void deleteTable(dbTableDescriptor* desc);
00929
00934 void dropTable(dbTableDescriptor* desc);
00935
00940 void createIndex(dbFieldDescriptor* fd);
00941
00946 void createHashTable(dbFieldDescriptor* fd);
00947
00952 void dropIndex(dbFieldDescriptor* fd);
00953
00958 void dropHashTable(dbFieldDescriptor* fd);
00959
00965 void linkTable(dbTableDescriptor* table, oid_t tableId);
00966
00971 void unlinkTable(dbTableDescriptor* table);
00972
00979 bool wasReserved(offs_t pos, size_t size);
00980
00989 void reserveLocation(dbLocation& location, offs_t pos, size_t size);
00990
00995 void commitLocation();
00996
01002 dbTableDescriptor* findTable(char const* name);
01003
01010 dbTableDescriptor* findTableByName(char const* name);
01011 };
01012
01013
01014 #ifdef REPLICATION_SUPPORT
01015
01016 #include "sockio.h"
01017
01018 class dbConnection {
01019 public:
01020 socket_t* reqSock;
01021 socket_t* respSock;
01022 dbLocalEvent statusEvent;
01023 dbLocalEvent readyEvent;
01024 dbLocalEvent useEvent;
01025 dbMutex writeCS;
01026 int useCount;
01027 int waitUseEventFlag;
01028 int waitStatusEventFlag;
01029 int status;
01030
01031 dbConnection() {
01032 readyEvent.open();
01033 useEvent.open();
01034 statusEvent.open();
01035 useCount = 0;
01036 waitUseEventFlag = 0;
01037 waitStatusEventFlag = 0;
01038 status = 0;
01039 reqSock = respSock = NULL;
01040 }
01041 ~dbConnection() {
01042 readyEvent.close();
01043 useEvent.close();
01044 statusEvent.close();
01045 delete reqSock;
01046 delete respSock;
01047 }
01048 };
01049
01050 class dbReplicatedDatabase : public dbDatabase {
01051 friend class dbFile;
01052 protected:
01053 char** serverURL;
01054 int nServers;
01055 int id;
01056 dbConnection* con;
01057
01058 enum NodeStatus {
01059 ST_OFFLINE,
01060 ST_ONLINE,
01061 ST_ACTIVE,
01062 ST_STANDBY,
01063 ST_RECOVERED
01064 };
01065
01066 dbLocalEvent startEvent;
01067 dbMutex startCS;
01068 fd_set inputSD;
01069 int nInputSD;
01070
01071 int activeNodeId;
01072 dbMutex sockCS;
01073 socket_t* acceptSock;
01074 dbThread readerThread;
01075
01076 static void thread_proc dbReplicatedDatabase::startReader(void* arg);
01077
01078 void reader();
01079
01080 public:
01081 void deleteConnection(int nodeId);
01082 void lockConnection(int nodeId);
01083 void unlockConnection(int nodeId);
01084 void changeActiveNode();
01085 void addConnection(int nodeId, socket_t* s);
01086 bool writeReq(int nodeId, ReplicationRequest const& hdr,
01087 void* body = NULL, size_t bodySize = 0);
01088 bool writeResp(int nodeId, ReplicationRequest const& hdr);
01089
01090 bool open(char const* databaseName, char const* fileName,
01091 int id, char* servers[], int nServers);
01092 virtual void close();
01093
01094 static int dbPollInterval;
01095 static int dbWaitReadyTimeout;
01096 static int dbWaitStatusTimeout;
01097 static int dbRecoveryConnectionAttempts;
01098 static int dbStartupConnectionAttempts;
01099 };
01100 #endif
01101
01102 template<class T>
01103 dbReference<T> insert(T const& record) {
01104 dbReference<T> ref;
01105 T::dbDescriptor.getDatabase()->insertRecord(&T::dbDescriptor, &ref, &record);
01106 return ref;
01107 }
01108
01109 #ifdef NO_MEMBER_TEMPLATES
01110 template<class T>
01111 dbReference<T> insert(dbDatabase& db, T const& record) {
01112 dbReference<T> ref;
01113 db.insertRecord(db.lookupTable(&T::dbDescriptor), &ref, &record);
01114 return ref;
01115 }
01116 #endif
01117
01121 class dbSearchContext {
01122 public:
01123 dbDatabase* db;
01124 dbExprNode* condition;
01125 dbAnyCursor* cursor;
01126 char* firstKey;
01127 int firstKeyInclusion;
01128 char* lastKey;
01129 int lastKeyInclusion;
01130 int type;
01131 int sizeofType;
01132 dbUDTComparator comparator;
01133 int offs;
01134 int probes;
01135 };
01136
01137
01138 #endif