#include #include #include #include #include #include #include #include #include "global.h" #include "sigil.h" #include "log.h" /* The database consists of two files; * An immutable log with timestamped entries of variable size (URLs); * An index mapping 20-bit values to corresponding log positions Terminology: * LOG * - an append-only log containing: - :<0> - /<0> * MAP * - a memory-mapped array of 1M 8-byte entries, used as a hashtable * IDX * - a 20-bit index into MAP * OFFSET * - an offset into LOG (for URLs); * NICK * - a 4-character mnemonic string convertible to an IDX An entry stored in MAP contains an offset to a URL, and some metadata. The index of an entry is hashed from the URL (linear probing used on collisions). A NICK may be used to communicate the IDX. */ #define IDX_ENTRY_SIZE 4 #define tIDX_ENTRY U32 // Index size is 20 bits... #define IDX_SIZE (0x100000 * IDX_ENTRY_SIZE) #define IDX_MASK 0xFFFFF // Each entry contains an offset into log #define OFF_MASK 0x7FFFFFFF /******************************************************************************* LOG Current log state is kept here. TIMESTAMPS ========== <:><0> The log contains timestamp records interleaved with other records. As a matter of policy, not every record is preceded with a stamp. Currently, the policy is: * Any 1K run of log data must contain at least one stamp; * If the previous stamp is more than 59 seconds in the past, a stamp now. To enforce this policy, the application keeps a variable last_stamp, a 64-bit value containing: | 0 - 31 | last-stamp-value | | 32- 63 | last stamp file position | ******************************************************************************/ #ifdef ERRLOG extern FILE* errlog; #endif tIDX_ENTRY* map; /******************************************************************************* ******************************************************************************/ /******************************************************************************* ******************************************************************************/ int sys_open(){ log_open(); int fd = open(DBFILE_MAP,O_RDWR); if(fd == -1) { printf("unable to open map file %s\n",DBFILE_MAP); return -1; } // map to memory map = (tIDX_ENTRY*) mmap(NULL,IDX_SIZE,PROT_READ|PROT_WRITE,MAP_SHARED,fd,0); close(fd); if(MAP_FAILED==map){ printf("mmap failed\n"); return -2; } else { return 0; } } /******************************************************************************* ******************************************************************************/ void sys_close(){ log_close(); munmap(map,IDX_SIZE); } /******************************************************************************* Given a URL, return an idx of the interned URL. ******************************************************************************/ U32 URL_check(char* str){ U32 idx = string_hash(str); // Linear-probe char buf[256]; U32 off; while((off=map[idx])){ // as long as a map exists for hash, log_read(off,buf); // look at what it refers to, and if(!strcmp(str,buf)) // see if it is our needle. If so, return idx; // we already have it, return it. idx = (idx+1) & IDX_MASK; // otherwise, linear-probe next. } return 0; } U32 URL_idx(char* str){ U32 idx = string_hash(str); // Linear-probe char buf[256]; U32 off; while((off=map[idx])){ // as long as a map exists for hash, log_read(off,buf); // look at what it refers to, and if(!strcmp(str,buf)) // see if it is our needle. If so, return idx; // we already have it, return it. idx = (idx+1) & IDX_MASK; // otherwise, linear-probe next. } // not found? create. idx points at a 0 U32 pos = log_write(str); map[idx]= pos; return idx; } /******************************************************************************* ******************************************************************************/ U32 idx_URL(U32 idx,char* buf){ U32 offset = map[idx]; //printf("db.c: offset:%X",offset); if(offset>0) return log_read(offset,buf); else return 0; }