spa/db.c

171 lines
4.3 KiB
C

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/mman.h>
#include <string.h>
#include <time.h>
#include "global.h"
#include "sigil.h"
#include "log.h"
/*
The database consists of two files;
* An immutable log with timestamped entries of variable size (URLs);
* An index mapping 20-bit values to corresponding log positions
Terminology:
* LOG * - an append-only log containing:
- :<U32-timestamp><0>
- /<url-string><0>
* MAP * - a memory-mapped array of 1M 8-byte entries, used as a hashtable
* IDX * - a 20-bit index into MAP
* OFFSET * - an offset into LOG (for URLs);
* NICK * - a 4-character mnemonic string convertible to an IDX
An entry stored in MAP contains an offset to a URL, and some metadata. The
index of an entry is hashed from the URL (linear probing used on collisions).
A NICK may be used to communicate the IDX.
*/
#define IDX_ENTRY_SIZE 4
#define tIDX_ENTRY U32
// Index size is 20 bits...
#define IDX_SIZE (0x100000 * IDX_ENTRY_SIZE)
#define IDX_MASK 0xFFFFF
// Each entry contains an offset into log
#define OFF_MASK 0x7FFFFFFF
/*******************************************************************************
LOG
Current log state is kept here.
TIMESTAMPS
==========
<:><U32-time-stamp><0>
The log contains timestamp records interleaved with other records. As a matter
of policy, not every record is preceded with a stamp.
Currently, the policy is:
* Any 1K run of log data must contain at least one stamp;
* If the previous stamp is more than 59 seconds in the past, a stamp now.
To enforce this policy, the application keeps a variable last_stamp, a 64-bit
value containing:
| 0 - 31 | last-stamp-value |
| 32- 63 | last stamp file position |
******************************************************************************/
#ifdef ERRLOG
extern FILE* errlog;
#endif
tIDX_ENTRY* map;
/*******************************************************************************
******************************************************************************/
/*******************************************************************************
******************************************************************************/
int sys_open(){
log_open();
int fd = open(DBFILE_MAP,O_RDWR);
if(fd == -1) {
printf("unable to open map file %s\n",DBFILE_MAP);
return -1;
}
// map to memory
map = (tIDX_ENTRY*) mmap(NULL,IDX_SIZE,PROT_READ|PROT_WRITE,MAP_SHARED,fd,0);
close(fd);
if(MAP_FAILED==map){
printf("mmap failed\n");
return -2;
} else {
return 0;
}
}
/*******************************************************************************
******************************************************************************/
void sys_close(){
log_close();
munmap(map,IDX_SIZE);
}
/*******************************************************************************
Given a URL, return an idx of the interned URL.
******************************************************************************/
U32 URL_check(char* str){
U32 idx = string_hash(str);
// Linear-probe
char buf[256];
U32 off;
while((off=map[idx])){ // as long as a map exists for hash,
log_read(off,buf); // look at what it refers to, and
if(!strcmp(str,buf)) // see if it is our needle. If so,
return idx; // we already have it, return it.
idx = (idx+1) & IDX_MASK; // otherwise, linear-probe next.
}
return 0;
}
U32 URL_idx(char* str){
U32 idx = string_hash(str);
// Linear-probe
char buf[256];
U32 off;
while((off=map[idx])){ // as long as a map exists for hash,
log_read(off,buf); // look at what it refers to, and
if(!strcmp(str,buf)) // see if it is our needle. If so,
return idx; // we already have it, return it.
idx = (idx+1) & IDX_MASK; // otherwise, linear-probe next.
}
// not found? create. idx points at a 0
U32 pos = log_write(str);
map[idx]= pos;
return idx;
}
/*******************************************************************************
******************************************************************************/
U32 idx_URL(U32 idx,char* buf){
U32 offset = map[idx];
//printf("db.c: offset:%X",offset);
if(offset>0)
return log_read(offset,buf);
else
return 0;
}