2022-03-04 22:37:21 +00:00
|
|
|
#include <stdio.h>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/stat.h>
|
|
|
|
#include <fcntl.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
#include <sys/mman.h>
|
|
|
|
|
|
|
|
#include <string.h>
|
|
|
|
#include <time.h>
|
|
|
|
|
|
|
|
#include "global.h"
|
|
|
|
#include "sigil.h"
|
|
|
|
#include "log.h"
|
|
|
|
/*
|
|
|
|
|
|
|
|
The database consists of two files;
|
|
|
|
* An immutable log with timestamped entries of variable size (URLs);
|
|
|
|
* An index mapping 20-bit values to corresponding log positions
|
|
|
|
|
|
|
|
|
|
|
|
Terminology:
|
|
|
|
* LOG * - an append-only log containing:
|
|
|
|
- :<U32-timestamp><0>
|
|
|
|
- /<url-string><0>
|
|
|
|
|
|
|
|
* MAP * - a memory-mapped array of 1M 8-byte entries, used as a hashtable
|
|
|
|
|
|
|
|
* IDX * - a 20-bit index into MAP
|
|
|
|
|
|
|
|
* OFFSET * - an offset into LOG (for URLs);
|
|
|
|
|
|
|
|
* NICK * - a 4-character mnemonic string convertible to an IDX
|
|
|
|
|
|
|
|
An entry stored in MAP contains an offset to a URL, and some metadata. The
|
|
|
|
index of an entry is hashed from the URL (linear probing used on collisions).
|
|
|
|
A NICK may be used to communicate the IDX.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define IDX_ENTRY_SIZE 4
|
|
|
|
#define tIDX_ENTRY U32
|
|
|
|
// Index size is 20 bits...
|
|
|
|
#define IDX_SIZE (0x100000 * IDX_ENTRY_SIZE)
|
|
|
|
#define IDX_MASK 0xFFFFF
|
|
|
|
// Each entry contains an offset into log
|
|
|
|
#define OFF_MASK 0x7FFFFFFF
|
|
|
|
|
|
|
|
/*******************************************************************************
|
|
|
|
|
|
|
|
LOG
|
|
|
|
|
|
|
|
Current log state is kept here.
|
|
|
|
|
|
|
|
TIMESTAMPS
|
|
|
|
==========
|
|
|
|
|
|
|
|
<:><U32-time-stamp><0>
|
|
|
|
|
|
|
|
The log contains timestamp records interleaved with other records. As a matter
|
|
|
|
of policy, not every record is preceded with a stamp.
|
|
|
|
|
|
|
|
Currently, the policy is:
|
|
|
|
* Any 1K run of log data must contain at least one stamp;
|
|
|
|
* If the previous stamp is more than 59 seconds in the past, a stamp now.
|
|
|
|
|
|
|
|
To enforce this policy, the application keeps a variable last_stamp, a 64-bit
|
|
|
|
value containing:
|
|
|
|
| 0 - 31 | last-stamp-value |
|
|
|
|
| 32- 63 | last stamp file position |
|
|
|
|
|
|
|
|
******************************************************************************/
|
|
|
|
#ifdef ERRLOG
|
|
|
|
extern FILE* errlog;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tIDX_ENTRY* map;
|
|
|
|
/*******************************************************************************
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
******************************************************************************/
|
|
|
|
|
|
|
|
/*******************************************************************************
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
******************************************************************************/
|
|
|
|
int sys_open(){
|
|
|
|
log_open();
|
|
|
|
int fd = open(DBFILE_MAP,O_RDWR);
|
|
|
|
if(fd == -1) {
|
|
|
|
printf("unable to open map file %s\n",DBFILE_MAP);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
// map to memory
|
|
|
|
map = (tIDX_ENTRY*) mmap(NULL,IDX_SIZE,PROT_READ|PROT_WRITE,MAP_SHARED,fd,0);
|
|
|
|
close(fd);
|
|
|
|
|
|
|
|
if(MAP_FAILED==map){
|
|
|
|
printf("mmap failed\n");
|
|
|
|
return -2;
|
|
|
|
} else {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/*******************************************************************************
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
******************************************************************************/
|
|
|
|
void sys_close(){
|
|
|
|
log_close();
|
|
|
|
munmap(map,IDX_SIZE);
|
|
|
|
}
|
|
|
|
/*******************************************************************************
|
|
|
|
|
|
|
|
Given a URL, return an idx of the interned URL.
|
|
|
|
|
|
|
|
******************************************************************************/
|
2022-03-07 22:04:22 +00:00
|
|
|
U32 URL_check(char* str){
|
|
|
|
U32 idx = string_hash(str);
|
|
|
|
|
|
|
|
// Linear-probe
|
|
|
|
char buf[256];
|
|
|
|
U32 off;
|
|
|
|
while((off=map[idx])){ // as long as a map exists for hash,
|
|
|
|
log_read(off,buf); // look at what it refers to, and
|
|
|
|
if(!strcmp(str,buf)) // see if it is our needle. If so,
|
|
|
|
return idx; // we already have it, return it.
|
|
|
|
idx = (idx+1) & IDX_MASK; // otherwise, linear-probe next.
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2022-03-04 22:37:21 +00:00
|
|
|
U32 URL_idx(char* str){
|
|
|
|
U32 idx = string_hash(str);
|
|
|
|
|
|
|
|
// Linear-probe
|
|
|
|
char buf[256];
|
|
|
|
U32 off;
|
|
|
|
while((off=map[idx])){ // as long as a map exists for hash,
|
|
|
|
log_read(off,buf); // look at what it refers to, and
|
|
|
|
if(!strcmp(str,buf)) // see if it is our needle. If so,
|
|
|
|
return idx; // we already have it, return it.
|
|
|
|
idx = (idx+1) & IDX_MASK; // otherwise, linear-probe next.
|
|
|
|
}
|
|
|
|
// not found? create. idx points at a 0
|
|
|
|
U32 pos = log_write(str);
|
|
|
|
map[idx]= pos;
|
|
|
|
return idx;
|
|
|
|
}
|
|
|
|
/*******************************************************************************
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
******************************************************************************/
|
|
|
|
U32 idx_URL(U32 idx,char* buf){
|
|
|
|
U32 offset = map[idx];
|
|
|
|
//printf("db.c: offset:%X",offset);
|
|
|
|
|
|
|
|
if(offset>0)
|
|
|
|
return log_read(offset,buf);
|
|
|
|
else
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|