209 lines
5.2 KiB
C
209 lines
5.2 KiB
C
#include <stdio.h>
|
|
|
|
|
|
|
|
#include <sys/types.h>
|
|
#include <sys/stat.h>
|
|
#include <fcntl.h>
|
|
#include <unistd.h>
|
|
#include <sys/mman.h>
|
|
|
|
#include <string.h>
|
|
#include <time.h>
|
|
|
|
#include "global.h"
|
|
#include "sigil.h"
|
|
/*
|
|
|
|
The database consists of two files;
|
|
* An immutable log with timestamped entries of variable size (URLs);
|
|
* An index mapping 20-bit values to corresponding log positions
|
|
|
|
|
|
Terminology:
|
|
* LOG * - an append-only log containing:
|
|
- :<U32-timestamp><0>
|
|
- /<url-string><0>
|
|
|
|
* MAP * - a memory-mapped array of 1M 8-byte entries, used as a hashtable
|
|
|
|
* IDX * - a 20-bit index into MAP
|
|
|
|
* OFFSET * - an offset into LOG (for URLs);
|
|
|
|
* NICK * - a 4-character mnemonic string convertible to an IDX
|
|
|
|
An entry stored in MAP contains an offset to a URL, and some metadata. The
|
|
index of an entry is hashed from the URL (linear probing used on collisions).
|
|
A NICK may be used to communicate the IDX.
|
|
*/
|
|
|
|
#define IDX_ENTRY_SIZE 4
|
|
#define tIDX_ENTRY U32
|
|
// Index size is 20 bits...
|
|
#define IDX_SIZE (0x100000 * IDX_ENTRY_SIZE)
|
|
#define IDX_MASK 0xFFFFF
|
|
// Each entry contains an offset into log
|
|
#define OFF_MASK 0x7FFFFFFF
|
|
|
|
/*******************************************************************************
|
|
|
|
LOG
|
|
|
|
Current log state is kept here.
|
|
|
|
TIMESTAMPS
|
|
==========
|
|
|
|
<:><U32-time-stamp><0>
|
|
|
|
The log contains timestamp records interleaved with other records. As a matter
|
|
of policy, not every record is preceded with a stamp.
|
|
|
|
Currently, the policy is:
|
|
* Any 1K run of log data must contain at least one stamp;
|
|
* If the previous stamp is more than 59 seconds in the past, a stamp now.
|
|
|
|
To enforce this policy, the application keeps a variable last_stamp, a 64-bit
|
|
value containing:
|
|
| 0 - 31 | last-stamp-value |
|
|
| 32- 63 | last stamp file position |
|
|
|
|
******************************************************************************/
|
|
#ifdef ERRLOG
|
|
extern FILE* errlog;
|
|
#endif
|
|
|
|
FILE* flog;
|
|
U64 last_stamp = 0;
|
|
|
|
#define LS_STAMP ((U32)(last_stamp & 0xFFFFFFFF))
|
|
#define LS_POS ((U32)(last_stamp>>32))
|
|
#define STAMP_SIZE 6
|
|
|
|
/*******************************************************************************
|
|
|
|
stamp_write Given now, a U32 current time in seconds, and here, the current
|
|
position in the file
|
|
|
|
******************************************************************************/
|
|
U32 stamp_write(U32 now,U32 here){
|
|
last_stamp = ((U64)here)<<32 | now;
|
|
putc(':',flog);
|
|
fwrite(&now,4,1,flog);
|
|
putc(0,flog);
|
|
return 6;
|
|
}
|
|
/*******************************************************************************
|
|
|
|
stamp_maybe_write Consult the stamp policy and write a stamp when needed
|
|
|
|
|
|
******************************************************************************/
|
|
U32 stamp_maybe_write(U32 here){
|
|
U32 now=time(NULL);
|
|
|
|
if( ((now - LS_STAMP) > 59) || // If a minute or more has passed since stamp,
|
|
((here - LS_POS)>(1024-STAMP_SIZE)))
|
|
{
|
|
//printf("STAMPING\n");
|
|
return stamp_write(now,here);
|
|
} else
|
|
return 0;
|
|
}
|
|
|
|
/*******************************************************************************
|
|
|
|
|
|
|
|
******************************************************************************/
|
|
/* log_stamp_of(U32 pos)
|
|
|
|
Return stamp (low U32) and position thereof (high U32)
|
|
|
|
*/
|
|
U64 log_stamp_of(U32 pos){
|
|
char buf[1024];
|
|
FILE* f = fopen(DBFILE_LOG,"r");
|
|
fseek(f,pos-1024,SEEK_SET); // read 1K before pos;
|
|
fread(buf,1024,1,f); // stamp guaranteed
|
|
fclose(f);
|
|
for(int i=1024-6;i>=0;i--){ // scan from end, backward
|
|
if((buf[i]==0)&&(buf[i+1]==':')){ // search for '<0>:'
|
|
// printf("stamp detected at %08x\n",pos-1024+i+1);
|
|
return (((U64)(pos-1024+i+2))<<32) | (*(U32*)(buf+i+2));
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/*******************************************************************************
|
|
|
|
|
|
|
|
******************************************************************************/
|
|
|
|
void log_open(){
|
|
flog = fopen(DBFILE_LOG,"a");
|
|
if(!flog){
|
|
perror("fucked");
|
|
}
|
|
U32 here = ftell(flog);
|
|
last_stamp = log_stamp_of(here);
|
|
// printf("HERE: %d Now: %d\n",here,LS_STAMP);
|
|
}
|
|
|
|
/*******************************************************************************
|
|
|
|
|
|
|
|
******************************************************************************/
|
|
void log_close(){
|
|
fclose(flog);
|
|
}
|
|
|
|
/*******************************************************************************
|
|
|
|
|
|
|
|
******************************************************************************/
|
|
/* log_write
|
|
Write a URL into the log, returning a file position of the entry.
|
|
If necessary, precede the entry with a timestamp.
|
|
|
|
|
|
*/
|
|
U32 log_write(char* url){
|
|
U32 here = ((U32)ftell(flog));
|
|
here += stamp_maybe_write(here);
|
|
|
|
#ifdef ERRLOG
|
|
fputs(url,errlog);
|
|
fputc('\n',errlog);
|
|
|
|
#endif
|
|
|
|
fputs(url,flog);
|
|
|
|
putc(0,flog); // null-term
|
|
return here;
|
|
}
|
|
/*******************************************************************************
|
|
|
|
|
|
|
|
******************************************************************************/
|
|
// log_read read log entry at pos into buf
|
|
U32 log_read(U32 pos,char* buf){
|
|
FILE* f = fopen(DBFILE_LOG,"r");
|
|
fseek(f,pos,SEEK_SET);
|
|
U32 ret = fread(buf,1,256,f);
|
|
fclose(f);
|
|
return ret;
|
|
}
|
|
/*******************************************************************************
|
|
|
|
|
|
|
|
******************************************************************************/
|