This commit is contained in:
stacksmith 2022-03-04 17:37:21 -05:00
commit 42d5556630
15 changed files with 1307 additions and 0 deletions

51
Makefile Normal file
View File

@ -0,0 +1,51 @@
# http://creativecommons.org/publicdomain/zero/1.0/
bin := ~/bin
SPADIR := $(HOME)/.spa
logfile := '"$(SPADIR)/spa.log"'
map20file := '"$(SPADIR)/spa.map20"'
all: link
gemtext.o: gemtext.c global.h
gcc -Wall -c gemtext.c
log.o: log.c global.h
gcc -Wall -c -DDBFILE_LOG=$(logfile) -DDBFILE_MAP=$(map20file) log.c
sigil.o: sigil.c global.h
gcc -Wall -c sigil.c
db.o: db.c global.h log.h
gcc -c -Wall -DDBFILE_LOG=$(logfile) -DDBFILE_MAP=$(map20file) db.c
comm.o: comm.c global.h
gcc -Wall -c comm.c
url.o: url.c global.h
gcc -Wall -c url.c
main.o: main.c global.h
gcc -Wall -c main.c
link: main.o db.o comm.o url.o sigil.o gemtext.o log.o
gcc -Wall -o spa main.o db.o comm.o url.o sigil.o gemtext.o log.o
strip spa
mv spa $(bin)/
lookup.o: lookup.c db.h global.h sigil.h
gcc -Wall -c lookup.c
lookup: lookup.o db.o log.o sigil.o
gcc -Wall -o lookup lookup.o db.o log.o sigil.o
mv lookup $(bin)/
clean:
rm -f *~ *# *.o spa
rm $(bin)/spa
wipe:
rm -f $(SPADIR)/spa.log
rm -f $(SPADIR)/spa.map20
truncate -s 1K $(SPADIR)/spa.log
truncate -s 4M $(SPADIR)/spa.map20

36
README.md Normal file
View File

@ -0,0 +1,36 @@
# spa - a Spartan Client powered by HorseWare
WIP!
`spa` is an experimental, terminal, client for the [Spartan Protocol](https://portal.mozz.us/spartan/spartan.mozz.us/), a minimized, unencrypted version of the Gemini protocol.
`spa` is built as a proof of concept and a testbed for a slightly off-the-beaten path technology inspired by Whinam methodologies.
On the surface it is just a simplistic browser. Internally, an immutable logbase tracks every spartan URL ever encountered, assigning 4-character sigils for display and access. Sigils may be used instead of the URLs they represent, and are much easier to memorize, type in, or otherwise keep track of. The database acts as a URL-shortener, a bookmark system, and a history log.
## Usage
`spa <url>` to visit a spartan URL
`spa <sigil>` to visit a sigil
Currently, the output is sent to the terminal, after generating appropriate sigils. Further navigation using sigils is now possible by re-invoking spa.
### Sigils
Sigils are 4-character mnemonics assigned for each URL visited or rendered by `spa`. Each character may be an uppercase alpha character (but not 'O' or 'I') or a digit (but not '0' or '1'). This allows for 32 possibilities representing 5 bits; 4 such characters provide for 20 bits, or 1M (1048576) possible URLs (sufficient for Spartan!)
## Requirements
A linux machine with a GCC compiler.
## Resources
The compiled binary is < 20K.
Upon installation, a db.log file is created in the `.spa` directory. It will contain every spartan URL seen by the system. Each url is stored once and only once. In addition, db.map20 file is created containing an index for fast lookup (hashtable). The file is fixed at 4MB, but it is a sparse file and will generally take up substantially less (and never more) space.
### Installing
Modify the Makefile to set your build folder, the folder into which spa will be placed (I keep a ~/bin directory which is in my PATH).
Create a .spa directory in your HOME (unless you want it elsewhere, but change the Makefile accordingly);
`make` to build spa and move it to the specified bin directory.
`make wipe` to create the log and index files in the .spa directory.

66
comm.c Normal file
View File

@ -0,0 +1,66 @@
/*
spb - a very simple toy spartan client
*/
#include <stdio.h> //printf
#include <string.h> //strlen
#include <sys/socket.h> //socket
#include <arpa/inet.h> //inet_addr
#include <unistd.h>
#include <netdb.h>
#include "global.h"
char req[1000];
char reply[0x10000];
long hostname_to_ip(char * hostname){
struct hostent *he;
struct in_addr **addr_list;
if ( (he = gethostbyname( hostname ) ) == NULL) {
herror("gethostbyname");
return 0;
}
addr_list = (struct in_addr **) he->h_addr_list;
// printf("hostname_to_ip: ok; %d bytes\n",he->h_length);
//printf("0: %08lX\n",*(long*)addr_list[0]);
return *(long*)addr_list[0];
}
struct sockaddr_in server;
int get(FILE* f,char* host, U32 port,char* path){
int sock = socket(AF_INET , SOCK_STREAM , 0);
if (sock == -1){
printf("Could not create socket\n");
return -1;
}
if(!(server.sin_addr.s_addr = hostname_to_ip(host)))
return -2;
server.sin_family = AF_INET;
server.sin_port = htons(port?port:300);
if (connect(sock , (struct sockaddr*)&server , sizeof(struct sockaddr_in)) < 0) {
perror("Could not connect\n");
return -3;
}
sprintf(req,"%s %s 0\r\n",host, path);
//Send some data
if( send(sock , req , strlen(req) , 0) < 0) {
puts("Send failed");
return -4;
}
//Receive a reply from the server
ssize_t len;
while( (len = recv(sock, reply, sizeof(reply), 0)) > 0){
fwrite(reply,len,1,f);
}
return 0;
}

155
db.c Normal file
View File

@ -0,0 +1,155 @@
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/mman.h>
#include <string.h>
#include <time.h>
#include "global.h"
#include "sigil.h"
#include "log.h"
/*
The database consists of two files;
* An immutable log with timestamped entries of variable size (URLs);
* An index mapping 20-bit values to corresponding log positions
Terminology:
* LOG * - an append-only log containing:
- :<U32-timestamp><0>
- /<url-string><0>
* MAP * - a memory-mapped array of 1M 8-byte entries, used as a hashtable
* IDX * - a 20-bit index into MAP
* OFFSET * - an offset into LOG (for URLs);
* NICK * - a 4-character mnemonic string convertible to an IDX
An entry stored in MAP contains an offset to a URL, and some metadata. The
index of an entry is hashed from the URL (linear probing used on collisions).
A NICK may be used to communicate the IDX.
*/
#define IDX_ENTRY_SIZE 4
#define tIDX_ENTRY U32
// Index size is 20 bits...
#define IDX_SIZE (0x100000 * IDX_ENTRY_SIZE)
#define IDX_MASK 0xFFFFF
// Each entry contains an offset into log
#define OFF_MASK 0x7FFFFFFF
/*******************************************************************************
LOG
Current log state is kept here.
TIMESTAMPS
==========
<:><U32-time-stamp><0>
The log contains timestamp records interleaved with other records. As a matter
of policy, not every record is preceded with a stamp.
Currently, the policy is:
* Any 1K run of log data must contain at least one stamp;
* If the previous stamp is more than 59 seconds in the past, a stamp now.
To enforce this policy, the application keeps a variable last_stamp, a 64-bit
value containing:
| 0 - 31 | last-stamp-value |
| 32- 63 | last stamp file position |
******************************************************************************/
#ifdef ERRLOG
extern FILE* errlog;
#endif
tIDX_ENTRY* map;
/*******************************************************************************
******************************************************************************/
/*******************************************************************************
******************************************************************************/
int sys_open(){
log_open();
int fd = open(DBFILE_MAP,O_RDWR);
if(fd == -1) {
printf("unable to open map file %s\n",DBFILE_MAP);
return -1;
}
// map to memory
map = (tIDX_ENTRY*) mmap(NULL,IDX_SIZE,PROT_READ|PROT_WRITE,MAP_SHARED,fd,0);
close(fd);
if(MAP_FAILED==map){
printf("mmap failed\n");
return -2;
} else {
return 0;
}
}
/*******************************************************************************
******************************************************************************/
void sys_close(){
log_close();
munmap(map,IDX_SIZE);
}
/*******************************************************************************
Given a URL, return an idx of the interned URL.
******************************************************************************/
U32 URL_idx(char* str){
U32 idx = string_hash(str);
// Linear-probe
char buf[256];
U32 off;
while((off=map[idx])){ // as long as a map exists for hash,
log_read(off,buf); // look at what it refers to, and
if(!strcmp(str,buf)) // see if it is our needle. If so,
return idx; // we already have it, return it.
idx = (idx+1) & IDX_MASK; // otherwise, linear-probe next.
}
// not found? create. idx points at a 0
U32 pos = log_write(str);
map[idx]= pos;
return idx;
}
/*******************************************************************************
******************************************************************************/
U32 idx_URL(U32 idx,char* buf){
U32 offset = map[idx];
//printf("db.c: offset:%X",offset);
if(offset>0)
return log_read(offset,buf);
else
return 0;
}

6
db.h Normal file
View File

@ -0,0 +1,6 @@
int sys_open();
void sys_close();
void idx_sigil(U32 idx,char*sigil);
U32 sigil_idx(char* sigil);
U32 URL_idx(char* str);
U32 idx_URL(U32 idx,char* buf);

165
gemtext.c Normal file
View File

@ -0,0 +1,165 @@
#include <stdio.h>
#include <string.h>
#include "global.h"
#include "db.h"
#include "url.h"
#include <stdlib.h> //exit
extern sUrl base; //the url being rendered
char* skip_to_ws(char* p){
char c;
while((c=*p))
if((' '==c)||('\t'==c)||('\n'==c))
break;
else
p++;
return p;
}
char* skip_ws(char*p){
char c;
while((c=*p))
if((' '==c)||('\t'==c)||('\n'==c))
p++;
else
break;
return p;
}
U32 url_length(char* url){
char* end = skip_to_ws(url);
return end-url;
}
char linebuf[4096];
/*******************************************************************************
render_link_line - Pull out the URL, normalize, and check with the db for the
sigil. Format line for display.
******************************************************************************/
void render_link_line(FILE*out,char*p){
// printf("[%s]\n",p);
char* u = skip_ws(p+2);
U32 ulen = url_length(u);
sUrl url;
if(url_is_full(u)){
sUrl_set(&url,u,ulen);
sUrl_full(&url);
} else {
sUrl_copy(&url,&base);
sUrl_file(&url);
// printf("[r[%*.*s]r]",url_len,url_len,url_start);
//printf("[b[%s]b]",base.buf);
sUrl_rel(&url,u,ulen);
// printf("[[%s]]",url.buf);
}
// make a shortcut!
// printf("[[[%s]]]", url.buf);
if(!sUrl_norm(&url)){
// URL normalized OK. Only spartan urls are databased.
if(!(strncmp("spartan://",url.buf,10))) {
char sigil[5];
U32 idx = URL_idx(url.buf);
idx_sigil(idx,sigil);
//fprintf(out,"\033[33m%s\033[0m ",sigil);
fprintf(out,"\033[33m%s\033[0m ",sigil);
p = skip_ws(u+ulen);
if(*p)
fprintf(out,"\033[92m%s\033[0m",p);
else {
fprintf(out,"\033[92m%*.*s\033[0m\n",ulen,ulen,u);
}
} else { //non-spartan urls are printed as-is
fprintf(out,"\033[95m???? %s\033[0m",u);
}
} else { //an attempt to manipulate a path
fprintf(out,"\033[91mXXXX %s\033[0m",u);
}
}
/*******************************************************************************
render_head - Render a header line, # ## or ###
******************************************************************************/
void render_head(FILE* out, char*p){
int level;
for(level=0;*p=='#';p++,level++);
switch(level){
case 1: fprintf(out,"\033[93m");break;
case 2: fprintf(out,"\033[94;1m");break;
case 3: fprintf(out,"\033[94m");break;
}
fprintf(out,"%s\033[0m",p);
}
/*******************************************************************************
render_quote - Render a > quote line
******************************************************************************/
void render_quote(FILE* out,char*p){
fprintf(out,"\033[36m%s\033[0m",p);
}
/*******************************************************************************
render_line - dispatch to a proper line handler.
*******************************************************************************/
/*
Return 0 for normal, 1 for block-quote status.
*/
int render_line(FILE* out,char* p, int blockquote){
int backquotes = !strncmp("```",p,3);
if(blockquote){
if(backquotes)
return 0;
else{
fputs(linebuf,out);
return 1;
}
} else {
if(backquotes)
return 1;
else {
switch(*p){
case '=':
if('>'==*(p+1)){
render_link_line(out,p);
return 0;
}
case '#':
render_head(out,p);
return 0;
case '>':
render_quote(out,p);
return 0;
}
fprintf(out,"%s",linebuf);
return 0;
}
}
}
void render_file1(FILE* in,FILE* out){
int bq = 0;
while(fgets(linebuf,4096,in))
bq=render_line(out,linebuf,bq );
}
/*******************************************************************************
render_file - Render the entire in file to out file
******************************************************************************/
void render_file(FILE* in,FILE* out){
int bq = 0; // tracking multiline-quote state
while(fgets(linebuf,4096,in))
bq=render_line(out,linebuf,bq );
}

14
global.h Normal file
View File

@ -0,0 +1,14 @@
#include <stdint.h>
typedef uint8_t U8;
typedef uint16_t U16;
typedef uint32_t U32;
typedef uint64_t U64;
/* Application constants */
//#define DBFILE_LOG "/home/stack/.spa/spa.log"
//#define DBFILE_MAP "/home/stack/.spa/spa.map20"
// tempfiles are dumped into working directory
#define TEMPFILE_GMI "tempfile.gmi"
#define TEMPFILE_OUT "tempfile.out"

208
log.c Normal file
View File

@ -0,0 +1,208 @@
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/mman.h>
#include <string.h>
#include <time.h>
#include "global.h"
#include "sigil.h"
/*
The database consists of two files;
* An immutable log with timestamped entries of variable size (URLs);
* An index mapping 20-bit values to corresponding log positions
Terminology:
* LOG * - an append-only log containing:
- :<U32-timestamp><0>
- /<url-string><0>
* MAP * - a memory-mapped array of 1M 8-byte entries, used as a hashtable
* IDX * - a 20-bit index into MAP
* OFFSET * - an offset into LOG (for URLs);
* NICK * - a 4-character mnemonic string convertible to an IDX
An entry stored in MAP contains an offset to a URL, and some metadata. The
index of an entry is hashed from the URL (linear probing used on collisions).
A NICK may be used to communicate the IDX.
*/
#define IDX_ENTRY_SIZE 4
#define tIDX_ENTRY U32
// Index size is 20 bits...
#define IDX_SIZE (0x100000 * IDX_ENTRY_SIZE)
#define IDX_MASK 0xFFFFF
// Each entry contains an offset into log
#define OFF_MASK 0x7FFFFFFF
/*******************************************************************************
LOG
Current log state is kept here.
TIMESTAMPS
==========
<:><U32-time-stamp><0>
The log contains timestamp records interleaved with other records. As a matter
of policy, not every record is preceded with a stamp.
Currently, the policy is:
* Any 1K run of log data must contain at least one stamp;
* If the previous stamp is more than 59 seconds in the past, a stamp now.
To enforce this policy, the application keeps a variable last_stamp, a 64-bit
value containing:
| 0 - 31 | last-stamp-value |
| 32- 63 | last stamp file position |
******************************************************************************/
#ifdef ERRLOG
extern FILE* errlog;
#endif
FILE* flog;
U64 last_stamp = 0;
#define LS_STAMP ((U32)(last_stamp & 0xFFFFFFFF))
#define LS_POS ((U32)(last_stamp>>32))
#define STAMP_SIZE 6
/*******************************************************************************
stamp_write Given now, a U32 current time in seconds, and here, the current
position in the file
******************************************************************************/
U32 stamp_write(U32 now,U32 here){
last_stamp = ((U64)here)<<32 | now;
putc(':',flog);
fwrite(&now,4,1,flog);
putc(0,flog);
return 6;
}
/*******************************************************************************
stamp_maybe_write Consult the stamp policy and write a stamp when needed
******************************************************************************/
U32 stamp_maybe_write(U32 here){
U32 now=time(NULL);
if( ((now - LS_STAMP) > 59) || // If a minute or more has passed since stamp,
((here - LS_POS)>(1024-STAMP_SIZE)))
{
//printf("STAMPING\n");
return stamp_write(now,here);
} else
return 0;
}
/*******************************************************************************
******************************************************************************/
/* log_stamp_of(U32 pos)
Return stamp (low U32) and position thereof (high U32)
*/
U64 log_stamp_of(U32 pos){
char buf[1024];
FILE* f = fopen(DBFILE_LOG,"r");
fseek(f,pos-1024,SEEK_SET); // read 1K before pos;
fread(buf,1024,1,f); // stamp guaranteed
fclose(f);
for(int i=1024-6;i>=0;i--){ // scan from end, backward
if((buf[i]==0)&&(buf[i+1]==':')){ // search for '<0>:'
printf("stamp detected at %08x\n",pos-1024+i+1);
return (((U64)(pos-1024+i+2))<<32) | (*(U32*)(buf+i+2));
}
}
return 0;
}
/*******************************************************************************
******************************************************************************/
void log_open(){
flog = fopen(DBFILE_LOG,"a");
if(!flog){
perror("fucked");
}
U32 here = ftell(flog);
last_stamp = log_stamp_of(here);
// printf("HERE: %d Now: %d\n",here,LS_STAMP);
}
/*******************************************************************************
******************************************************************************/
void log_close(){
fclose(flog);
}
/*******************************************************************************
******************************************************************************/
/* log_write
Write a URL into the log, returning a file position of the entry.
If necessary, precede the entry with a timestamp.
*/
U32 log_write(char* url){
U32 here = ((U32)ftell(flog));
here += stamp_maybe_write(here);
#ifdef ERRLOG
fputs(url,errlog);
fputc('\n',errlog);
#endif
fputs(url,flog);
putc(0,flog); // null-term
return here;
}
/*******************************************************************************
******************************************************************************/
// log_read read log entry at pos into buf
U32 log_read(U32 pos,char* buf){
FILE* f = fopen(DBFILE_LOG,"r");
fseek(f,pos,SEEK_SET);
U32 ret = fread(buf,1,256,f);
fclose(f);
return ret;
}
/*******************************************************************************
******************************************************************************/

5
log.h Normal file
View File

@ -0,0 +1,5 @@
U64 log_stamp_of(U32 pos);
void log_open();
void log_close();
U32 log_write(char* url);
U32 log_read(U32 pos,char* buf);

33
lookup.c Normal file
View File

@ -0,0 +1,33 @@
#include <stdio.h>
#include <string.h>
#include "global.h"
#include "db.h"
FILE* errlog;
#undef ERRLOG
int doit(char* req){
char buf[1024];
U32 idx = sigil_idx(req);
int ret = idx_URL(idx,buf);
if(ret) {
printf("[%s](@%X)%s\n",req,idx*4,buf);
return 0;
} else {
printf("Not Found\n");
return 1;
}
}
int main(int argc,char*argv[]){
if(strlen(argv[1])!=4) {
printf("Usage: lookup XXXX<enter>, where XXXX is a 4-char sigil\n");
return 1;
}
sys_open();
int ret = doit(argv[1]);
sys_close();
return ret;
}

87
main.c Normal file
View File

@ -0,0 +1,87 @@
#include <stdio.h>
#include <string.h>
#include "global.h"
#include "db.h"
#include "url.h"
#include <stdlib.h> //exit
#define ERRLOG 1
FILE* errlog;
extern void get(FILE* f,char*host,U32 port,char* path);
extern void render_file(FILE*in,FILE* out);
/* url_path
Given a mutable full url string, split it, peeling the host.
********/
void fetch_sUrl(FILE* f,sUrl* purl){
char host[256];
U32 host_len = sUrl_host_length(purl);
strncpy(host,purl->buf+purl->oHost,host_len);
host[host_len]=0;
char*px = strchr(host,':'); //port?
if(px) *px=0;
U32 port = sUrl_port(purl);
char* path = sUrl_path(purl);
// printf("fetching: %s %d %s\n",host,port,path);
get(f,host,port,path);
}
sUrl base; // extern'ed in gemtext.c
int doit(char* req){
char buf[1024];
char* p = req;
// If the parameter is exactly 4 long, it is a sigil
if(4==strlen(req)){
U32 idx = sigil_idx(req);
idx_URL(idx,buf);
printf("idx %4.4X %s\n",idx,buf);
p = buf;
} else { // it must be a full url to start with
if(!url_is_full(req))
return 1;
URL_idx(req); // check the root URL with the db
}
sUrl_set(&base,p,strlen(p));
sUrl_full(&base);
// sUrl_dump(&base);
FILE* fgmi = fopen(TEMPFILE_GMI,"w+");
FILE* fout = stdout;//fopen(TEMPFILE_OUT,"w+");
fetch_sUrl(fgmi,&base);
fseek(fgmi,0,SEEK_SET);
render_file(fgmi,fout);
// fclose(fout);
fclose(fgmi);
// printf("[%s]\n",URL_dir(argv[1],urlbuf));
return 0;
}
extern int index_create();
int main(int argc,char*argv[]){
printf("ARGC: %d\n",argc);
if(argc != 2){
printf("spa is an experimental Spartan protocol browser\nUsage:\n spa <spartan-url>\n spa <sigil>\n");
return -1;
}
errlog = fopen("errlog.txt","a");
int failed = sys_open();
if(!failed){
failed = doit(argv[1]);
}
sys_close();
fclose(errlog);
return failed;
}

99
sigil.c Normal file
View File

@ -0,0 +1,99 @@
#include <stdio.h>
/*
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/mman.h>
#include <string.h>
#include <time.h>
*/
#include <ctype.h> //toupper
#include "global.h"
/* SIGILS
A sigil is shortcut to a URL previously seen by the system and databased.
In this application, a sigil corresponds to a 20-bit value, encoded as a 4-char
string (using only capital letters and numbers, but not O,I,0,or 1 to avoid
confusion). Sigils are meant to be somewhat easy to recognize.
Sigils are used as indices into the URL database, resolving as URLS.
Sigils are generated as FNV1a hashes of URLs provided.
*/
#define IDX_SIZE (0x100000 * IDX_ENTRY_SIZE)
#define IDX_MASK 0xFFFFF
#define FNV_OFFSET 14695981039346656037UL
#define FNV_PRIME 1099511628211UL
// working character map, 32 characters
static char charmap[]="ABCDEFGHJKLMNPQRSTUVWXYZ23456789";
/*******************************************************************************
******************************************************************************/
/*******************************************************************************
string_hash - generate a FNV1A hash of 20 bits
******************************************************************************/
U32 string_hash(const char* str) {
uint64_t hash = FNV_OFFSET;
for (const char* p = str; *p; p++) {
hash ^= (uint64_t)(unsigned char)(*p);
hash *= FNV_PRIME;
}
return hash & IDX_MASK;
}
/*******************************************************************************
char_to_c5bit Convert a 5-bit encoded as a character to a 5-bit value
******************************************************************************/
int char_to_c5bit(int ch){
int i;
int c = toupper(ch);
for(i=31;i>=0;i--)
if(c==charmap[i])
break;
return i;
}
/*******************************************************************************
idx_sigil Convert a 20-bit idx to a 4-char sigil
******************************************************************************/
void idx_sigil(U32 idx,char* sigil){
for(int i=15;i>=0;i-=5){
*sigil++ = charmap[0x1F & (idx>>i)];
}
*sigil=0;
}
/*******************************************************************************
sigil_idx Convert a 4-char null-termed sigil to a 20-bit idx
******************************************************************************/
U32 sigil_idx(char* sigil){
int result = 0;
for(int i=0;i<4;i++){
int code = char_to_c5bit(*sigil++);
if(result<0)
return -1;
result = (result << 5) + code;
}
// printf("Converted sigil %s to idx %X\n",sigil,result);
return result;
}

5
sigil.h Normal file
View File

@ -0,0 +1,5 @@
void idx_sigil(U32 idx,char* sigil);
U32 sigil_idx(char* sigil);
U32 string_hash(const char* str);

350
url.c Normal file
View File

@ -0,0 +1,350 @@
#include <stdio.h>
#include <string.h>
#include "global.h"
#include "url.h"
/*
[scheme[p://] [host[:port]] [/path] [?query]
*/
/*******************************************************************************
URL processing
URLs start their life as strings from the user or links embedded in gmi files.
sUrl structure encapsulate a local copy of the URL and facilitate normalization.
An sUrl may be initialized in one of the following ways:
* copied from another sUrl;
* set from a full sUrl;
* set from a partial url using another, base sUrl.
It is processed in order to set internal component offsets.
It is normalized to eliminate . and .. components.
Notes:
port is tracked as an actual port and not an offset!
oFile is used only for base sUrls, and is normally not searched/initialized.
******************************************************************************/
int url_is_spartan(char* url){
return !(strncmp("spartan://",url,10));
}
int url_is_full(char* url){
return (NULL != strstr(url,"://"));
}
char* sUrl_path(sUrl* purl){
return purl->buf + purl->oPath;
}
U16 sUrl_port(sUrl* purl){
return purl->port;
}
/*******************************************************************************
print the sUrl
******************************************************************************/
void sUrl_dump(sUrl* purl){
printf("sUrl at %p\n",purl);
int ohost = purl->oHost;
int opath = purl->oPath;
char* buf = purl->buf;
if(opath){
printf("scheme: %*.*s\n",ohost,ohost,buf);
printf("host: %*.*s\n",opath-ohost,opath-ohost,buf+ohost);
printf("port: %d\n",purl->port);
printf("path: %s\n",buf+opath);
} else {
printf("invalid:[%s]\n",buf);
}
}
// fputs(purl->buf,f);
/*******************************************************************************
Set the sUrl to the string fragment provided. The sUrl is reset and needs
further processing!
******************************************************************************/
int sUrl_set(sUrl*p,char*str,U32 len){
if(!len)
len = strlen(str);
if (len > URL_MAX) return 1; // didn't fit
p->oHost = 0;
p->oPath = 0;
p->oFile = 0;
p->port = 300;
strncpy(p->buf,str,len);
p->buf[len]=0;
return 0;
}
/*******************************************************************************
Copy the src sUrl into the destingation sUrl, along with all components.
******************************************************************************/
sUrl* sUrl_copy(sUrl* dst, sUrl* src){
memcpy(dst,src,sizeof(sUrl));
return dst;
}
/*******************************************************************************
file Return the pointer to the file component of this URL, and set the internal
offset.
******************************************************************************/
char* sUrl_file(sUrl*p){
char* pfile = strrchr(p->buf,'/') + 1;
p->oFile = pfile - (p->buf);
return pfile;
}
U32 host_port(char* p){
char c;
do {
c=*p++;
if(!c) return 300; // at end? default
if('/'==c) return 300; // at /? default
if(':'==c) break; // at : parse
} while (1);
U32 val = 0;
while((c=*p++)) {
c-= '0';
if( (c<0) || (c>9))
return 0; //invalid port
val = (val * 10) + c;
}
return val;
}
/*******************************************************************************
full - parse the buffer of this sUrl to set the internal offsets (but not file),
assuming a full URL (it must have a ://).
Return: 0 on success; 1 if not a full URL.
******************************************************************************/
/* url_do_full -- Try to parse a full URL */
//
int sUrl_full(sUrl* purl){
char* p = strstr(purl->buf,"://"); //if scheme is there it ends with spartan|://|host|/|
if(!p) return 1; //no scheme sigil = error!
p+=3;
U32 port = host_port(p);
if(!p) return 1; //invalid port
purl->port = port;
purl->oHost = p-(purl->buf);
char* path = strchr(p+3,'/'); //host ends with a /path
if(!path) { // if no path provided,
path = p + 3 + (strlen(p+3));
*path = '/';
*(path+1)=0;
}
purl->oPath = path - purl->buf;
return 0;
}
/*******************************************************************************
rel - given a base sUrl and a string with local url, mutate sUrl into a full
rul
******************************************************************************/
void sUrl_rel(sUrl*base,char* str,U32 len){
char* dst = base->buf + (('/'==*str) ? base->oPath : base->oFile);
strncpy(dst,str,len);
dst[len]=0;
}
/*******************************************************************************
norm - given a full but not canonical sUrl, process to eliminate ./ and ../
return: 0 success
1 error
******************************************************************************/
// backup from p to the previous '/'
char* _norm_backup(char* p,char* start){
while(--p >= start)
if('/'==*p)
return p;
return NULL;
}
int sUrl_norm(sUrl* purl){
char* start = purl->buf + purl->oPath;
char* src = start;
char* dst = start;
//printf("normalizing[%s]\n",src);
char c;
while((c = (*dst++ = *src++))){
// printf("c=%c[%s]\n",c,src-1);
if('/'==c){
if(!strncmp(src,"../",3)){
if(!(dst = _norm_backup(dst-1,start)))
return 1;
else
src += 2; //src and dest at respecive '/'s
} else if(!strncmp(src,"./",2)) {
src += 2;
} else if(!strcmp(src,"..")){
if(!(dst = _norm_backup(dst-1,start)))
return 2;
dst++; // leave the slash
src += 2;
*dst++=0;
// printf("src:[%s]; dst:[%s];\n",src,dst);
} else if(!strcmp(src,".")){
*dst++=0;
}
}
}
*start = '/';
// printf("postnorm[%s]\n",start);
return 0;
}
/*******************************************************************************
******************************************************************************/
U32 sUrl_host_length(sUrl* purl){
return (purl->oPath - purl->oHost);
}
/*
char URL_base[1024];
int baselen;
int bhost_off;
int bpath_off;
*/
/*
char buf[2048];
char* scheme;
char* host;
char* path;
int url_make_base(){
strcpy(URL_base,scheme);
baselen = strlen(URL_base);
bhost_off = host - scheme;
bpath_off = path - scheme;
return 0;
}
int url_do_part(char*src){
scheme = src-baselen;
strncpy(scheme,URL_base,baselen);
host = scheme + bhost_off;
path = scheme + bpath_off;
return 0;
}
*/
/* url_do_full -- Try to parse a full URL */
/*
int url_do_full(char*src){
char* p = strstr(src,"://"); //if scheme is there it ends with spartan|://|host|/|
if(!p)
return 1; //no scheme sigil = error!
scheme = src; //scheme at start
host = p+3; //host starts after ://
path = strchr(host,'/'); //host ends with a /path
if(!path) { // if no path provided,
int i = strlen(host); // append a '/' and make one.
path = host+i;
*path='/';
*(path+1)=0;
}
return 0;
}
//int url_is_full(char*src){
char* path_norm(char* path){
*(path-1) = 0;
char* src = path;
char* dst = path;
char c;
while((c=(*dst++ = *src++))){
if('/'==c){
if(!strncmp(src,"../",3)){
if(!(dst = path_backup(dst-1)))
return NULL;
src += 2;
} else if(!strncmp(src,"./",2)) {
src += 2;
} else if(!strcmp(src,"..")){
if(!(dst = path_backup(dst-1)))
return NULL;
dst++; // leave the slash
src += 2;
// printf("src:[%s]; dst:[%s];\n",src,dst);
} else if(!strcmp(src,".")){
src++;
}
}
}
return path;
}
*/
/*
int main(int argc,char**argv)
{
sUrl base;
sUrl_set(&base,"spartan://192.168.0.2:8888/servlet/rece/holo");
sUrl_full(&base);
printf("FILE: %s\n",sUrl_file(&base));
sUrl url;
sUrl_copy(&url, &base);
sUrl_rel(&url,argv[1]);
int q = sUrl_norm(&url);
printf("normalize returned %d\n",q);
sUrl_print(&url);
return 0;
// strcpy(URL_base,"spartan://192.168.0.2:8888/servlet/rece/");
//baselen = strlen(URL_base);
//bhost_off = 10;
//bpath_off = 26;
*/
/*
memset(buf,0,2048);
strcpy(buf+256,argv[1]);
q = url_do_full(buf+256);
int lScheme = (int)(host-scheme);
printf("Returned %d\n",q);
printf("scheme: %*.*s\n",lScheme,lScheme,scheme);
printf("host: %s\n",host);
printf("path: %s\n",path);
*/
// char* p = path_norm(path);
//printf("norm: %s\n",p);
//}

27
url.h Normal file
View File

@ -0,0 +1,27 @@
#define URL_MAX 2044
typedef struct sUrl {
U8 oHost;
U8 oPath;
U16 oFile;
U16 port; // _ACTUAL_ port number specified in the URL,
U16 res;
char buf[URL_MAX];
} sUrl;
int url_is_spartan(char* url);
int url_is_full(char* url);
U32 host_port(char* host);
void sUrl_dump(sUrl* purl);
int sUrl_set(sUrl*p,char*str,U32 len);
sUrl* sUrl_copy(sUrl* dst, sUrl* src);
char* sUrl_file(sUrl* purl);
int sUrl_full(sUrl* purl);
void sUrl_rel(sUrl*purl,char* str,U32 len);
int sUrl_norm(sUrl* purl);
char* sUrl_path(sUrl* purl);
U16 sUrl_port(sUrl* purl);
U32 sUrl_host_length(sUrl* purl);