Merge pull request 'regex' (#7) from regex into master

Reviewed-on: #7
This commit is contained in:
solene 2022-08-19 13:13:19 +00:00
commit 3ca16cf38f
5 changed files with 117 additions and 137 deletions

45
main.c
View File

@ -3,25 +3,20 @@
int int
main(int argc, char **argv) main(int argc, char **argv)
{ {
char request[GEMINI_REQUEST_MAX] = {'\0'}; char request[GEMINI_REQUEST_MAX] = {'\0'};
char user[_SC_LOGIN_NAME_MAX] = {'\0'}; char user[_SC_LOGIN_NAME_MAX] = {'\0'};
char hostname[GEMINI_REQUEST_MAX] = {'\0'}; char hostname[GEMINI_REQUEST_MAX] = {'\0'};
char query[PATH_MAX] = {'\0'}; char path[GEMINI_REQUEST_MAX] = {'\0'};
char path[PATH_MAX] = {'\0'}; char query[GEMINI_REQUEST_MAX] = {'\0'};
char chroot_dir[PATH_MAX] = DEFAULT_CHROOT; char chroot_dir[PATH_MAX] = DEFAULT_CHROOT;
char file[FILENAME_MAX] = DEFAULT_INDEX; int option = 0;
char dir[PATH_MAX] = {'\0'}; int virtualhost = 0;
int option = 0;
int virtualhost = 0;
/* /*
* request : contain the whole request from client : gemini://...\r\n * request : contain the whole request from client : gemini://...\r\n
* user : username, used in drop_privileges() * user : username, used in drop_privileges()
* hostname : extracted from hostname. used with virtualhosts and cgi SERVER_NAME * hostname : extracted from hostname. used with virtualhosts and cgi SERVER_NAME
* query : file requested in cgi : gemini://...?query * query : after a ? in cgi : gemini://...?query
* file : file basename to display. Emtpy is a directory has been requested
* dir : directory requested. vger will chdir() in to find file
* pos : used to parse request and split into interesting parts
*/ */
while ((option = getopt(argc, argv, ":d:l:m:u:c:vi")) != -1) { while ((option = getopt(argc, argv, ":d:l:m:u:c:vi")) != -1) {
@ -56,10 +51,9 @@ main(int argc, char **argv)
*/ */
drop_privileges(user, chroot_dir, cgi_dir); drop_privileges(user, chroot_dir, cgi_dir);
check_request(request); read_request(request);
get_hostname(request, hostname, sizeof(hostname)); split_request(request, hostname, path, query);
get_path(request, path, sizeof(path), virtualhost, hostname); set_path(path, sizeof(path), virtualhost, hostname);
get_query(path, query, sizeof(query));
/* percent decode */ /* percent decode */
uridecode(query); uridecode(query);
@ -69,20 +63,13 @@ main(int argc, char **argv)
if (*cgi_dir) if (*cgi_dir)
if (do_cgi(chroot_dir, cgi_dir, path, hostname, query) == 0) if (do_cgi(chroot_dir, cgi_dir, path, hostname, query) == 0)
stop(EXIT_SUCCESS, NULL); stop(EXIT_SUCCESS, NULL);
/* *** from here, cgi didn't run *** */
/* *** from here, cgi didn't run *** /* check if path available */
* check if path available check_path(path, sizeof(path), virtualhost, strlen(hostname));
*/
check_path(path, sizeof(path), hostname, virtualhost);
/* split dir and filename */
get_dir_file(path, dir, sizeof(dir), file, sizeof(file));
/* go to dir */
echdir(dir);
/* regular file to stdout */ /* regular file to stdout */
display_file(file); display_file(path);
stop(EXIT_SUCCESS, NULL); stop(EXIT_SUCCESS, NULL);
} }

15
utils.c
View File

@ -1,5 +1,8 @@
#include <sys/types.h>
#include <err.h> #include <err.h>
#include <errno.h> #include <errno.h>
#include <regex.h>
#include <stdarg.h> #include <stdarg.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
@ -115,3 +118,15 @@ print_file(FILE *fd)
datasent += fwrite(buffer, 1, nread, stdout); datasent += fwrite(buffer, 1, nread, stdout);
return datasent; return datasent;
} }
void
getsubexp(const char *str, regmatch_t m, char *dst)
{
size_t len = 0;
if ((len = m.rm_eo - m.rm_so) > 0) { /* skip empty substring */
len = m.rm_eo - m.rm_so;
memcpy(dst, str + m.rm_so, len);
dst[len] = '\0';
}
}

View File

@ -1,3 +1,4 @@
void getsubexp(const char *, regmatch_t, char *);
void echdir (const char *); void echdir (const char *);
void epledge(const char *, const char *); void epledge(const char *, const char *);
void eunveil(const char *, const char *); void eunveil(const char *, const char *);

158
vger.c
View File

@ -9,6 +9,7 @@
#include <fcntl.h> #include <fcntl.h>
#include <limits.h> #include <limits.h>
#include <pwd.h> #include <pwd.h>
#include <regex.h>
#include <stdarg.h> #include <stdarg.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
@ -42,8 +43,8 @@ stop(const int r, const char *fmt, ...)
va_start(ap2, fmt); va_start(ap2, fmt);
vfprintf(stderr, fmt, ap2); vfprintf(stderr, fmt, ap2);
va_end(ap2); va_end(ap2);
}
}
exit(r); exit(r);
} }
@ -167,23 +168,23 @@ drop_privileges(const char *user, const char *chroot_dir, const char *cgi_dir)
} }
ssize_t ssize_t
display_file(const char *fname) display_file(const char *path)
{ {
FILE *fd = NULL; FILE *fd = NULL;
const char *file_mime; const char *file_mime;
/* /*
* special case : fname empty. The user requested just a dir name * special case : path ends with "/". The user requested a dir
*/ */
if ((strlen(fname) == 0) && (doautoidx)) { if ((path[strlen(path)-1] == '/') && (doautoidx)) {
/* no index.gmi, so display autoindex if enabled */ /* no index.gmi, so display autoindex if enabled */
_datasiz += autoindex("."); _datasiz += autoindex(path);
return _datasiz; return _datasiz;
} }
/* open the file requested */ /* open the file requested */
if ((fd = fopen(fname, "r")) != NULL) { if ((fd = fopen(path, "r")) != NULL) {
file_mime = get_file_mime(fname, default_mime); file_mime = get_file_mime(path, default_mime);
if (strcmp(file_mime, "text/gemini") == 0) if (strcmp(file_mime, "text/gemini") == 0)
status(20, "%s; %s", file_mime, lang); status(20, "%s; %s", file_mime, lang);
else else
@ -218,7 +219,6 @@ do_cgi(const char *chroot_dir, const char *cgi_dir, const char *path, const char
/* check if path starts with cgi_dir /* check if path starts with cgi_dir
* compare beginning of path with cgi_dir * compare beginning of path with cgi_dir
* path + 2 : skip "./"
* cgi_dir + strlen(chrootdir) (skip chrootdir) * cgi_dir + strlen(chrootdir) (skip chrootdir)
*/ */
@ -227,7 +227,7 @@ do_cgi(const char *chroot_dir, const char *cgi_dir, const char *path, const char
while (*cgirp == '/') while (*cgirp == '/')
estrlcpy(cgirp, cgirp+1, sizeof(cgirp)); estrlcpy(cgirp, cgirp+1, sizeof(cgirp));
if (strncmp(cgirp, path+2, strlen(cgirp)) != 0) if (strncmp(cgirp, path, strlen(cgirp)) != 0)
return 1; /* not in cgi_dir, go to display_file */ return 1; /* not in cgi_dir, go to display_file */
/* set env variables for CGI /* set env variables for CGI
@ -248,11 +248,11 @@ do_cgi(const char *chroot_dir, const char *cgi_dir, const char *path, const char
*/ */
/* find next item after cgi_dir in path: /* find next item after cgi_dir in path:
* path + 2 (skip "./") + strlen(cgirp) + 1 (skip '/') * path + strlen(cgirp) + 1 (skip '/')
*/ */
/* cgi file to execute */ /* cgi file to execute */
estrlcpy(cgifp, path + 2 + strlen(cgirp) + 1, sizeof(cgifp)); estrlcpy(cgifp, path + strlen(cgirp) + 1, sizeof(cgifp));
if (!(*cgifp)) /* problem with cgi file, abort */ if (!(*cgifp)) /* problem with cgi file, abort */
return 1; return 1;
@ -324,27 +324,16 @@ cgi(const char *cgicmd)
} }
void
strip_trailing_slash(char *path)
{
size_t end = strlen(path);
if (end == 0)
return;
end--;
while (path[end] == '/')
path[end--] = '\0';
}
char * char *
check_request(char *request) read_request(char *request)
{ {
/* /*
* read the request, check for errors and sanitize the input * read the request, check for errors and sanitize the input
*/ */
char *pos = NULL; char *pos = NULL;
/* read 1024 +1 chars from stdin to get the request (1024 + \0) */ /* read 1024 +1 chars from stdin to get the request (1024 + \0) */
if (fgets(request, GEMINI_REQUEST_MAX, stdin) == NULL) { if (fgets(request, GEMINI_REQUEST_MAX, stdin) == NULL) {
/* EOF reached before reading anything */ /* EOF reached before reading anything */
if (feof(stdin)) { if (feof(stdin)) {
@ -367,22 +356,9 @@ check_request(char *request)
/* remove \r\n at the end of string */ /* remove \r\n at the end of string */
request[strcspn(request, "\r\n")] = '\0'; request[strcspn(request, "\r\n")] = '\0';
/*
* check if the beginning of the request starts with
* gemini://
*/
if (strncmp(request, "gemini://", GEMINI_PART) != 0) {
/* error code url malformed */
status(59, "request «%s» doesn't match gemini://", request);
stop(EXIT_FAILURE, "request «%s» doesn't match gemini://", request);
}
/* save request for logs */ /* save request for logs */
estrlcpy(_request, request, sizeof(_request)); estrlcpy(_request, request, sizeof(_request));
/* remove the gemini:// part */
memmove(request, request + GEMINI_PART, strlen(request) + 1 - GEMINI_PART);
/* remove all "/.." for safety reasons */ /* remove all "/.." for safety reasons */
while ((pos = strstr(request, "/..")) != NULL) while ((pos = strstr(request, "/..")) != NULL)
memmove(request, pos + 3, strlen(pos) + 1 - 3); /* "/.." = 3 */ memmove(request, pos + 3, strlen(pos) + 1 - 3); /* "/.." = 3 */
@ -391,48 +367,26 @@ check_request(char *request)
} }
char * char *
get_hostname(const char *request, char *hstnm, size_t hstnmsiz) set_path(char *path, size_t pathsiz, int virtualhost, const char *hostname)
{ {
char *pos = NULL; char tmp[GEMINI_REQUEST_MAX] = {'\0'};
/* first make a copy of request */ if (strlen(path) == 0) /* this is root dir */
estrlcpy(hstnm, request, hstnmsiz); estrlcpy(path, "./", pathsiz);
/* look for hostname : stops at first '/' if any */
if ( (pos = strchr(hstnm, '/')) != NULL)
pos[0] = '\0'; /* end string at the end of hostname */
/* check if client added :port at end of hostname and remove it */
if ( (pos = strchr(hstnm, ':')) != NULL)
pos[0] = '\0'; /* end string at : */
return hstnm;
}
char *
get_path(const char *request, char *path, size_t pathsiz, int virtualhost, const char *hostname)
{
char *pos = NULL;
/* path must be relative to chroot */
estrlcpy(path, "./", pathsiz);
/* path is in a subdir named hostname */ /* path is in a subdir named hostname */
if (virtualhost) { if (virtualhost) {
estrlcat(path, hostname, pathsiz); estrlcpy(tmp, hostname, sizeof(tmp));
estrlcat(path, "/", pathsiz); estrlcat(tmp, "/", sizeof(tmp));
estrlcat(tmp, path, sizeof(tmp));
estrlcpy(path, tmp, pathsiz);
} }
/* path is after hostname/ */
pos = strchr(request, '/');
if (pos != NULL) /* append the path. pos +1 to remove leading '/' */
estrlcat(path, pos+1, pathsiz);
return path; return path;
} }
void void
check_path(char *path, size_t pathsiz, const char *hstnm, int virtualhost) check_path(char *path, size_t pathsiz, int virtualhost, size_t hstnm_o)
{ {
struct stat sb = {0}; struct stat sb = {0};
char tmp[PATH_MAX] = {'\0'}; char tmp[PATH_MAX] = {'\0'};
@ -452,17 +406,13 @@ check_path(char *path, size_t pathsiz, const char *hstnm, int virtualhost)
/* check if dir path end with "/" */ /* check if dir path end with "/" */
if (path[strlen(path) - 1] != '/') { if (path[strlen(path) - 1] != '/') {
/* redirect to the dir with appropriate ending '/' */ /* redirect to the dir with appropriate ending '/' */
estrlcpy(tmp, "/", sizeof(tmp));
/* remove leading '.' for redirection*/ if (virtualhost) /* skip hostname */
if (virtualhost) /* remove ./host.name */ estrlcat(tmp, path+hstnm_o+1, sizeof(tmp));
memmove(path, path+2+strlen(hstnm),
strlen(path + 2) + strlen(hstnm) + 1);
else else
memmove(path, path+1, estrlcat(tmp, path, sizeof(tmp));
strlen(path + 1) + 1); /* +1 for \0 */ estrlcat(tmp, "/", sizeof(tmp));
status(31, "%s", tmp);
estrlcat(path, "/", pathsiz);
status(31, "%s", path);
stop(EXIT_SUCCESS, NULL); stop(EXIT_SUCCESS, NULL);
} }
/* check if DEFAULT_INDEX exists in directory */ /* check if DEFAULT_INDEX exists in directory */
@ -475,31 +425,39 @@ check_path(char *path, size_t pathsiz, const char *hstnm, int virtualhost)
} }
void void
get_dir_file(char *path, char *dir, size_t dirsiz, char *file, size_t filesiz) split_request(const char *request, char *hostname, char *path, char *query)
{ {
char *pos = NULL; size_t nmatch = SE_MAX; /* 3 "()" + 1 for whole match */
char buf[BUFSIZ] = {'\0'}; /* to handle error messages */
int ret = 0;
regex_t greg; /* compiled gemini regex */
regmatch_t match[SE_MAX]; /* matches founds */
pos = strrchr(path, '/'); ret = regcomp(&greg, _gemini_regex, REG_EXTENDED);
if (pos != NULL) { if (ret != 0) {
estrlcpy(file, pos+1, filesiz); /* +1 : not heading / */ regerror(ret, &greg, buf, sizeof(buf));
pos[0] = '\0'; /* stop path at file */ regfree(&greg);
estrlcpy(dir, path, dirsiz); status(50, "Internal server error");
} else { stop(EXIT_FAILURE, "%s", buf);
estrlcpy(file, path, filesiz);
} }
}
char * ret = regexec(&greg, request, nmatch, match, 0);
get_query(char *path, char *query, size_t querysiz) if (ret != 0) {
{ regerror(ret, &greg, buf, sizeof(buf));
char *pos = NULL; regfree(&greg);
status(59, "Malformed request");
stop(EXIT_FAILURE, "Malformed request, error:%s", buf);
}
/* remove a query string before percent decoding */ /* one may want to check the return of getsubexp
/* look for "?" if any to set query for cgi, remove it */ * and change memcpy to strlcpy
pos = strchr(path, '?'); * to make sure we didn't try to copy too long
if (pos != NULL) { * and that string isn't trunkated.
estrlcpy(query, pos + 1, querysiz); * It is unlikely to happen since dest string are as long as request
pos[0] = '\0'; /* path end where query begins */ */
} getsubexp(request, match[1], hostname);
return query; getsubexp(request, match[2], path);
getsubexp(request, match[3], query);
regfree(&greg);
} }

35
vger.h
View File

@ -11,6 +11,28 @@
*/ */
#define GEMINI_REQUEST_MAX 1025 #define GEMINI_REQUEST_MAX 1025
/* max subexpression in regex : 3 + 1 */
#define SE_MAX 4
/* gemini_regex:
* =============
* ^gemini://+ : in case of gemini:///
* 1: hostname
* ([^/|^\?|^:]*) :
* catch everything, stop when /, ? or : is found
* don't catch :port
* [:[0-9]*]? : skip :1234 (port number) if any
* / * : skip "/" if any
* 2: path
* ([^\?]*) :
* catch everything and stop at ? if any
* 3 : query
* [\?]?(.*)$:
* catch everything after ? if any
*/
static const char *_gemini_regex =
"^gemini://+([^/|^\?|^:]*)[:[0-9]*]?/*([^\?]*)[\?]?(.*)$";
/* global vars */ /* global vars */
static int _retcode = 0; static int _retcode = 0;
static ssize_t _datasiz = 0; static ssize_t _datasiz = 0;
@ -19,19 +41,16 @@ static char _request[GEMINI_REQUEST_MAX] = {'\0'};
/* functions */ /* functions */
ssize_t autoindex(const char *); ssize_t autoindex(const char *);
void cgi(const char *); void cgi(const char *);
char * check_request(char *); char * read_request(char *);
void check_path(char *, size_t, const char *, int); void check_path(char *, size_t, int, size_t);
ssize_t display_file(const char *); ssize_t display_file(const char *);
int do_cgi(const char *, const char *, const char *, const char *, const char *); int do_cgi(const char *, const char *, const char *, const char *, const char *);
void drop_privileges(const char *, const char *, const char *); void drop_privileges(const char *, const char *, const char *);
void get_dir_file(char *, char *, size_t, char *, size_t); char * set_path(char *, size_t, int, const char *);
char * get_hostname(const char *, char *, size_t); void split_request(const char *, char *, char *, char *);
char * get_path(const char *, char *, size_t, int, const char *);
char * get_query(char *, char *, size_t);
void status(const int, const char *, ...); void status(const int, const char *, ...);
void strip_trailing_slash(char *);
int uridecode (char *);
void stop(const int, const char *, ...); void stop(const int, const char *, ...);
int uridecode (char *);
#endif // vger_h_INCLUDED #endif // vger_h_INCLUDED