diff --git a/main.c b/main.c index fae3d9e..e9c7b92 100644 --- a/main.c +++ b/main.c @@ -3,25 +3,20 @@ int main(int argc, char **argv) { - char request[GEMINI_REQUEST_MAX] = {'\0'}; - char user[_SC_LOGIN_NAME_MAX] = {'\0'}; - char hostname[GEMINI_REQUEST_MAX] = {'\0'}; - char query[PATH_MAX] = {'\0'}; - char path[PATH_MAX] = {'\0'}; - char chroot_dir[PATH_MAX] = DEFAULT_CHROOT; - char file[FILENAME_MAX] = DEFAULT_INDEX; - char dir[PATH_MAX] = {'\0'}; - int option = 0; - int virtualhost = 0; + char request[GEMINI_REQUEST_MAX] = {'\0'}; + char user[_SC_LOGIN_NAME_MAX] = {'\0'}; + char hostname[GEMINI_REQUEST_MAX] = {'\0'}; + char path[GEMINI_REQUEST_MAX] = {'\0'}; + char query[GEMINI_REQUEST_MAX] = {'\0'}; + char chroot_dir[PATH_MAX] = DEFAULT_CHROOT; + int option = 0; + int virtualhost = 0; /* * request : contain the whole request from client : gemini://...\r\n * user : username, used in drop_privileges() * hostname : extracted from hostname. used with virtualhosts and cgi SERVER_NAME - * query : file requested in cgi : gemini://...?query - * file : file basename to display. Emtpy is a directory has been requested - * dir : directory requested. vger will chdir() in to find file - * pos : used to parse request and split into interesting parts + * query : after a ? in cgi : gemini://...?query */ while ((option = getopt(argc, argv, ":d:l:m:u:c:vi")) != -1) { @@ -56,10 +51,9 @@ main(int argc, char **argv) */ drop_privileges(user, chroot_dir, cgi_dir); - check_request(request); - get_hostname(request, hostname, sizeof(hostname)); - get_path(request, path, sizeof(path), virtualhost, hostname); - get_query(path, query, sizeof(query)); + read_request(request); + split_request(request, hostname, path, query); + set_path(path, sizeof(path), virtualhost, hostname); /* percent decode */ uridecode(query); @@ -69,20 +63,13 @@ main(int argc, char **argv) if (*cgi_dir) if (do_cgi(chroot_dir, cgi_dir, path, hostname, query) == 0) stop(EXIT_SUCCESS, NULL); + /* *** from here, cgi didn't run *** */ - /* *** from here, cgi didn't run *** - * check if path available - */ - check_path(path, sizeof(path), hostname, virtualhost); - - /* split dir and filename */ - get_dir_file(path, dir, sizeof(dir), file, sizeof(file)); - - /* go to dir */ - echdir(dir); + /* check if path available */ + check_path(path, sizeof(path), virtualhost, strlen(hostname)); /* regular file to stdout */ - display_file(file); + display_file(path); stop(EXIT_SUCCESS, NULL); } diff --git a/utils.c b/utils.c index 50e2191..89bc991 100644 --- a/utils.c +++ b/utils.c @@ -1,5 +1,8 @@ +#include + #include #include +#include #include #include #include @@ -115,3 +118,15 @@ print_file(FILE *fd) datasent += fwrite(buffer, 1, nread, stdout); return datasent; } + +void +getsubexp(const char *str, regmatch_t m, char *dst) +{ + size_t len = 0; + + if ((len = m.rm_eo - m.rm_so) > 0) { /* skip empty substring */ + len = m.rm_eo - m.rm_so; + memcpy(dst, str + m.rm_so, len); + dst[len] = '\0'; + } +} diff --git a/utils.h b/utils.h index 982a3e0..72d64e8 100644 --- a/utils.h +++ b/utils.h @@ -1,3 +1,4 @@ +void getsubexp(const char *, regmatch_t, char *); void echdir (const char *); void epledge(const char *, const char *); void eunveil(const char *, const char *); diff --git a/vger.c b/vger.c index d02f420..f4e0b97 100644 --- a/vger.c +++ b/vger.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -42,8 +43,8 @@ stop(const int r, const char *fmt, ...) va_start(ap2, fmt); vfprintf(stderr, fmt, ap2); va_end(ap2); - } + } exit(r); } @@ -167,23 +168,23 @@ drop_privileges(const char *user, const char *chroot_dir, const char *cgi_dir) } ssize_t -display_file(const char *fname) +display_file(const char *path) { FILE *fd = NULL; const char *file_mime; /* - * special case : fname empty. The user requested just a dir name + * special case : path ends with "/". The user requested a dir */ - if ((strlen(fname) == 0) && (doautoidx)) { + if ((path[strlen(path)-1] == '/') && (doautoidx)) { /* no index.gmi, so display autoindex if enabled */ - _datasiz += autoindex("."); + _datasiz += autoindex(path); return _datasiz; } /* open the file requested */ - if ((fd = fopen(fname, "r")) != NULL) { - file_mime = get_file_mime(fname, default_mime); + if ((fd = fopen(path, "r")) != NULL) { + file_mime = get_file_mime(path, default_mime); if (strcmp(file_mime, "text/gemini") == 0) status(20, "%s; %s", file_mime, lang); else @@ -218,7 +219,6 @@ do_cgi(const char *chroot_dir, const char *cgi_dir, const char *path, const char /* check if path starts with cgi_dir * compare beginning of path with cgi_dir - * path + 2 : skip "./" * cgi_dir + strlen(chrootdir) (skip chrootdir) */ @@ -227,7 +227,7 @@ do_cgi(const char *chroot_dir, const char *cgi_dir, const char *path, const char while (*cgirp == '/') estrlcpy(cgirp, cgirp+1, sizeof(cgirp)); - if (strncmp(cgirp, path+2, strlen(cgirp)) != 0) + if (strncmp(cgirp, path, strlen(cgirp)) != 0) return 1; /* not in cgi_dir, go to display_file */ /* set env variables for CGI @@ -248,11 +248,11 @@ do_cgi(const char *chroot_dir, const char *cgi_dir, const char *path, const char */ /* find next item after cgi_dir in path: - * path + 2 (skip "./") + strlen(cgirp) + 1 (skip '/') + * path + strlen(cgirp) + 1 (skip '/') */ /* cgi file to execute */ - estrlcpy(cgifp, path + 2 + strlen(cgirp) + 1, sizeof(cgifp)); + estrlcpy(cgifp, path + strlen(cgirp) + 1, sizeof(cgifp)); if (!(*cgifp)) /* problem with cgi file, abort */ return 1; @@ -324,27 +324,16 @@ cgi(const char *cgicmd) } -void -strip_trailing_slash(char *path) -{ - size_t end = strlen(path); - if (end == 0) - return; - end--; - while (path[end] == '/') - path[end--] = '\0'; -} - char * -check_request(char *request) +read_request(char *request) { /* * read the request, check for errors and sanitize the input */ char *pos = NULL; - /* read 1024 +1 chars from stdin to get the request (1024 + \0) */ + if (fgets(request, GEMINI_REQUEST_MAX, stdin) == NULL) { /* EOF reached before reading anything */ if (feof(stdin)) { @@ -367,22 +356,9 @@ check_request(char *request) /* remove \r\n at the end of string */ request[strcspn(request, "\r\n")] = '\0'; - /* - * check if the beginning of the request starts with - * gemini:// - */ - if (strncmp(request, "gemini://", GEMINI_PART) != 0) { - /* error code url malformed */ - status(59, "request «%s» doesn't match gemini://", request); - stop(EXIT_FAILURE, "request «%s» doesn't match gemini://", request); - } - /* save request for logs */ estrlcpy(_request, request, sizeof(_request)); - /* remove the gemini:// part */ - memmove(request, request + GEMINI_PART, strlen(request) + 1 - GEMINI_PART); - /* remove all "/.." for safety reasons */ while ((pos = strstr(request, "/..")) != NULL) memmove(request, pos + 3, strlen(pos) + 1 - 3); /* "/.." = 3 */ @@ -391,48 +367,26 @@ check_request(char *request) } char * -get_hostname(const char *request, char *hstnm, size_t hstnmsiz) +set_path(char *path, size_t pathsiz, int virtualhost, const char *hostname) { - char *pos = NULL; + char tmp[GEMINI_REQUEST_MAX] = {'\0'}; - /* first make a copy of request */ - estrlcpy(hstnm, request, hstnmsiz); - - /* look for hostname : stops at first '/' if any */ - if ( (pos = strchr(hstnm, '/')) != NULL) - pos[0] = '\0'; /* end string at the end of hostname */ - - /* check if client added :port at end of hostname and remove it */ - if ( (pos = strchr(hstnm, ':')) != NULL) - pos[0] = '\0'; /* end string at : */ - - return hstnm; -} - -char * -get_path(const char *request, char *path, size_t pathsiz, int virtualhost, const char *hostname) -{ - char *pos = NULL; - - /* path must be relative to chroot */ - estrlcpy(path, "./", pathsiz); + if (strlen(path) == 0) /* this is root dir */ + estrlcpy(path, "./", pathsiz); /* path is in a subdir named hostname */ if (virtualhost) { - estrlcat(path, hostname, pathsiz); - estrlcat(path, "/", pathsiz); + estrlcpy(tmp, hostname, sizeof(tmp)); + estrlcat(tmp, "/", sizeof(tmp)); + estrlcat(tmp, path, sizeof(tmp)); + estrlcpy(path, tmp, pathsiz); } - /* path is after hostname/ */ - pos = strchr(request, '/'); - if (pos != NULL) /* append the path. pos +1 to remove leading '/' */ - estrlcat(path, pos+1, pathsiz); - return path; } void -check_path(char *path, size_t pathsiz, const char *hstnm, int virtualhost) +check_path(char *path, size_t pathsiz, int virtualhost, size_t hstnm_o) { struct stat sb = {0}; char tmp[PATH_MAX] = {'\0'}; @@ -452,17 +406,13 @@ check_path(char *path, size_t pathsiz, const char *hstnm, int virtualhost) /* check if dir path end with "/" */ if (path[strlen(path) - 1] != '/') { /* redirect to the dir with appropriate ending '/' */ - - /* remove leading '.' for redirection*/ - if (virtualhost) /* remove ./host.name */ - memmove(path, path+2+strlen(hstnm), - strlen(path + 2) + strlen(hstnm) + 1); + estrlcpy(tmp, "/", sizeof(tmp)); + if (virtualhost) /* skip hostname */ + estrlcat(tmp, path+hstnm_o+1, sizeof(tmp)); else - memmove(path, path+1, - strlen(path + 1) + 1); /* +1 for \0 */ - - estrlcat(path, "/", pathsiz); - status(31, "%s", path); + estrlcat(tmp, path, sizeof(tmp)); + estrlcat(tmp, "/", sizeof(tmp)); + status(31, "%s", tmp); stop(EXIT_SUCCESS, NULL); } /* check if DEFAULT_INDEX exists in directory */ @@ -475,31 +425,39 @@ check_path(char *path, size_t pathsiz, const char *hstnm, int virtualhost) } void -get_dir_file(char *path, char *dir, size_t dirsiz, char *file, size_t filesiz) +split_request(const char *request, char *hostname, char *path, char *query) { - char *pos = NULL; + size_t nmatch = SE_MAX; /* 3 "()" + 1 for whole match */ + char buf[BUFSIZ] = {'\0'}; /* to handle error messages */ + int ret = 0; + regex_t greg; /* compiled gemini regex */ + regmatch_t match[SE_MAX]; /* matches founds */ - pos = strrchr(path, '/'); - if (pos != NULL) { - estrlcpy(file, pos+1, filesiz); /* +1 : not heading / */ - pos[0] = '\0'; /* stop path at file */ - estrlcpy(dir, path, dirsiz); - } else { - estrlcpy(file, path, filesiz); + ret = regcomp(&greg, _gemini_regex, REG_EXTENDED); + if (ret != 0) { + regerror(ret, &greg, buf, sizeof(buf)); + regfree(&greg); + status(50, "Internal server error"); + stop(EXIT_FAILURE, "%s", buf); } -} -char * -get_query(char *path, char *query, size_t querysiz) -{ - char *pos = NULL; + ret = regexec(&greg, request, nmatch, match, 0); + if (ret != 0) { + regerror(ret, &greg, buf, sizeof(buf)); + regfree(&greg); + status(59, "Malformed request"); + stop(EXIT_FAILURE, "Malformed request, error:%s", buf); + } - /* remove a query string before percent decoding */ - /* look for "?" if any to set query for cgi, remove it */ - pos = strchr(path, '?'); - if (pos != NULL) { - estrlcpy(query, pos + 1, querysiz); - pos[0] = '\0'; /* path end where query begins */ - } - return query; + /* one may want to check the return of getsubexp + * and change memcpy to strlcpy + * to make sure we didn't try to copy too long + * and that string isn't trunkated. + * It is unlikely to happen since dest string are as long as request + */ + getsubexp(request, match[1], hostname); + getsubexp(request, match[2], path); + getsubexp(request, match[3], query); + + regfree(&greg); } diff --git a/vger.h b/vger.h index 0585e4a..8246f1f 100644 --- a/vger.h +++ b/vger.h @@ -11,6 +11,28 @@ */ #define GEMINI_REQUEST_MAX 1025 +/* max subexpression in regex : 3 + 1 */ +#define SE_MAX 4 + +/* gemini_regex: + * ============= + * ^gemini://+ : in case of gemini:/// + * 1: hostname + * ([^/|^\?|^:]*) : + * catch everything, stop when /, ? or : is found + * don't catch :port + * [:[0-9]*]? : skip :1234 (port number) if any + * / * : skip "/" if any + * 2: path + * ([^\?]*) : + * catch everything and stop at ? if any + * 3 : query + * [\?]?(.*)$: + * catch everything after ? if any + */ +static const char *_gemini_regex = + "^gemini://+([^/|^\?|^:]*)[:[0-9]*]?/*([^\?]*)[\?]?(.*)$"; + /* global vars */ static int _retcode = 0; static ssize_t _datasiz = 0; @@ -19,19 +41,16 @@ static char _request[GEMINI_REQUEST_MAX] = {'\0'}; /* functions */ ssize_t autoindex(const char *); void cgi(const char *); -char * check_request(char *); -void check_path(char *, size_t, const char *, int); +char * read_request(char *); +void check_path(char *, size_t, int, size_t); ssize_t display_file(const char *); int do_cgi(const char *, const char *, const char *, const char *, const char *); void drop_privileges(const char *, const char *, const char *); -void get_dir_file(char *, char *, size_t, char *, size_t); -char * get_hostname(const char *, char *, size_t); -char * get_path(const char *, char *, size_t, int, const char *); -char * get_query(char *, char *, size_t); +char * set_path(char *, size_t, int, const char *); +void split_request(const char *, char *, char *, char *); void status(const int, const char *, ...); -void strip_trailing_slash(char *); -int uridecode (char *); void stop(const int, const char *, ...); +int uridecode (char *); #endif // vger_h_INCLUDED