Merge pull request 'regex' (#7) from regex into master

Reviewed-on: #7
This commit is contained in:
solene 2022-08-19 13:13:19 +00:00
commit 3ca16cf38f
5 changed files with 117 additions and 137 deletions

45
main.c
View File

@ -3,25 +3,20 @@
int
main(int argc, char **argv)
{
char request[GEMINI_REQUEST_MAX] = {'\0'};
char user[_SC_LOGIN_NAME_MAX] = {'\0'};
char hostname[GEMINI_REQUEST_MAX] = {'\0'};
char query[PATH_MAX] = {'\0'};
char path[PATH_MAX] = {'\0'};
char chroot_dir[PATH_MAX] = DEFAULT_CHROOT;
char file[FILENAME_MAX] = DEFAULT_INDEX;
char dir[PATH_MAX] = {'\0'};
int option = 0;
int virtualhost = 0;
char request[GEMINI_REQUEST_MAX] = {'\0'};
char user[_SC_LOGIN_NAME_MAX] = {'\0'};
char hostname[GEMINI_REQUEST_MAX] = {'\0'};
char path[GEMINI_REQUEST_MAX] = {'\0'};
char query[GEMINI_REQUEST_MAX] = {'\0'};
char chroot_dir[PATH_MAX] = DEFAULT_CHROOT;
int option = 0;
int virtualhost = 0;
/*
* request : contain the whole request from client : gemini://...\r\n
* user : username, used in drop_privileges()
* hostname : extracted from hostname. used with virtualhosts and cgi SERVER_NAME
* query : file requested in cgi : gemini://...?query
* file : file basename to display. Emtpy is a directory has been requested
* dir : directory requested. vger will chdir() in to find file
* pos : used to parse request and split into interesting parts
* query : after a ? in cgi : gemini://...?query
*/
while ((option = getopt(argc, argv, ":d:l:m:u:c:vi")) != -1) {
@ -56,10 +51,9 @@ main(int argc, char **argv)
*/
drop_privileges(user, chroot_dir, cgi_dir);
check_request(request);
get_hostname(request, hostname, sizeof(hostname));
get_path(request, path, sizeof(path), virtualhost, hostname);
get_query(path, query, sizeof(query));
read_request(request);
split_request(request, hostname, path, query);
set_path(path, sizeof(path), virtualhost, hostname);
/* percent decode */
uridecode(query);
@ -69,20 +63,13 @@ main(int argc, char **argv)
if (*cgi_dir)
if (do_cgi(chroot_dir, cgi_dir, path, hostname, query) == 0)
stop(EXIT_SUCCESS, NULL);
/* *** from here, cgi didn't run *** */
/* *** from here, cgi didn't run ***
* check if path available
*/
check_path(path, sizeof(path), hostname, virtualhost);
/* split dir and filename */
get_dir_file(path, dir, sizeof(dir), file, sizeof(file));
/* go to dir */
echdir(dir);
/* check if path available */
check_path(path, sizeof(path), virtualhost, strlen(hostname));
/* regular file to stdout */
display_file(file);
display_file(path);
stop(EXIT_SUCCESS, NULL);
}

15
utils.c
View File

@ -1,5 +1,8 @@
#include <sys/types.h>
#include <err.h>
#include <errno.h>
#include <regex.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
@ -115,3 +118,15 @@ print_file(FILE *fd)
datasent += fwrite(buffer, 1, nread, stdout);
return datasent;
}
void
getsubexp(const char *str, regmatch_t m, char *dst)
{
size_t len = 0;
if ((len = m.rm_eo - m.rm_so) > 0) { /* skip empty substring */
len = m.rm_eo - m.rm_so;
memcpy(dst, str + m.rm_so, len);
dst[len] = '\0';
}
}

View File

@ -1,3 +1,4 @@
void getsubexp(const char *, regmatch_t, char *);
void echdir (const char *);
void epledge(const char *, const char *);
void eunveil(const char *, const char *);

158
vger.c
View File

@ -9,6 +9,7 @@
#include <fcntl.h>
#include <limits.h>
#include <pwd.h>
#include <regex.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
@ -42,8 +43,8 @@ stop(const int r, const char *fmt, ...)
va_start(ap2, fmt);
vfprintf(stderr, fmt, ap2);
va_end(ap2);
}
}
exit(r);
}
@ -167,23 +168,23 @@ drop_privileges(const char *user, const char *chroot_dir, const char *cgi_dir)
}
ssize_t
display_file(const char *fname)
display_file(const char *path)
{
FILE *fd = NULL;
const char *file_mime;
/*
* special case : fname empty. The user requested just a dir name
* special case : path ends with "/". The user requested a dir
*/
if ((strlen(fname) == 0) && (doautoidx)) {
if ((path[strlen(path)-1] == '/') && (doautoidx)) {
/* no index.gmi, so display autoindex if enabled */
_datasiz += autoindex(".");
_datasiz += autoindex(path);
return _datasiz;
}
/* open the file requested */
if ((fd = fopen(fname, "r")) != NULL) {
file_mime = get_file_mime(fname, default_mime);
if ((fd = fopen(path, "r")) != NULL) {
file_mime = get_file_mime(path, default_mime);
if (strcmp(file_mime, "text/gemini") == 0)
status(20, "%s; %s", file_mime, lang);
else
@ -218,7 +219,6 @@ do_cgi(const char *chroot_dir, const char *cgi_dir, const char *path, const char
/* check if path starts with cgi_dir
* compare beginning of path with cgi_dir
* path + 2 : skip "./"
* cgi_dir + strlen(chrootdir) (skip chrootdir)
*/
@ -227,7 +227,7 @@ do_cgi(const char *chroot_dir, const char *cgi_dir, const char *path, const char
while (*cgirp == '/')
estrlcpy(cgirp, cgirp+1, sizeof(cgirp));
if (strncmp(cgirp, path+2, strlen(cgirp)) != 0)
if (strncmp(cgirp, path, strlen(cgirp)) != 0)
return 1; /* not in cgi_dir, go to display_file */
/* set env variables for CGI
@ -248,11 +248,11 @@ do_cgi(const char *chroot_dir, const char *cgi_dir, const char *path, const char
*/
/* find next item after cgi_dir in path:
* path + 2 (skip "./") + strlen(cgirp) + 1 (skip '/')
* path + strlen(cgirp) + 1 (skip '/')
*/
/* cgi file to execute */
estrlcpy(cgifp, path + 2 + strlen(cgirp) + 1, sizeof(cgifp));
estrlcpy(cgifp, path + strlen(cgirp) + 1, sizeof(cgifp));
if (!(*cgifp)) /* problem with cgi file, abort */
return 1;
@ -324,27 +324,16 @@ cgi(const char *cgicmd)
}
void
strip_trailing_slash(char *path)
{
size_t end = strlen(path);
if (end == 0)
return;
end--;
while (path[end] == '/')
path[end--] = '\0';
}
char *
check_request(char *request)
read_request(char *request)
{
/*
* read the request, check for errors and sanitize the input
*/
char *pos = NULL;
/* read 1024 +1 chars from stdin to get the request (1024 + \0) */
if (fgets(request, GEMINI_REQUEST_MAX, stdin) == NULL) {
/* EOF reached before reading anything */
if (feof(stdin)) {
@ -367,22 +356,9 @@ check_request(char *request)
/* remove \r\n at the end of string */
request[strcspn(request, "\r\n")] = '\0';
/*
* check if the beginning of the request starts with
* gemini://
*/
if (strncmp(request, "gemini://", GEMINI_PART) != 0) {
/* error code url malformed */
status(59, "request «%s» doesn't match gemini://", request);
stop(EXIT_FAILURE, "request «%s» doesn't match gemini://", request);
}
/* save request for logs */
estrlcpy(_request, request, sizeof(_request));
/* remove the gemini:// part */
memmove(request, request + GEMINI_PART, strlen(request) + 1 - GEMINI_PART);
/* remove all "/.." for safety reasons */
while ((pos = strstr(request, "/..")) != NULL)
memmove(request, pos + 3, strlen(pos) + 1 - 3); /* "/.." = 3 */
@ -391,48 +367,26 @@ check_request(char *request)
}
char *
get_hostname(const char *request, char *hstnm, size_t hstnmsiz)
set_path(char *path, size_t pathsiz, int virtualhost, const char *hostname)
{
char *pos = NULL;
char tmp[GEMINI_REQUEST_MAX] = {'\0'};
/* first make a copy of request */
estrlcpy(hstnm, request, hstnmsiz);
/* look for hostname : stops at first '/' if any */
if ( (pos = strchr(hstnm, '/')) != NULL)
pos[0] = '\0'; /* end string at the end of hostname */
/* check if client added :port at end of hostname and remove it */
if ( (pos = strchr(hstnm, ':')) != NULL)
pos[0] = '\0'; /* end string at : */
return hstnm;
}
char *
get_path(const char *request, char *path, size_t pathsiz, int virtualhost, const char *hostname)
{
char *pos = NULL;
/* path must be relative to chroot */
estrlcpy(path, "./", pathsiz);
if (strlen(path) == 0) /* this is root dir */
estrlcpy(path, "./", pathsiz);
/* path is in a subdir named hostname */
if (virtualhost) {
estrlcat(path, hostname, pathsiz);
estrlcat(path, "/", pathsiz);
estrlcpy(tmp, hostname, sizeof(tmp));
estrlcat(tmp, "/", sizeof(tmp));
estrlcat(tmp, path, sizeof(tmp));
estrlcpy(path, tmp, pathsiz);
}
/* path is after hostname/ */
pos = strchr(request, '/');
if (pos != NULL) /* append the path. pos +1 to remove leading '/' */
estrlcat(path, pos+1, pathsiz);
return path;
}
void
check_path(char *path, size_t pathsiz, const char *hstnm, int virtualhost)
check_path(char *path, size_t pathsiz, int virtualhost, size_t hstnm_o)
{
struct stat sb = {0};
char tmp[PATH_MAX] = {'\0'};
@ -452,17 +406,13 @@ check_path(char *path, size_t pathsiz, const char *hstnm, int virtualhost)
/* check if dir path end with "/" */
if (path[strlen(path) - 1] != '/') {
/* redirect to the dir with appropriate ending '/' */
/* remove leading '.' for redirection*/
if (virtualhost) /* remove ./host.name */
memmove(path, path+2+strlen(hstnm),
strlen(path + 2) + strlen(hstnm) + 1);
estrlcpy(tmp, "/", sizeof(tmp));
if (virtualhost) /* skip hostname */
estrlcat(tmp, path+hstnm_o+1, sizeof(tmp));
else
memmove(path, path+1,
strlen(path + 1) + 1); /* +1 for \0 */
estrlcat(path, "/", pathsiz);
status(31, "%s", path);
estrlcat(tmp, path, sizeof(tmp));
estrlcat(tmp, "/", sizeof(tmp));
status(31, "%s", tmp);
stop(EXIT_SUCCESS, NULL);
}
/* check if DEFAULT_INDEX exists in directory */
@ -475,31 +425,39 @@ check_path(char *path, size_t pathsiz, const char *hstnm, int virtualhost)
}
void
get_dir_file(char *path, char *dir, size_t dirsiz, char *file, size_t filesiz)
split_request(const char *request, char *hostname, char *path, char *query)
{
char *pos = NULL;
size_t nmatch = SE_MAX; /* 3 "()" + 1 for whole match */
char buf[BUFSIZ] = {'\0'}; /* to handle error messages */
int ret = 0;
regex_t greg; /* compiled gemini regex */
regmatch_t match[SE_MAX]; /* matches founds */
pos = strrchr(path, '/');
if (pos != NULL) {
estrlcpy(file, pos+1, filesiz); /* +1 : not heading / */
pos[0] = '\0'; /* stop path at file */
estrlcpy(dir, path, dirsiz);
} else {
estrlcpy(file, path, filesiz);
ret = regcomp(&greg, _gemini_regex, REG_EXTENDED);
if (ret != 0) {
regerror(ret, &greg, buf, sizeof(buf));
regfree(&greg);
status(50, "Internal server error");
stop(EXIT_FAILURE, "%s", buf);
}
}
char *
get_query(char *path, char *query, size_t querysiz)
{
char *pos = NULL;
ret = regexec(&greg, request, nmatch, match, 0);
if (ret != 0) {
regerror(ret, &greg, buf, sizeof(buf));
regfree(&greg);
status(59, "Malformed request");
stop(EXIT_FAILURE, "Malformed request, error:%s", buf);
}
/* remove a query string before percent decoding */
/* look for "?" if any to set query for cgi, remove it */
pos = strchr(path, '?');
if (pos != NULL) {
estrlcpy(query, pos + 1, querysiz);
pos[0] = '\0'; /* path end where query begins */
}
return query;
/* one may want to check the return of getsubexp
* and change memcpy to strlcpy
* to make sure we didn't try to copy too long
* and that string isn't trunkated.
* It is unlikely to happen since dest string are as long as request
*/
getsubexp(request, match[1], hostname);
getsubexp(request, match[2], path);
getsubexp(request, match[3], query);
regfree(&greg);
}

35
vger.h
View File

@ -11,6 +11,28 @@
*/
#define GEMINI_REQUEST_MAX 1025
/* max subexpression in regex : 3 + 1 */
#define SE_MAX 4
/* gemini_regex:
* =============
* ^gemini://+ : in case of gemini:///
* 1: hostname
* ([^/|^\?|^:]*) :
* catch everything, stop when /, ? or : is found
* don't catch :port
* [:[0-9]*]? : skip :1234 (port number) if any
* / * : skip "/" if any
* 2: path
* ([^\?]*) :
* catch everything and stop at ? if any
* 3 : query
* [\?]?(.*)$:
* catch everything after ? if any
*/
static const char *_gemini_regex =
"^gemini://+([^/|^\?|^:]*)[:[0-9]*]?/*([^\?]*)[\?]?(.*)$";
/* global vars */
static int _retcode = 0;
static ssize_t _datasiz = 0;
@ -19,19 +41,16 @@ static char _request[GEMINI_REQUEST_MAX] = {'\0'};
/* functions */
ssize_t autoindex(const char *);
void cgi(const char *);
char * check_request(char *);
void check_path(char *, size_t, const char *, int);
char * read_request(char *);
void check_path(char *, size_t, int, size_t);
ssize_t display_file(const char *);
int do_cgi(const char *, const char *, const char *, const char *, const char *);
void drop_privileges(const char *, const char *, const char *);
void get_dir_file(char *, char *, size_t, char *, size_t);
char * get_hostname(const char *, char *, size_t);
char * get_path(const char *, char *, size_t, int, const char *);
char * get_query(char *, char *, size_t);
char * set_path(char *, size_t, int, const char *);
void split_request(const char *, char *, char *, char *);
void status(const int, const char *, ...);
void strip_trailing_slash(char *);
int uridecode (char *);
void stop(const int, const char *, ...);
int uridecode (char *);
#endif // vger_h_INCLUDED