From f388d2a57ad742c872f4d7e368738660b9768292 Mon Sep 17 00:00:00 2001 From: prx Date: Thu, 18 Aug 2022 10:53:08 +0200 Subject: [PATCH] improve regex to handle :1234 in url --- vger.c | 3 +++ vger.h | 11 +++++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/vger.c b/vger.c index ab14d5e..e4b537d 100644 --- a/vger.c +++ b/vger.c @@ -527,6 +527,9 @@ split_request(const char *request, char *hostname, char *path, char *query) getsubexp(request, match[1], hostname); getsubexp(request, match[2], path); getsubexp(request, match[3], query); + syslog(LOG_DAEMON, "hostname:%s", hostname); + syslog(LOG_DAEMON, "path:%s", path); + syslog(LOG_DAEMON, "query:%s", query); regfree(&greg); } diff --git a/vger.h b/vger.h index 04b0ade..3f0f926 100644 --- a/vger.h +++ b/vger.h @@ -18,9 +18,11 @@ * ============= * ^gemini://+ : in case of gemini:/// * 1: hostname - * ([^/|^\?]*) : - * catch everything, stop when / or ? is found - * then skip multiple / + * ([^/|^\?|^:]*) : + * catch everything, stop when /, ? or : is found + * don't catch :port + * [:[0-9]*]? : skip :1234 (port number) if any + * / * : skip "/" if any * 2: path * ([^\?]*) : * catch everything and stop at ? if any @@ -28,7 +30,8 @@ * [\?]?(.*)$: * catch everything after ? if any */ -static const char *_gemini_regex = "^gemini://+([^/|^\?]*)/*([^\?]*)[\?]?(.*)$"; +static const char *_gemini_regex = + "^gemini://+([^/|^\?|^:]*)[:[0-9]*]?/*([^\?]*)[\?]?(.*)$"; /* global vars */ static int _retcode = 0;