diff --git a/CHANGELOG.gmi b/CHANGELOG.gmi
index 490d094..582abab 100644
--- a/CHANGELOG.gmi
+++ b/CHANGELOG.gmi
@@ -4,6 +4,17 @@ All notable changes to this project will be documented in this file.
=> https://keepachangelog.com/en/1.0.0/ The format is based on keep a Changelog.
=> https://semver.org/spec/v2.0.0.html And this project adheres to Semantic Versioning.
+## [1.2.0] - 2021-03-19
+* Removes "^" to disable text decoration line-wise.
+* CSS is no longer incorporated in the HTML page.
+* Perform sanity checks against unauthorized file access.
+* Properly close tags when the page exists in a non-null mode.
+* Split HTML generation in two: parsing and translating.
+* Create classes to handle gemtext parsing and translating.
+* Create class to generate back gemtext (for future test cases).
+* Fix: 404 doesn't occur for an empty file.
+* Page 404 fully generated by HtmGem itself.
+
## [1.1.0] - 2021-03-14
* File download when using "source" as a style.
* Improves the regex.
diff --git a/index.gmi b/index.gmi
index 7998350..7367289 100644
--- a/index.gmi
+++ b/index.gmi
@@ -118,8 +118,7 @@ La décoration du texte, qui interprête le **gras** par exemple, ne fait pas pa
### Désactiver la décoration du texte
On peut :
-* commencer la ligne par **^**,
-* faire un bloc non décoré avec **^^^**,
+* désactiver et activer la décoration du texte avec une ligne **^^^**,
* ajouter ce qui suit à la **réécriture** d’URL :
> &textDecoration=0
diff --git a/index.php b/index.php
index 57af85c..a551626 100644
--- a/index.php
+++ b/index.php
@@ -1,305 +1,72 @@
");
}
-?>
-
-
-
-Installation de HtmGem
-
-
-
-
-\n\n";
- die();
+ $t = new \htmgem\GemTextTranslate_html(@file_get_contents("index.gmi"));
+ echo $t->getFullHtml();
+ exit();
}
-######################################## /Installation page
+
+$documentRoot = $_SERVER['DOCUMENT_ROOT'];
# Removes the headling and trailling slashes, to be sure there's not any.
$filePath = rtrim($_SERVER['DOCUMENT_ROOT'], "/")."/".ltrim($url, "/");
-$fileContents = @file_get_contents($filePath);
+switch(true) {
+ case false:
+ case !realPath($filePath):
+ case !preg_match("/\.gmi$/", $url): # not finishing by .gmi
+ case strpos($filePath, $documentRoot)!==0: # not in web directory
+ $go404 = true;
+ // Says 404 even if the file exists to not give any information.
+ break;
+ default:
+ $go404 = false;
+}
-
-######################################## 404 page
-if (!file_exists($filePath) || !preg_match("/\.gmi$/", $url)) {
+/* 404 page
+ */
+if ($go404) {
error_log("HtmGem: 404 $url $filePath");
- http_response_code(404); ?>
-
-
-
-
-
-
-
- $url Recharger 🔄
-
-=> /
+=> .. 🔄 🔄
EOF;
-echo translateGemToHtml($text404);
-echo "\n";
-die();
+ $t = new \htmgem\GemTextTranslate_html($page404);
+ echo $t->getFullHtml();
+ exit();
}
-######################################## /404 page
+# to false only if textDecoration=0 in the URL
+$textDecoration = "0" != @$_REQUEST['textDecoration'];
+
+$fileContents = @file_get_contents($filePath);
# Removes the Byte Order Mark
$fileContents = preg_replace("/\xEF\xBB\xBF/", "", $fileContents);
-/**
- * Replaces markups things like __underlined__ to underlined.
- * @param $instruction the characters to replace, ex. _
- * @param $markup the markup to replace to, ex. "u" to get …
- * @param &$text where to replace.
+/* CSS and special style management
*/
-function markupPreg($instruction, $markup, &$text) {
- $output = $text;
-
- # Replaces couples "__word__" into "word".
- $output = mb_ereg_replace("${instruction}(.+?)${instruction}", "<{$markup}>\\1{$markup}>", $output);
-
- # Replaces a remaining __ into "…" to the end of the line.
- $output = mb_ereg_replace("${instruction}(.+)?", "<{$markup}>\\1{$markup}>", $output);
-
- $text = $output;
-}
-
-
-/**
- * Adds text attributes sucj as underline, bold, … to $line
- * @param $line the line to process
- */
-function addTextAttributes(&$line) {
- global $textDecoration;
- if (!$textDecoration) return;
- markupPreg("__", "u", $line);
- markupPreg("\*\*", "strong", $line);
- markupPreg("//", "em", $line);
- markupPreg("~~", "del", $line);
-}
-
-/**
- * Prepares the raw text to be displayed in HTML environment:
- * * Escapes the HTML entities yet contained in the Gemtext.
- * * Puts thin unbrakable spaces before some characters.
- * @param $text1, $text2 texts to process
- */
-function htmlPrepare(&$text) {
- $text = htmlspecialchars($text, ENT_HTML5|ENT_NOQUOTES, "UTF-8", false);
- $text = mb_ereg_replace("\ ([?!:;»€$])", NARROW_NO_BREAK_SPACE."\\1", $text);
- $text = mb_ereg_replace("([«])\ ", "\\1".NARROW_NO_BREAK_SPACE, $text); # Espace fine insécable
-
- # Warning: using a monospace font editor may not display dashes as they should be!
- # Adds no-break spaces to stick the (EM/EN dashes) to words : aaaaaa – bb – ccccc ==> aaaaaa –$bb$– ccccc
- $text = mb_ereg_replace("([".DASHES."]) ([^".DASHES.".]+) ([".DASHES."])", "\\1".NARROW_NO_BREAK_SPACE."\\2".NARROW_NO_BREAK_SPACE."\\3", $text);
-
- # Adds no-break space to stick the (EM/EN dashes) to words : aaaaaa – bb. ==> aaaaaa –$bb.
- $text = mb_ereg_replace("([—–]) ([^.]+)\.", "\\1".NARROW_NO_BREAK_SPACE."\\2.", $text);
-}
-
-function translateGemToHtml($fileContents) {
- $fileLines = preg_split("/\n/", $fileContents);
- if (empty($fileLines[-1])) array_pop($fileLines); # Don't output a last empty line
- ob_start();
- $mode = null;
- $mode_textAttributes = true;
- foreach ($fileLines as $line) {
- $reDoCount = 0;
- $mode_textAttributes_temp = false;
- while (true) {
- if ($reDoCount>2) {
- error_log("HtmGem: Too many loops, mode == '$mode'");
- $mode = null;
- $reDoCount = 0;
- break;
- }
- $reDoCount += 1;
- $line1 = substr($line, 0, 1); // $line can be modified
- $line2 = substr($line, 0, 2); // in the meantime.
- $line3 = substr($line, 0, 3);
- if (is_null($mode)) {
- if (empty($line)) {
- echo "
\n";
- } elseif ('^^^' == $line3) {
- $mode_textAttributes = !$mode_textAttributes;
- } elseif ('^' == $line1 and !$mode_textAttributes_temp) {
- if (preg_match("/^\^\s*(.+)$/", $line, $parts)) {
- $line = $parts[1];
- $mode_textAttributes_temp = true;
- } else {
- $mode = "raw";
- }
- continue;
- } elseif ("#" == $line1) {
- if (preg_match("/^(#{1,3})\s*(.+)/", $line, $sharps)) {
- $h_level = strlen($sharps[1]);
- $text = $sharps[2];
- htmlPrepare($text);
- switch ($h_level) {
- case 1: echo "".$text."
\n"; break;
- case 2: echo "".$text."
\n"; break;
- case 3: echo "".$text."
\n"; break;
- }
- } else {
- $mode = "raw";
- continue;
- }
- } elseif ("=>" == $line2) {
- if (preg_match("/^=>\s*([^\s]+)(?:\s+(.*))?$/", $line, $linkParts)) {
- $url_link = $linkParts[1];
- $url_label = @$linkParts[2];
- preg_match("/^([^:]+):/", $url_link, $matches);
- $url_protocol = @$matches[1];
- if (empty($url_protocol)) $url_protocol = "local";
- if (empty(trim($url_label))) {
- $url_label = $url_link;
- } else {
- // the label is humain-made, apply formatting
- htmlPrepare($url_label);
- if ($mode_textAttributes xor $mode_textAttributes_temp) addTextAttributes($url_label);
- }
- echo "$url_label
\n";
- } else {
- $mode = "raw";
- continue;
- }
- } elseif ("```" == $line3) {
- preg_match("/^```\s*(.*)$/", $line, $matches);
- $alt_text = trim($matches[1]);
- if (empty($alt_text)) {
- echo "\n";
- } else {
- echo "\n";
- }
- $mode="pre";
- } elseif (">" == $line1) {
- echo "\n";
- $mode = "quote";
- continue;
- } elseif ("*" == $line1) {
- echo "\n";
- $mode = "ul";
- continue;
- } else {
- $mode = "raw";
- continue;
- }
- } else {
- if ("raw"==$mode) {
- if (empty($line)) {
- $line = " ";
- } else {
- htmlPrepare($line);
- if ($mode_textAttributes xor $mode_textAttributes_temp)
- addTextAttributes($line);
- }
- echo "$line
\n";
- $mode = null;
- } elseif ("pre"==$mode) {
- if ("```" == $line3) {
- echo "
\n";
- $mode = null;
- } else {
- htmlPrepare($line);
- echo $line."\n";
- }
- } elseif ("quote"==$mode) {
- if (">" == $line1) {
- preg_match("/^>\s*(.*)$/", $line, $quoteParts);
- $quote = $quoteParts[1];
- if (empty($quote))
- echo "
\n";
- else {
- htmlPrepare($quote);
- if ($mode_textAttributes xor $mode_textAttributes_temp)
- addTextAttributes($line);
- echo "".$quote."
\n";
- }
- } else {
- echo "\n";
- $mode = null;
- continue;
- }
- } elseif ("ul"==$mode) {
- if ("*" == $line1) {
- preg_match("/^\*\s*(.*)$/", $line, $ulParts);
- $li = $ulParts[1];
- if (empty($li)) {
- echo "
\n";
- } else {
- htmlPrepare($li);
- if ($mode_textAttributes xor $mode_textAttributes_temp) addTextAttributes($li);
- echo "".$li."\n";
- }
- } else {
- echo "\n";
- $mode = null;
- continue;
- }
- } else {
- die("Unexpected mode: $mode!");
- }
- }
- break; // exits the while(true) as no continue occured
- } // while(true)
- }
- $html = ob_get_contents();
- ob_clean();
- return $html;
-}
-
-# Gets the page title: the first occurrence with # at the line start
-mb_ereg("#\s*([^\n]+)\n", $fileContents, $matches);
-$page_title = @$matches[1];
-
-###################################### CSS Management
-/**
-* if &style=source displays the source directly and stops.
-* if there's a filename.css besides filename.gmi, use the css and stops.
-* if &style= then embbed the default style, and stops.
-* if &style= then use htmgem/word.css
-* if &style=/… then use the … as as stylesheet.
-**/
+$style = @$_REQUEST['style'];
if ("source" == $style) {
$basename = basename($filePath);
header("Cache-Control: public");
@@ -307,9 +74,12 @@ if ("source" == $style) {
header("Content-Type: text/plain");
header("Content-Transfer-Encoding: binary");
header('Content-Length: ' . filesize($filePath));
- readfile($filePath);
+ echo $fileContents;
exit();
} elseif ("pre" == $style) {
+ # Gets the page title: the first occurrence with # at the line start
+ mb_ereg("#\s*([^\n]+)\n", $fileContents, $matches);
+ $page_title = @$matches[1];
$fileContents = htmlspecialchars($fileContents, ENT_HTML5|ENT_NOQUOTES, "UTF-8", false);
echo <<
@@ -318,52 +88,32 @@ if ("source" == $style) {
$page_title
-$fileContents
+
+$fileContents