From dcc36b1d662c2339a388567274ca372bb4deb140 Mon Sep 17 00:00:00 2001
From: Christophe HENRY <christophe.henry@sbgodin.fr>
Date: Tue, 9 Mar 2021 00:48:07 +0100
Subject: [PATCH] Setup easy installation on shared host

---
 htmgem.php | 254 -------------------------------------------------
 index.gmi  |  34 +++++++
 index.php  | 272 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 306 insertions(+), 254 deletions(-)
 delete mode 100644 htmgem.php
 create mode 100644 index.gmi
 create mode 100644 index.php
diff --git a/htmgem.php b/htmgem.php
deleted file mode 100644
index d547081..0000000
--- a/htmgem.php
+++ /dev/null
@@ -1,254 +0,0 @@
-<?php
-
-mb_internal_encoding("UTF-8");
-mb_regex_encoding("UTF-8");
-
-if (isset($_REQUEST["url"]))
-    $url = $_REQUEST["url"];
-elseif (isset($_SERVER["QUERY_STRING"]))
-    $url = "/".$_SERVER["QUERY_STRING"];
-else
-    $url = "/index.gmi";
-
-$GMI_DIR = $_SERVER['DOCUMENT_ROOT'];
-
-$filePath = $GMI_DIR.$url;
-$fileContents = @file_get_contents($filePath);
-if (!$fileContents) {
-    http_response_code(404);
-    die("404: $url");
-}
-
-# Removes the Byte Order Mark
-$fileContents = preg_replace("/\xEF\xBB\xBF/", "", $fileContents);
-
-$fileLines = preg_split("/\n/", $fileContents);
-
-/**
- * Replaces markups things like __underlined__ to <u>underlined</u>.
- * @param $instruction the characters to replace, ex. _
- * @param $markup the markup to replace to, ex. "u" to get <u>…</u>
- * @param &$text where to replace.
- */
-function markupPreg($instruction, $markup, &$text) {
-    $output = $text;
-
-    # Replaces couples "__word__" into "<i>word</i>".
-    $output = mb_ereg_replace("${instruction}(.+?)${instruction}", "<{$markup}>\\1</{$markup}>", $output);
-
-    # Replaces a remaining __ into "<i>…</i>" to the end of the line.
-    $output = mb_ereg_replace("${instruction}(.+)?", "<{$markup}>\\1</{$markup}>", $output);
-
-    $text = $output;
-}
-
-
-/**
- * Adds text attributes sucj as underline, bold, … to $line
- * @param $line the line to process
- */
-function addTextAttributes(&$line) {
-    markupPreg("__",   "u",   $line);
-    markupPreg("\*\*", "strong",   $line);
-    markupPreg("//",   "em",   $line);
-    markupPreg("~~",   "del", $line);
-}
-
-define("NARROW_NO_BREAK_SPACE", "&#8239;");
-define("DASHES"
-    ,"‒" # U+2012 Figure Dash
-    ."–" # U+2013 En Dash
-    ."—" # U+2014 Em Dash
-    ."⸺" # U+2E3A Two-Em Dash
-    ."⸻" # U+2E3B Three-Em Dash (Three times larger than a single char)
-);
-
-/**
- * Prepares the raw text to be displayed in HTML environment:
- * * Escapes the HTML entities yet contained in the Gemtext.
- * * Puts thin unbrakable spaces before some characters.
- * @param $text1, $text2 texts to process
- */
-function htmlPrepare(&$text) {
-    $text = htmlspecialchars($text, ENT_HTML5|ENT_NOQUOTES, "UTF-8", false);
-    $text = mb_ereg_replace("\ ([?!:;»€$])", NARROW_NO_BREAK_SPACE."\\1", $text);
-    $text = mb_ereg_replace("([«])\ ", "\\1".NARROW_NO_BREAK_SPACE, $text); # Espace fine insécable
-
-    # Warning: using a monospace font editor may not display dashes as they should be!
-    # Adds no-break spaces to stick the (EM/EN dashes) to words : aaaaaa – bb – ccccc ==> aaaaaa –$bb$– ccccc
-    $text = mb_ereg_replace("([".DASHES."]) ([^".DASHES.".]+) ([".DASHES."])", "\\1".NARROW_NO_BREAK_SPACE."\\2".NARROW_NO_BREAK_SPACE."\\3", $text);
-
-    # Adds no-break space to stick the (EM/EN dashes) to words : aaaaaa – bb. ==> aaaaaa –$bb.
-    $text = mb_ereg_replace("([—–]) ([^.]+).", "\\1".NARROW_NO_BREAK_SPACE."\\2.", $text);
-}
-
-ob_start();
-$mode = null;
-$mode_textAttributes = true;
-foreach ($fileLines as $line) {
-    $reDoCount = 0;
-    $mode_textAttributes_temp = false;
-    while (true) {
-        if ($reDoCount>2) {
-            error_log("HtmGem: Too many loops, mode == '$mode'");
-            $mode = null;
-            $reDoCount = 0;
-            break;
-        }
-        $reDoCount += 1;
-        $line1 = substr($line, 0, 1); // $line can be modified
-        $line2 = substr($line, 0, 2); // in the meantime.
-        $line3 = substr($line, 0, 3);
-        if (is_null($mode)) {
-            if (empty($line)) {
-                echo "<p>&nbsp;</p>\n";
-            } elseif ('^^^' == $line3) {
-                if (preg_match("/^\^\^\^\s+(.*)$/", $line)) {
-                    $mode_textAttributes = !$mode_textAttributes;
-                } else {
-                    $mode = "raw";
-                    continue;
-                }
-            } elseif ('^' == $line1 and !$mode_textAttributes_temp) {
-                preg_match("/^\^\s*(.*)$/", $line, $parts);
-                $line = $parts[1];
-                $mode_textAttributes_temp = true;
-                continue;
-            } elseif ("#" == $line1) {
-                preg_match("/^(#{1,3})\s*(.*)/", $line, $sharps);
-                $h_level = strlen($sharps[1]);
-                $text = $sharps[2];
-                htmlPrepare($text);
-                switch ($h_level) {
-                    case 1: echo "<h1>".$text."</h1>\n"; break;
-                    case 2: echo "<h2>".$text."</h2>\n"; break;
-                    case 3: echo "<h3>".$text."</h3>\n"; break;
-                }
-            } elseif ("=>" == $line2) {
-                if (preg_match("/^=>\s*([^\s]+)(?:\s+(.*))?$/", $line, $linkParts)) {
-                    $url_link = $linkParts[1];
-                    $url_label = @$linkParts[2];
-                    preg_match("/^([^:]+):/", $url_link, $matches);
-                    $url_protocol = @$matches[1];
-                    if (empty($url_protocol)) $url_protocol = "local";
-                    if (empty(trim($url_label))) {
-                        $url_label = $url_link;
-                    } else {
-                        // the label is humain-made, apply formatting
-                        htmlPrepare($url_label);
-                    }
-                    echo "<p><a class='$url_protocol' href='$url_link'>$url_label</a></p>\n";
-                } else {
-                    $mode = "raw";
-                    continue;
-                }
-            } elseif ("```" == $line3) {
-                if (preg_match("/^```\s*(.*)$/", $line, $matches)) {
-                    $alt_text = trim(@$matches[1]);
-                    if (!(empty($alt_text))) {
-                        echo "<pre alt='$alt_text' title='$alt_text'>\n";
-                    } else {
-                        echo "<pre>\n";
-                    }
-                }
-                $mode="pre";
-            } elseif (">" == $line1) {
-                $mode = "quote";
-                preg_match("/^>\s*(.*)$/", $line, $quoteParts);
-                $quote = $quoteParts[1];
-                echo "<blockquote>\n";
-                if (empty($quote))
-                    echo "<p>&nbsp;</p>\n";
-                else
-                    htmlPrepare($quote);
-                if ($mode_textAttributes xor $mode_textAttributes_temp) addTextAttributes($line);
-                    echo "<p>".$quote."</p>\n";
-            } elseif ("* " == $line2) {
-                echo "<ul>\n";
-                $mode = "ul";
-                continue;
-            } else {
-                $mode = "raw";
-                continue;
-            }
-        } else {
-            if ("raw"==$mode) {
-                htmlPrepare($line);
-                if ($mode_textAttributes xor $mode_textAttributes_temp) addTextAttributes($line);
-                if (empty($line)) $line = "&nbsp;";
-                echo "<p>$line</p>\n";
-                $mode = null;
-            } elseif ("pre"==$mode) {
-                if ("```" == $line3) {
-                    echo "</pre>\n";
-                    $mode = null;
-                } else {
-                    htmlPrepare($line);
-                    echo $line."\n";
-                }
-            } elseif ("quote"==$mode) {
-                if (">" == $line1) {
-                    preg_match("/^>\s*(.*)$/", $line, $quoteParts);
-                    $quote = $quoteParts[1];
-                    if (empty($quote))
-                        echo "<p>&nbsp;</p>\n";
-                    else
-                        htmlPrepare($quote);
-                        echo "<p>".$quote."</p>\n";
-                } else {
-                    echo "</blockquote>\n";
-                    $mode = null;
-                    continue;
-                }
-            } elseif ("ul"==$mode) {
-                if ("* " == $line2) {
-                    preg_match("/^\*\s*(.*)$/", $line, $ulParts);
-                    $li = $ulParts[1];
-                    if (empty($li)) {
-                        echo "<li>&nbsp;\n";
-                    } else {
-                        htmlPrepare($li);
-                        if ($mode_textAttributes xor $mode_textAttributes_temp) addTextAttributes($li);
-                        echo "<li>".$li."\n";
-                    }
-                } else {
-                    echo "</ul>\n";
-                    $mode = null;
-                    continue;
-                }
-            } else {
-                die("Unexpected mode: $mode!");
-            }
-        }
-        break; // exits the while(true) as no continue occured
-    }
-}
-$body = ob_get_contents();
-ob_clean();
-
-# Gets the page title: the first occurrence with # at the line start
-mb_ereg("#\s*([^\n]+)\n", $fileContents, $matches);
-$page_title = @$matches[1];
-
-# <!-- link type="text/css" rel="StyleSheet" href="/htmgem.css" -->
-echo <<<EOL
-<!DOCTYPE html>
-<html lang="fr">
-<head>
-<title>$page_title</title>
-<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-<style>
-
-EOL;
-include("htmgem.css");
-echo <<<EOL
-</style>
-</head>
-<body>
-EOL;
-
-echo "\n".$body;
-echo "</body>\n</html>\n";
-ob_end_flush();
-
-?>
diff --git a/index.gmi b/index.gmi
new file mode 100644
index 0000000..2f0304d
--- /dev/null
+++ b/index.gmi
@@ -0,0 +1,34 @@
+# HtmGem, voir Gemini à travers HTTP
+Une fois les fichiers copiés dans votre hébergement, par exemple ///htmgem//, il ne reste qu’à ajouter une règle de ré-écriture d’URL (//URL Rewriting//). Si vous pouvez lire ce texte en ayant demandé //htmgem.php// avec le navigateur, il ne reste que cette dernière étape à franchir 😄
+
+### Nginx
+```
+# Ce texte doit être placé dans la configuration du serveur.
+DirectoryIndex index.php index.gmi index.html
+rewrite ^(.+\.gmi)$ htmgem/?url=$1;
+```
+
+### Apache
+```
+# Ce texte peut se placer à la racine du site dans un fichier ".htaccess".
+Index index.php index.gmi index.html
+RewriteEngine on
+RewriteRule ^(.+\.gmi)$ htmgem/?url=$1
+```
+
+Par la suite,
+* Quand un fichier se terminant par //.gmi// sera demandé, HtmGem le traduira à la volée en HTML.
+* Quand seul le répertoire sera demandé (http:/​/site.tld/path) le fichier //index.gmi// sera automatiquement choisi.
+
+## Tester HtmGem
+Il est supposé que HtmGem est installé à la racine du site dans un répertoire //HtmGem//. Sinon, il faudra simplement adapter les liens.
+
+🢂 Appeler cette présente page directement, en cliquant ci-après :
+=> index.gmi
+
+🢂 Enfin, pour vérifier que le fichier //index.gmi// est automatiquement appelé quand le fichier //**.gmi**// n’est pas indiqué :
+=> / Lien vers la racine du site
+
+# Quand tout marche…
+Parmi les fichiers distribués, seuls sont requis :
+* htmgem/index.php
diff --git a/index.php b/index.php
new file mode 100644
index 0000000..778d98f
--- /dev/null
+++ b/index.php
@@ -0,0 +1,272 @@
+<?php
+
+mb_internal_encoding("UTF-8");
+mb_regex_encoding("UTF-8");
+
+define("NARROW_NO_BREAK_SPACE", "&#8239;");
+define("DASHES"
+    ,"‒" # U+2012 Figure Dash
+    ."–" # U+2013 En Dash
+    ."—" # U+2014 Em Dash
+    ."⸺" # U+2E3A Two-Em Dash
+    ."⸻" # U+2E3B Three-Em Dash (Three times larger than a single char)
+);
+
+$url = @$_REQUEST["url"];
+if (empty($url)) {
+?>
+
+<!DOCTYPE html>
+<html lang="fr">
+<head>
+<title>Installation de HtmGem</title>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<style>
+<?php include("htmgem.css"); ?>
+</style>
+</head>
+<body>
+<?php
+    echo translateGemToHtml(@file_get_contents("index.gmi"));
+    echo "</body>\n</html>\n";
+    die();
+}
+
+
+$GMI_DIR = $_SERVER['DOCUMENT_ROOT'];
+
+
+$filePath = $GMI_DIR.$url;
+$fileContents = @file_get_contents($filePath);
+if (!$fileContents) {
+    http_response_code(404);
+    die("404: $url");
+}
+
+# Removes the Byte Order Mark
+$fileContents = preg_replace("/\xEF\xBB\xBF/", "", $fileContents);
+
+
+/**
+ * Replaces markups things like __underlined__ to <u>underlined</u>.
+ * @param $instruction the characters to replace, ex. _
+ * @param $markup the markup to replace to, ex. "u" to get <u>…</u>
+ * @param &$text where to replace.
+ */
+function markupPreg($instruction, $markup, &$text) {
+    $output = $text;
+
+    # Replaces couples "__word__" into "<i>word</i>".
+    $output = mb_ereg_replace("${instruction}(.+?)${instruction}", "<{$markup}>\\1</{$markup}>", $output);
+
+    # Replaces a remaining __ into "<i>…</i>" to the end of the line.
+    $output = mb_ereg_replace("${instruction}(.+)?", "<{$markup}>\\1</{$markup}>", $output);
+
+    $text = $output;
+}
+
+
+/**
+ * Adds text attributes sucj as underline, bold, … to $line
+ * @param $line the line to process
+ */
+function addTextAttributes(&$line) {
+    markupPreg("__",   "u",   $line);
+    markupPreg("\*\*", "strong",   $line);
+    markupPreg("//",   "em",   $line);
+    markupPreg("~~",   "del", $line);
+}
+
+/**
+ * Prepares the raw text to be displayed in HTML environment:
+ * * Escapes the HTML entities yet contained in the Gemtext.
+ * * Puts thin unbrakable spaces before some characters.
+ * @param $text1, $text2 texts to process
+ */
+function htmlPrepare(&$text) {
+    $text = htmlspecialchars($text, ENT_HTML5|ENT_NOQUOTES, "UTF-8", false);
+    $text = mb_ereg_replace("\ ([?!:;»€$])", NARROW_NO_BREAK_SPACE."\\1", $text);
+    $text = mb_ereg_replace("([«])\ ", "\\1".NARROW_NO_BREAK_SPACE, $text); # Espace fine insécable
+
+    # Warning: using a monospace font editor may not display dashes as they should be!
+    # Adds no-break spaces to stick the (EM/EN dashes) to words : aaaaaa – bb – ccccc ==> aaaaaa –$bb$– ccccc
+    $text = mb_ereg_replace("([".DASHES."]) ([^".DASHES.".]+) ([".DASHES."])", "\\1".NARROW_NO_BREAK_SPACE."\\2".NARROW_NO_BREAK_SPACE."\\3", $text);
+
+    # Adds no-break space to stick the (EM/EN dashes) to words : aaaaaa – bb. ==> aaaaaa –$bb.
+    $text = mb_ereg_replace("([—–]) ([^.]+).", "\\1".NARROW_NO_BREAK_SPACE."\\2.", $text);
+}
+
+function translateGemToHtml($fileContents) {
+    $fileLines = preg_split("/\n/", $fileContents);
+    ob_start();
+    $mode = null;
+    $mode_textAttributes = true;
+    foreach ($fileLines as $line) {
+        $reDoCount = 0;
+        $mode_textAttributes_temp = false;
+        while (true) {
+            if ($reDoCount>2) {
+                error_log("HtmGem: Too many loops, mode == '$mode'");
+                $mode = null;
+                $reDoCount = 0;
+                break;
+            }
+            $reDoCount += 1;
+            $line1 = substr($line, 0, 1); // $line can be modified
+            $line2 = substr($line, 0, 2); // in the meantime.
+            $line3 = substr($line, 0, 3);
+            if (is_null($mode)) {
+                if (empty($line)) {
+                    echo "<p>&nbsp;</p>\n";
+                } elseif ('^^^' == $line3) {
+                    if (preg_match("/^\^\^\^\s+(.*)$/", $line)) {
+                        $mode_textAttributes = !$mode_textAttributes;
+                    } else {
+                        $mode = "raw";
+                        continue;
+                    }
+                } elseif ('^' == $line1 and !$mode_textAttributes_temp) {
+                    preg_match("/^\^\s*(.*)$/", $line, $parts);
+                    $line = $parts[1];
+                    $mode_textAttributes_temp = true;
+                    continue;
+                } elseif ("#" == $line1) {
+                    preg_match("/^(#{1,3})\s*(.*)/", $line, $sharps);
+                    $h_level = strlen($sharps[1]);
+                    $text = $sharps[2];
+                    htmlPrepare($text);
+                    switch ($h_level) {
+                        case 1: echo "<h1>".$text."</h1>\n"; break;
+                        case 2: echo "<h2>".$text."</h2>\n"; break;
+                        case 3: echo "<h3>".$text."</h3>\n"; break;
+                    }
+                } elseif ("=>" == $line2) {
+                    if (preg_match("/^=>\s*([^\s]+)(?:\s+(.*))?$/", $line, $linkParts)) {
+                        $url_link = $linkParts[1];
+                        $url_label = @$linkParts[2];
+                        preg_match("/^([^:]+):/", $url_link, $matches);
+                        $url_protocol = @$matches[1];
+                        if (empty($url_protocol)) $url_protocol = "local";
+                        if (empty(trim($url_label))) {
+                            $url_label = $url_link;
+                        } else {
+                            // the label is humain-made, apply formatting
+                            htmlPrepare($url_label);
+                        }
+                        echo "<p><a class='$url_protocol' href='$url_link'>$url_label</a></p>\n";
+                    } else {
+                        $mode = "raw";
+                        continue;
+                    }
+                } elseif ("```" == $line3) {
+                    if (preg_match("/^```\s*(.*)$/", $line, $matches)) {
+                        $alt_text = trim(@$matches[1]);
+                        if (!(empty($alt_text))) {
+                            echo "<pre alt='$alt_text' title='$alt_text'>\n";
+                        } else {
+                            echo "<pre>\n";
+                        }
+                    }
+                    $mode="pre";
+                } elseif (">" == $line1) {
+                    $mode = "quote";
+                    preg_match("/^>\s*(.*)$/", $line, $quoteParts);
+                    $quote = $quoteParts[1];
+                    echo "<blockquote>\n";
+                    if (empty($quote))
+                        echo "<p>&nbsp;</p>\n";
+                    else
+                        htmlPrepare($quote);
+                    if ($mode_textAttributes xor $mode_textAttributes_temp) addTextAttributes($line);
+                        echo "<p>".$quote."</p>\n";
+                } elseif ("* " == $line2) {
+                    echo "<ul>\n";
+                    $mode = "ul";
+                    continue;
+                } else {
+                    $mode = "raw";
+                    continue;
+                }
+            } else {
+                if ("raw"==$mode) {
+                    htmlPrepare($line);
+                    if ($mode_textAttributes xor $mode_textAttributes_temp) addTextAttributes($line);
+                    if (empty($line)) $line = "&nbsp;";
+                    echo "<p>$line</p>\n";
+                    $mode = null;
+                } elseif ("pre"==$mode) {
+                    if ("```" == $line3) {
+                        echo "</pre>\n";
+                        $mode = null;
+                    } else {
+                        htmlPrepare($line);
+                        echo $line."\n";
+                    }
+                } elseif ("quote"==$mode) {
+                    if (">" == $line1) {
+                        preg_match("/^>\s*(.*)$/", $line, $quoteParts);
+                        $quote = $quoteParts[1];
+                        if (empty($quote))
+                            echo "<p>&nbsp;</p>\n";
+                        else
+                            htmlPrepare($quote);
+                            echo "<p>".$quote."</p>\n";
+                    } else {
+                        echo "</blockquote>\n";
+                        $mode = null;
+                        continue;
+                    }
+                } elseif ("ul"==$mode) {
+                    if ("* " == $line2) {
+                        preg_match("/^\*\s*(.*)$/", $line, $ulParts);
+                        $li = $ulParts[1];
+                        if (empty($li)) {
+                            echo "<li>&nbsp;\n";
+                        } else {
+                            htmlPrepare($li);
+                            if ($mode_textAttributes xor $mode_textAttributes_temp) addTextAttributes($li);
+                            echo "<li>".$li."\n";
+                        }
+                    } else {
+                        echo "</ul>\n";
+                        $mode = null;
+                        continue;
+                    }
+                } else {
+                    die("Unexpected mode: $mode!");
+                }
+            }
+            break; // exits the while(true) as no continue occured
+        }
+    }
+    $html = ob_get_contents();
+    ob_clean();
+    return $html;
+}
+
+# Gets the page title: the first occurrence with # at the line start
+mb_ereg("#\s*([^\n]+)\n", $fileContents, $matches);
+$page_title = @$matches[1];
+
+# <!-- link type="text/css" rel="StyleSheet" href="/htmgem.css" -->
+echo <<<EOL
+<!DOCTYPE html>
+<html lang="fr">
+<head>
+<title>$page_title</title>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<style>
+
+EOL;
+include("htmgem.css");
+echo <<<EOL
+</style>
+</head>
+<body>
+EOL;
+
+echo "\n".translateGemToHtml($fileContents);
+echo "</body>\n</html>\n";
+ob_end_flush();
+
+?>