diff --git a/TODO b/TODO index e72a6a2..875dc2b 100644 --- a/TODO +++ b/TODO @@ -13,3 +13,9 @@ * Manage anchors * Page menu on upper and lower sides: [parent dir] [root dir] [inline images] [raw response] [view cert] * Check unicode capability +* Manage different type of carriage return: CR CR/LF LF +* Do not output an empty line for the last carriage return of the document +* Test on a shared hosting +* Be able to navigate (custom the links) when using htmgen.php?url=… +* Text localisation +* Search all cases where a narrow no-break space would apply diff --git a/htmgem.php b/htmgem.php index d7e3e88..826d248 100644 --- a/htmgem.php +++ b/htmgem.php @@ -13,35 +13,16 @@ else $GMI_DIR = $_SERVER['DOCUMENT_ROOT']; $filePath = $GMI_DIR.$url; -$fileContent = @file_get_contents($filePath); -if (!$fileContent) { +$fileContents = @file_get_contents($filePath); +if (!$fileContents) { http_response_code(404); die("404: $url"); } -$fileLines = preg_split("/\n/", $fileContent); +# Removes the Byte Order Mark +$fileContents = preg_replace("/\xEF\xBB\xBF/", "", $fileContents); -ob_start(); - - -echo(<< - - - HTM Gem - - - - - - -EOL); +$fileLines = preg_split("/\n/", $fileContents); /** * Replaces markups things like __underlined__ to underlined. @@ -73,16 +54,21 @@ function addTextAttributes(&$line) { markupPreg("~~", "del", $line); } +define("NARROW_NO_BREAK_SPACE", " "); + /** - * Escapes the HTML entities yet contained in the Gemtext, keeps multiple spaces. + * Prepares the raw text to be displayed in HTML environment: + * * Escapes the HTML entities yet contained in the Gemtext. + * * Puts thin unbrakable spaces before some characters. * @param $text1, $text2 texts to process */ -function htmlEscape(&$text) { +function htmlPrepare(&$text) { $text = htmlspecialchars($text, ENT_HTML5|ENT_NOQUOTES, "UTF-8", false); - $text = mb_ereg_replace("\ ([?!:;»€$])", " \\1", $text); # Espace fine insécable - $text = mb_ereg_replace("([«])\ ", "\\1 ", $text); # Espace fine insécable + $text = mb_ereg_replace("\ ([?!:;»€$])", NARROW_NO_BREAK_SPACE."\\1", $text); + $text = mb_ereg_replace("([«])\ ", "\\1".NARROW_NO_BREAK_SPACE, $text); # Espace fine insécable } +ob_start(); $mode = null; $mode_textAttributes = true; foreach ($fileLines as $line) { @@ -95,77 +81,73 @@ foreach ($fileLines as $line) { $line3 = substr($line, 0, 3); if (is_null($mode)) { if (empty($line)) { - print("

 

\n"); - } elseif (b"\xEF\xBB\xBF" == $line3) { - # Removes the Byte Order Mark - $line = substr($line, 3); - continue; + echo "

 

\n"; } elseif ("#" == $line1) { preg_match("/^(#{1,3})\s*(.*)/", $line, $sharps); $h_level = strlen($sharps[1]); $text = $sharps[2]; - htmlEscape($text); + htmlPrepare($text); switch ($h_level) { - case 1: print("

".$text."

\n"); break; - case 2: print("

".$text."

\n"); break; - case 3: print("

".$text."

\n"); break; + case 1: echo "

".$text."

\n"; break; + case 2: echo "

".$text."

\n"; break; + case 3: echo "

".$text."

\n"; break; } } elseif ("=>" == $line2) { preg_match("/^=>\s*([^\s]+)(\s+(.*))?$/", $line, $linkParts); $url_link = $linkParts[1]; - $url_label = $linkParts[2]; + $url_label = @$linkParts[2]; if (empty(trim($url_label))) { $url_label = $url_link; } else { // the label is humain-made, apply formatting - htmlEscape($url_label); + htmlPrepare($url_label); } - print("

".$url_label."

\n"); + echo "

".$url_label."

\n"; } elseif ('"""' == $line3) { $mode_textAttributes = !$mode_textAttributes; } elseif ("```" == $line3) { $mode="pre"; - print("
\n");
+                echo "
\n";
             } elseif (">" == $line1) {
                 $mode = "quote";
                 preg_match("/^>\s*(.*)$/", $line, $quoteParts);
                 $quote = $quoteParts[1];
-                print("
\n"); + echo "
\n"; if (empty($quote)) - print("

 

\n"); + echo "

 

\n"; else - htmlEscape($quote); + htmlPrepare($quote); if ($mode_textAttributes) addTextAttributes($line); - print("

".$quote."

\n"); + echo "

".$quote."

\n"; } elseif ("*" == $line1 && "**" != $line2) { $mode = "ul"; - print("
    \n"); + echo "
      \n"; continue; } else { - htmlEscape($line); + htmlPrepare($line); if ($mode_textAttributes) addTextAttributes($line); - print("

      $line

      \n"); + echo "

      $line

      \n"; } } elseif ("pre"==$mode) { if ("```" == $line3) { $mode=null; - print("
\n"); + echo "
\n"; } else { - htmlEscape($line); - print($line."\n"); + htmlPrepare($line); + echo $line."\n"; } } elseif ("quote"==$mode) { if (">" == $line1) { preg_match("/^>\s*(.*)$/", $line, $quoteParts); $quote = $quoteParts[1]; if (empty($quote)) - print("

 

\n"); + echo "

 

\n"; else - htmlEscape($quote); - print("

".$quote."

\n"); + htmlPrepare($quote); + echo "

".$quote."

\n"; } else { $mode=null; - print("\n"); + echo "\n"; continue; } } elseif ("ul"==$mode) { @@ -173,21 +155,45 @@ foreach ($fileLines as $line) { preg_match("/^\*\s*(.*)$/", $line, $ulParts); $li = $ulParts[1]; if (empty($li)) - print("
  •  \n"); + echo "
  •  \n"; else - htmlEscape($li); + htmlPrepare($li); addTextAttributes($li); - print("
  • ".$li."\n"); + echo "
  • ".$li."\n"; } else { $mode = null; - print("\n"); + echo "\n"; continue; } } - break; // Do one loop, except if required + break; } } +$body = ob_get_contents(); +ob_clean(); +# Gets the page title: the first occurrence with # at the line start +mb_ereg("#\s*([^\n]+)\n", $fileContents, $matches); +$page_title = @$matches[1]; + +# +echo << + + +$page_title + + + + +EOL; + +echo "\n".$body; echo "\n\n"; ob_end_flush(); + ?>