HtmGem is Php program whose goal is to make Gemini files reachable through HTTP. It can be used on a shared host. https://gmi.sbgodin.fr/htmgem
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

423 lines
16 KiB

<?php declare(strict_types=1);
namespace htmgem;
mb_internal_encoding("UTF-8");
mb_regex_encoding("UTF-8");
/**
* Resolve $path interpretating / . and ..
* @param $path str
* @returns "/" if .. goes above the limit
*/
function resolve_path($path) {
if (empty($path)) return "";
$absolute = "/"==$path[0];
$parts = array_filter(explode("/", $path), 'strlen');
$chuncks = array();
foreach ($parts as $part) {
if ('.' == $part) continue;
if ('..' == $part) {
if (is_null(array_pop($chuncks))) return "/";
} else {
$chuncks[] = $part;
}
}
$output = implode("/", $chuncks);
if ($absolute) $output = "/".$output;
return $output;
}
/**
* Splits link (without .. or .) into parts along with direct url access.
* @param url
*
* Ex. /dir1/dir2/page.gmi
* --> "dir1" --> "/dir1"
* --> "dir2" --> "/dir1/dir2"
* --> "page.gmi" --> "/dir2/page.gmi"
*/
function split_path_links($path, $prefix="") {
$parts = array_filter(explode("/", $path), 'strlen');
if (empty($parts)) return array();
if ("/"==$path[0])
$stack = "/";
else
$stack = "";
$output = array();
$slash = "";
foreach ($parts as $part) {
$stack .= $slash.$part;
$output[$part] = $prefix.$stack;
$slash = "/";
}
return $output;
}
/**
* Parses the gemtext and generates the internal format version
* @param str $fileContents the gemtext to parse
*/
function gemtextParser($fileContents) {
if (empty($fileContents)) return array();
$fileContents = rtrim($fileContents); // removes last empty line
$fileLines = mb_split("\n|\r\n?", $fileContents); // Unix, Mac, Windows line feeds
$mode = null;
$current = array();
foreach ($fileLines as $line) {
$reDoCount = 0;
$mode_textAttributes_temp = false;
while (true) {
/* The continue instruction is used to make another turn when there is a transition
* between two modes. */
if ($reDoCount>1) {
die("HtmGem: Too many loops, mode == '$mode'");
}
$reDoCount += 1;
$line1 = substr($line, 0, 1); // $line can be modified
$line2 = substr($line, 0, 2); // in the meantime.
$line3 = substr($line, 0, 3);
if (is_null($mode)) {
if ('^^^' == $line3) {
yield array("mode" => "^^^");
} elseif ("#" == $line1) {
preg_match("/^(#{1,3})\s*(.+)?/", $line, $matches);
yield array("mode" => $matches[1], "title" => trim($matches[2]??""));
} elseif ("=>" == $line2) {
preg_match("/^=>\s*([^\s]+)(?:\s+(.*))?$/", $line, $matches);
yield array("mode" => "=>", "link" => trim($matches[1]??""), "text" => trim($matches[2]??""));
} elseif ("```" == $line3) {
preg_match("/^```\s*(.*)$/", $line, $matches);
$current = array("mode" => "```", "alt" => trim($matches[1]), "texts" => array());
$mode="```";
} elseif (">" == $line1) {
preg_match("/^>\s*(.*)$/", $line, $matches);
$current = array("mode" => ">", "texts" => array(trim($matches[1])));
$mode = ">";
} elseif ("*" == $line1) {
preg_match("/^\*\s*(.*)$/", $line, $matches);
$current = array("mode" => "*", "texts" => array(trim($matches[1])));
$mode = "*";
} else {
// text_line
yield array("mode"=>"", "text" => rtrim($line));
}
} else {
if ("```"==$mode) {
if ("```" == $line3) {
yield $current;
$current = array();
$mode = null;
} else {
$current["texts"] []= rtrim($line); // No ltrim() as it’s a preformated text!
}
} elseif (">"==$mode) {
if (">" == $line1) {
preg_match("/^>\s*(.*)$/", $line, $matches);
$current["texts"] []= trim($matches[1]);
} else {
yield $current;
$current = array();
$mode = null;
continue;
}
} elseif ("*"==$mode) {
if ("*" == $line1) {
preg_match("/^\*\s*(.*)$/", $line, $matches);
$current["texts"] []= trim($matches[1]);
} else {
yield $current;
$current = array();
$mode = null;
continue;
}
} else {
die("Unexpected mode: $mode!");
}
}
break; // exits the while(true) as no continue occured
} // while(true)
}// foreach
if ($current) yield $current; # File ends before the block.
} // gemtextParser
/**
* Translates the internal format into a gemtext.
* Uses cases:
*
* - test suites
* - serialisation easier with a text content
* - normalization (trimming spaces for instance)
*/
class GemtextTranslate_gemtext {
function __construct($parsedGemtext) {
if (empty($parsedGemtext)) $parsedGemtext = "";
// to delete the last empty lines
$parsedGemtext = rtrim($parsedGemtext);
// The text must be parsed
$this->parsedGemtext = gemtextParser($parsedGemtext);
$this->translate();
}
protected function translate() {
$output = "";
foreach ($this->parsedGemtext as $node) {
$mode = $node["mode"];
switch($mode) {
case "":
$output .= $node["text"]."\n";
break;
case "*":
foreach ($node["texts"] as $text) {
$output .= "* $text\n";
}
break;
case "```":
$alt = $node["alt"];
if (empty($alt))
$output .= "```\n";
else
$output .= "``` $alt\n";
foreach ($node["texts"] as $text) {
$output .= "$text\n";
}
$output .= "```\n";
break;
case ">":
foreach ($node["texts"] as $text) {
if (empty($text))
$output .= ">\n";
else
$output .= "> $text\n";
}
break;
case "=>":
$linkText = $node["text"];
$link = $node["link"];
if (!empty($linkText)) $linkText = " $linkText";
if (!empty($link)) $link = " $link";
$output .= "=>".$link.$linkText."\n";
break;
case "#":
case "##":
case "###":
$output .= "$mode ".$node["title"]."\n";
break;
case "^^^":
$output .= "^^^\n";
break;
default:
die("Unknown mode: '{$node["mode"]}'\n");
}
}
$this->translatedGemtext = $output;
}
public function __toString() {
return $this->translatedGemtext;
}
} // GemtextTranslate_gemtext
/**
* Translates the internal format to HTML
*/
class GemtextTranslate_html {
protected $cssList = array();
protected $pageTitle = "";
public $translatedGemtext;
/**
* @param $parsedGemtext the gemtext internal format
* @param $textDecoration bool to interpret or not the text decoration
* @param $urlPrefix the prefix to prepend if the URL rewriting is not on
* @param $currentPageDir the current directory, to be used without URL rewriting
*/
function __construct($parsedGemtext, $textDecoration=true, $urlPrefix=null, $currentPageDir=null) {
$this->urlPrefix = $urlPrefix;
$this->currentPageDir = $currentPageDir;
if (empty($parsedGemtext)) $parsedGemtext = "";
// to delete the last empty lines
$parsedGemtext = rtrim($parsedGemtext);
// The text must be parsed
$parsedGemtext = gemtextParser($parsedGemtext);
$this->parsedGemtext = $parsedGemtext;
$this->translate($textDecoration);
}
function addCss($css) {
$this->cssList []= $css;
}
function getCss() { return $this->cssList; }
function getTitle() { return $this->pageTitle; }
const NARROW_NO_BREAK_SPACE = "&#8239;";
const DASHES
="" # U+2012 Figure Dash
."" # U+2013 En Dash
."" # U+2014 Em Dash
."" # U+2E3A Two-Em Dash
."" # U+2E3B Three-Em Dash (Three times larger than a single char)
;
/**
* Replaces markups things like __underlined__ to <u>underlined</u>.
* @param $instruction the characters to replace, ex. _
* @param $markup the markup to replace to, ex. "u" to get <u>…</u>
* @param &$text where to replace.
*/
protected static function markupPreg($instruction, $markup, &$text) {
$output = $text;
# Replaces couples "__word__" into "<i>word</i>".
$output = mb_ereg_replace("${instruction}(.+?)${instruction}", "<{$markup}>\\1</{$markup}>", $output);
# Replaces a remaining __ into "<i>…</i>" to the end of the line.
$output = mb_ereg_replace("${instruction}(.+)?", "<{$markup}>\\1</{$markup}>", $output);
$text = $output;
}
/**
* Adds text attributes sucj as underline, bold, … to $line
* @param $line the line to process
*/
protected static function addTextDecoration(&$line) {
self::markupPreg("__", "u", $line);
self::markupPreg("\*\*", "strong", $line);
self::markupPreg("//", "em", $line);
self::markupPreg("~~", "del", $line);
}
/**
* Prepares the raw text to be displayed in HTML environment:
* * Escapes the HTML entities yet contained in the Gemtext.
* * Puts thin unbrakable spaces before some characters.
* @param $text1, $text2 texts to process
*/
protected static function htmlPrepare(&$text) {
if (empty($text)) {
$text = "&nbsp;";
} else {
$text = htmlspecialchars($text, ENT_HTML5|ENT_QUOTES, "UTF-8", true);
$text = mb_ereg_replace("\ ([?!:;»€$])", self::NARROW_NO_BREAK_SPACE."\\1", $text);
$text = mb_ereg_replace("([«])\ ", "\\1".self::NARROW_NO_BREAK_SPACE, $text); # Espace fine insécable
# Warning: using a monospace font editor may not display dashes as they should be!
# Adds no-break spaces to stick the (EM/EN dashes) to words : aaaaaa – bb – ccccc ==> aaaaaa –$bb$– ccccc
$text = mb_ereg_replace("([".self::DASHES."]) ([^".self::DASHES.".]+) ([".self::DASHES."])", "\\1".self::NARROW_NO_BREAK_SPACE."\\2".self::NARROW_NO_BREAK_SPACE."\\3", $text);
# Adds no-break space to stick the (EM/EN dashes) to words : aaaaaa – bb. ==> aaaaaa –$bb.
$text = mb_ereg_replace("([—–]) ([^.]+)\.", "\\1".self::NARROW_NO_BREAK_SPACE."\\2.", $text);
}
}
protected static function spacesCompress(&$text) {
# Replaces several spaces (0x20) by only one
$text = preg_replace("/ +/", " ", $text);
}
public function translate($textDecoration=true) {
$output = "";
foreach ($this->parsedGemtext as $node) {
$mode = $node["mode"];
switch($mode) {
case "":
$text = $node["text"];
self::spacesCompress($text);
self::htmlPrepare($text);
if ($textDecoration) self::addTextDecoration($text);
$output .= "<p>$text</p>\n";
break;
case "*":
$output .= "<ul>\n";
foreach ($node["texts"] as $text) {
self::spacesCompress($text);
self::htmlPrepare($text);
if ($textDecoration) self::addTextDecoration($text);
$output .= "<li>$text\n";
}
$output .= "</ul>\n";
break;
case "```":
$text = implode("\n", $node["texts"]);
self::htmlPrepare($text);
$alt = $node["alt"];
$output .= "<pre alt='$alt'>\n$text\n</pre>\n";
break;
case ">":
$output .= "<blockquote>\n";
foreach ($node["texts"] as $text) {
self::spacesCompress($text);
self::htmlPrepare($text);
if ($textDecoration) self::addTextDecoration($text);
$output .= "<p>$text</p>\n";
}
$output .= "</blockquote>\n";
break;
case "=>":
$link = $node["link"];
$linkText = $node["text"];
if (empty($linkText)) {
$linkText = $link;
self::htmlPrepare($linkText);
} else {
self::spacesCompress($linkText);
// Don't double encode, just escapes quotes, "<" and ">".
// So "I'm&gt" becomes "I&apos;&gt". The & remains untouched.
$link = htmlspecialchars($link, ENT_HTML5|ENT_QUOTES, "UTF-8", false);
self::htmlPrepare($linkText);
if ($textDecoration) self::addTextDecoration($linkText);
}
preg_match("/^([^:]+):/", $link, $matches);
$protocol = @$matches[1]??"local";
if ("local"==$protocol) {
if (!is_null($this->urlPrefix)) { // No URL rewriting
$link = $this->currentPageDir."/".$link;
$link = resolve_path($link);
$link = $this->urlPrefix.$link;
}
$newWindow = "";
} else {
$newWindow = "target='_blank' ";
}
$output .= "<p><a {$newWindow}class='$protocol' href='$link'>$linkText</a></p>\n";
break;
case "#":
$title = $node["title"];
self::spacesCompress($linkText);
self::htmlPrepare($title);
if (empty($this->pageTitle)) $this->pageTitle = $title;
$output .= "<h1>$title</h1>\n";
break;
case "##":
$title = $node["title"];
self::spacesCompress($linkText);
self::htmlPrepare($title);
$output .= "<h2>$title</h2>\n";
break;
case "###":
$title = $node["title"];
self::spacesCompress($linkText);
self::htmlPrepare($title);
$output .= "<h3>$title</h3>\n";
break;
case "^^^":
$textDecoration = !$textDecoration;
break;
default:
die("Unknown mode: '{$node["mode"]}'\n");
}
}
$this->translatedGemtext = $output;
}
} // GemTextTranslate_html
?>