HtmGem/lib-htmgem.inc.php

425 lines
16 KiB
PHP
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php declare(strict_types=1);
namespace htmgem;
mb_internal_encoding("UTF-8");
mb_regex_encoding("UTF-8");
/**
* Resolve $path interpretating / . and ..
* @param $path str
* @returns "/" if .. goes above the limit
*/
function resolve_path($path) {
if (empty($path)) return "";
$absolute = "/"==$path[0];
$parts = array_filter(explode("/", $path), 'strlen');
$chuncks = array();
foreach ($parts as $part) {
if ('.' == $part) continue;
if ('..' == $part) {
if (is_null(array_pop($chuncks))) return "/";
} else {
$chuncks[] = $part;
}
}
$output = implode("/", $chuncks);
if ($absolute) $output = "/".$output;
return $output;
}
/**
* Splits link (without .. or .) into parts along with direct url access.
* @param url
*
* Ex. /dir1/dir2/page.gmi
* --> "dir1" --> "/dir1"
* --> "dir2" --> "/dir1/dir2"
* --> "page.gmi" --> "/dir2/page.gmi"
*/
function split_path_links($path, $prefix="") {
$parts = array_filter(explode("/", $path), 'strlen');
if (empty($parts)) return array();
if ("/"==$path[0])
$stack = "/";
else
$stack = "";
$output = array();
$slash = "";
foreach ($parts as $part) {
$stack .= $slash.$part;
$output[$part] = $prefix.$stack;
$slash = "/";
}
return $output;
}
/**
* Parses the gemtext and generates the internal format version
* @param str $fileContents the gemtext to parse
*/
function gemtextParser($fileContents) {
if (empty($fileContents)) return array();
$fileContents = rtrim($fileContents); // removes last empty line
$fileLines = mb_split("\n|\r\n?", $fileContents); // Unix, Mac, Windows line feeds
$mode = null;
$current = array();
foreach ($fileLines as $line) {
$reDoCount = 0;
$mode_textAttributes_temp = false;
while (true) {
/* The continue instruction is used to make another turn when there is a transition
* between two modes. */
if ($reDoCount>1) {
die("HtmGem: Too many loops, mode == '$mode'");
}
$reDoCount += 1;
$line1 = substr($line, 0, 1); // $line can be modified
$line2 = substr($line, 0, 2); // in the meantime.
$line3 = substr($line, 0, 3);
if (is_null($mode)) {
if ('^^^' == $line3) {
yield array("mode" => "^^^");
} elseif ("#" == $line1) {
preg_match("/^(#{1,3})\s*(.+)?/", $line, $matches);
yield array("mode" => $matches[1], "title" => trim($matches[2]??""));
} elseif ("=>" == $line2) {
preg_match("/^=>\s*([^\s]+)(?:\s+(.*))?$/", $line, $matches);
yield array("mode" => "=>", "link" => trim($matches[1]??""), "text" => trim($matches[2]??""));
} elseif ("```" == $line3) {
preg_match("/^```\s*(.*)$/", $line, $matches);
$current = array("mode" => "```", "alt" => trim($matches[1]), "texts" => array());
$mode="```";
} elseif (">" == $line1) {
preg_match("/^>\s*(.*)$/", $line, $matches);
$current = array("mode" => ">", "texts" => array(trim($matches[1])));
$mode = ">";
} elseif ("*" == $line1) {
preg_match("/^\*\s*(.*)$/", $line, $matches);
$current = array("mode" => "*", "texts" => array(trim($matches[1])));
$mode = "*";
} else {
// text_line
yield array("mode"=>"", "text" => rtrim($line));
}
} else {
if ("```"==$mode) {
if ("```" == $line3) {
yield $current;
$current = array();
$mode = null;
} else {
$current["texts"] []= rtrim($line); // No ltrim() as its a preformated text!
}
} elseif (">"==$mode) {
if (">" == $line1) {
preg_match("/^>\s*(.*)$/", $line, $matches);
$current["texts"] []= trim($matches[1]);
} else {
yield $current;
$current = array();
$mode = null;
continue;
}
} elseif ("*"==$mode) {
if ("*" == $line1) {
preg_match("/^\*\s*(.*)$/", $line, $matches);
$current["texts"] []= trim($matches[1]);
} else {
yield $current;
$current = array();
$mode = null;
continue;
}
} else {
die("Unexpected mode: $mode!");
}
}
break; // exits the while(true) as no continue occured
} // while(true)
}// foreach
if ($current) yield $current; # File ends before the block.
} // gemtextParser
/**
* Translates the internal format into a gemtext.
* Uses cases:
*
* - test suites
* - serialisation easier with a text content
* - normalization (trimming spaces for instance)
*/
class GemtextTranslate_gemtext {
function __construct($parsedGemtext) {
if (empty($parsedGemtext)) $parsedGemtext = "";
// to delete the last empty lines
$parsedGemtext = rtrim($parsedGemtext);
// The text must be parsed
$this->parsedGemtext = gemtextParser($parsedGemtext);
$this->translate();
}
protected function translate() {
$output = "";
foreach ($this->parsedGemtext as $node) {
$mode = $node["mode"];
switch($mode) {
case "":
$output .= $node["text"]."\n";
break;
case "*":
foreach ($node["texts"] as $text) {
$output .= "* $text\n";
}
break;
case "```":
$alt = $node["alt"];
if (empty($alt))
$output .= "```\n";
else
$output .= "``` $alt\n";
foreach ($node["texts"] as $text) {
$output .= "$text\n";
}
$output .= "```\n";
break;
case ">":
foreach ($node["texts"] as $text) {
if (empty($text))
$output .= ">\n";
else
$output .= "> $text\n";
}
break;
case "=>":
$linkText = $node["text"];
$link = $node["link"];
if (!empty($linkText)) $linkText = " $linkText";
if (!empty($link)) $link = " $link";
$output .= "=>".$link.$linkText."\n";
break;
case "#":
case "##":
case "###":
$output .= "$mode ".$node["title"]."\n";
break;
case "^^^":
$output .= "^^^\n";
break;
default:
die("Unknown mode: '{$node["mode"]}'\n");
}
}
$this->translatedGemtext = $output;
}
public function __toString() {
return $this->translatedGemtext;
}
} // GemtextTranslate_gemtext
/**
* Translates the internal format to HTML
*/
class GemtextTranslate_html {
protected $cssList = array();
protected $pageTitle = "";
public $translatedGemtext;
/**
* @param $parsedGemtext the gemtext internal format
* @param $textDecoration bool to interpret or not the text decoration
* @param $urlPrefix the prefix to prepend if the URL rewriting is not on
* @param $currentPageDir the current directory, to be used without URL rewriting
*/
function __construct($parsedGemtext, $textDecoration=true, $urlPrefix=null, $currentPageDir=null) {
$this->urlPrefix = $urlPrefix;
$this->currentPageDir = $currentPageDir;
if (empty($parsedGemtext)) $parsedGemtext = "";
// to delete the last empty lines
$parsedGemtext = rtrim($parsedGemtext);
// The text must be parsed
$parsedGemtext = gemtextParser($parsedGemtext);
$this->parsedGemtext = $parsedGemtext;
$this->translate($textDecoration);
}
function addCss($css) {
$this->cssList []= $css;
}
function getCss() { return $this->cssList; }
function getTitle() { return $this->pageTitle; }
const NARROW_NO_BREAK_SPACE = "&#8239;";
const DASHES
="" # U+2012 Figure Dash
."" # U+2013 En Dash
."" # U+2014 Em Dash
."" # U+2E3A Two-Em Dash
."" # U+2E3B Three-Em Dash (Three times larger than a single char)
;
/**
* Replaces markups things like __underlined__ to <u>underlined</u>.
* @param $instruction the characters to replace, ex. _
* @param $markup the markup to replace to, ex. "u" to get <u>…</u>
* @param &$text where to replace.
*/
protected static function markupPreg($instruction, $markup, &$text) {
$output = $text;
# Replaces couples "__word__" into "<i>word</i>".
$output = mb_ereg_replace("${instruction}(.+?)${instruction}", "<{$markup}>\\1</{$markup}>", $output);
# Replaces a remaining __ into "<i>…</i>" to the end of the line.
$output = mb_ereg_replace("${instruction}(.+)?", "<{$markup}>\\1</{$markup}>", $output);
$text = $output;
}
/**
* Adds text attributes sucj as underline, bold, … to $line
* @param $line the line to process
*/
protected static function addTextDecoration(&$line) {
self::markupPreg("__", "u", $line);
self::markupPreg("\*\*", "strong", $line);
self::markupPreg("//", "em", $line);
self::markupPreg("~~", "del", $line);
}
/**
* Prepares the raw text to be displayed in HTML environment:
* * Escapes the HTML entities yet contained in the Gemtext.
* * Puts thin unbrakable spaces before some characters.
* @param $text1, $text2 texts to process
*/
protected static function htmlPrepare(&$text) {
if (empty($text)) {
$text = "&nbsp;";
} else {
$text = htmlspecialchars($text, ENT_HTML5|ENT_QUOTES, "UTF-8", true);
$text = mb_ereg_replace("\ ([?!:;»€$])", self::NARROW_NO_BREAK_SPACE."\\1", $text);
$text = mb_ereg_replace("([«])\ ", "\\1".self::NARROW_NO_BREAK_SPACE, $text); # Espace fine insécable
# Warning: using a monospace font editor may not display dashes as they should be!
# Adds no-break spaces to stick the (EM/EN dashes) to words : aaaaaa bb ccccc ==> aaaaaa $bb$ ccccc
$text = mb_ereg_replace("([".self::DASHES."]) ([^".self::DASHES.".]+) ([".self::DASHES."])", "\\1".self::NARROW_NO_BREAK_SPACE."\\2".self::NARROW_NO_BREAK_SPACE."\\3", $text);
# Adds no-break space to stick the (EM/EN dashes) to words : aaaaaa bb. ==> aaaaaa $bb.
$text = mb_ereg_replace("([—–]) ([^.]+)\.", "\\1".self::NARROW_NO_BREAK_SPACE."\\2.", $text);
}
}
protected static function spacesCompress(&$text) {
# Replaces several spaces (0x20) by only one
if (empty($text)) $text = "";
$text = preg_replace("/ +/", " ", $text);
}
public function translate($textDecoration=true) {
$output = "";
foreach ($this->parsedGemtext as $node) {
$mode = $node["mode"];
switch($mode) {
case "":
$text = $node["text"];
self::spacesCompress($text);
self::htmlPrepare($text);
if ($textDecoration) self::addTextDecoration($text);
$output .= "<p>$text</p>\n";
break;
case "*":
$output .= "<ul>\n";
foreach ($node["texts"] as $text) {
self::spacesCompress($text);
self::htmlPrepare($text);
if ($textDecoration) self::addTextDecoration($text);
$output .= "<li>$text\n";
}
$output .= "</ul>\n";
break;
case "```":
$text = implode("\n", $node["texts"]);
self::htmlPrepare($text);
$alt = $node["alt"];
$output .= "<pre alt='$alt'>\n$text\n</pre>\n";
break;
case ">":
$output .= "<blockquote>\n";
foreach ($node["texts"] as $text) {
self::spacesCompress($text);
self::htmlPrepare($text);
if ($textDecoration) self::addTextDecoration($text);
$output .= "<p>$text</p>\n";
}
$output .= "</blockquote>\n";
break;
case "=>":
$link = $node["link"];
$linkText = $node["text"];
if (empty($linkText)) {
$linkText = $link;
self::htmlPrepare($linkText);
} else {
self::spacesCompress($linkText);
// Don't double encode, just escapes quotes, "<" and ">".
// So "I'm&gt" becomes "I&apos;&gt". The & remains untouched.
$link = htmlspecialchars($link, ENT_HTML5|ENT_QUOTES, "UTF-8", false);
self::htmlPrepare($linkText);
if ($textDecoration) self::addTextDecoration($linkText);
}
preg_match("/^([^:]+):/", $link, $matches);
$protocol = @$matches[1]??"local";
if ("local"==$protocol) {
if (!is_null($this->urlPrefix)) { // No URL rewriting
$link = $this->currentPageDir."/".$link;
$link = resolve_path($link);
$link = $this->urlPrefix.$link;
}
$newWindow = "";
} else {
$newWindow = "target='_blank' ";
}
$output .= "<p><a {$newWindow}class='$protocol' href='$link'>$linkText</a></p>\n";
break;
case "#":
$title = $node["title"];
self::spacesCompress($linkText);
self::htmlPrepare($title);
if (empty($this->pageTitle)) $this->pageTitle = $title;
$output .= "<h1>$title</h1>\n";
break;
case "##":
$title = $node["title"];
self::spacesCompress($linkText);
self::htmlPrepare($title);
$output .= "<h2>$title</h2>\n";
break;
case "###":
$title = $node["title"];
self::spacesCompress($linkText);
self::htmlPrepare($title);
$output .= "<h3>$title</h3>\n";
break;
case "^^^":
$textDecoration = !$textDecoration;
break;
default:
die("Unknown mode: '{$node["mode"]}'\n");
}
}
$this->translatedGemtext = $output;
}
} // GemTextTranslate_html
?>