From 8cf174ecb3b3e113e0a051eea04c74272d09e675 Mon Sep 17 00:00:00 2001 From: Christophe HENRY Date: Sun, 4 Apr 2021 22:23:38 +0200 Subject: [PATCH] Handles line feeds: Unix, Mac, Windows --- lib-htmgem.inc.php | 2 +- tests/files_with_html/linefeeds-utf-16.txt | Bin 0 -> 82 bytes .../files_with_html/linefeeds-utf-16.txt.html | 8 ++++++++ tests/files_with_html/linefeeds.txt | 4 ++++ tests/files_with_html/linefeeds.txt.html | 8 ++++++++ tests/translateToGemtextTest.php | 2 +- tests/translateToHtmlTest.php | 18 +++++++++++++++++- tests/utils.inc.php | 4 ++-- 8 files changed, 41 insertions(+), 5 deletions(-) create mode 100644 tests/files_with_html/linefeeds-utf-16.txt create mode 100644 tests/files_with_html/linefeeds-utf-16.txt.html create mode 100644 tests/files_with_html/linefeeds.txt create mode 100644 tests/files_with_html/linefeeds.txt.html diff --git a/lib-htmgem.inc.php b/lib-htmgem.inc.php index 6ad1ed3..f93baf4 100644 --- a/lib-htmgem.inc.php +++ b/lib-htmgem.inc.php @@ -12,7 +12,7 @@ mb_regex_encoding("UTF-8"); function gemtextParser($fileContents) { if (empty($fileContents)) return array(); $fileContents = rtrim($fileContents); // removes last empty line - $fileLines = explode("\n", $fileContents); + $fileLines = mb_split("\R", $fileContents); // Unix, Mac, Windows line feeds $mode = null; $current = array(); foreach ($fileLines as $line) { diff --git a/tests/files_with_html/linefeeds-utf-16.txt b/tests/files_with_html/linefeeds-utf-16.txt new file mode 100644 index 0000000000000000000000000000000000000000..62a3f6f2dc38c10d03382038f3dbbc6f82af1cdd GIT binary patch literal 82 scmezWPm4i;p_CzyA(Nqkfr|me%VkJpNM_&#vr`!I8H#~yhzv360ADB!i~s-t literal 0 HcmV?d00001 diff --git a/tests/files_with_html/linefeeds-utf-16.txt.html b/tests/files_with_html/linefeeds-utf-16.txt.html new file mode 100644 index 0000000..6fe31fe --- /dev/null +++ b/tests/files_with_html/linefeeds-utf-16.txt.html @@ -0,0 +1,8 @@ + diff --git a/tests/files_with_html/linefeeds.txt b/tests/files_with_html/linefeeds.txt new file mode 100644 index 0000000..ae9d1ae --- /dev/null +++ b/tests/files_with_html/linefeeds.txt @@ -0,0 +1,4 @@ +* unix +* mac * dos +* unix +* mac * dos diff --git a/tests/files_with_html/linefeeds.txt.html b/tests/files_with_html/linefeeds.txt.html new file mode 100644 index 0000000..6fe31fe --- /dev/null +++ b/tests/files_with_html/linefeeds.txt.html @@ -0,0 +1,8 @@ + diff --git a/tests/translateToGemtextTest.php b/tests/translateToGemtextTest.php index 4c70684..4e1943a 100644 --- a/tests/translateToGemtextTest.php +++ b/tests/translateToGemtextTest.php @@ -51,7 +51,7 @@ final class translateToGemtextTest extends TestCase { #TODO: don't stop when problems are found, list all the faulty files public function test_translate_gemtext_files(): void { - foreach(getGmiFiles(dirname(__FILE__)."/..") as $filePathname) { + foreach(getFiles(dirname(__FILE__)."/..", "gmi") as $filePathname) { $fileContent = file_get_contents($filePathname); \htmgem\io\convertToUTF8($fileContent); $this->assertSame( diff --git a/tests/translateToHtmlTest.php b/tests/translateToHtmlTest.php index 0fd78aa..fb6977d 100644 --- a/tests/translateToHtmlTest.php +++ b/tests/translateToHtmlTest.php @@ -94,7 +94,7 @@ final class translateToHtmlTest extends TestCase { /** NOTE: the UTF-16 files must result in the same content as UTF-8 ones. * command to convert from UTF-8 to UTF-16: iconv -f utf8 -r utf16 text.gmi */ - foreach(getGmiFiles(dirname(__FILE__)."/files_with_html") as $filePathname) { + foreach(getFiles(dirname(__FILE__)."/files_with_html", "gmi") as $filePathname) { $fileContentGmi = file_get_contents($filePathname); \htmgem\io\convertToUTF8($fileContentGmi); $fileContentHtml = file_get_contents($filePathname.".html"); @@ -106,4 +106,20 @@ final class translateToHtmlTest extends TestCase { } } + public function test_line_feeds(): void { + /** NOTE: the UTF-16 files must result in the same content as UTF-8 ones. + * command to convert from UTF-8 to UTF-16: iconv -f utf8 -r utf16 text.gmi + */ + foreach(getFiles(dirname(__FILE__)."/files_with_html", "txt") as $filePathname) { + $fileContentGmi = file_get_contents($filePathname); + \htmgem\io\convertToUTF8($fileContentGmi); + $fileContentHtml = file_get_contents($filePathname.".html"); + $this->assertSame( + $fileContentHtml, + translateHtml($fileContentGmi), + "Line feeds, translation to HTML: $filePathname" + ); + } + } + } diff --git a/tests/utils.inc.php b/tests/utils.inc.php index 52824e8..533336e 100644 --- a/tests/utils.inc.php +++ b/tests/utils.inc.php @@ -1,6 +1,6 @@ getFilename(); $filePathname = $fileinfo->getPathname(); $extension = $fileinfo->getExtension(); - if ("gmi" == $extension) { + if ($targetExtension == $extension) { yield $filePathname; } }