From 8ff6721b9b87e200bfe9c5bd73d3c1cdb61c3bf7 Mon Sep 17 00:00:00 2001 From: Ben Harris Date: Wed, 17 Aug 2022 15:20:41 -0400 Subject: [PATCH] add fulltext.php --- fulltext/.gitignore | 1 + fulltext/composer.json | 14 +++ fulltext/composer.lock | 224 +++++++++++++++++++++++++++++++++++++++++ fulltext/fulltext.php | 23 +++++ 4 files changed, 262 insertions(+) create mode 100644 fulltext/.gitignore create mode 100644 fulltext/composer.json create mode 100644 fulltext/composer.lock create mode 100644 fulltext/fulltext.php diff --git a/fulltext/.gitignore b/fulltext/.gitignore new file mode 100644 index 0000000..57872d0 --- /dev/null +++ b/fulltext/.gitignore @@ -0,0 +1 @@ +/vendor/ diff --git a/fulltext/composer.json b/fulltext/composer.json new file mode 100644 index 0000000..615a6b0 --- /dev/null +++ b/fulltext/composer.json @@ -0,0 +1,14 @@ +{ + "name": "benharri/fulltext", + "type": "project", + "require": { + "j0k3r/php-readability": "^2.0" + }, + "license": "MIT", + "authors": [ + { + "name": "Ben Harris", + "email": "ben@tilde.team" + } + ] +} diff --git a/fulltext/composer.lock b/fulltext/composer.lock new file mode 100644 index 0000000..04d244b --- /dev/null +++ b/fulltext/composer.lock @@ -0,0 +1,224 @@ +{ + "_readme": [ + "This file locks the dependencies of your project to a known state", + "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", + "This file is @generated automatically" + ], + "content-hash": "d5efce9175131a522632dce527f8a3ce", + "packages": [ + { + "name": "j0k3r/php-readability", + "version": "2.0.1", + "source": { + "type": "git", + "url": "https://github.com/j0k3r/php-readability.git", + "reference": "6689f199562836142fa4c3e82b1885ff56255ccb" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/j0k3r/php-readability/zipball/6689f199562836142fa4c3e82b1885ff56255ccb", + "reference": "6689f199562836142fa4c3e82b1885ff56255ccb", + "shasum": "" + }, + "require": { + "ext-mbstring": "*", + "masterminds/html5": "^2.7", + "php": ">=7.2.0", + "psr/log": "^1.0" + }, + "require-dev": { + "friendsofphp/php-cs-fixer": "^3.0", + "monolog/monolog": "^1.24|^2.1", + "phpstan/phpstan": "^1.3", + "phpstan/phpstan-phpunit": "^1.0", + "rector/rector": "^0.12.15", + "symfony/phpunit-bridge": "^4.4|^5.3|^6.0" + }, + "suggest": { + "ext-tidy": "Used to clean up given HTML and to avoid problems with bad HTML structure." + }, + "type": "library", + "autoload": { + "psr-4": { + "Readability\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "Apache-2.0" + ], + "authors": [ + { + "name": "Jeremy Benoist", + "email": "jeremy.benoist@gmail.com", + "homepage": "http://www.j0k3r.net", + "role": "Developer" + }, + { + "name": "DitherSky", + "homepage": "https://github.com/Dither", + "role": "Developer (https://github.com/Dither/full-text-rss)" + }, + { + "name": "Keyvan Minoukadeh", + "email": "keyvan@keyvan.net", + "homepage": "http://keyvan.net", + "role": "Developer (ported original JS code to PHP)" + }, + { + "name": "Arc90", + "homepage": "http://arc90.com", + "role": "Developer (original JS version)" + } + ], + "description": "Automatic article extraction from HTML", + "keywords": [ + "article", + "article extraction", + "content", + "content extraction", + "extraction", + "html" + ], + "support": { + "issues": "https://github.com/j0k3r/php-readability/issues", + "source": "https://github.com/j0k3r/php-readability/tree/2.0.1" + }, + "funding": [ + { + "url": "https://github.com/j0k3r", + "type": "github" + } + ], + "time": "2022-06-13T07:13:23+00:00" + }, + { + "name": "masterminds/html5", + "version": "2.7.5", + "source": { + "type": "git", + "url": "https://github.com/Masterminds/html5-php.git", + "reference": "f640ac1bdddff06ea333a920c95bbad8872429ab" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/Masterminds/html5-php/zipball/f640ac1bdddff06ea333a920c95bbad8872429ab", + "reference": "f640ac1bdddff06ea333a920c95bbad8872429ab", + "shasum": "" + }, + "require": { + "ext-ctype": "*", + "ext-dom": "*", + "ext-libxml": "*", + "php": ">=5.3.0" + }, + "require-dev": { + "phpunit/phpunit": "^4.8.35 || ^5.7.21 || ^6 || ^7" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "2.7-dev" + } + }, + "autoload": { + "psr-4": { + "Masterminds\\": "src" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Matt Butcher", + "email": "technosophos@gmail.com" + }, + { + "name": "Matt Farina", + "email": "matt@mattfarina.com" + }, + { + "name": "Asmir Mustafic", + "email": "goetas@gmail.com" + } + ], + "description": "An HTML5 parser and serializer.", + "homepage": "http://masterminds.github.io/html5-php", + "keywords": [ + "HTML5", + "dom", + "html", + "parser", + "querypath", + "serializer", + "xml" + ], + "support": { + "issues": "https://github.com/Masterminds/html5-php/issues", + "source": "https://github.com/Masterminds/html5-php/tree/2.7.5" + }, + "time": "2021-07-01T14:25:37+00:00" + }, + { + "name": "psr/log", + "version": "1.1.4", + "source": { + "type": "git", + "url": "https://github.com/php-fig/log.git", + "reference": "d49695b909c3b7628b6289db5479a1c204601f11" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/php-fig/log/zipball/d49695b909c3b7628b6289db5479a1c204601f11", + "reference": "d49695b909c3b7628b6289db5479a1c204601f11", + "shasum": "" + }, + "require": { + "php": ">=5.3.0" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.1.x-dev" + } + }, + "autoload": { + "psr-4": { + "Psr\\Log\\": "Psr/Log/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "PHP-FIG", + "homepage": "https://www.php-fig.org/" + } + ], + "description": "Common interface for logging libraries", + "homepage": "https://github.com/php-fig/log", + "keywords": [ + "log", + "psr", + "psr-3" + ], + "support": { + "source": "https://github.com/php-fig/log/tree/1.1.4" + }, + "time": "2021-05-03T11:20:27+00:00" + } + ], + "packages-dev": [], + "aliases": [], + "minimum-stability": "stable", + "stability-flags": [], + "prefer-stable": false, + "prefer-lowest": false, + "platform": [], + "platform-dev": [], + "plugin-api-version": "2.2.0" +} diff --git a/fulltext/fulltext.php b/fulltext/fulltext.php new file mode 100644 index 0000000..c8d38cf --- /dev/null +++ b/fulltext/fulltext.php @@ -0,0 +1,23 @@ +init(); + +if ($result) { + header('Content-type: text/plain'); + echo $readability->getTitle()->textContent; + echo PHP_EOL; + echo wordwrap($readability->getContent()->textContent); +} +else { + echo 'failed'; +} +