add fulltext.php

This commit is contained in:
Ben Harris 2022-08-17 15:20:41 -04:00
parent 2fe80adbf7
commit 8ff6721b9b
4 changed files with 262 additions and 0 deletions

1
fulltext/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/vendor/

14
fulltext/composer.json Normal file
View File

@ -0,0 +1,14 @@
{
"name": "benharri/fulltext",
"type": "project",
"require": {
"j0k3r/php-readability": "^2.0"
},
"license": "MIT",
"authors": [
{
"name": "Ben Harris",
"email": "ben@tilde.team"
}
]
}

224
fulltext/composer.lock generated Normal file
View File

@ -0,0 +1,224 @@
{
"_readme": [
"This file locks the dependencies of your project to a known state",
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
"This file is @generated automatically"
],
"content-hash": "d5efce9175131a522632dce527f8a3ce",
"packages": [
{
"name": "j0k3r/php-readability",
"version": "2.0.1",
"source": {
"type": "git",
"url": "https://github.com/j0k3r/php-readability.git",
"reference": "6689f199562836142fa4c3e82b1885ff56255ccb"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/j0k3r/php-readability/zipball/6689f199562836142fa4c3e82b1885ff56255ccb",
"reference": "6689f199562836142fa4c3e82b1885ff56255ccb",
"shasum": ""
},
"require": {
"ext-mbstring": "*",
"masterminds/html5": "^2.7",
"php": ">=7.2.0",
"psr/log": "^1.0"
},
"require-dev": {
"friendsofphp/php-cs-fixer": "^3.0",
"monolog/monolog": "^1.24|^2.1",
"phpstan/phpstan": "^1.3",
"phpstan/phpstan-phpunit": "^1.0",
"rector/rector": "^0.12.15",
"symfony/phpunit-bridge": "^4.4|^5.3|^6.0"
},
"suggest": {
"ext-tidy": "Used to clean up given HTML and to avoid problems with bad HTML structure."
},
"type": "library",
"autoload": {
"psr-4": {
"Readability\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"Apache-2.0"
],
"authors": [
{
"name": "Jeremy Benoist",
"email": "jeremy.benoist@gmail.com",
"homepage": "http://www.j0k3r.net",
"role": "Developer"
},
{
"name": "DitherSky",
"homepage": "https://github.com/Dither",
"role": "Developer (https://github.com/Dither/full-text-rss)"
},
{
"name": "Keyvan Minoukadeh",
"email": "keyvan@keyvan.net",
"homepage": "http://keyvan.net",
"role": "Developer (ported original JS code to PHP)"
},
{
"name": "Arc90",
"homepage": "http://arc90.com",
"role": "Developer (original JS version)"
}
],
"description": "Automatic article extraction from HTML",
"keywords": [
"article",
"article extraction",
"content",
"content extraction",
"extraction",
"html"
],
"support": {
"issues": "https://github.com/j0k3r/php-readability/issues",
"source": "https://github.com/j0k3r/php-readability/tree/2.0.1"
},
"funding": [
{
"url": "https://github.com/j0k3r",
"type": "github"
}
],
"time": "2022-06-13T07:13:23+00:00"
},
{
"name": "masterminds/html5",
"version": "2.7.5",
"source": {
"type": "git",
"url": "https://github.com/Masterminds/html5-php.git",
"reference": "f640ac1bdddff06ea333a920c95bbad8872429ab"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/Masterminds/html5-php/zipball/f640ac1bdddff06ea333a920c95bbad8872429ab",
"reference": "f640ac1bdddff06ea333a920c95bbad8872429ab",
"shasum": ""
},
"require": {
"ext-ctype": "*",
"ext-dom": "*",
"ext-libxml": "*",
"php": ">=5.3.0"
},
"require-dev": {
"phpunit/phpunit": "^4.8.35 || ^5.7.21 || ^6 || ^7"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "2.7-dev"
}
},
"autoload": {
"psr-4": {
"Masterminds\\": "src"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Matt Butcher",
"email": "technosophos@gmail.com"
},
{
"name": "Matt Farina",
"email": "matt@mattfarina.com"
},
{
"name": "Asmir Mustafic",
"email": "goetas@gmail.com"
}
],
"description": "An HTML5 parser and serializer.",
"homepage": "http://masterminds.github.io/html5-php",
"keywords": [
"HTML5",
"dom",
"html",
"parser",
"querypath",
"serializer",
"xml"
],
"support": {
"issues": "https://github.com/Masterminds/html5-php/issues",
"source": "https://github.com/Masterminds/html5-php/tree/2.7.5"
},
"time": "2021-07-01T14:25:37+00:00"
},
{
"name": "psr/log",
"version": "1.1.4",
"source": {
"type": "git",
"url": "https://github.com/php-fig/log.git",
"reference": "d49695b909c3b7628b6289db5479a1c204601f11"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/php-fig/log/zipball/d49695b909c3b7628b6289db5479a1c204601f11",
"reference": "d49695b909c3b7628b6289db5479a1c204601f11",
"shasum": ""
},
"require": {
"php": ">=5.3.0"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "1.1.x-dev"
}
},
"autoload": {
"psr-4": {
"Psr\\Log\\": "Psr/Log/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "PHP-FIG",
"homepage": "https://www.php-fig.org/"
}
],
"description": "Common interface for logging libraries",
"homepage": "https://github.com/php-fig/log",
"keywords": [
"log",
"psr",
"psr-3"
],
"support": {
"source": "https://github.com/php-fig/log/tree/1.1.4"
},
"time": "2021-05-03T11:20:27+00:00"
}
],
"packages-dev": [],
"aliases": [],
"minimum-stability": "stable",
"stability-flags": [],
"prefer-stable": false,
"prefer-lowest": false,
"platform": [],
"platform-dev": [],
"plugin-api-version": "2.2.0"
}

23
fulltext/fulltext.php Normal file
View File

@ -0,0 +1,23 @@
<?php
include_once 'vendor/autoload.php';
use Readability\Readability;
$url = urldecode($_GET["url"]);
$html = file_get_contents($url);
$readability = new Readability($html, $url);
$result = $readability->init();
if ($result) {
header('Content-type: text/plain');
echo $readability->getTitle()->textContent;
echo PHP_EOL;
echo wordwrap($readability->getContent()->textContent);
}
else {
echo 'failed';
}