You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

100 lines
2.1 KiB

<?php
namespace p3k\XRay\Formats;
use DOMDocument, DOMXPath;
use HTMLPurifier, HTMLPurifier_Config;
interface iFormat {
public static function matches_host($url);
public static function matches($url);
}
abstract class Format implements iFormat {
protected static function _unknown() {
return [
'data' => [
'type' => 'unknown'
]
];
}
protected static function _loadHTML($html) {
$doc = new DOMDocument();
@$doc->loadHTML($html);
if(!$doc) {
return [null, null];
}
$xpath = new DOMXPath($doc);
return [$doc, $xpath];
}
protected static function sanitizeHTML($html, $allowImg=true) {
$allowed = [
'a',
'abbr',
'b',
'code',
'del',
'em',
'i',
'q',
'strike',
'strong',
'time',
'blockquote',
'pre',
'p',
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'ul',
'li',
'ol'
];
if($allowImg)
$allowed[] = 'img';
$config = HTMLPurifier_Config::createDefault();
$config->set('Cache.DefinitionImpl', null);
$config->set('HTML.AllowedElements', $allowed);
$def = $config->getHTMLDefinition(true);
$def->addElement(
'time',
'Inline',
'Inline',
'Common',
[
'datetime' => 'Text'
]
);
// Override the allowed classes to only support Microformats2 classes
$def->manager->attrTypes->set('Class', new HTMLPurifier_AttrDef_HTML_Microformats2());
$purifier = new HTMLPurifier($config);
$sanitized = $purifier->purify($html);
$sanitized = str_replace("&#xD;","\r",$sanitized);
return trim($sanitized);
}
// Return a plaintext version of the input HTML
protected static function stripHTML($html) {
$config = HTMLPurifier_Config::createDefault();
$config->set('Cache.DefinitionImpl', null);
$config->set('HTML.AllowedElements', ['br']);
$purifier = new HTMLPurifier($config);
$sanitized = $purifier->purify($html);
$sanitized = str_replace("&#xD;","\r",$sanitized);
$sanitized = html_entity_decode($sanitized);
return trim(str_replace('<br>',"\n", $sanitized));
}
}