You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

89 lines
1.7 KiB

  1. <?php
  2. namespace p3k\XRay\Formats;
  3. use DOMDocument, DOMXPath;
  4. use HTMLPurifier, HTMLPurifier_Config;
  5. interface iFormat {
  6. public static function matches_host($url);
  7. public static function matches($url);
  8. }
  9. abstract class Format implements iFormat {
  10. protected static function _unknown() {
  11. return [
  12. 'data' => [
  13. 'type' => 'unknown'
  14. ]
  15. ];
  16. }
  17. protected static function _loadHTML($html) {
  18. $doc = new DOMDocument();
  19. @$doc->loadHTML($html);
  20. if(!$doc) {
  21. return [null, null];
  22. }
  23. $xpath = new DOMXPath($doc);
  24. return [$doc, $xpath];
  25. }
  26. protected static function sanitizeHTML($html) {
  27. $config = HTMLPurifier_Config::createDefault();
  28. $config->set('Cache.DefinitionImpl', null);
  29. $config->set('HTML.AllowedElements', [
  30. 'a',
  31. 'abbr',
  32. 'b',
  33. 'code',
  34. 'del',
  35. 'em',
  36. 'i',
  37. 'img',
  38. 'q',
  39. 'strike',
  40. 'strong',
  41. 'time',
  42. 'blockquote',
  43. 'pre',
  44. 'p',
  45. 'h1',
  46. 'h2',
  47. 'h3',
  48. 'h4',
  49. 'h5',
  50. 'h6',
  51. 'ul',
  52. 'li',
  53. 'ol'
  54. ]);
  55. $def = $config->getHTMLDefinition(true);
  56. $def->addElement(
  57. 'time',
  58. 'Inline',
  59. 'Inline',
  60. 'Common',
  61. [
  62. 'datetime' => 'Text'
  63. ]
  64. );
  65. // Override the allowed classes to only support Microformats2 classes
  66. $def->manager->attrTypes->set('Class', new HTMLPurifier_AttrDef_HTML_Microformats2());
  67. $purifier = new HTMLPurifier($config);
  68. $sanitized = $purifier->purify($html);
  69. $sanitized = str_replace("&#xD;","\r",$sanitized);
  70. return $sanitized;
  71. }
  72. protected static function stripHTML($html) {
  73. return trim(strip_tags($html));
  74. }
  75. }