You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

150 lines
3.5 KiB

  1. <?php
  2. namespace p3k\XRay\Formats;
  3. use DOMDocument, DOMXPath;
  4. use HTMLPurifier, HTMLPurifier_Config;
  5. interface iFormat {
  6. public static function matches_host($url);
  7. public static function matches($url);
  8. }
  9. abstract class Format implements iFormat {
  10. protected static function _unknown() {
  11. return [
  12. 'data' => [
  13. 'type' => 'unknown'
  14. ]
  15. ];
  16. }
  17. protected static function _loadHTML($html) {
  18. $doc = new DOMDocument();
  19. @$doc->loadHTML($html);
  20. if(!$doc) {
  21. return [null, null];
  22. }
  23. $xpath = new DOMXPath($doc);
  24. return [$doc, $xpath];
  25. }
  26. protected static function sanitizeHTML($html, $allowImg=true, $baseURL=false) {
  27. $allowed = [
  28. 'a',
  29. 'abbr',
  30. 'b',
  31. 'br',
  32. 'code',
  33. 'del',
  34. 'em',
  35. 'i',
  36. 'q',
  37. 'strike',
  38. 'strong',
  39. 'time',
  40. 'blockquote',
  41. 'pre',
  42. 'p',
  43. 'h1',
  44. 'h2',
  45. 'h3',
  46. 'h4',
  47. 'h5',
  48. 'h6',
  49. 'ul',
  50. 'li',
  51. 'ol',
  52. 'span',
  53. ];
  54. if($allowImg)
  55. $allowed[] = 'img';
  56. $config = HTMLPurifier_Config::createDefault();
  57. $config->set('Cache.DefinitionImpl', null);
  58. if (\p3k\XRay\allow_iframe_video()) {
  59. $allowed[] = 'iframe';
  60. $config->set('HTML.SafeIframe', true);
  61. $config->set('URI.SafeIframeRegexp', '%^(https?:)?//(www\.youtube(?:-nocookie)?\.com/embed/|player\.vimeo\.com/video/)%');
  62. $config->set('AutoFormat.RemoveEmpty', true);
  63. // Removes iframe in case it has no src. This strips the non-allowed domains.
  64. $config->set('AutoFormat.RemoveEmpty.Predicate', array('iframe' => array(0 => 'src')));
  65. }
  66. $config->set('HTML.AllowedElements', $allowed);
  67. if($baseURL) {
  68. $config->set('URI.MakeAbsolute', true);
  69. $config->set('URI.Base', $baseURL);
  70. }
  71. $def = $config->getHTMLDefinition(true);
  72. // add HTML <time> element
  73. $def->addElement(
  74. 'time',
  75. 'Inline',
  76. 'Inline',
  77. 'Common',
  78. [
  79. 'datetime' => 'Text'
  80. ]
  81. );
  82. /*
  83. // This isn't working right now, not sure why
  84. // http://developers.whatwg.org/the-video-element.html#the-video-element
  85. $def->addElement(
  86. 'video',
  87. 'Block',
  88. 'Optional: (source, Flow) | (Flow, source) | Flow',
  89. 'Common',
  90. [
  91. 'src' => 'URI',
  92. 'type' => 'Text',
  93. 'width' => 'Length',
  94. 'height' => 'Length',
  95. 'poster' => 'URI',
  96. 'preload' => 'Enum#auto,metadata,none',
  97. 'controls' => 'Bool',
  98. ]
  99. );
  100. $def->addElement(
  101. 'source',
  102. 'Block',
  103. 'Flow',
  104. 'Common',
  105. [
  106. 'src' => 'URI',
  107. 'type' => 'Text',
  108. ]
  109. );
  110. */
  111. // Override the allowed classes to only support Microformats2 classes
  112. $def->manager->attrTypes->set('Class', new HTMLPurifier_AttrDef_HTML_Microformats2());
  113. $purifier = new HTMLPurifier($config);
  114. $sanitized = $purifier->purify($html);
  115. $sanitized = str_replace("&#xD;","\r",$sanitized);
  116. return trim($sanitized);
  117. }
  118. // Return a plaintext version of the input HTML
  119. protected static function stripHTML($html) {
  120. $config = HTMLPurifier_Config::createDefault();
  121. $config->set('Cache.DefinitionImpl', null);
  122. $config->set('HTML.AllowedElements', ['br']);
  123. $purifier = new HTMLPurifier($config);
  124. $sanitized = $purifier->purify($html);
  125. $sanitized = str_replace("&#xD;","\r",$sanitized);
  126. $sanitized = html_entity_decode($sanitized);
  127. return trim(str_replace(['<br>','<br />'],"\n", $sanitized));
  128. }
  129. }