|
|
- <?php
- namespace p3k\XRay\Formats;
-
- use HTMLPurifier, HTMLPurifier_Config;
- use DOMDocument, DOMXPath;
- use p3k\XRay\Formats;
- use PicoFeed\Reader\Reader;
- use PicoFeed\PicoFeedException;
-
- class XML extends Format {
-
- public static function matches_host($url) { return true; }
- public static function matches($url) { return true; }
-
- public static function parse($xml, $url) {
- $result = [
- 'data' => [
- 'type' => 'unknown',
- ],
- 'url' => $url,
- ];
-
- try {
- $reader = new Reader();
- $parser = $reader->getParser($url, $xml, '');
- $feed = $parser->execute();
-
- $result['data']['type'] = 'feed';
- $result['data']['items'] = [];
-
- foreach($feed->getItems() as $item) {
- $result['data']['items'][] = self::_hEntryFromFeedItem($item, $feed);
- }
-
- } catch(PicoFeedException $e) {
-
- }
-
- return $result;
- }
-
- private static function _hEntryFromFeedItem($item, $feed) {
- $entry = [
- 'type' => 'entry',
- 'author' => [
- 'name' => null,
- 'url' => null,
- 'photo' => null
- ]
- ];
-
- if(is_array($guid=$item->getTag('guid')) && count($guid))
- $entry['uid'] = $guid[0];
- elseif(is_array($guid=$item->getTag('id')) && count($guid))
- $entry['uid'] = $guid[0];
-
- if($item->getUrl())
- $entry['url'] = $item->getUrl();
-
- if($item->getPublishedDate())
- $entry['published'] = $item->getPublishedDate()->format('c');
-
- if($item->getContent())
- $entry['content'] = [
- 'html' => self::sanitizeHTML($item->getContent()),
- 'text' => self::stripHTML($item->getContent())
- ];
-
- if($item->getTitle() && $item->getTitle() != $item->getUrl()) {
- $title = $item->getTitle();
- $entry['name'] = $title;
-
- // Check if the title is a prefix of the content and drop if so
- if(isset($entry['content'])) {
- if(substr($title, -3) == '...' || substr($title, -1) == '…') {
- if(substr($title, -3) == '...') {
- $trimmedTitle = substr($title, 0, -3);
- } else {
- $trimmedTitle = substr($title, 0, -1);
- }
- if(substr($entry['content']['text'], 0, strlen($trimmedTitle)) == $trimmedTitle) {
- unset($entry['name']);
- }
- }
- }
- }
-
- if($item->getAuthor()) {
- $entry['author']['name'] = $item->getAuthor();
- }
-
- if($item->getAuthorUrl()) {
- $entry['author']['url'] = $item->getAuthorUrl();
- } else if($feed->siteUrl) {
- $entry['author']['url'] = $feed->siteUrl;
- }
-
- if($item->getEnclosureType()) {
- $prop = false;
- switch($item->getEnclosureType()) {
- case 'audio/mpeg':
- $prop = 'audio'; break;
- case 'image/jpeg':
- case 'image/png':
- case 'image/gif':
- $prop = 'photo'; break;
- }
- if($prop)
- $entry[$prop] = [$item->getEnclosureUrl()];
- }
-
- return $entry;
- }
-
- }
|