Browse Source

Merge pull request #111 from barnabywalters/php-mf2@0.5

Allow installation alongside php-mf2 v0.5
pull/119/head
Aaron Parecki 2 years ago
committed by GitHub
parent
commit
0ece8bb47b
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 278 additions and 601 deletions
  1. +7
    -2
      README.md
  2. +1
    -1
      composer.json
  3. +178
    -587
      composer.lock
  4. +15
    -1
      lib/XRay.php
  5. +35
    -10
      lib/XRay/Formats/Mf2.php
  6. +38
    -0
      tests/LibraryTest.php
  7. +4
    -0
      tests/bootstrap.php

+ 7
- 2
README.md View File

@ -69,11 +69,16 @@ In both cases, you can add an additional parameter to configure various options
Additional parameters are supported when making requests that use the Twitter or GitHub API. See the Authentication section below for details.
The XRay constructor can optionally be passed an array of default options, which will be applied in
addition to (and can be overridden by) the options passed to individual `parse()` calls.
```php
$xray = new p3k\XRay();
$xray = new p3k\XRay([
'timeout' => 30 // Time-out all requests which take longer than 30s
]);
$parsed = $xray->parse('https://aaronparecki.com/2017/04/28/9/', [
'timeout' => 30
'timeout' => 40 // Override the default 30s timeout for this specific request
]);
$parsed = $xray->parse('https://aaronparecki.com/2017/04/28/9/', $html, [

+ 1
- 1
composer.json View File

@ -5,7 +5,7 @@
"homepage": "https://github.com/aaronpk/XRay",
"description": "X-Ray returns structured data from any URL",
"require": {
"mf2/mf2": "^0.4",
"mf2/mf2": "^0.4||^0.5",
"ezyang/htmlpurifier": "^4.10",
"indieweb/link-rel-parser": "^0.1.0",
"dg/twitter-php": "^3.0.0",

+ 178
- 587
composer.lock
File diff suppressed because it is too large
View File


+ 15
- 1
lib/XRay.php View File

@ -4,17 +4,26 @@ namespace p3k;
class XRay {
public $http;
public function __construct() {
private $defaultOptions = [];
public function __construct($options=[]) {
$this->http = new HTTP('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36 p3k/XRay');
if (is_array($options)) {
$this->defaultOptions = $options;
}
}
public function rels($url, $opts=[]) {
$rels = new XRay\Rels($this->http);
// Merge provided options with default options, allowing provided options to override defaults.
$opts = array_merge($this->defaultOptions, $opts);
return $rels->parse($url, $opts);
}
public function feeds($url, $opts=[]) {
$feeds = new XRay\Feeds($this->http);
// Merge provided options with default options, allowing provided options to override defaults.
$opts = array_merge($this->defaultOptions, $opts);
return $feeds->find($url, $opts);
}
@ -35,6 +44,9 @@ class XRay {
}
$parser = new XRay\Parser($this->http);
// Merge provided options with default options, allowing provided options to override defaults.
$opts = array_merge($this->defaultOptions, $opts);
$result = $parser->parse([
'body' => $body,
'url' => $url,
@ -51,6 +63,8 @@ class XRay {
public function process($url, $mf2json, $opts=[]) {
$parser = new XRay\Parser($this->http);
// Merge provided options with default options, allowing provided options to override defaults.
$opts = array_merge($this->defaultOptions, $opts);
$result = $parser->parse([
'body' => $mf2json,
'url' => $url,

+ 35
- 10
lib/XRay/Formats/Mf2.php View File

@ -356,7 +356,6 @@ class Mf2 extends Format {
private static function collectArrayURLValues($properties, $item, &$data, &$refs, &$http) {
$keys = [];
foreach($properties as $p) {
if(array_key_exists($p, $item['properties'])) {
foreach($item['properties'][$p] as $v) {
@ -365,6 +364,15 @@ class Mf2 extends Format {
$data[$p][] = $v;
$keys[] = $p;
}
elseif(self::isImgAlt($v)) {
// For the moment, disregard the alt value and output a string for compatibility with current consuming code.
$imgURL = $v['value'];
if (is_string($imgURL) and self::isURL($imgURL)) {
if(!array_key_exists($p, $data)) $data[$p] = [];
$data[$p][] = $imgURL;
$keys[] = $p;
}
}
elseif(self::isMicroformat($v) && ($u=self::getPlaintext($v, 'url')) && self::isURL($u)) {
if(!array_key_exists($p, $data)) $data[$p] = [];
$data[$p][] = $u;
@ -697,18 +705,22 @@ class Mf2 extends Format {
if($p == 'url' && $authorURL) {
// If there is a matching author URL, use that one
$found = false;
foreach($item['properties']['url'] as $url) {
if(self::isURL($url)) {
$url = \p3k\XRay\normalize_url($url);
if($url == \p3k\XRay\normalize_url($authorURL)) {
$data['url'] = $url;
$found = true;
if (array_key_exists('url', $item['properties']) and is_array($item['properties']['url'])) {
foreach($item['properties']['url'] as $url) {
if(self::isURL($url)) {
$url = \p3k\XRay\normalize_url($url);
if($url == \p3k\XRay\normalize_url($authorURL)) {
$data['url'] = $url;
$found = true;
}
}
}
if(!$found && self::isURL($item['properties']['url'][0])) {
$data['url'] = $item['properties']['url'][0];
}
}
if(!$found && self::isURL($item['properties']['url'][0])) {
$data['url'] = $item['properties']['url'][0];
}
} else if(($v = self::getPlaintext($item, $p)) !== null) {
// Make sure the URL property is actually a URL
if($p == 'url' || $p == 'photo') {
@ -881,6 +893,13 @@ class Mf2 extends Format {
return false;
}
private static function isImgAlt($mf) {
return is_array($mf)
and !self::hasNumericKeys($mf)
and array_key_exists('value', $mf)
and array_key_exists('alt', $mf);
}
private static function isMicroformat($mf) {
return is_array($mf)
and !self::hasNumericKeys($mf)
@ -909,6 +928,12 @@ class Mf2 extends Format {
$value = $mf2['properties'][$k][0];
if(is_string($value)) {
return $value;
}
elseif(self::isImgAlt($value)) {
// For back-compatibility, assume that the consuming code wants the URL value.
if (is_string($value['value'])) {
return $value['value'];
}
} elseif(self::isMicroformat($value) && array_key_exists('value', $value)) {
return $value['value'];
}

+ 38
- 0
tests/LibraryTest.php View File

@ -71,4 +71,42 @@ class LibraryTest extends PHPUnit\Framework\TestCase
$this->assertArrayNotHasKey('html', $data);
}
public function testHandlesHCardWithoutURLProperty()
{
$url = 'http://example.com/';
$html = '<p class="h-card">The Mythical URLless Person</p>';
$xray = new p3k\XRay();
$data = $xray->parse($url, $html);
$this->assertEquals('card', $data['data']['type']);
// On pages where the h-card is the main data but lacks a URL property, it will be filled with the page URL.
$this->assertEquals($url, $data['data']['url']);
}
public function testDefaultOptionsAreUsed()
{
$url = 'http://example.com/';
$html = '<p class="h-card">A Person</p>';
$defaultOptionsXRay = new p3k\XRay(['include_original' => true]);
$normalXRay = new p3k\XRay();
// Make sure that the options we’re testing with actually result in different values first.
$this->assertNotEquals(
$defaultOptionsXRay->parse($url, $html),
$normalXRay->parse($url, $html)
);
// Make sure that the options are applied in the same way as they would have been if passed to parse()
$this->assertEquals(
$defaultOptionsXRay->parse($url, $html),
$normalXRay->parse($url, $html, ['include_original' => true])
);
// Make sure that the options can be overridden (this doesn’t test on a property-by-property basis but should be good enough.)
$this->assertEquals(
$defaultOptionsXRay->parse($url, $html, ['include_original' => false]),
$normalXRay->parse($url, $html)
);
}
}

+ 4
- 0
tests/bootstrap.php View File

@ -2,6 +2,10 @@
const TESTING = true;
require __DIR__ . '/../vendor/autoload.php';
// TODO: fix the many things causing deprecation warnings!
// For the moment, report all errors except for deprecation warnings during testing.
error_reporting(E_ALL ^ E_DEPRECATED);
// Load config file if present, otherwise use default
if(file_exists(dirname(__FILE__).'/../config.php')) {
include dirname(__FILE__).'/../config.php';

Loading…
Cancel
Save