Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 2 additions & 11 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ on:
pull_request:

env:
COVERAGE_PHP_VERSION: "8.3"
COVERAGE_PHP_VERSION: "8.4"

jobs:
phpunit:
Expand All @@ -16,18 +16,9 @@ jobs:
fail-fast: false
matrix:
php-version:
- "7.2"
- "7.3"
- "7.4"
- "8.0"
- "8.1"
- "8.2"
- "8.3"
- "8.4"
deps:
- "highest"
include:
- php-version: "7.2"
deps: "lowest"

steps:
- name: Checkout
Expand Down
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ composer require brick/structured-data

### Requirements

This library requires PHP 7.2 or later. It makes use of the following extensions:
This library requires PHP 8.4 or later. It makes use of the following extensions:

- [dom](https://www.php.net/manual/en/book.dom.php)
- [json](https://www.php.net/manual/en/book.json.php)
Expand Down Expand Up @@ -54,13 +54,13 @@ interface Brick\StructuredData\Reader
/**
* Reads the items contained in the given document.
*
* @param DOMDocument $document The DOM document to read.
* @param string $url The URL the document was retrieved from. This will be used only to resolve relative
* URLs in property values. No attempt will be performed to connect to this URL.
* @param HTMLDocument $document The DOM document to read.
* @param string $url The URL the document was retrieved from. This will be used only to resolve relative
* URLs in property values. No attempt will be performed to connect to this URL.
*
* @return Item[] The top-level items.
*/
public function read(DOMDocument $document, string $url) : array;
public function read(HTMLDocument $document, string $url) : array;
}
```

Expand Down Expand Up @@ -92,7 +92,7 @@ use Brick\StructuredData\Item;
$microdataReader = new MicrodataReader();

// Wrap into HTMLReader to be able to read HTML strings or files directly,
// i.e. without manually converting them to DOMDocument instances first
// i.e. without manually converting them to HTMLDocument instances first
$htmlReader = new HTMLReader($microdataReader);

// Replace this URL with that of a website you know is using Microdata
Expand Down
2 changes: 1 addition & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
],
"license": "MIT",
"require": {
"php": "^7.2 || ^8.0",
"php": "^8.4",
"ext-dom": "*",
"ext-json": "*",
"ext-libxml": "*",
Expand Down
26 changes: 11 additions & 15 deletions src/DOMBuilder.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,37 +4,33 @@

namespace Brick\StructuredData;

use DOMDocument;
use DOM\HTMLDocument;

use const DOM\HTML_NO_DEFAULT_NS;

class DOMBuilder
{
/**
* Builds a DOMDocument from an HTML string.
* Builds a HTMLDocument from an HTML string.
*
* @param string $html
*
* @return DOMDocument
* @return HTMLDocument
*/
public static function fromHTML(string $html) : DOMDocument
public static function fromHTML(string $html) : HTMLDocument
{
$document = new DOMDocument();
$document->loadHTML($html, LIBXML_NOWARNING | LIBXML_NOERROR);

return $document;
return HTMLDocument::createFromString($html, LIBXML_NOERROR | HTML_NO_DEFAULT_NS);
}

/**
* Builds a DOMDocument from an HTML file.
* Builds a HTMLDocument from an HTML file.
*
* @param string $file
*
* @return DOMDocument
* @return HTMLDocument
*/
public static function fromHTMLFile(string $file) : DOMDocument
public static function fromHTMLFile(string $file) : HTMLDocument
{
$document = new DOMDocument();
$document->loadHTMLFile($file, LIBXML_NOWARNING | LIBXML_NOERROR);

return $document;
return HTMLDocument::createFromFile($file, LIBXML_NOERROR | HTML_NO_DEFAULT_NS);
}
}
8 changes: 4 additions & 4 deletions src/Reader.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

namespace Brick\StructuredData;

use DOMDocument;
use DOM\HTMLDocument;

/**
* Common interface for readers of each format: Microdata, RDFa Lite, JSON-LD.
Expand All @@ -14,11 +14,11 @@ interface Reader
/**
* Reads the items contained in the given document.
*
* @param DOMDocument $document The DOM document to read.
* @param string $url The URL the document was retrieved from. This will be used only to resolve relative
* @param HTMLDocument $document The DOM document to read.
* @param string $url The URL the document was retrieved from. This will be used only to resolve relative
* URLs in property values. The implementation must not attempt to connect to this URL.
*
* @return Item[] The top-level items.
*/
public function read(DOMDocument $document, string $url) : array;
public function read(HTMLDocument $document, string $url) : array;
}
12 changes: 6 additions & 6 deletions src/Reader/JsonLdReader.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@

use stdClass;

use DOMDocument;
use DOMNode;
use DOMXPath;
use DOM\HTMLDocument;
use DOM\Node;
use DOM\XPath;

use Sabre\Uri\InvalidUriException;
use function Sabre\Uri\build;
Expand Down Expand Up @@ -54,9 +54,9 @@ public function __construct(array $iriProperties = [])
/**
* @inheritDoc
*/
public function read(DOMDocument $document, string $url) : array
public function read(HTMLDocument $document, string $url) : array
{
$xpath = new DOMXPath($document);
$xpath = new XPath($document);

$nodes = $xpath->query('//script[@type="application/ld+json"]');
$nodes = iterator_to_array($nodes);
Expand All @@ -65,7 +65,7 @@ public function read(DOMDocument $document, string $url) : array
return [];
}

$items = array_map(function(DOMNode $node) use ($url) {
$items = array_map(function(Node $node) use ($url) {
return $this->readJson($node->textContent, $url);
}, $nodes);

Expand Down
34 changes: 17 additions & 17 deletions src/Reader/MicrodataReader.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
use Brick\StructuredData\Item;
use Brick\StructuredData\Reader;

use DOMDocument;
use DOMNode;
use DOMXPath;
use DOM\HTMLDocument;
use DOM\Node;
use DOM\XPath;

use Sabre\Uri\InvalidUriException;
use function Sabre\Uri\resolve;
Expand All @@ -26,9 +26,9 @@ class MicrodataReader implements Reader
/**
* @inheritDoc
*/
public function read(DOMDocument $document, string $url) : array
public function read(HTMLDocument $document, string $url) : array
{
$xpath = new DOMXPath($document);
$xpath = new XPath($document);

/**
* An item is a top-level Microdata item if its element does not have an itemprop attribute.
Expand All @@ -38,21 +38,21 @@ public function read(DOMDocument $document, string $url) : array
$nodes = $xpath->query('//*[@itemscope and not(@itemprop)]');
$nodes = iterator_to_array($nodes);

return array_map(function(DOMNode $node) use ($xpath, $url) {
return array_map(function(Node $node) use ($xpath, $url) {
return $this->nodeToItem($node, $xpath, $url);
}, $nodes);
}

/**
* Extracts information from a DOMNode into an Item.
* Extracts information from a Node into an Item.
*
* @param DOMNode $node A DOMNode representing an element with the itemscope attribute.
* @param DOMXPath $xpath A DOMXPath object created from the node's document element.
* @param string $url The URL the document was retrieved from, for relative URL resolution.
* @param Node $node A Node representing an element with the itemscope attribute.
* @param XPath $xpath A XPath object created from the node's document element.
* @param string $url The URL the document was retrieved from, for relative URL resolution.
*
* @return Item
*/
private function nodeToItem(DOMNode $node, DOMXPath $xpath, string $url) : Item
private function nodeToItem(Node $node, XPath $xpath, string $url) : Item
{
$itemid = $node->attributes->getNamedItem('itemid');

Expand Down Expand Up @@ -99,7 +99,7 @@ private function nodeToItem(DOMNode $node, DOMXPath $xpath, string $url) : Item

// Exclude properties that are inside a nested item; XPath does not seem to provide a way to do this.
// See: https://stackoverflow.com/q/26365495/759866
$itemprops = array_filter($itemprops, function(DOMNode $itemprop) use ($node, $xpath) {
$itemprops = array_filter($itemprops, function(Node $itemprop) use ($node, $xpath) {
for (;;) {
$itemprop = $itemprop->parentNode;

Expand All @@ -118,7 +118,7 @@ private function nodeToItem(DOMNode $node, DOMXPath $xpath, string $url) : Item

$vocabularyIdentifier = $this->getVocabularyIdentifier($types);

/** @var DOMNode[] $itemprops */
/** @var Node[] $itemprops */
foreach ($itemprops as $itemprop) {
/**
* An element introducing a property can introduce multiple properties at once, to avoid duplication when
Expand Down Expand Up @@ -155,13 +155,13 @@ private function nodeToItem(DOMNode $node, DOMXPath $xpath, string $url) : Item
/**
* https://www.w3.org/TR/microdata/#values
*
* @param DOMNode $node A DOMNode representing an element with the itemprop attribute.
* @param DOMXPath $xpath A DOMXPath object created from the node's document element.
* @param string $url The URL the document was retrieved from, for relative URL resolution.
* @param Node $node A Node representing an element with the itemprop attribute.
* @param XPath $xpath A XPath object created from the node's document element.
* @param string $url The URL the document was retrieved from, for relative URL resolution.
*
* @return Item|string
*/
private function getPropertyValue(DOMNode $node, DOMXPath $xpath, string $url)
private function getPropertyValue(Node $node, XPath $xpath, string $url)
{
/**
* If the element also has an itemscope attribute: the value is the item created by the element.
Expand Down
34 changes: 17 additions & 17 deletions src/Reader/RdfaLiteReader.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
use Brick\StructuredData\Item;
use Brick\StructuredData\Reader;

use DOMDocument;
use DOMNode;
use DOMXPath;
use DOM\HTMLDocument;
use DOM\Node;
use DOM\XPath;

use Sabre\Uri\InvalidUriException;
use function Sabre\Uri\resolve;
Expand Down Expand Up @@ -87,34 +87,34 @@ class RdfaLiteReader implements Reader
/**
* @inheritDoc
*/
public function read(DOMDocument $document, string $url) : array
public function read(HTMLDocument $document, string $url) : array
{
$xpath = new DOMXPath($document);
$xpath = new XPath($document);

/**
* Top-level item have a typeof attribute and no property attribute.
*/
$nodes = $xpath->query('//*[@typeof and not(@property)]');
$nodes = iterator_to_array($nodes);

return array_map(function(DOMNode $node) use ($xpath, $url) {
return array_map(function(Node $node) use ($xpath, $url) {
return $this->nodeToItem($node, $xpath, $url, self::PREDEFINED_PREFIXES, null);
}, $nodes);
}

/**
* Extracts information from a DOMNode into an Item.
* Extracts information from a Node into an Item.
*
* @param DOMNode $node A DOMNode representing an element with the typeof attribute.
* @param DOMXPath $xpath A DOMXPath object created from the node's document element.
* @param Node $node A Node representing an element with the typeof attribute.
* @param XPath $xpath A XPath object created from the node's document element.
* @param string $url The URL the document was retrieved from, for relative URL resolution.
* @param string[] $prefixes The prefixes in use, as a map of prefix to vocabulary URL.
* @param string|null $vocabulary The URL of the vocabulary in use, if any.
* This is the content of the vocab attribute of the closest item ancestor.
*
* @return Item
*/
private function nodeToItem(DOMNode $node, DOMXPath $xpath, string $url, array $prefixes, ?string $vocabulary) : Item
private function nodeToItem(Node $node, XPath $xpath, string $url, array $prefixes, ?string $vocabulary) : Item
{
$vocabulary = $this->updateVocabulary($node, $vocabulary);

Expand Down Expand Up @@ -162,7 +162,7 @@ private function nodeToItem(DOMNode $node, DOMXPath $xpath, string $url, array $

// Exclude properties that are inside a nested item; XPath does not seem to provide a way to do this.
// See: https://stackoverflow.com/q/26365495/759866
$properties = array_filter($properties, function(DOMNode $itemprop) use ($node, $xpath) {
$properties = array_filter($properties, function(Node $itemprop) use ($node, $xpath) {
for (;;) {
$itemprop = $itemprop->parentNode;

Expand All @@ -179,7 +179,7 @@ private function nodeToItem(DOMNode $node, DOMXPath $xpath, string $url, array $
return false;
});

/** @var DOMNode[] $properties */
/** @var Node[] $properties */
foreach ($properties as $property) {
$names = $property->attributes->getNamedItem('property')->textContent;

Expand Down Expand Up @@ -263,12 +263,12 @@ private function isValidAbsoluteURL(string $url) : bool
/**
* Replaces the current vocabulary with the one from the vocab attribute of the current node, if set.
*
* @param DOMNode $node The DOMNode that may contain a vocab attribute.
* @param Node $node The Node that may contain a vocab attribute.
* @param string|null $vocabulary The URL of the vocabulary in use, if any.
*
* @return string|null The updated vocabulary URL, if any.
*/
private function updateVocabulary(DOMNode $node, ?string $vocabulary) : ?string
private function updateVocabulary(Node $node, ?string $vocabulary) : ?string
{
$vocab = $node->attributes->getNamedItem('vocab');

Expand Down Expand Up @@ -314,15 +314,15 @@ private function checkVocabularyUrl(string $url) : ?string
/**
* https://www.w3.org/TR/microdata/#values
*
* @param DOMNode $node A DOMNode representing an element with the property attribute.
* @param DOMXPath $xpath A DOMXPath object created from the node's document element.
* @param Node $node A Node representing an element with the property attribute.
* @param XPath $xpath A XPath object created from the node's document element.
* @param string $url The URL the document was retrieved from, for relative URL resolution.
* @param string[] $prefixes The prefixes in use, as a map of prefix to vocabulary URL.
* @param string|null $vocabulary The URL of the vocabulary in use, if any.
*
* @return Item|string
*/
private function getPropertyValue(DOMNode $node, DOMXPath $xpath, string $url, array $prefixes, ?string $vocabulary)
private function getPropertyValue(Node $node, XPath $xpath, string $url, array $prefixes, ?string $vocabulary)
{
// If the element also has an typeof attribute, create an item from the element
$attr = $node->attributes->getNamedItem('typeof');
Expand Down
4 changes: 2 additions & 2 deletions src/Reader/ReaderChain.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

use Brick\StructuredData\Reader;

use DOMDocument;
use DOM\HTMLDocument;

/**
* Chains several schema readers and returns the aggregate results.
Expand All @@ -31,7 +31,7 @@ public function __construct(Reader ...$readers)
/**
* @inheritDoc
*/
public function read(DOMDocument $document, string $url) : array
public function read(HTMLDocument $document, string $url) : array
{
if (! $this->readers) {
return [];
Expand Down