Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions src/Readability.php
Original file line number Diff line number Diff line change
Expand Up @@ -281,11 +281,77 @@ public function init(): bool
*/
public function postProcessContent(\DOMElement $articleContent): void
{
$this->fixIncompleteURLs($articleContent);
if ($this->convertLinksToFootnotes && !preg_match('/\bwiki/', $this->url)) {
$this->addFootnotes($articleContent);
}
}

private function parse_url($url): ?array{
$urlInfo = @parse_url($url);
if (false === $urlInfo) {
$this->logger->error('Failed to parse URL: ' . $url);
return NULL;
}

if (!isset($urlInfo['path'])) {
$urlInfo['path'] = '/';
}

// 构建基础URL和路径URL
$urlInfo['base_url'] = $urlInfo['scheme'] . '://' . $urlInfo['host'];
if (isset($urlInfo['port']) && $urlInfo['port'] && 80 !== $urlInfo['port'] && 443 !== $urlInfo['port']) {
$urlInfo['base_url'] .= ':' . $urlInfo['port'];
}

$urlInfo['path_url'] = $urlInfo['base_url'] . rtrim(dirname($urlInfo['path']), '/') . '/';

return $urlInfo;
}

/**
* Check and fix incomplete URLs in content.
*/
public function fixIncompleteURLs($articleContent): void{
$urlInfo = $this->parse_url($this->url);
if (!$urlInfo || !isset($urlInfo['scheme'], $urlInfo['host'])) {
$this->logger->error('Invalid URL provided for post-processing: ' . $this->url);
return;
}
// Process all <a> and <img> elements to ensure their href/src attributes are complete URLs.
$this->processUrlElements($articleContent, 'a', 'href', $urlInfo);
$this->processUrlElements($articleContent, 'img', 'src', $urlInfo);
}

private function processUrlElements($articleContent, string $tagName, string $attribute, array $urlInfo): void{
$elements = $articleContent->getElementsByTagName($tagName);
for ($i = 0; $i < $elements->length; ++$i) {
$element = $elements->item($i);
$url = $element->getAttribute($attribute);
$completedUrl = $this->completeUrl($url, $urlInfo);
$element->setAttribute($attribute, $completedUrl);
}
}

private function completeUrl(string $url, array $urlInfo): string{
if (@parse_url($url, PHP_URL_HOST)) {
return $url;
}

if (strpos($url, '#') === 0 || strpos($url, 'javascript:') === 0) {
return $url;
}

if (strpos($url, '/') === 0) {
return $urlInfo['base_url'] . $url;
}

$result = $urlInfo['path_url'] . $url;
$result = preg_replace('/\/[^\/]+\/\.\.\//', '/', $result);
$result = preg_replace('/\/\.\//', '/', $result);
return $result;
}

/**
* For easier reading, convert this document to have footnotes at the bottom rather than inline links.
*
Expand Down