@@ -120,7 +120,7 @@ class Readability implements LoggerAwareInterface
120120 */
121121 protected ?string $ domainRegExp = null ;
122122
123- protected ?\ DOMElement $ body = null ;
123+ protected ?JSLikeHTMLElement $ body = null ;
124124
125125 /**
126126 * @var ?string Cache the body HTML in case we need to re-use it later
@@ -262,6 +262,7 @@ public function init(): bool
262262
263263 // Assume successful outcome
264264 $ this ->success = true ;
265+ /** @var \DOMNodeList<JSLikeHTMLElement> */
265266 $ bodyElems = $ this ->dom ->getElementsByTagName ('body ' );
266267
267268 // WTF multiple body nodes?
@@ -284,7 +285,7 @@ public function init(): bool
284285 $ articleTitle = $ this ->getArticleTitle ();
285286 $ articleContent = $ this ->grabArticle ();
286287
287- if (! $ articleContent ) {
288+ if (null === $ articleContent ) {
288289 $ this ->success = false ;
289290 $ articleContent = $ this ->dom ->createElement ('div ' );
290291 $ articleContent ->setAttribute ('class ' , 'readability-content ' );
@@ -423,7 +424,7 @@ public function prepArticle(\DOMNode $articleContent): void
423424 }
424425
425426 // Remove service data-candidate attribute.
426- /** @var \DOMNodeList<\DOMElement > */
427+ /** @var \DOMNodeList<JSLikeHTMLElement > */
427428 $ elems = $ xpath ->query ('.//*[@data-candidate] ' , $ articleContent );
428429 for ($ i = $ elems ->length - 1 ; $ i >= 0 ; --$ i ) {
429430 $ elems ->item ($ i )->removeAttribute ('data-candidate ' );
@@ -519,7 +520,7 @@ public function getInnerText(?\DOMNode $e, bool $normalizeSpaces = true, bool $f
519520 /**
520521 * Remove the style attribute on every $e and under.
521522 */
522- public function cleanStyles (\ DOMElement $ e ): void
523+ public function cleanStyles (JSLikeHTMLElement $ e ): void
523524 {
524525 if (\is_object ($ e )) {
525526 $ elems = $ e ->getElementsByTagName ('* ' );
@@ -552,7 +553,7 @@ public function getWordCount(string $text): int
552553 * This is the amount of text that is inside a link divided by the total text in the node.
553554 * Can exclude external references to differentiate between simple text and menus/infoblocks.
554555 */
555- public function getLinkDensity (\ DOMElement $ e , bool $ excludeExternal = false ): float
556+ public function getLinkDensity (JSLikeHTMLElement $ e , bool $ excludeExternal = false ): float
556557 {
557558 $ links = $ e ->getElementsByTagName ('a ' );
558559 $ textLength = mb_strlen ($ this ->getInnerText ($ e , true , true ));
@@ -575,7 +576,7 @@ public function getLinkDensity(\DOMElement $e, bool $excludeExternal = false): f
575576 /**
576577 * Get an element relative weight.
577578 */
578- public function getWeight (\ DOMElement $ e ): int
579+ public function getWeight (JSLikeHTMLElement $ e ): int
579580 {
580581 if (!$ this ->flagIsActive (self ::FLAG_WEIGHT_ATTRIBUTES )) {
581582 return 0 ;
@@ -606,7 +607,7 @@ public function killBreaks(JSLikeHTMLElement $node): void
606607 *
607608 * Updated 2012-09-18 to preserve youtube/vimeo iframes
608609 */
609- public function clean (\ DOMElement $ e , string $ tag ): void
610+ public function clean (JSLikeHTMLElement $ e , string $ tag ): void
610611 {
611612 $ targetList = $ e ->getElementsByTagName ($ tag );
612613 $ isEmbed = ('audio ' === $ tag || 'video ' === $ tag || 'iframe ' === $ tag || 'object ' === $ tag || 'embed ' === $ tag );
@@ -638,7 +639,7 @@ public function clean(\DOMElement $e, string $tag): void
638639 * "Fishy" is an algorithm based on content length, classnames,
639640 * link density, number of images & embeds, etc.
640641 */
641- public function cleanConditionally (\ DOMElement $ e , string $ tag ): void
642+ public function cleanConditionally (JSLikeHTMLElement $ e , string $ tag ): void
642643 {
643644 if (!$ this ->flagIsActive (self ::FLAG_CLEAN_CONDITIONALLY )) {
644645 return ;
@@ -751,7 +752,7 @@ public function cleanConditionally(\DOMElement $e, string $tag): void
751752 /**
752753 * Clean out spurious headers from an Element. Checks things like classnames and link density.
753754 */
754- public function cleanHeaders (\ DOMElement $ e ): void
755+ public function cleanHeaders (JSLikeHTMLElement $ e ): void
755756 {
756757 for ($ headerIndex = 1 ; $ headerIndex < 3 ; ++$ headerIndex ) {
757758 $ headers = $ e ->getElementsByTagName ('h ' . $ headerIndex );
@@ -791,7 +792,7 @@ public function removeFlag(int $flag): void
791792 /**
792793 * Get the article title as an H1.
793794 */
794- protected function getArticleTitle (): \ DOMElement
795+ protected function getArticleTitle (): JSLikeHTMLElement
795796 {
796797 try {
797798 $ curTitle = $ origTitle = $ this ->getInnerText ($ this ->dom ->getElementsByTagName ('title ' )->item (0 ));
@@ -861,7 +862,7 @@ protected function prepDocument(): void
861862 * Initialize a node with the readability object. Also checks the
862863 * className/id for special names to add to its score.
863864 */
864- protected function initializeNode (\ DOMElement $ node ): void
865+ protected function initializeNode (JSLikeHTMLElement $ node ): void
865866 {
866867 if (!isset ($ node ->tagName )) {
867868 return ;
@@ -929,10 +930,8 @@ protected function initializeNode(\DOMElement $node): void
929930 /**
930931 * Using a variety of metrics (content score, classname, element types), find the content that is
931932 * most likely to be the stuff a user wants to read. Then return it wrapped up in a div.
932- *
933- * @return \DOMElement|false
934933 */
935- protected function grabArticle (?\ DOMElement $ page = null )
934+ protected function grabArticle (?JSLikeHTMLElement $ page = null ): ? JSLikeHTMLElement
936935 {
937936 if (!$ page ) {
938937 $ page = $ this ->dom ;
@@ -1078,7 +1077,7 @@ protected function grabArticle(?\DOMElement $page = null)
10781077
10791078 foreach ($ ancestors as $ level => $ ancestor ) {
10801079 if (!$ ancestor ->nodeName || !$ ancestor ->parentNode ) {
1081- return false ;
1080+ return null ;
10821081 }
10831082
10841083 if (!$ ancestor ->hasAttribute ('readability ' )) {
@@ -1103,13 +1102,13 @@ protected function grabArticle(?\DOMElement $page = null)
11031102 * This is faster to do before scoring but safer after.
11041103 */
11051104 if ($ this ->flagIsActive (self ::FLAG_STRIP_UNLIKELYS ) && $ xpath ) {
1106- /** @var \DOMNodeList<\DOMElement > */
1105+ /** @var \DOMNodeList<JSLikeHTMLElement > */
11071106 $ candidates = $ xpath ->query ('.//*[(self::footer and count(//footer)<2) or (self::aside and count(//aside)<2)] ' , $ page ->documentElement );
11081107
11091108 for ($ c = $ candidates ->length - 1 ; $ c >= 0 ; --$ c ) {
11101109 $ node = $ candidates ->item ($ c );
11111110 // node should be readable but not inside of an article otherwise it's probably non-readable block
1112- if ($ node ->hasAttribute ('readability ' ) && (int ) $ node ->getAttributeNode ('readability ' )->value < 40 && ($ node ->parentNode instanceof \DOMElement ? 0 !== strcasecmp ($ node ->parentNode ->tagName , 'article ' ) : true )) {
1111+ if ($ node ->hasAttribute ('readability ' ) && (int ) $ node ->getAttributeNode ('readability ' )->value < 40 && ($ node ->parentNode instanceof JSLikeHTMLElement ? 0 !== strcasecmp ($ node ->parentNode ->tagName , 'article ' ) : true )) {
11131112 $ this ->logger ->debug ('Removing unlikely candidate (using note) ' . $ node ->getNodePath () . ' by " ' . $ node ->tagName . '" with readability ' . self ::getContentScore ($ node ));
11141113 $ node ->parentNode ->removeChild ($ node );
11151114 }
@@ -1130,7 +1129,7 @@ protected function grabArticle(?\DOMElement $page = null)
11301129 $ topCandidates = array_fill (0 , 5 , null );
11311130 if ($ xpath ) {
11321131 // Using array of DOMElements after deletion is a path to DOOMElement.
1133- /** @var \DOMNodeList<\DOMElement > */
1132+ /** @var \DOMNodeList<JSLikeHTMLElement > */
11341133 $ candidates = $ xpath ->query ('.//*[@data-candidate] ' , $ page ->documentElement );
11351134 $ this ->logger ->debug ('Candidates: ' . $ candidates ->length );
11361135
@@ -1157,7 +1156,7 @@ protected function grabArticle(?\DOMElement $page = null)
11571156 }
11581157 }
11591158
1160- /** @var \DOMNodeList<\DOMElement > */
1159+ /** @var \DOMNodeList<JSLikeHTMLElement > */
11611160 $ topCandidates = array_filter (
11621161 $ topCandidates ,
11631162 fn ($ v , $ idx ) => 0 === $ idx || null !== $ v ,
@@ -1250,7 +1249,7 @@ protected function grabArticle(?\DOMElement $page = null)
12501249 if (0 === strcasecmp ($ tagName , 'td ' ) || 0 === strcasecmp ($ tagName , 'tr ' )) {
12511250 $ up = $ topCandidate ;
12521251
1253- if ($ up ->parentNode instanceof \DOMElement ) {
1252+ if ($ up ->parentNode instanceof JSLikeHTMLElement ) {
12541253 $ up = $ up ->parentNode ;
12551254
12561255 if (0 === strcasecmp ($ up ->tagName , 'table ' )) {
@@ -1280,19 +1279,19 @@ protected function grabArticle(?\DOMElement $page = null)
12801279 $ siblingNode = $ siblingNodes ->item ($ s );
12811280 $ siblingNodeName = $ siblingNode ->nodeName ;
12821281 $ append = false ;
1283- $ this ->logger ->debug ('Looking at sibling node: ' . $ siblingNode ->getNodePath () . (($ siblingNode instanceof \DOMElement && $ siblingNode ->hasAttribute ('readability ' )) ? (' with score ' . $ siblingNode ->getAttribute ('readability ' )) : '' ));
1282+ $ this ->logger ->debug ('Looking at sibling node: ' . $ siblingNode ->getNodePath () . (($ siblingNode instanceof JSLikeHTMLElement && $ siblingNode ->hasAttribute ('readability ' )) ? (' with score ' . $ siblingNode ->getAttribute ('readability ' )) : '' ));
12841283
12851284 if ($ siblingNode ->isSameNode ($ topCandidate )) {
12861285 $ append = true ;
12871286 } else {
12881287 $ contentBonus = 0 ;
12891288
12901289 // Give a bonus if sibling nodes and top candidates have the same classname.
1291- if ($ siblingNode instanceof \DOMElement && $ siblingNode ->getAttribute ('class ' ) === $ topCandidate ->getAttribute ('class ' ) && '' !== $ topCandidate ->getAttribute ('class ' )) {
1290+ if ($ siblingNode instanceof JSLikeHTMLElement && $ siblingNode ->getAttribute ('class ' ) === $ topCandidate ->getAttribute ('class ' ) && '' !== $ topCandidate ->getAttribute ('class ' )) {
12921291 $ contentBonus += ((int ) $ topCandidate ->getAttribute ('readability ' )) * 0.2 ;
12931292 }
12941293
1295- if ($ siblingNode instanceof \DOMElement && $ siblingNode ->hasAttribute ('readability ' ) && (((int ) $ siblingNode ->getAttribute ('readability ' )) + $ contentBonus ) >= $ siblingScoreThreshold ) {
1294+ if ($ siblingNode instanceof JSLikeHTMLElement && $ siblingNode ->hasAttribute ('readability ' ) && (((int ) $ siblingNode ->getAttribute ('readability ' )) + $ contentBonus ) >= $ siblingScoreThreshold ) {
12961295 $ append = true ;
12971296 } elseif (0 === strcasecmp ($ siblingNodeName , 'p ' )) {
12981297 $ linkDensity = (int ) $ this ->getLinkDensity ($ siblingNode );
@@ -1369,7 +1368,7 @@ protected function grabArticle(?\DOMElement $page = null)
13691368 return $ this ->grabArticle ($ this ->body );
13701369 }
13711370
1372- return false ;
1371+ return null ;
13731372 }
13741373
13751374 return $ articleContent ;
@@ -1379,7 +1378,7 @@ protected function grabArticle(?\DOMElement $page = null)
13791378 * Get an element weight by attribute.
13801379 * Uses regular expressions to tell if this element looks good or bad.
13811380 */
1382- protected function weightAttribute (\ DOMElement $ element , string $ attribute ): int
1381+ protected function weightAttribute (JSLikeHTMLElement $ element , string $ attribute ): int
13831382 {
13841383 if (!$ element ->hasAttribute ($ attribute )) {
13851384 return 0 ;
@@ -1423,7 +1422,7 @@ protected function reinitBody(): void
14231422 *
14241423 * @param callable(float): float $f
14251424 */
1426- private static function updateContentScore (\ DOMElement $ element , callable $ f ): void
1425+ private static function updateContentScore (JSLikeHTMLElement $ element , callable $ f ): void
14271426 {
14281427 $ readabilityAttr = $ element ->getAttributeNode ('readability ' );
14291428 $ prevScore = (float ) $ readabilityAttr ->value ;
@@ -1433,7 +1432,7 @@ private static function updateContentScore(\DOMElement $element, callable $f): v
14331432 /**
14341433 * Gets the content score for given element.
14351434 */
1436- private static function getContentScore (\ DOMElement $ element ): float
1435+ private static function getContentScore (JSLikeHTMLElement $ element ): float
14371436 {
14381437 return $ element ->hasAttribute ('readability ' ) ? (float ) $ element ->getAttribute ('readability ' ) : 0 ;
14391438 }
@@ -1505,11 +1504,11 @@ private function loadHtml(): void
15051504 $ this ->dom ->registerNodeClass (\DOMElement::class, JSLikeHTMLElement::class);
15061505 }
15071506
1508- private function getAncestors (\ DOMElement $ node , int $ maxDepth = 0 ): array
1507+ private function getAncestors (JSLikeHTMLElement $ node , int $ maxDepth = 0 ): array
15091508 {
15101509 $ ancestors = [];
15111510 $ i = 0 ;
1512- while ($ node ->parentNode instanceof \DOMElement ) {
1511+ while ($ node ->parentNode instanceof JSLikeHTMLElement ) {
15131512 $ ancestors [] = $ node ->parentNode ;
15141513 if (++$ i === $ maxDepth ) {
15151514 break ;
@@ -1537,7 +1536,7 @@ private function isPhrasingContent($node): bool
15371536 );
15381537 }
15391538
1540- private function hasSingleTagInsideElement (\ DOMElement $ node , string $ tag ): bool
1539+ private function hasSingleTagInsideElement (JSLikeHTMLElement $ node , string $ tag ): bool
15411540 {
15421541 if (1 !== $ node ->childNodes ->length || $ node ->childNodes ->item (0 )->nodeName !== $ tag ) {
15431542 return false ;
@@ -1557,7 +1556,7 @@ private function hasSingleTagInsideElement(\DOMElement $node, string $tag): bool
15571556 * Tidy must be configured to not clean the input for this function to
15581557 * work as expected, see $this->tidy_config['clean']
15591558 */
1560- private function isNodeVisible (\ DOMElement $ node ): bool
1559+ private function isNodeVisible (JSLikeHTMLElement $ node ): bool
15611560 {
15621561 return !(
15631562 $ node ->hasAttribute ('style ' )
0 commit comments