Skip to content

Commit 27536ca

Browse files
committed
Update readability
1 parent 54d595a commit 27536ca

File tree

3 files changed

+1033
-520
lines changed

3 files changed

+1033
-520
lines changed

ext/readability-master/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ The `options` object accepts a number of properties, all optional:
3737
* `disableJSONLD` (boolean, default `false`): when extracting page metadata, Readability gives precedence to Schema.org fields specified in the JSON-LD format. Set this option to `true` to skip JSON-LD parsing.
3838
* `serializer` (function, default `el => el.innerHTML`) controls how the `content` property returned by the `parse()` method is produced from the root DOM element. It may be useful to specify the `serializer` as the identity function (`el => el`) to obtain a DOM element instead of a string for `content` if you plan to process it further.
3939
* `allowedVideoRegex` (RegExp, default `undefined` ): a regular expression that matches video URLs that should be allowed to be included in the article content. If `undefined`, the [default regex](https://github.com/mozilla/readability/blob/8e8ec27cd2013940bc6f3cc609de10e35a1d9d86/Readability.js#L133) is applied.
40+
* `linkDensityModifier` (number, default `0`): a number that is added to the base link density threshold during the shadiness checks. This can be used to penalize nodes with a high link density or vice versa.
4041

4142
### `parse()`
4243

ext/readability-master/Readability-readerable.js

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,16 +22,23 @@
2222
var REGEXPS = {
2323
// NOTE: These two regular expressions are duplicated in
2424
// Readability.js. Please keep both copies in sync.
25-
unlikelyCandidates: /-ad-|ai2html|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i,
26-
okMaybeItsACandidate: /and|article|body|column|content|main|shadow/i,
25+
unlikelyCandidates:
26+
/-ad-|ai2html|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i,
27+
okMaybeItsACandidate: /and|article|body|column|content|main|mathjax|shadow/i,
2728
};
2829

2930
function isNodeVisible(node) {
30-
// Have to null-check node.style and node.className.indexOf to deal with SVG and MathML nodes.
31-
return (!node.style || node.style.display != "none")
32-
&& !node.hasAttribute("hidden")
31+
// Have to null-check node.style and node.className.includes to deal with SVG and MathML nodes.
32+
return (
33+
(!node.style || node.style.display != "none") &&
34+
!node.hasAttribute("hidden") &&
3335
//check for "fallback-image" so that wikimedia math images are displayed
34-
&& (!node.hasAttribute("aria-hidden") || node.getAttribute("aria-hidden") != "true" || (node.className && node.className.indexOf && node.className.indexOf("fallback-image") !== -1));
36+
(!node.hasAttribute("aria-hidden") ||
37+
node.getAttribute("aria-hidden") != "true" ||
38+
(node.className &&
39+
node.className.includes &&
40+
node.className.includes("fallback-image")))
41+
);
3542
}
3643

3744
/**
@@ -49,7 +56,11 @@ function isProbablyReaderable(doc, options = {}) {
4956
options = { visibilityChecker: options };
5057
}
5158

52-
var defaultOptions = { minScore: 20, minContentLength: 140, visibilityChecker: isNodeVisible };
59+
var defaultOptions = {
60+
minScore: 20,
61+
minContentLength: 140,
62+
visibilityChecker: isNodeVisible,
63+
};
5364
options = Object.assign(defaultOptions, options);
5465

5566
var nodes = doc.querySelectorAll("p, pre, article");
@@ -79,8 +90,10 @@ function isProbablyReaderable(doc, options = {}) {
7990
}
8091

8192
var matchString = node.className + " " + node.id;
82-
if (REGEXPS.unlikelyCandidates.test(matchString) &&
83-
!REGEXPS.okMaybeItsACandidate.test(matchString)) {
93+
if (
94+
REGEXPS.unlikelyCandidates.test(matchString) &&
95+
!REGEXPS.okMaybeItsACandidate.test(matchString)
96+
) {
8497
return false;
8598
}
8699

@@ -103,6 +116,7 @@ function isProbablyReaderable(doc, options = {}) {
103116
}
104117

105118
if (typeof module === "object") {
119+
/* eslint-disable-next-line no-redeclare */
106120
/* global module */
107121
module.exports = isProbablyReaderable;
108122
}

0 commit comments

Comments
 (0)