You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
* Tries to insert `meta[charset]` tag into the proper place in the passed HTML document.
1459
+
*
1460
+
* `DOMDocument::loadHTML` will parse HTML documents as ISO-8859-1 if there is no `meta[charset]` tag.
1461
+
* This means that UTF-8-encoded HTML fragments such as those coming from JSON-LD `articleBody` field would be parsed with incorrect encoding.
1462
+
* Unfortunately, we cannot just put the tag at the start of the HTML fragment, since that would cause parser to auto-insert a `html` element, losing the attributes of the original `html` tag.
1463
+
*
1464
+
* @param string $html UTF-8 encoded document
1465
+
*/
1466
+
privatestaticfunctionensureMetaCharset($html)
1467
+
{
1468
+
$charsetTag = '<meta charset="utf-8">';
1469
+
1470
+
// Only look at first 1024 bytes since, according to HTML5 specification,
1471
+
// that’s where <meta> elements declaring a character encoding must be located.
@@ -106,7 +106,6 @@ public function testInitDivP()
106
106
publicfunctiontestInitDiv()
107
107
{
108
108
$readability = $this->getReadability('<div>' . str_repeat('This is the awesome content :)', 7) . '</div>', 'http://0.0.0.0');
109
-
$readability->debug = true;
110
109
$res = $readability->init();
111
110
112
111
$this->assertTrue($res);
@@ -120,7 +119,6 @@ public function testInitDiv()
120
119
publicfunctiontestWithFootnotes()
121
120
{
122
121
$readability = $this->getReadability('<div>' . str_repeat('<p>This is an awesome text with some links, here there are: <a href="http://0.0.0.0/test.html">the awesome</a></p>', 7) . '</div>', 'http://0.0.0.0');
123
-
$readability->debug = true;
124
122
$readability->convertLinksToFootnotes = true;
125
123
$res = $readability->init();
126
124
@@ -137,7 +135,6 @@ public function testWithFootnotes()
137
135
publicfunctiontestStandardClean()
138
136
{
139
137
$readability = $this->getReadability('<div><h2>Title</h2>' . str_repeat('<p>This is an awesome text with some links, here there are: <a href="http://0.0.0.0/test.html">the awesome</a></p>', 7) . '<a href="#nofollow" rel="nofollow">will NOT be removed</a></div>', 'http://0.0.0.0');
140
-
$readability->debug = true;
141
138
$readability->lightClean = false;
142
139
$res = $readability->init();
143
140
@@ -154,7 +151,6 @@ public function testStandardClean()
154
151
publicfunctiontestWithIframe()
155
152
{
156
153
$readability = $this->getReadability('<div><h2>Title</h2>' . str_repeat('<p>This is an awesome text with some links, here there are: <a href="http://0.0.0.0/test.html">the awesome</a></p>', 7) . '<p>This is an awesome text with some links, here there are <iframe src="http://youtube.com/test" href="#nofollow" rel="nofollow"></iframe><iframe>http://soundcloud.com/test</iframe></p></div>', 'http://0.0.0.0');
157
-
$readability->debug = true;
158
154
$res = $readability->init();
159
155
160
156
$this->assertTrue($res);
@@ -169,7 +165,6 @@ public function testWithIframe()
169
165
publicfunctiontestWithArticle()
170
166
{
171
167
$readability = $this->getReadability('<article><p>' . str_repeat('This is an awesome text with some links, here there are: the awesome', 20) . '</p><p>This is an awesome text with some links, here there are <iframe src="http://youtube.com/test" href="#nofollow" rel="nofollow"></iframe></p></article>', 'http://0.0.0.0');
172
-
$readability->debug = true;
173
168
$res = $readability->init();
174
169
175
170
$this->assertTrue($res);
@@ -184,7 +179,6 @@ public function testWithArticle()
184
179
publicfunctiontestWithAside()
185
180
{
186
181
$readability = $this->getReadability('<article>' . str_repeat('<p>This is an awesome text with some links, here there are: <a href="http://0.0.0.0/test.html">the awesome</a></p>', 7) . '<footer><aside>' . str_repeat('<p>This is an awesome text with some links, here there are</p>', 8) . '</aside></footer></article>', 'http://0.0.0.0');
187
-
$readability->debug = true;
188
182
$res = $readability->init();
189
183
190
184
$this->assertTrue($res);
@@ -199,7 +193,6 @@ public function testWithAside()
199
193
publicfunctiontestWithClasses()
200
194
{
201
195
$readability = $this->getReadability('<article>' . str_repeat('<p>This is an awesome text with some links, here there are: <a href="http://0.0.0.0/test.html">the awesome</a></p>', 7) . '<div style="display:none">' . str_repeat('<p class="clock">This text should be removed</p>', 10) . '</div></article>', 'http://0.0.0.0');
202
-
$readability->debug = true;
203
196
$res = $readability->init();
204
197
205
198
$this->assertTrue($res);
@@ -214,7 +207,6 @@ public function testWithClasses()
214
207
publicfunctiontestWithClassesWithoutLightClean()
215
208
{
216
209
$readability = $this->getReadability('<article>' . str_repeat('<p>This is an awesome text with some links, here there are: <a href="http://0.0.0.0/test.html">the awesome</a></p>', 7) . '<div style="display:none">' . str_repeat('<p class="clock">This text should be removed</p>', 10) . '</div></article>', 'http://0.0.0.0');
217
-
$readability->debug = true;
218
210
$readability->lightClean = false;
219
211
$res = $readability->init();
220
212
@@ -230,7 +222,6 @@ public function testWithClassesWithoutLightClean()
230
222
publicfunctiontestWithTd()
231
223
{
232
224
$readability = $this->getReadability('<table><tr>' . str_repeat('<td><p>This is an awesome text with some links, here there are the awesome</td>', 7) . '</tr></table>', 'http://0.0.0.0');
233
-
$readability->debug = true;
234
225
$res = $readability->init();
235
226
236
227
$this->assertTrue($res);
@@ -243,7 +234,6 @@ public function testWithTd()
243
234
publicfunctiontestWithSameClasses()
244
235
{
245
236
$readability = $this->getReadability('<article class="awesomecontent">' . str_repeat('<p>This is an awesome text with some links, here there are the awesome</p>', 7) . '<div class="awesomecontent">This text is also an awesome text and you should know that !</div></article>', 'http://0.0.0.0');
246
-
$readability->debug = true;
247
237
$res = $readability->init();
248
238
249
239
$this->assertTrue($res);
@@ -257,7 +247,6 @@ public function testWithSameClasses()
257
247
publicfunctiontestWithScript()
258
248
{
259
249
$readability = $this->getReadability('<article class="awesomecontent">' . str_repeat('<p>This is an awesome text with some links, here there are the awesome</p>', 7) . '<p><script>This text is also an awesome text and you should know that !</script></p></article>', 'http://0.0.0.0');
260
-
$readability->debug = true;
261
250
$res = $readability->init();
262
251
263
252
$this->assertTrue($res);
@@ -271,7 +260,6 @@ public function testWithScript()
271
260
publicfunctiontestTitle()
272
261
{
273
262
$readability = $this->getReadability('<title>this is my title</title><article class="awesomecontent">' . str_repeat('<p>This is an awesome text with some links, here there are the awesome</p>', 7) . '<p></p></article>', 'http://0.0.0.0');
274
-
$readability->debug = true;
275
263
$res = $readability->init();
276
264
277
265
$this->assertTrue($res);
@@ -285,7 +273,6 @@ public function testTitle()
285
273
publicfunctiontestTitleWithDash()
286
274
{
287
275
$readability = $this->getReadability('<title> title2 - title3 </title><article class="awesomecontent">' . str_repeat('<p>This is an awesome text with some links, here there are the awesome</p>', 7) . '<p></p></article>', 'http://0.0.0.0');
288
-
$readability->debug = true;
289
276
$res = $readability->init();
290
277
291
278
$this->assertTrue($res);
@@ -299,7 +286,6 @@ public function testTitleWithDash()
299
286
publicfunctiontestTitleWithDoubleDot()
300
287
{
301
288
$readability = $this->getReadability('<title> title2 : title3 </title><article class="awesomecontent">' . str_repeat('<p>This is an awesome text with some links, here there are the awesome</p>', 7) . '<p></p></article>', 'http://0.0.0.0');
302
-
$readability->debug = true;
303
289
$res = $readability->init();
304
290
305
291
$this->assertTrue($res);
@@ -313,7 +299,6 @@ public function testTitleWithDoubleDot()
313
299
publicfunctiontestTitleTooShortUseH1()
314
300
{
315
301
$readability = $this->getReadability('<title>too short</title><h1>this is my h1 title !</h1><article class="awesomecontent">' . str_repeat('<p>This is an awesome text with some links, here there are the awesome</p>', 7) . '<p></p></article>', 'http://0.0.0.0');
316
-
$readability->debug = true;
317
302
$res = $readability->init();
318
303
319
304
$this->assertTrue($res);
@@ -365,7 +350,6 @@ public function testAutoClosingIframeNotThrowingException()
0 commit comments