Skip to content

Commit 7c9e1aa

Browse files
author
Nito
committed
Static array deleted
1 parent 43647e2 commit 7c9e1aa

File tree

1 file changed

+8
-11
lines changed

1 file changed

+8
-11
lines changed

src/LanguageData.php

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,11 @@
1616
limitations under the License.
1717
*/
1818

19+
// This file matches the trained data of the Ngrams database, for new trained databases this file has to be updated.
20+
1921
namespace Nitotm\Eld;
2022

21-
require_once __DIR__.'/LanguageSubset.php';
23+
require_once __DIR__ . '/LanguageSubset.php';
2224

2325
class LanguageData extends LanguageSubset
2426
{
@@ -35,16 +37,10 @@ class LanguageData extends LanguageSubset
3537

3638
// ['Amharic', 'Arabic', 'Azerbaijani (Latin)', 'Belarusian', 'Bulgarian', 'Bengali', 'Catalan', 'Czech', 'Danish', 'German', 'Greek', 'English', 'Spanish', 'Estonian', 'Basque', 'Persian', 'Finnish', 'French', 'Gujarati', 'Hebrew', 'Hindi', 'Croatian', 'Hungarian', 'Armenian', 'Icelandic', 'Italian', 'Japanese', 'Georgian', 'Kannada', 'Korean', 'Kurdish (Arabic)', 'Lao', 'Lithuanian', 'Latvian', 'Malayalam', 'Marathi', 'Malay (Latin)', 'Dutch', 'Norwegian', 'Oriya', 'Punjabi', 'Polish', 'Portuguese', 'Romanian', 'Russian', 'Slovak', 'Slovene', 'Albanian', 'Serbian (Cyrillic)', 'Swedish', 'Tamil', 'Telugu', 'Thai', 'Tagalog', 'Turkish', 'Ukrainian', 'Urdu', 'Vietnamese', 'Yoruba', 'Chinese'];
3739

38-
// Predeclared for speed.
39-
protected $langScore
40-
= [
41-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
42-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
43-
];
44-
4540
// Deprecated for now. Some languages score higher with the same amount of text, this multiplier evens it out for multi-language strings
4641
//protected $scoreNormalizer = [0.7, 1, 1, 1, 1, 0.6, 0.98, 1, 1, 1, 0.9, 1, 1, 1, 1, 1, 1, 1, 0.6, 1, 0.7, 1, 1, 0.9, 1, 1, 0.8, 0.6, 0.6, 1, 1, 0.5, 1, 1, 0.6, 0.7, 1, 0.95, 1, 0.6, 0.6, 1, 1, 1, 1, 1, 1, 0.9, 1, 1, 0.6, 0.6, 0.7, 0.9, 1, 1, 1, 0.8, 1, 1.7];
4742

43+
protected $langScore;
4844
protected $avgScore
4945
= [
5046
0.0661, 0.0237, 0.0269, 0.0227, 0.0234, 0.1373, 0.0246, 0.0242, 0.0277, 0.0275, 0.0369, 0.0378, 0.0252,
@@ -54,10 +50,11 @@ class LanguageData extends LanguageSubset
5450
0.0882, 0.0368, 0.0258, 0.0206, 0.0282, 0.0467, 0.0329, 0.0152
5551
];
5652

57-
function __construct($subsetFile = false)
53+
function __construct(string $ngramsFile = 'ngrams-m.php')
5854
{
59-
// Opcache needs to be active, so the load of this database array does not add overhead.
60-
require __DIR__.'/ngrams/'.(! $subsetFile ? 'ngrams-m.php' : $subsetFile);
55+
// Opcache needs to be active, so the load of the database array does not add overhead.
56+
require __DIR__ . '/ngrams/' . $ngramsFile;
6157
// Internal reference: _ngrams_newAddEnd4gramExtra_1-4_2824 + _ngrams_charUtf8_1-1_2291
58+
$this->langScore = array_fill(0, count($this->langCodes), 0);
6259
}
6360
}

0 commit comments

Comments
 (0)