1616limitations under the License.
1717*/
1818
19+ // This file matches the trained data of the Ngrams database, for new trained databases this file has to be updated.
20+
1921namespace Nitotm \Eld ;
2022
21- require_once __DIR__ . '/LanguageSubset.php ' ;
23+ require_once __DIR__ . '/LanguageSubset.php ' ;
2224
2325class LanguageData extends LanguageSubset
2426{
@@ -35,16 +37,10 @@ class LanguageData extends LanguageSubset
3537
3638 // ['Amharic', 'Arabic', 'Azerbaijani (Latin)', 'Belarusian', 'Bulgarian', 'Bengali', 'Catalan', 'Czech', 'Danish', 'German', 'Greek', 'English', 'Spanish', 'Estonian', 'Basque', 'Persian', 'Finnish', 'French', 'Gujarati', 'Hebrew', 'Hindi', 'Croatian', 'Hungarian', 'Armenian', 'Icelandic', 'Italian', 'Japanese', 'Georgian', 'Kannada', 'Korean', 'Kurdish (Arabic)', 'Lao', 'Lithuanian', 'Latvian', 'Malayalam', 'Marathi', 'Malay (Latin)', 'Dutch', 'Norwegian', 'Oriya', 'Punjabi', 'Polish', 'Portuguese', 'Romanian', 'Russian', 'Slovak', 'Slovene', 'Albanian', 'Serbian (Cyrillic)', 'Swedish', 'Tamil', 'Telugu', 'Thai', 'Tagalog', 'Turkish', 'Ukrainian', 'Urdu', 'Vietnamese', 'Yoruba', 'Chinese'];
3739
38- // Predeclared for speed.
39- protected $ langScore
40- = [
41- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
42- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0
43- ];
44-
4540 // Deprecated for now. Some languages score higher with the same amount of text, this multiplier evens it out for multi-language strings
4641 //protected $scoreNormalizer = [0.7, 1, 1, 1, 1, 0.6, 0.98, 1, 1, 1, 0.9, 1, 1, 1, 1, 1, 1, 1, 0.6, 1, 0.7, 1, 1, 0.9, 1, 1, 0.8, 0.6, 0.6, 1, 1, 0.5, 1, 1, 0.6, 0.7, 1, 0.95, 1, 0.6, 0.6, 1, 1, 1, 1, 1, 1, 0.9, 1, 1, 0.6, 0.6, 0.7, 0.9, 1, 1, 1, 0.8, 1, 1.7];
4742
43+ protected $ langScore ;
4844 protected $ avgScore
4945 = [
5046 0.0661 , 0.0237 , 0.0269 , 0.0227 , 0.0234 , 0.1373 , 0.0246 , 0.0242 , 0.0277 , 0.0275 , 0.0369 , 0.0378 , 0.0252 ,
@@ -54,10 +50,11 @@ class LanguageData extends LanguageSubset
5450 0.0882 , 0.0368 , 0.0258 , 0.0206 , 0.0282 , 0.0467 , 0.0329 , 0.0152
5551 ];
5652
57- function __construct ($ subsetFile = false )
53+ function __construct (string $ ngramsFile = ' ngrams-m.php ' )
5854 {
59- // Opcache needs to be active, so the load of this database array does not add overhead.
60- require __DIR__ . '/ngrams/ ' .(! $ subsetFile ? ' ngrams-m.php ' : $ subsetFile );
55+ // Opcache needs to be active, so the load of the database array does not add overhead.
56+ require __DIR__ . '/ngrams/ ' . $ ngramsFile ;
6157 // Internal reference: _ngrams_newAddEnd4gramExtra_1-4_2824 + _ngrams_charUtf8_1-1_2291
58+ $ this ->langScore = array_fill (0 , count ($ this ->langCodes ), 0 );
6259 }
6360}
0 commit comments