Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tests/initialization.c
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ START_TEST (initialize_using_lang_code_custom_symbols_dir)
varnam *handle;
int exitcode;

// Copy a sample vst file to the temp
/* Copy a sample vst file to the temp */
exitcode = system ("cp ../schemes/ml.vst /tmp");

varnam_set_symbols_dir("/tmp");
Expand Down
19 changes: 19 additions & 0 deletions tests/transliteration.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,24 @@ START_TEST (indic_digit_rendering)
}
END_TEST

START_TEST (no_chil_combos)
{
int rc;
vword* word;
varray *words;

/* If learnings has the word മലയാളം which ends with an anusvaram or chil, the output becomes incorrect when combination is added to it
* Like മലയാളംോ
* https://github.com/varnamproject/libvarnam/issues/166 */

rc = varnam_transliterate (varnam_instance, "malayalamO", &words);
assert_success (rc);
ck_assert_int_eq (varray_length (words), 2);
word = varray_get (words, 0);
ck_assert_str_eq (word->text, "മലയാളമോ");
}
END_TEST

TCase* get_transliteration_tests()
{
TCase* tcase = tcase_create("transliteration");
Expand All @@ -123,5 +141,6 @@ TCase* get_transliteration_tests()
tcase_add_test (tcase, dependent_vowel_rendering);
tcase_add_test (tcase, cancellation_character_should_force_independent_vowel_form);
tcase_add_test (tcase, indic_digit_rendering);
tcase_add_test (tcase, no_chil_combos);
return tcase;
}
30 changes: 30 additions & 0 deletions words-table.c
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,26 @@ print_tokens_array(varray *tokens)

}

/* Replaces last chil letter token with its root consonant.
* ൽ with ല */
varray*
replace_last_chil(varray *tokens)
{
varray *tmp;
vtoken *last_token;

tmp = varray_get (tokens, varray_length(tokens) - 1);
assert (tmp);
last_token = varray_get (tmp, varray_length(tmp) - 1);
assert (last_token);

if (strcmp(last_token->tag, "chill") == 0) {
strcpy(last_token->value1, last_token->value3);
}

return tokens;
}

/* This function learns all possibilities of writing the word and it's prefixes.
* It finds cartesian product of the tokens passed in and process each product.
* tokens will be a multidimensional array */
Expand Down Expand Up @@ -867,6 +887,16 @@ vwt_tokenize_pattern (varnam *handle, const char *pattern, varray *result)
rc = vst_tokenize (handle, strbuf_to_s(match), VARNAM_TOKENIZER_VALUE, VARNAM_MATCH_EXACT, tokens);
if (rc) return rc;

/**
* Suppose varnam learnings has the word `kilivaathil => കിളിവാതിൽ`.
* When Varanm finds this word, what it does is use the word plus tokenizes the rest of it. This gives chil combinations.
* Prevent this by replacing end chil with its root consonant.
* https://github.com/varnamproject/libvarnam/issues/166
*/
if (strcmp(handle->internal->scheme_details->langCode, "ml") == 0) {
replace_last_chil(tokens);
}

add_tokens (handle, tokens, result, first_match);
varray_clear (tokens);
}
Expand Down