-
Couldn't load subscription status.
- Fork 4
Adding the lowercase aspect #5
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
84c6637
6437d46
d8decf3
95accbf
01b87b4
b4dd625
8096acf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,12 @@ | ||
| from wildnlp.aspects import LowerCase | ||
|
|
||
|
|
||
| def test_single_word(): | ||
| assert LowerCase()("Language") == "language" | ||
|
|
||
|
|
||
| def test_sentence(): | ||
| sentence = "EU rejects German call to boycott British lamb." | ||
| transformed = LowerCase()(sentence) | ||
|
|
||
| assert transformed == "eu rejects german call to boycott british lamb." |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,18 @@ | ||
| from .base import Aspect | ||
|
|
||
|
|
||
| class LowerCase(Aspect): | ||
| """Lower-cases the dataset. | ||
| """ | ||
|
|
||
| def __call__(self, sentence): | ||
| return " ".join([self._lowercase_word(word) | ||
| if word != '' else '' | ||
| for word in sentence.split(' ')]) | ||
|
|
||
| @staticmethod | ||
| def _lowercase_word(word): | ||
| if len(word) == 0: | ||
| raise ValueError("Can't lowercase empty words") | ||
| return word.lower() | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -54,14 +54,17 @@ def load(self, path): | |
| processed = self._process_sample(sample) | ||
| self._data.append(processed) | ||
|
|
||
| def apply(self, aspect, apply_to_ne=False): | ||
| def apply(self, aspect, apply_to_ne=False, apply_to_both=False): | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe it would be better to rewrite it so for example there is the |
||
| """ | ||
|
|
||
| :param aspect: transformation function | ||
|
|
||
| :param apply_to_ne: if `False`, transformation won't be applied | ||
| to Named Entities. If `True`, transformation | ||
| will be applied only to Named Entities. | ||
| :param apply_to_both: if `True`, transformation will be applied | ||
| to both the Named Entities and other tokens. | ||
|
|
||
|
|
||
| :return: modified dataset in the following form: | ||
|
|
||
|
|
@@ -82,10 +85,13 @@ def apply(self, aspect, apply_to_ne=False): | |
| for entry in self._data: | ||
| tags = entry['ner_tags'] | ||
|
|
||
| if apply_to_ne is False: | ||
| non_ner = np.where(tags == 'O')[0] | ||
| if not apply_to_both: | ||
| if apply_to_ne is False: | ||
| non_ner = np.where(tags == 'O')[0] | ||
| else: | ||
| non_ner = np.where(tags != 'O')[0] | ||
| else: | ||
| non_ner = np.where(tags != 'O')[0] | ||
| non_ner = range(len(entry['tokens'])) | ||
|
|
||
| if len(non_ner) == 0: | ||
| modified.append(entry) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you add the info that it's especially important for NER tasks?