MI2DataLab · lunayach · Jul 17, 2019 · Jul 18, 2019 · Jul 17, 2019 · Jul 18, 2019
diff --git a/docs/source/aspects.rst b/docs/source/aspects.rst
@@ -100,6 +100,14 @@ WhiteSpaces
 -----------------------
 
 .. autoclass:: wildnlp.aspects.white_spaces.WhiteSpaces
+   :members:
+   :special-members: __init__
+   :show-inheritance:
+
+Lowercase
+-----------------------
+
+.. autoclass:: wildnlp.aspects.lowercase.LowerCase
    :members:
    :special-members: __init__
    :show-inheritance:
diff --git a/tests/aspects/test_lowercase.py b/tests/aspects/test_lowercase.py
@@ -0,0 +1,12 @@
+from wildnlp.aspects import LowerCase
+
+
+def test_single_word():
+    assert LowerCase()("Language") == "language"
+
+
+def test_sentence():
+    sentence = "EU rejects German call to boycott British lamb."
+    transformed = LowerCase()(sentence)
+
+    assert transformed == "eu rejects german call to boycott british lamb."
diff --git a/wildnlp/aspects/__init__.py b/wildnlp/aspects/__init__.py
@@ -9,3 +9,4 @@
 from .change_char import ChangeChar
 from .white_spaces import WhiteSpaces
 from .add_sub_string import AddSubString
+from .lowercase import LowerCase
diff --git a/wildnlp/aspects/lowercase.py b/wildnlp/aspects/lowercase.py
@@ -0,0 +1,18 @@
+from .base import Aspect
+
+
+class LowerCase(Aspect):
+    """Lower-cases the dataset.
+
+    """
+
+    def __call__(self, sentence):
+        return " ".join([self._lowercase_word(word)
+                         if word != '' else ''
+                         for word in sentence.split(' ')])
+
+    @staticmethod
+    def _lowercase_word(word):
+        if len(word) == 0:
+            raise ValueError("Can't lowercase empty words")
+        return word.lower()
diff --git a/wildnlp/datasets/conll.py b/wildnlp/datasets/conll.py
@@ -54,14 +54,17 @@ def load(self, path):
                 processed = self._process_sample(sample)
                 self._data.append(processed)
 
-    def apply(self, aspect, apply_to_ne=False):
+    def apply(self, aspect, apply_to_ne=False, apply_to_both=False):
         """
 
         :param aspect: transformation function
 
         :param apply_to_ne: if `False`, transformation won't be applied
                             to Named Entities. If `True`, transformation
                             will be applied only to Named Entities.
+        :param apply_to_both: if `True`, transformation will be applied
+                            to both the Named Entities and other tokens.
+
 
         :return: modified dataset in the following form:
 
@@ -82,10 +85,13 @@ def apply(self, aspect, apply_to_ne=False):
         for entry in self._data:
             tags = entry['ner_tags']
 
-            if apply_to_ne is False:
-                non_ner = np.where(tags == 'O')[0]
+            if not apply_to_both:
+                if apply_to_ne is False:
+                    non_ner = np.where(tags == 'O')[0]
+                else:
+                    non_ner = np.where(tags != 'O')[0]
             else:
-                non_ner = np.where(tags != 'O')[0]
+                non_ner = range(len(entry['tokens']))
 
             if len(non_ner) == 0:
                 modified.append(entry)