diff --git a/mapper.py b/mapper.py index 9fa3def..4efaece 100644 --- a/mapper.py +++ b/mapper.py @@ -6,9 +6,15 @@ # remove leading and trailing whitespace line = line.strip() + # make lower case + line = line.lower() + # split the line into words; splits on any whitespace words = line.split() + # output tuples (word, 1) in tab-delimited format + stopwords = set(['the', 'and', 'a', 'for', 'I', 'go', 'four', 'three', 'be', 'not', 'to', 'railing']) + # output tuples (word, 1) in tab-delimited format for word in words: print '%s\t%s' % (word, "1")