-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathnltk_learn.py
More file actions
35 lines (26 loc) · 970 Bytes
/
nltk_learn.py
File metadata and controls
35 lines (26 loc) · 970 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
from nltk import tag, tokenize, chunk
sent = tokenize.word_tokenize("At least nine tenths of the students passed.")
tagged_sent = tag.pos_tag(sent)
tree = chunk.ne_chunk(tagged_sent)
tree.draw()
from nltk.corpus import stopwords
stop = set(stopwords.words('english'))
print(stop)
import nltk
from nltk import CFG
groucho_grammar = CFG.fromstring("""
S -> NP VP
PP -> P NP
NP -> Det N | Det N PP | 'I'
VP -> V NP | VP PP
Det -> 'an' | 'my'
N -> 'elephant' | 'pajamas'
V -> 'shot'
P -> 'in'
""")
sent = 'I shot an elephant in my pajamas'
sent = nltk.word_tokenize(sent)
parser = nltk.ChartParser(groucho_grammar)
trees = parser.parse(sent)
for tree in trees:
print(tree)