Skip to content

Commit 7252a93

Browse files
authored
Merge pull request #33 from nielstron/feat/prefix_cfg
Add a prefix language generation
2 parents 358ac36 + 3526fee commit 7252a93

File tree

2 files changed

+105
-0
lines changed

2 files changed

+105
-0
lines changed

pyformlang/cfg/cfg.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1106,3 +1106,54 @@ def is_normal_form(self):
11061106
"""
11071107
return all(
11081108
production.is_normal_form() for production in self._productions)
1109+
1110+
def get_prefix_language(self):
1111+
"""
1112+
Generates the prefix language of the CFL, i.e., the language of all prefixes of valid words
1113+
Based on https://cs.uwaterloo.ca/~s4bendav/files/CS360S21Lec10.pdf
1114+
"""
1115+
if self.is_empty():
1116+
return CFG()
1117+
cfg = self
1118+
if not self.is_normal_form():
1119+
cfg = cfg.to_normal_form()
1120+
cfg = cfg.remove_useless_symbols()
1121+
def to_zero(var: Variable):
1122+
return Variable((var.value, 0))
1123+
new_variables = list(cfg.variables) + [to_zero(var) for var in cfg.variables]
1124+
new_productions = list(cfg.productions)
1125+
for p in cfg.productions:
1126+
if len(p.body) == 1:
1127+
# the production is of the form X -> c
1128+
new_productions.append(Production(
1129+
to_zero(p.head),
1130+
p.body
1131+
))
1132+
new_productions.append(Production(
1133+
to_zero(p.head),
1134+
[Epsilon()]
1135+
))
1136+
else:
1137+
# the production is of the form X -> Y Z
1138+
new_productions.append(Production(
1139+
to_zero(p.head),
1140+
[p.body[0], to_zero(p.body[1])]
1141+
))
1142+
new_productions.append(Production(
1143+
to_zero(p.head),
1144+
[to_zero(p.body[0])]
1145+
))
1146+
new_productions.append(Production(cfg.start_symbol, [Epsilon()]))
1147+
new_productions.append(Production(to_zero(cfg.start_symbol), [Epsilon()]))
1148+
return CFG(
1149+
variables=set(new_variables),
1150+
terminals=set(cfg.terminals) | {Epsilon()},
1151+
productions=new_productions,
1152+
start_symbol=to_zero(cfg.start_symbol),
1153+
)
1154+
1155+
def get_suffix_language(self):
1156+
"""
1157+
Generates the suffix language of the CFL, i.e., the language containing all suffixes of valid words
1158+
"""
1159+
return self.reverse().get_prefix_language().reverse()

pyformlang/cfg/tests/test_cfg.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -858,6 +858,60 @@ def test_to_text_epsilon(self):
858858
cfg = CFG.from_text("S -> a S b | a b epsilon")
859859
assert cfg.contains(["a", "b"])
860860

861+
def test_prefix_lang(self):
862+
cfg = CFG.from_text("S -> a S b | a b | c")
863+
prefix_cfg = cfg.get_prefix_language()
864+
assert prefix_cfg.contains("")
865+
for word in [
866+
"ab",
867+
"aabb",
868+
]:
869+
assert cfg.contains(word)
870+
assert prefix_cfg.contains(word)
871+
for prefix in [
872+
"aaa",
873+
"aaac",
874+
"aaacb"
875+
]:
876+
assert not cfg.contains(prefix)
877+
assert prefix_cfg.contains(prefix)
878+
for invalid in [
879+
"abb",
880+
"aba",
881+
"aaaba",
882+
"bbb",
883+
]:
884+
assert not cfg.contains(invalid)
885+
assert not prefix_cfg.contains(invalid)
886+
887+
def test_suffix_lang(self):
888+
cfg = CFG.from_text("S -> a S b | a b | c")
889+
suffix_cfg = cfg.get_suffix_language()
890+
assert suffix_cfg.contains("")
891+
for word in [
892+
"ab",
893+
"aabb",
894+
]:
895+
assert cfg.contains(word)
896+
assert suffix_cfg.contains(word)
897+
for suffix in [
898+
"bbb",
899+
"cbbb",
900+
"acbbb"
901+
]:
902+
assert not cfg.contains(suffix)
903+
assert suffix_cfg.contains(suffix)
904+
for invalid in [
905+
"aab",
906+
"aba",
907+
"aaaba",
908+
"abbbba",
909+
"bbbbab",
910+
"aaa",
911+
]:
912+
assert not cfg.contains(invalid)
913+
assert not suffix_cfg.contains(invalid)
914+
861915

862916
def get_example_text_duplicate():
863917
""" Duplicate text """

0 commit comments

Comments
 (0)