Skip to content

Commit edb974a

Browse files
committed
add production and start symbol adding for cfg, correct epsilon equality
1 parent f24329e commit edb974a

File tree

19 files changed

+111
-76
lines changed

19 files changed

+111
-76
lines changed

pyformlang/cfg/cfg.py

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -57,23 +57,13 @@ def __init__(self,
5757
start_symbol = to_variable(start_symbol)
5858
self._variables.add(start_symbol)
5959
self._start_symbol = start_symbol
60-
if productions is not None:
61-
productions = set(productions)
62-
self._productions = productions or set()
63-
for production in self._productions:
64-
self.__initialize_production_in_cfg(production)
60+
self._productions = set()
61+
for production in productions or set():
62+
self.add_production(production)
6563
self._impacts: Dict[CFGObject, List[Tuple[CFGObject, int]]] = {}
6664
self._remaining_lists: Dict[CFGObject, List[int]] = {}
6765
self._added_impacts: Set[CFGObject] = set()
6866

69-
def __initialize_production_in_cfg(self, production: Production) -> None:
70-
self._variables.add(production.head)
71-
for cfg_object in production.body:
72-
if isinstance(cfg_object, Terminal):
73-
self._terminals.add(cfg_object)
74-
elif isinstance(cfg_object, Variable):
75-
self._variables.add(cfg_object)
76-
7767
def get_generating_symbols(self) -> Set[CFGObject]:
7868
""" Gives the objects which are generating in the CFG
7969
@@ -846,12 +836,13 @@ def get_words(self, max_length: int = -1) -> Iterable[List[Terminal]]:
846836
for production in productions:
847837
body = production.body
848838
if len(body) == 1 and isinstance(body[0], Terminal):
839+
word = [body[0]]
849840
if len(gen_d[production.head]) == 1:
850841
gen_d[production.head].append([])
851-
if [body[0]] not in gen_d[production.head][-1]:
852-
gen_d[production.head][-1].append([body[0]])
842+
if word not in gen_d[production.head][-1]:
843+
gen_d[production.head][-1].append(word)
853844
if production.head == cfg.start_symbol:
854-
yield [body[0]]
845+
yield word
855846
# Complete what is missing
856847
current_length = 2
857848
total_no_modification = 0

pyformlang/cfg/cfg_variable_converter.py

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,23 +2,23 @@
22

33
from typing import Dict, List, AbstractSet, Tuple, Optional, Hashable
44

5-
from ..objects.cfg_objects import Variable, CFGConvertible
5+
from ..objects.cfg_objects import Variable, CFGObjectConvertible
66

77

88
class CFGVariableConverter:
99
"""A CFG Variable Converter"""
1010

1111
def __init__(self,
12-
states: AbstractSet[CFGConvertible],
13-
stack_symbols: AbstractSet[CFGConvertible]) -> None:
12+
states: AbstractSet[CFGObjectConvertible],
13+
stack_symbols: AbstractSet[CFGObjectConvertible]) -> None:
1414
self._counter = 0
15-
self._inverse_states_d: Dict[CFGConvertible, int] = {}
15+
self._inverse_states_d: Dict[CFGObjectConvertible, int] = {}
1616
self._counter_state = 0
1717
for self._counter_state, state in enumerate(states):
1818
self._inverse_states_d[state] = self._counter_state
1919
state.index_cfg_converter = self._counter_state
2020
self._counter_state += 1
21-
self._inverse_stack_symbol_d: Dict[CFGConvertible, int] = {}
21+
self._inverse_stack_symbol_d: Dict[CFGObjectConvertible, int] = {}
2222
self._counter_symbol = 0
2323
for self._counter_symbol, symbol in enumerate(stack_symbols):
2424
self._inverse_stack_symbol_d[symbol] = self._counter_symbol
@@ -29,7 +29,7 @@ def __init__(self,
2929
for _ in range(len(stack_symbols))] for _ in
3030
range(len(states))]
3131

32-
def _get_state_index(self, state: CFGConvertible) -> int:
32+
def _get_state_index(self, state: CFGObjectConvertible) -> int:
3333
"""Get the state index"""
3434
if state.index_cfg_converter is None:
3535
if state not in self._inverse_states_d:
@@ -38,7 +38,7 @@ def _get_state_index(self, state: CFGConvertible) -> int:
3838
state.index_cfg_converter = self._inverse_states_d[state]
3939
return state.index_cfg_converter
4040

41-
def _get_symbol_index(self, symbol: CFGConvertible) -> int:
41+
def _get_symbol_index(self, symbol: CFGObjectConvertible) -> int:
4242
"""Get the symbol index"""
4343
if symbol.index_cfg_converter is None:
4444
if symbol not in self._inverse_stack_symbol_d:
@@ -48,9 +48,9 @@ def _get_symbol_index(self, symbol: CFGConvertible) -> int:
4848
return symbol.index_cfg_converter
4949

5050
def to_cfg_combined_variable(self,
51-
state0: CFGConvertible,
52-
stack_symbol: CFGConvertible,
53-
state1: CFGConvertible) -> Variable:
51+
state0: CFGObjectConvertible,
52+
stack_symbol: CFGObjectConvertible,
53+
state1: CFGObjectConvertible) -> Variable:
5454
""" Conversion used in the to_pda method """
5555
i_stack_symbol, i_state0, i_state1 = self._get_indexes(
5656
stack_symbol, state0, state1)
@@ -74,19 +74,19 @@ def _create_new_variable(self,
7474
return temp
7575

7676
def set_valid(self,
77-
state0: CFGConvertible,
78-
stack_symbol: CFGConvertible,
79-
state1: CFGConvertible) -> None:
77+
state0: CFGObjectConvertible,
78+
stack_symbol: CFGObjectConvertible,
79+
state1: CFGObjectConvertible) -> None:
8080
"""Set valid"""
8181
i_stack_symbol, i_state0, i_state1 = self._get_indexes(
8282
stack_symbol, state0, state1)
8383
prev = self._conversions[i_state0][i_stack_symbol][i_state1]
8484
self._conversions[i_state0][i_stack_symbol][i_state1] = (True, prev[1])
8585

8686
def is_valid_and_get(self,
87-
state0: CFGConvertible,
88-
stack_symbol: CFGConvertible,
89-
state1: CFGConvertible) -> Optional[Variable]:
87+
state0: CFGObjectConvertible,
88+
stack_symbol: CFGObjectConvertible,
89+
state1: CFGObjectConvertible) -> Optional[Variable]:
9090
"""Check if valid and get"""
9191
i_state0 = self._get_state_index(state0)
9292
i_stack_symbol = self._get_symbol_index(stack_symbol)
@@ -102,9 +102,9 @@ def is_valid_and_get(self,
102102
return current[1]
103103

104104
def _get_indexes(self,
105-
stack_symbol: CFGConvertible,
106-
state0: CFGConvertible,
107-
state1: CFGConvertible) \
105+
stack_symbol: CFGObjectConvertible,
106+
state0: CFGObjectConvertible,
107+
state1: CFGObjectConvertible) \
108108
-> Tuple[int, int, int]:
109109
i_state0 = self._get_state_index(state0)
110110
i_stack_symbol = self._get_symbol_index(stack_symbol)

pyformlang/cfg/formal_grammar.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -67,11 +67,21 @@ def start_symbol(self) -> Optional[Variable]:
6767
"""
6868
return self._start_symbol
6969

70-
@start_symbol.setter
71-
def start_symbol(self, symbol: Hashable) -> None:
72-
""" Sets the start symbol of the grammar """
73-
self._start_symbol = to_variable(symbol) \
74-
if symbol is not None else None
70+
def add_production(self, production: Production) -> None:
71+
""" Adds the given production to the grammar """
72+
self.variables.update(production.variables)
73+
self.terminals.update(production.terminals)
74+
self.productions.add(production)
75+
76+
def add_start_symbol(self, symbol: Hashable) -> None:
77+
""" Adds the start symbol to the grammar """
78+
symbol = to_variable(symbol)
79+
self.variables.add(symbol)
80+
self._start_symbol = symbol
81+
82+
def remove_start_symbol(self) -> None:
83+
""" Removes the start symbol from the grammar """
84+
self._start_symbol = None
7585

7686
@abstractmethod
7787
def copy(self: GrammarT) -> GrammarT:

pyformlang/cfg/tests/test_cfg.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -847,6 +847,31 @@ def test_copy(self):
847847
assert cfg.start_symbol == cfg_copy.start_symbol
848848
assert cfg is not cfg_copy
849849

850+
def test_add_production(self):
851+
text_example = get_example_text_duplicate()
852+
cfg = CFG.from_text(text_example)
853+
assert Epsilon() not in cfg.terminals
854+
production = Production(Variable("K"),
855+
[Epsilon(), Terminal("a"), Variable("B")])
856+
cfg.add_production(production)
857+
assert production in cfg.productions
858+
assert "K" in cfg.variables
859+
assert "a" in cfg.terminals
860+
assert "B" in cfg.variables
861+
assert Epsilon() not in cfg.terminals
862+
863+
def test_start_symbol(self):
864+
cfg = CFG()
865+
assert not cfg.variables
866+
assert not cfg.start_symbol
867+
cfg.add_start_symbol("S")
868+
assert cfg.start_symbol == "S"
869+
assert "S" in cfg.variables
870+
cfg.remove_start_symbol()
871+
assert not cfg.start_symbol
872+
cfg.remove_start_symbol()
873+
assert not cfg.start_symbol
874+
850875

851876
def get_example_text_duplicate():
852877
""" Duplicate text """

pyformlang/finite_automaton/finite_automaton.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ class FiniteAutomaton(Iterable[Tuple[State, Symbol, State]]):
4040
A set of final or accepting states. It is a subset of states.
4141
"""
4242

43+
@abstractmethod
4344
def __init__(self) -> None:
4445
self._states: Set[State]
4546
self._input_symbols: Set[Symbol]

pyformlang/finite_automaton/tests/test_epsilon.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"""
44

55
from pyformlang.finite_automaton import Epsilon
6-
from pyformlang.finite_automaton import Symbol
6+
from pyformlang.finite_automaton import State, Symbol
77

88

99
class TestEpsilon:
@@ -18,3 +18,5 @@ def test_epsilon(self):
1818
assert eps0 != symb
1919
assert "epsilon" == Epsilon()
2020
assert Epsilon() == "ɛ"
21+
assert Symbol("ɛ") != Epsilon()
22+
assert Epsilon() != State("epsilon")

pyformlang/objects/base_epsilon.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ def __init__(self) -> None:
2121
super().__init__("epsilon")
2222

2323
def __eq__(self, other: Any) -> bool:
24-
return isinstance(other, BaseEpsilon) or other in EPSILON_SYMBOLS
24+
return isinstance(other, BaseEpsilon) \
25+
or not isinstance(other, FormalObject) and other in EPSILON_SYMBOLS
2526

2627
def __hash__(self) -> int:
2728
return super().__hash__()

pyformlang/objects/cfg_objects/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,12 @@
55
from .terminal import Terminal
66
from .epsilon import Epsilon
77
from .production import Production
8-
from .cfg_convertible import CFGConvertible
8+
from .cfg_object_convertible import CFGObjectConvertible
99

1010

1111
__all__ = ["CFGObject",
1212
"Variable",
1313
"Terminal",
1414
"Epsilon",
1515
"Production",
16-
"CFGConvertible"]
16+
"CFGObjectConvertible"]

pyformlang/objects/cfg_objects/cfg_object.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,10 @@
22

33
from abc import abstractmethod
44

5-
from .cfg_convertible import CFGConvertible
6-
from ..formal_object import FormalObject
5+
from .cfg_object_convertible import CFGObjectConvertible
76

87

9-
class CFGObject(CFGConvertible, FormalObject):
8+
class CFGObject(CFGObjectConvertible):
109
""" An object in a CFG
1110
1211
Parameters
Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,18 @@
11
""" Interface representing the ability of conversion to cfg object """
22

33
from typing import Optional, Any
4+
from abc import abstractmethod
45

6+
from ..formal_object import FormalObject
57

6-
class CFGConvertible:
8+
9+
class CFGObjectConvertible(FormalObject):
710
""" Interface representing the ability of conversion to cfg object """
811

912
def __init__(self, *args: Any, **kwargs: Any) -> None:
1013
super().__init__(*args, **kwargs)
1114
self.index_cfg_converter: Optional[int] = None
15+
16+
@abstractmethod
17+
def __repr__(self) -> str:
18+
raise NotImplementedError

0 commit comments

Comments
 (0)