Skip to content

Commit f0f6fc9

Browse files
add JSON -> LarkElement deserializer;
batch of other changes
1 parent 5a10fec commit f0f6fc9

File tree

10 files changed

+365
-71
lines changed

10 files changed

+365
-71
lines changed

hcl2/const.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@
22

33
START_LINE_KEY = "__start_line__"
44
END_LINE_KEY = "__end_line__"
5+
IS_BLOCK = "__is_block__"
Lines changed: 256 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,68 @@
11
import json
2+
from functools import lru_cache
23
from typing import Any, TextIO, List
34

5+
from regex import regex
6+
7+
from hcl2 import parses
8+
from hcl2.const import IS_BLOCK
49
from hcl2.rule_transformer.rules.abstract import LarkElement, LarkRule
10+
from hcl2.rule_transformer.rules.base import (
11+
BlockRule,
12+
AttributeRule,
13+
BodyRule,
14+
StartRule,
15+
)
16+
from hcl2.rule_transformer.rules.containers import (
17+
TupleRule,
18+
ObjectRule,
19+
ObjectElemRule,
20+
ObjectElemKeyExpressionRule,
21+
ObjectElemKeyDotAccessor,
22+
ObjectElemKeyRule,
23+
)
24+
from hcl2.rule_transformer.rules.expressions import ExprTermRule
25+
from hcl2.rule_transformer.rules.literal_rules import (
26+
IdentifierRule,
27+
IntLitRule,
28+
FloatLitRule,
29+
)
30+
from hcl2.rule_transformer.rules.strings import (
31+
StringRule,
32+
InterpolationRule,
33+
StringPartRule,
34+
)
35+
from hcl2.rule_transformer.rules.tokens import (
36+
NAME,
37+
EQ,
38+
DBLQUOTE,
39+
STRING_CHARS,
40+
ESCAPED_INTERPOLATION,
41+
INTERP_START,
42+
RBRACE,
43+
IntLiteral,
44+
FloatLiteral,
45+
RSQB,
46+
LSQB,
47+
COMMA,
48+
DOT,
49+
LBRACE,
50+
)
51+
from hcl2.rule_transformer.transformer import RuleTransformer
552
from hcl2.rule_transformer.utils import DeserializationOptions
653

754

855
class Deserializer:
956
def __init__(self, options=DeserializationOptions()):
1057
self.options = options
1158

59+
@property
60+
@lru_cache
61+
def _transformer(self) -> RuleTransformer:
62+
return RuleTransformer()
63+
1264
def load_python(self, value: Any) -> LarkElement:
13-
pass
65+
return StartRule([self._deserialize(value)])
1466

1567
def loads(self, value: str) -> LarkElement:
1668
return self.load_python(json.loads(value))
@@ -19,13 +71,209 @@ def load(self, file: TextIO) -> LarkElement:
1971
return self.loads(file.read())
2072

2173
def _deserialize(self, value: Any) -> LarkElement:
22-
pass
74+
if isinstance(value, dict):
75+
if self._contains_block_marker(value):
76+
elements = self._deserialize_block_elements(value)
77+
return BodyRule(elements)
78+
79+
return self._deserialize_object(value)
80+
81+
if isinstance(value, list):
82+
return self._deserialize_list(value)
83+
84+
return self._deserialize_text(value)
85+
86+
def _deserialize_block_elements(self, value: dict) -> List[LarkRule]:
87+
children = []
88+
89+
for key, value in value.items():
90+
if self._is_block(value):
91+
# this value is a list of blocks, iterate over each block and deserialize them
92+
for block in value:
93+
children.append(self._deserialize_block(key, block))
94+
else:
95+
96+
# otherwise it's just an attribute
97+
if key != IS_BLOCK:
98+
children.append(self._deserialize_attribute(key, value))
99+
100+
return children
101+
102+
def _deserialize_text(self, value) -> LarkRule:
103+
try:
104+
int_val = int(value)
105+
return IntLitRule([IntLiteral(int_val)])
106+
except ValueError:
107+
pass
108+
109+
try:
110+
float_val = float(value)
111+
return FloatLitRule([FloatLiteral(float_val)])
112+
except ValueError:
113+
pass
114+
115+
if isinstance(value, str):
116+
if value.startswith('"') and value.endswith('"'):
117+
return self._deserialize_string(value)
118+
119+
if self._is_expression(value):
120+
return self._deserialize_expression(value)
121+
122+
return self._deserialize_identifier(value)
123+
124+
elif isinstance(value, bool):
125+
return self._deserialize_identifier(str(value).lower())
126+
127+
return self._deserialize_identifier(str(value))
128+
129+
def _deserialize_identifier(self, value: str) -> IdentifierRule:
130+
return IdentifierRule([NAME(value)])
131+
132+
def _deserialize_string(self, value: str) -> StringRule:
133+
result = []
134+
135+
pattern = regex.compile(r"(\${1,2}\{(?:[^{}]|(?R))*\})")
136+
parts = [part for part in pattern.split(value) if part != ""]
137+
# e.g. 'aaa$${bbb}ccc${"ddd-${eee}"}' -> ['aaa', '$${bbb}', 'ccc', '${"ddd-${eee}"}']
138+
# 'aa-${"bb-${"cc-${"dd-${5 + 5}"}"}"}' -> ['aa-', '${"bb-${"cc-${"dd-${5 + 5}"}"}"}']
139+
140+
for part in parts:
141+
if part == '"':
142+
continue
143+
144+
if part.startswith('"'):
145+
part = part[1:]
146+
if part.endswith('"'):
147+
part = part[:-1]
148+
149+
e = self._deserialize_string_part(part)
150+
result.append(e)
151+
152+
return StringRule([DBLQUOTE(), *result, DBLQUOTE()])
153+
154+
def _deserialize_string_part(self, value: str) -> StringPartRule:
155+
if value.startswith("$${") and value.endswith("}"):
156+
return StringPartRule([ESCAPED_INTERPOLATION(value)])
157+
158+
if value.startswith("${") and value.endswith("}"):
159+
return StringPartRule(
160+
[
161+
InterpolationRule(
162+
[INTERP_START(), self._deserialize_expression(value), RBRACE()]
163+
)
164+
]
165+
)
166+
167+
return StringPartRule([STRING_CHARS(value)])
168+
169+
def _deserialize_expression(self, value: str) -> ExprTermRule:
170+
"""Deserialize an expression string into an ExprTermRule."""
171+
# instead of processing expression manually and trying to recognize what kind of expression it is,
172+
# turn it into HCL2 code and parse it with lark:
173+
174+
# unwrap from ${ and }
175+
value = value[2:-1]
176+
# create HCL2 snippet
177+
value = f"temp = {value}"
178+
# parse the above
179+
parsed_tree = parses(value)
180+
# transform parsed tree into LarkElement tree
181+
rules_tree = self._transformer.transform(parsed_tree)
182+
# extract expression from the tree
183+
return rules_tree.body.children[0].expression
184+
185+
def _deserialize_block(self, first_label: str, value: dict) -> BlockRule:
186+
"""Deserialize a block by extracting labels and body"""
187+
labels = [first_label]
188+
body = value
189+
190+
# Keep peeling off single-key layers until we hit the body (dict with IS_BLOCK)
191+
while isinstance(body, dict) and not body.get(IS_BLOCK):
192+
non_block_keys = [k for k in body.keys() if k != IS_BLOCK]
193+
if len(non_block_keys) == 1:
194+
# This is another label level
195+
label = non_block_keys[0]
196+
labels.append(label)
197+
body = body[label]
198+
else:
199+
# Multiple keys = this is the body
200+
break
201+
202+
return BlockRule(
203+
[*[self._deserialize(label) for label in labels], self._deserialize(body)]
204+
)
205+
206+
def _deserialize_attribute(self, name: str, value: Any) -> AttributeRule:
207+
children = [
208+
self._deserialize_identifier(name),
209+
EQ(),
210+
ExprTermRule([self._deserialize(value)]),
211+
]
212+
return AttributeRule(children)
213+
214+
def _deserialize_list(self, value: List) -> TupleRule:
215+
children = []
216+
for element in value:
217+
deserialized = self._deserialize(element)
218+
if not isinstance(deserialized, ExprTermRule):
219+
# whatever an element of the list is, it has to be nested inside ExprTermRule
220+
deserialized = ExprTermRule([deserialized])
221+
children.append(deserialized)
222+
children.append(COMMA())
223+
224+
return TupleRule([LSQB(), *children, RSQB()])
225+
226+
def _deserialize_object(self, value: dict) -> ObjectRule:
227+
children = []
228+
for key, value in value.items():
229+
children.append(self._deserialize_object_elem(key, value))
230+
return ObjectRule([LBRACE(), *children, RBRACE()])
231+
232+
def _deserialize_object_elem(self, key: str, value: Any) -> ObjectElemRule:
233+
if self._is_expression(key):
234+
key = ObjectElemKeyExpressionRule([self._deserialize_expression(key)])
235+
elif "." in key:
236+
parts = key.split(".")
237+
children = []
238+
for part in parts:
239+
children.append(self._deserialize_identifier(part))
240+
children.append(DOT())
241+
key = ObjectElemKeyDotAccessor(children[:-1]) # without the last comma
242+
else:
243+
key = self._deserialize_text(key)
244+
245+
return ObjectElemRule(
246+
[
247+
ObjectElemKeyRule([key]),
248+
EQ(),
249+
ExprTermRule([self._deserialize_text(value)]),
250+
]
251+
)
252+
253+
def _is_expression(self, value: str) -> bool:
254+
return value.startswith("${") and value.endswith("}")
255+
256+
def _is_block(self, value: Any) -> bool:
257+
"""Simple check: if it's a list containing dicts with IS_BLOCK markers"""
258+
if not isinstance(value, list) or len(value) == 0:
259+
return False
23260

24-
def _deserialize_dict(self, value: dict) -> LarkRule:
25-
pass
261+
# Check if any item in the list has IS_BLOCK marker (directly or nested)
262+
for item in value:
263+
if isinstance(item, dict) and self._contains_block_marker(item):
264+
return True
26265

27-
def _deserialize_list(self, value: List) -> LarkRule:
28-
pass
266+
return False
29267

30-
def _deserialize_expression(self, value: str) -> LarkRule:
31-
pass
268+
def _contains_block_marker(self, obj: dict) -> bool:
269+
"""Recursively check if a dict contains IS_BLOCK marker anywhere"""
270+
if obj.get(IS_BLOCK):
271+
return True
272+
for value in obj.values():
273+
if isinstance(value, dict) and self._contains_block_marker(value):
274+
return True
275+
if isinstance(value, list):
276+
for element in value:
277+
if self._contains_block_marker(element):
278+
return True
279+
return False

hcl2/rule_transformer/rules/abstract.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def serialize(
3636

3737

3838
class LarkToken(LarkElement, ABC):
39-
def __init__(self, value: Union[str, int]):
39+
def __init__(self, value: Union[str, int, float]):
4040
self._value = value
4141
super().__init__()
4242

@@ -100,7 +100,6 @@ def __init__(self, children: List[LarkElement], meta: Optional[Meta] = None):
100100

101101
for index, child in enumerate(children):
102102
if child is not None:
103-
print(child)
104103
child.set_index(index)
105104
child.set_parent(self)
106105

hcl2/rule_transformer/rules/base.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,11 @@
33

44
from lark.tree import Meta
55

6-
from hcl2.dict_transformer import START_LINE, END_LINE
6+
from hcl2.const import IS_BLOCK
77
from hcl2.rule_transformer.rules.abstract import LarkRule, LarkToken
88
from hcl2.rule_transformer.rules.expressions import ExpressionRule
9+
from hcl2.rule_transformer.rules.literal_rules import IdentifierRule
10+
from hcl2.rule_transformer.rules.strings import StringRule
911
from hcl2.rule_transformer.rules.tokens import NAME, EQ
1012

1113
from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule
@@ -42,7 +44,7 @@ class BodyRule(LarkRule):
4244
_children: List[
4345
Union[
4446
NewLineOrCommentRule,
45-
# AttributeRule,
47+
AttributeRule,
4648
"BlockRule",
4749
]
4850
]
@@ -58,6 +60,7 @@ def serialize(
5860
attributes: List[AttributeRule] = []
5961
comments = []
6062
inline_comments = []
63+
6164
for child in self._children:
6265

6366
if isinstance(child, BlockRule):
@@ -116,7 +119,11 @@ def serialize(
116119

117120
class BlockRule(LarkRule):
118121

119-
_children: Tuple[BodyRule]
122+
_children: Tuple[
123+
IdentifierRule,
124+
Optional[Union[IdentifierRule, StringRule]],
125+
BodyRule,
126+
]
120127

121128
def __init__(self, children, meta: Optional[Meta] = None):
122129
super().__init__(children, meta)
@@ -141,15 +148,11 @@ def serialize(
141148
self, options=SerializationOptions(), context=SerializationContext()
142149
) -> Any:
143150
result = self._body.serialize(options)
151+
if options.explicit_blocks:
152+
result.update({IS_BLOCK: True})
153+
144154
labels = self._labels
145155
for label in reversed(labels[1:]):
146156
result = {label.serialize(options): result}
147157

148-
result.update(
149-
{
150-
START_LINE: self._meta.line,
151-
END_LINE: self._meta.end_line,
152-
}
153-
)
154-
155158
return result

hcl2/rule_transformer/rules/functions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ def arguments(self) -> Optional[ArgumentsRule]:
7878

7979
def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any:
8080
result = (
81-
f"{":".join(identifier.serialize(options, context) for identifier in self.identifiers)}"
81+
f"{"::".join(identifier.serialize(options, context) for identifier in self.identifiers)}"
8282
f"({self.arguments.serialize(options, context) if self.arguments else ""})"
8383
)
8484
if not context.inside_dollar_string:

hcl2/rule_transformer/rules/literal_rules.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,12 +43,6 @@ def lark_name() -> str:
4343
return "float_lit"
4444

4545

46-
class StringPartRule(TokenRule):
47-
@staticmethod
48-
def lark_name() -> str:
49-
return "string_part"
50-
51-
5246
class BinaryOperatorRule(TokenRule):
5347
@staticmethod
5448
def lark_name() -> str:

0 commit comments

Comments
 (0)