11import json
2+ from functools import lru_cache
23from typing import Any , TextIO , List
34
5+ from regex import regex
6+
7+ from hcl2 import parses
8+ from hcl2 .const import IS_BLOCK
49from hcl2 .rule_transformer .rules .abstract import LarkElement , LarkRule
10+ from hcl2 .rule_transformer .rules .base import (
11+ BlockRule ,
12+ AttributeRule ,
13+ BodyRule ,
14+ StartRule ,
15+ )
16+ from hcl2 .rule_transformer .rules .containers import (
17+ TupleRule ,
18+ ObjectRule ,
19+ ObjectElemRule ,
20+ ObjectElemKeyExpressionRule ,
21+ ObjectElemKeyDotAccessor ,
22+ ObjectElemKeyRule ,
23+ )
24+ from hcl2 .rule_transformer .rules .expressions import ExprTermRule
25+ from hcl2 .rule_transformer .rules .literal_rules import (
26+ IdentifierRule ,
27+ IntLitRule ,
28+ FloatLitRule ,
29+ )
30+ from hcl2 .rule_transformer .rules .strings import (
31+ StringRule ,
32+ InterpolationRule ,
33+ StringPartRule ,
34+ )
35+ from hcl2 .rule_transformer .rules .tokens import (
36+ NAME ,
37+ EQ ,
38+ DBLQUOTE ,
39+ STRING_CHARS ,
40+ ESCAPED_INTERPOLATION ,
41+ INTERP_START ,
42+ RBRACE ,
43+ IntLiteral ,
44+ FloatLiteral ,
45+ RSQB ,
46+ LSQB ,
47+ COMMA ,
48+ DOT ,
49+ LBRACE ,
50+ )
51+ from hcl2 .rule_transformer .transformer import RuleTransformer
552from hcl2 .rule_transformer .utils import DeserializationOptions
653
754
855class Deserializer :
956 def __init__ (self , options = DeserializationOptions ()):
1057 self .options = options
1158
59+ @property
60+ @lru_cache
61+ def _transformer (self ) -> RuleTransformer :
62+ return RuleTransformer ()
63+
1264 def load_python (self , value : Any ) -> LarkElement :
13- pass
65+ return StartRule ([ self . _deserialize ( value )])
1466
1567 def loads (self , value : str ) -> LarkElement :
1668 return self .load_python (json .loads (value ))
@@ -19,13 +71,209 @@ def load(self, file: TextIO) -> LarkElement:
1971 return self .loads (file .read ())
2072
2173 def _deserialize (self , value : Any ) -> LarkElement :
22- pass
74+ if isinstance (value , dict ):
75+ if self ._contains_block_marker (value ):
76+ elements = self ._deserialize_block_elements (value )
77+ return BodyRule (elements )
78+
79+ return self ._deserialize_object (value )
80+
81+ if isinstance (value , list ):
82+ return self ._deserialize_list (value )
83+
84+ return self ._deserialize_text (value )
85+
86+ def _deserialize_block_elements (self , value : dict ) -> List [LarkRule ]:
87+ children = []
88+
89+ for key , value in value .items ():
90+ if self ._is_block (value ):
91+ # this value is a list of blocks, iterate over each block and deserialize them
92+ for block in value :
93+ children .append (self ._deserialize_block (key , block ))
94+ else :
95+
96+ # otherwise it's just an attribute
97+ if key != IS_BLOCK :
98+ children .append (self ._deserialize_attribute (key , value ))
99+
100+ return children
101+
102+ def _deserialize_text (self , value ) -> LarkRule :
103+ try :
104+ int_val = int (value )
105+ return IntLitRule ([IntLiteral (int_val )])
106+ except ValueError :
107+ pass
108+
109+ try :
110+ float_val = float (value )
111+ return FloatLitRule ([FloatLiteral (float_val )])
112+ except ValueError :
113+ pass
114+
115+ if isinstance (value , str ):
116+ if value .startswith ('"' ) and value .endswith ('"' ):
117+ return self ._deserialize_string (value )
118+
119+ if self ._is_expression (value ):
120+ return self ._deserialize_expression (value )
121+
122+ return self ._deserialize_identifier (value )
123+
124+ elif isinstance (value , bool ):
125+ return self ._deserialize_identifier (str (value ).lower ())
126+
127+ return self ._deserialize_identifier (str (value ))
128+
129+ def _deserialize_identifier (self , value : str ) -> IdentifierRule :
130+ return IdentifierRule ([NAME (value )])
131+
132+ def _deserialize_string (self , value : str ) -> StringRule :
133+ result = []
134+
135+ pattern = regex .compile (r"(\${1,2}\{(?:[^{}]|(?R))*\})" )
136+ parts = [part for part in pattern .split (value ) if part != "" ]
137+ # e.g. 'aaa$${bbb}ccc${"ddd-${eee}"}' -> ['aaa', '$${bbb}', 'ccc', '${"ddd-${eee}"}']
138+ # 'aa-${"bb-${"cc-${"dd-${5 + 5}"}"}"}' -> ['aa-', '${"bb-${"cc-${"dd-${5 + 5}"}"}"}']
139+
140+ for part in parts :
141+ if part == '"' :
142+ continue
143+
144+ if part .startswith ('"' ):
145+ part = part [1 :]
146+ if part .endswith ('"' ):
147+ part = part [:- 1 ]
148+
149+ e = self ._deserialize_string_part (part )
150+ result .append (e )
151+
152+ return StringRule ([DBLQUOTE (), * result , DBLQUOTE ()])
153+
154+ def _deserialize_string_part (self , value : str ) -> StringPartRule :
155+ if value .startswith ("$${" ) and value .endswith ("}" ):
156+ return StringPartRule ([ESCAPED_INTERPOLATION (value )])
157+
158+ if value .startswith ("${" ) and value .endswith ("}" ):
159+ return StringPartRule (
160+ [
161+ InterpolationRule (
162+ [INTERP_START (), self ._deserialize_expression (value ), RBRACE ()]
163+ )
164+ ]
165+ )
166+
167+ return StringPartRule ([STRING_CHARS (value )])
168+
169+ def _deserialize_expression (self , value : str ) -> ExprTermRule :
170+ """Deserialize an expression string into an ExprTermRule."""
171+ # instead of processing expression manually and trying to recognize what kind of expression it is,
172+ # turn it into HCL2 code and parse it with lark:
173+
174+ # unwrap from ${ and }
175+ value = value [2 :- 1 ]
176+ # create HCL2 snippet
177+ value = f"temp = { value } "
178+ # parse the above
179+ parsed_tree = parses (value )
180+ # transform parsed tree into LarkElement tree
181+ rules_tree = self ._transformer .transform (parsed_tree )
182+ # extract expression from the tree
183+ return rules_tree .body .children [0 ].expression
184+
185+ def _deserialize_block (self , first_label : str , value : dict ) -> BlockRule :
186+ """Deserialize a block by extracting labels and body"""
187+ labels = [first_label ]
188+ body = value
189+
190+ # Keep peeling off single-key layers until we hit the body (dict with IS_BLOCK)
191+ while isinstance (body , dict ) and not body .get (IS_BLOCK ):
192+ non_block_keys = [k for k in body .keys () if k != IS_BLOCK ]
193+ if len (non_block_keys ) == 1 :
194+ # This is another label level
195+ label = non_block_keys [0 ]
196+ labels .append (label )
197+ body = body [label ]
198+ else :
199+ # Multiple keys = this is the body
200+ break
201+
202+ return BlockRule (
203+ [* [self ._deserialize (label ) for label in labels ], self ._deserialize (body )]
204+ )
205+
206+ def _deserialize_attribute (self , name : str , value : Any ) -> AttributeRule :
207+ children = [
208+ self ._deserialize_identifier (name ),
209+ EQ (),
210+ ExprTermRule ([self ._deserialize (value )]),
211+ ]
212+ return AttributeRule (children )
213+
214+ def _deserialize_list (self , value : List ) -> TupleRule :
215+ children = []
216+ for element in value :
217+ deserialized = self ._deserialize (element )
218+ if not isinstance (deserialized , ExprTermRule ):
219+ # whatever an element of the list is, it has to be nested inside ExprTermRule
220+ deserialized = ExprTermRule ([deserialized ])
221+ children .append (deserialized )
222+ children .append (COMMA ())
223+
224+ return TupleRule ([LSQB (), * children , RSQB ()])
225+
226+ def _deserialize_object (self , value : dict ) -> ObjectRule :
227+ children = []
228+ for key , value in value .items ():
229+ children .append (self ._deserialize_object_elem (key , value ))
230+ return ObjectRule ([LBRACE (), * children , RBRACE ()])
231+
232+ def _deserialize_object_elem (self , key : str , value : Any ) -> ObjectElemRule :
233+ if self ._is_expression (key ):
234+ key = ObjectElemKeyExpressionRule ([self ._deserialize_expression (key )])
235+ elif "." in key :
236+ parts = key .split ("." )
237+ children = []
238+ for part in parts :
239+ children .append (self ._deserialize_identifier (part ))
240+ children .append (DOT ())
241+ key = ObjectElemKeyDotAccessor (children [:- 1 ]) # without the last comma
242+ else :
243+ key = self ._deserialize_text (key )
244+
245+ return ObjectElemRule (
246+ [
247+ ObjectElemKeyRule ([key ]),
248+ EQ (),
249+ ExprTermRule ([self ._deserialize_text (value )]),
250+ ]
251+ )
252+
253+ def _is_expression (self , value : str ) -> bool :
254+ return value .startswith ("${" ) and value .endswith ("}" )
255+
256+ def _is_block (self , value : Any ) -> bool :
257+ """Simple check: if it's a list containing dicts with IS_BLOCK markers"""
258+ if not isinstance (value , list ) or len (value ) == 0 :
259+ return False
23260
24- def _deserialize_dict (self , value : dict ) -> LarkRule :
25- pass
261+ # Check if any item in the list has IS_BLOCK marker (directly or nested)
262+ for item in value :
263+ if isinstance (item , dict ) and self ._contains_block_marker (item ):
264+ return True
26265
27- def _deserialize_list (self , value : List ) -> LarkRule :
28- pass
266+ return False
29267
30- def _deserialize_expression (self , value : str ) -> LarkRule :
31- pass
268+ def _contains_block_marker (self , obj : dict ) -> bool :
269+ """Recursively check if a dict contains IS_BLOCK marker anywhere"""
270+ if obj .get (IS_BLOCK ):
271+ return True
272+ for value in obj .values ():
273+ if isinstance (value , dict ) and self ._contains_block_marker (value ):
274+ return True
275+ if isinstance (value , list ):
276+ for element in value :
277+ if self ._contains_block_marker (element ):
278+ return True
279+ return False
0 commit comments