Skip to content

transformer overhaul draft #203

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 7 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion hcl2/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from lark.tree import Tree
from hcl2.parser import parser, reconstruction_parser
from hcl2.transformer import DictTransformer
from hcl2.dict_transformer import DictTransformer
from hcl2.reconstructor import HCLReconstructor, HCLReverseTransformer


Expand Down
1 change: 1 addition & 0 deletions hcl2/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@

START_LINE_KEY = "__start_line__"
END_LINE_KEY = "__end_line__"
IS_BLOCK = "__is_block__"
4 changes: 4 additions & 0 deletions hcl2/transformer.py → hcl2/dict_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,10 @@ def heredoc_template_trim(self, args: List) -> str:
def new_line_or_comment(self, args: List) -> _DiscardType:
return Discard

# def EQ(self, args: List):
# print("EQ", args)
# return args

def for_tuple_expr(self, args: List) -> str:
args = self.strip_new_line_tokens(args)
for_expr = " ".join([self.to_tf_inline(arg) for arg in args[1:-1]])
Expand Down
4 changes: 2 additions & 2 deletions hcl2/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
def parser() -> Lark:
"""Build standard parser for transforming HCL2 text into python structures"""
return Lark.open(
"hcl2.lark",
"rule_transformer/hcl2.lark",
parser="lalr",
cache=str(PARSER_FILE), # Disable/Delete file to effect changes to the grammar
rel_to=__file__,
Expand All @@ -29,7 +29,7 @@ def reconstruction_parser() -> Lark:
if necessary.
"""
return Lark.open(
"hcl2.lark",
"rule_transformer/hcl2.lark",
parser="lalr",
# Caching must be disabled to allow for reconstruction until lark-parser/lark#1472 is fixed:
#
Expand Down
7 changes: 6 additions & 1 deletion hcl2/reconstructor.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,12 +167,17 @@ def _should_add_space(self, rule, current_terminal, is_block_label: bool = False
if self._is_equals_sign(current_terminal):
return True

if is_block_label:
pass
# print(rule, self._last_rule, current_terminal, self._last_terminal)

if is_block_label and isinstance(rule, Token) and rule.value == "string":
if (
current_terminal == self._last_terminal == Terminal("DBLQUOTE")
or current_terminal == Terminal("DBLQUOTE")
and self._last_terminal == Terminal("NAME")
and self._last_terminal == Terminal("IDENTIFIER")
):
# print("true")
return True

# if we're in a ternary or binary operator, add space around the operator
Expand Down
Empty file.
279 changes: 279 additions & 0 deletions hcl2/rule_transformer/deserializer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,279 @@
import json
from functools import lru_cache
from typing import Any, TextIO, List

from regex import regex

from hcl2 import parses
from hcl2.const import IS_BLOCK
from hcl2.rule_transformer.rules.abstract import LarkElement, LarkRule
from hcl2.rule_transformer.rules.base import (
BlockRule,
AttributeRule,
BodyRule,
StartRule,
)
from hcl2.rule_transformer.rules.containers import (
TupleRule,
ObjectRule,
ObjectElemRule,
ObjectElemKeyExpressionRule,
ObjectElemKeyDotAccessor,
ObjectElemKeyRule,
)
from hcl2.rule_transformer.rules.expressions import ExprTermRule
from hcl2.rule_transformer.rules.literal_rules import (
IdentifierRule,
IntLitRule,
FloatLitRule,
)
from hcl2.rule_transformer.rules.strings import (
StringRule,
InterpolationRule,
StringPartRule,
)
from hcl2.rule_transformer.rules.tokens import (
NAME,
EQ,
DBLQUOTE,
STRING_CHARS,
ESCAPED_INTERPOLATION,
INTERP_START,
RBRACE,
IntLiteral,
FloatLiteral,
RSQB,
LSQB,
COMMA,
DOT,
LBRACE,
)
from hcl2.rule_transformer.transformer import RuleTransformer
from hcl2.rule_transformer.utils import DeserializationOptions


class Deserializer:
def __init__(self, options=DeserializationOptions()):
self.options = options

@property
@lru_cache
def _transformer(self) -> RuleTransformer:
return RuleTransformer()

def load_python(self, value: Any) -> LarkElement:
return StartRule([self._deserialize(value)])

def loads(self, value: str) -> LarkElement:
return self.load_python(json.loads(value))

def load(self, file: TextIO) -> LarkElement:
return self.loads(file.read())

def _deserialize(self, value: Any) -> LarkElement:
if isinstance(value, dict):
if self._contains_block_marker(value):
elements = self._deserialize_block_elements(value)
return BodyRule(elements)

return self._deserialize_object(value)

if isinstance(value, list):
return self._deserialize_list(value)

return self._deserialize_text(value)

def _deserialize_block_elements(self, value: dict) -> List[LarkRule]:
children = []

for key, value in value.items():
if self._is_block(value):
# this value is a list of blocks, iterate over each block and deserialize them
for block in value:
children.append(self._deserialize_block(key, block))
else:

# otherwise it's just an attribute
if key != IS_BLOCK:
children.append(self._deserialize_attribute(key, value))

return children

def _deserialize_text(self, value) -> LarkRule:
try:
int_val = int(value)
return IntLitRule([IntLiteral(int_val)])
except ValueError:
pass

try:
float_val = float(value)
return FloatLitRule([FloatLiteral(float_val)])
except ValueError:
pass

if isinstance(value, str):
if value.startswith('"') and value.endswith('"'):
return self._deserialize_string(value)

if self._is_expression(value):
return self._deserialize_expression(value)

return self._deserialize_identifier(value)

elif isinstance(value, bool):
return self._deserialize_identifier(str(value).lower())

return self._deserialize_identifier(str(value))

def _deserialize_identifier(self, value: str) -> IdentifierRule:
return IdentifierRule([NAME(value)])

def _deserialize_string(self, value: str) -> StringRule:
result = []

pattern = regex.compile(r"(\${1,2}\{(?:[^{}]|(?R))*\})")
parts = [part for part in pattern.split(value) if part != ""]
# e.g. 'aaa$${bbb}ccc${"ddd-${eee}"}' -> ['aaa', '$${bbb}', 'ccc', '${"ddd-${eee}"}']
# 'aa-${"bb-${"cc-${"dd-${5 + 5}"}"}"}' -> ['aa-', '${"bb-${"cc-${"dd-${5 + 5}"}"}"}']

for part in parts:
if part == '"':
continue

if part.startswith('"'):
part = part[1:]
if part.endswith('"'):
part = part[:-1]

e = self._deserialize_string_part(part)
result.append(e)

return StringRule([DBLQUOTE(), *result, DBLQUOTE()])

def _deserialize_string_part(self, value: str) -> StringPartRule:
if value.startswith("$${") and value.endswith("}"):
return StringPartRule([ESCAPED_INTERPOLATION(value)])

if value.startswith("${") and value.endswith("}"):
return StringPartRule(
[
InterpolationRule(
[INTERP_START(), self._deserialize_expression(value), RBRACE()]
)
]
)

return StringPartRule([STRING_CHARS(value)])

def _deserialize_expression(self, value: str) -> ExprTermRule:
"""Deserialize an expression string into an ExprTermRule."""
# instead of processing expression manually and trying to recognize what kind of expression it is,
# turn it into HCL2 code and parse it with lark:

# unwrap from ${ and }
value = value[2:-1]
# create HCL2 snippet
value = f"temp = {value}"
# parse the above
parsed_tree = parses(value)
# transform parsed tree into LarkElement tree
rules_tree = self._transformer.transform(parsed_tree)
# extract expression from the tree
return rules_tree.body.children[0].expression

def _deserialize_block(self, first_label: str, value: dict) -> BlockRule:
"""Deserialize a block by extracting labels and body"""
labels = [first_label]
body = value

# Keep peeling off single-key layers until we hit the body (dict with IS_BLOCK)
while isinstance(body, dict) and not body.get(IS_BLOCK):
non_block_keys = [k for k in body.keys() if k != IS_BLOCK]
if len(non_block_keys) == 1:
# This is another label level
label = non_block_keys[0]
labels.append(label)
body = body[label]
else:
# Multiple keys = this is the body
break

return BlockRule(
[*[self._deserialize(label) for label in labels], self._deserialize(body)]
)

def _deserialize_attribute(self, name: str, value: Any) -> AttributeRule:
children = [
self._deserialize_identifier(name),
EQ(),
ExprTermRule([self._deserialize(value)]),
]
return AttributeRule(children)

def _deserialize_list(self, value: List) -> TupleRule:
children = []
for element in value:
deserialized = self._deserialize(element)
if not isinstance(deserialized, ExprTermRule):
# whatever an element of the list is, it has to be nested inside ExprTermRule
deserialized = ExprTermRule([deserialized])
children.append(deserialized)
children.append(COMMA())

return TupleRule([LSQB(), *children, RSQB()])

def _deserialize_object(self, value: dict) -> ObjectRule:
children = []
for key, value in value.items():
children.append(self._deserialize_object_elem(key, value))
return ObjectRule([LBRACE(), *children, RBRACE()])

def _deserialize_object_elem(self, key: str, value: Any) -> ObjectElemRule:
if self._is_expression(key):
key = ObjectElemKeyExpressionRule([self._deserialize_expression(key)])
elif "." in key:
parts = key.split(".")
children = []
for part in parts:
children.append(self._deserialize_identifier(part))
children.append(DOT())
key = ObjectElemKeyDotAccessor(children[:-1]) # without the last comma
else:
key = self._deserialize_text(key)

return ObjectElemRule(
[
ObjectElemKeyRule([key]),
EQ(),
ExprTermRule([self._deserialize_text(value)]),
]
)

def _is_expression(self, value: str) -> bool:
return value.startswith("${") and value.endswith("}")

def _is_block(self, value: Any) -> bool:
"""Simple check: if it's a list containing dicts with IS_BLOCK markers"""
if not isinstance(value, list) or len(value) == 0:
return False

# Check if any item in the list has IS_BLOCK marker (directly or nested)
for item in value:
if isinstance(item, dict) and self._contains_block_marker(item):
return True

return False

def _contains_block_marker(self, obj: dict) -> bool:
"""Recursively check if a dict contains IS_BLOCK marker anywhere"""
if obj.get(IS_BLOCK):
return True
for value in obj.values():
if isinstance(value, dict) and self._contains_block_marker(value):
return True
if isinstance(value, list):
for element in value:
if self._contains_block_marker(element):
return True
return False
Loading
Loading