Skip to content

Commit d8ac92d

Browse files
add heredoc rules and deserialization;
require heredoc openers to be on their on separate line in lark grammar; whitespace trimming based on current implementation in dict_transformer.py;
1 parent f0f6fc9 commit d8ac92d

File tree

6 files changed

+123
-12
lines changed

6 files changed

+123
-12
lines changed

hcl2/rule_transformer/deserializer.py

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import json
22
from functools import lru_cache
3-
from typing import Any, TextIO, List
3+
from typing import Any, TextIO, List, Union
44

55
from regex import regex
66

@@ -31,6 +31,8 @@
3131
StringRule,
3232
InterpolationRule,
3333
StringPartRule,
34+
HeredocTemplateRule,
35+
HeredocTrimTemplateRule,
3436
)
3537
from hcl2.rule_transformer.rules.tokens import (
3638
NAME,
@@ -47,9 +49,11 @@
4749
COMMA,
4850
DOT,
4951
LBRACE,
52+
HEREDOC_TRIM_TEMPLATE,
53+
HEREDOC_TEMPLATE,
5054
)
5155
from hcl2.rule_transformer.transformer import RuleTransformer
52-
from hcl2.rule_transformer.utils import DeserializationOptions
56+
from hcl2.rule_transformer.utils import DeserializationOptions, HEREDOC_TRIM_PATTERN, HEREDOC_PATTERN
5357

5458

5559
class Deserializer:
@@ -99,7 +103,7 @@ def _deserialize_block_elements(self, value: dict) -> List[LarkRule]:
99103

100104
return children
101105

102-
def _deserialize_text(self, value) -> LarkRule:
106+
def _deserialize_text(self, value: Any) -> LarkRule:
103107
try:
104108
int_val = int(value)
105109
return IntLitRule([IntLiteral(int_val)])
@@ -114,6 +118,16 @@ def _deserialize_text(self, value) -> LarkRule:
114118

115119
if isinstance(value, str):
116120
if value.startswith('"') and value.endswith('"'):
121+
if not self.options.heredocs_to_strings and value.startswith('"<<-'):
122+
match = HEREDOC_TRIM_PATTERN.match(value[1:-1])
123+
if match:
124+
return self._deserialize_heredoc(value[1:-1], True)
125+
126+
if not self.options.heredocs_to_strings and value.startswith('"<<'):
127+
match = HEREDOC_PATTERN.match(value[1:-1])
128+
if match:
129+
return self._deserialize_heredoc(value[1:-1], False)
130+
117131
return self._deserialize_string(value)
118132

119133
if self._is_expression(value):
@@ -131,11 +145,12 @@ def _deserialize_identifier(self, value: str) -> IdentifierRule:
131145

132146
def _deserialize_string(self, value: str) -> StringRule:
133147
result = []
134-
135-
pattern = regex.compile(r"(\${1,2}\{(?:[^{}]|(?R))*\})")
136-
parts = [part for part in pattern.split(value) if part != ""]
148+
# split string into individual parts based on lark grammar
137149
# e.g. 'aaa$${bbb}ccc${"ddd-${eee}"}' -> ['aaa', '$${bbb}', 'ccc', '${"ddd-${eee}"}']
138150
# 'aa-${"bb-${"cc-${"dd-${5 + 5}"}"}"}' -> ['aa-', '${"bb-${"cc-${"dd-${5 + 5}"}"}"}']
151+
pattern = regex.compile(r"(\${1,2}\{(?:[^{}]|(?R))*\})")
152+
parts = [part for part in pattern.split(value) if part != ""]
153+
139154

140155
for part in parts:
141156
if part == '"':
@@ -166,6 +181,11 @@ def _deserialize_string_part(self, value: str) -> StringPartRule:
166181

167182
return StringPartRule([STRING_CHARS(value)])
168183

184+
def _deserialize_heredoc(self, value: str, trim: bool) -> Union[HeredocTemplateRule, HeredocTrimTemplateRule]:
185+
if trim:
186+
return HeredocTrimTemplateRule([HEREDOC_TRIM_TEMPLATE(value)])
187+
return HeredocTemplateRule([HEREDOC_TEMPLATE(value)])
188+
169189
def _deserialize_expression(self, value: str) -> ExprTermRule:
170190
"""Deserialize an expression string into an ExprTermRule."""
171191
# instead of processing expression manually and trying to recognize what kind of expression it is,

hcl2/rule_transformer/hcl2.lark

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,8 @@ ELLIPSIS : "..."
6767
COLONS: "::"
6868

6969
// Heredocs
70-
HEREDOC_TEMPLATE : /<<(?P<heredoc>[a-zA-Z][a-zA-Z0-9._-]+)\n?(?:.|\n)*?\n\s*(?P=heredoc)\n/
71-
HEREDOC_TEMPLATE_TRIM : /<<-(?P<heredoc_trim>[a-zA-Z][a-zA-Z0-9._-]+)\n?(?:.|\n)*?\n\s*(?P=heredoc_trim)\n/
70+
HEREDOC_TEMPLATE : /<<(?P<heredoc>[a-zA-Z][a-zA-Z0-9._-]+)\n(?:.|\n)*?\n\s*(?P=heredoc)\n/
71+
HEREDOC_TEMPLATE_TRIM : /<<-(?P<heredoc_trim>[a-zA-Z][a-zA-Z0-9._-]+)\n(?:.|\n)*?\n\s*(?P=heredoc_trim)\n/
7272

7373
// Ignore whitespace (but not newlines, as they're significant in HCL)
7474
%ignore /[ \t]+/

hcl2/rule_transformer/rules/strings.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import sys
12
from typing import Tuple, List, Any, Union
23

34
from hcl2.rule_transformer.rules.abstract import LarkRule
@@ -8,11 +9,15 @@
89
DBLQUOTE,
910
STRING_CHARS,
1011
ESCAPED_INTERPOLATION,
12+
HEREDOC_TEMPLATE,
13+
HEREDOC_TRIM_TEMPLATE,
1114
)
1215
from hcl2.rule_transformer.utils import (
1316
SerializationOptions,
1417
SerializationContext,
1518
to_dollar_string,
19+
HEREDOC_TRIM_PATTERN,
20+
HEREDOC_PATTERN,
1621
)
1722

1823

@@ -71,3 +76,71 @@ def serialize(
7176
self, options=SerializationOptions(), context=SerializationContext()
7277
) -> Any:
7378
return '"' + "".join(part.serialize() for part in self.string_parts) + '"'
79+
80+
81+
class HeredocTemplateRule(LarkRule):
82+
83+
_children: Tuple[HEREDOC_TEMPLATE]
84+
_trim_chars = "\n\t "
85+
86+
87+
@staticmethod
88+
def lark_name() -> str:
89+
return "heredoc_template"
90+
91+
@property
92+
def heredoc(self):
93+
return self.children[0]
94+
95+
def serialize(
96+
self, options=SerializationOptions(), context=SerializationContext()
97+
) -> Any:
98+
heredoc = self.heredoc.serialize(options, context)
99+
100+
if not options.preserve_heredocs:
101+
match = HEREDOC_PATTERN.match(heredoc)
102+
if not match:
103+
raise RuntimeError(f"Invalid Heredoc token: {heredoc}")
104+
heredoc = match.group(2)
105+
106+
result = heredoc.rstrip(self._trim_chars)
107+
return f'"{result}"'
108+
109+
110+
class HeredocTrimTemplateRule(HeredocTemplateRule):
111+
112+
_children: Tuple[HEREDOC_TRIM_TEMPLATE]
113+
114+
@staticmethod
115+
def lark_name() -> str:
116+
return "heredoc_trim_template"
117+
118+
def serialize(
119+
self, options=SerializationOptions(), context=SerializationContext()
120+
) -> Any:
121+
# See https://github.com/hashicorp/hcl2/blob/master/hcl/hclsyntax/spec.md#template-expressions
122+
# This is a special version of heredocs that are declared with "<<-"
123+
# This will calculate the minimum number of leading spaces in each line of a heredoc
124+
# and then remove that number of spaces from each line
125+
126+
heredoc = self.heredoc.serialize(options, context)
127+
128+
if not options.preserve_heredocs:
129+
match = HEREDOC_TRIM_PATTERN.match(heredoc)
130+
if not match:
131+
raise RuntimeError(f"Invalid Heredoc token: {heredoc}")
132+
heredoc = match.group(2)
133+
134+
heredoc = heredoc.rstrip(self._trim_chars)
135+
lines = heredoc.split("\n")
136+
137+
# calculate the min number of leading spaces in each line
138+
min_spaces = sys.maxsize
139+
for line in lines:
140+
leading_spaces = len(line) - len(line.lstrip(" "))
141+
min_spaces = min(min_spaces, leading_spaces)
142+
143+
# trim off that number of leading spaces from each line
144+
lines = [line[min_spaces:] for line in lines]
145+
return '"' + "\n".join(lines) + '"'
146+

hcl2/rule_transformer/rules/tokens.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,12 +67,14 @@ def serialize_conversion(self) -> Callable[[Any], str]:
6767

6868

6969
# explicitly define various kinds of string-based tokens for type hinting
70-
# variable value
70+
# variable values
7171
NAME = StringToken["NAME"]
7272
STRING_CHARS = StringToken["STRING_CHARS"]
7373
ESCAPED_INTERPOLATION = StringToken["ESCAPED_INTERPOLATION"]
7474
BINARY_OP = StringToken["BINARY_OP"]
75-
# static value
75+
HEREDOC_TEMPLATE = STRING_CHARS["HEREDOC_TEMPLATE"]
76+
HEREDOC_TRIM_TEMPLATE = STRING_CHARS["HEREDOC_TRIM_TEMPLATE"]
77+
# static values
7678
EQ = StaticStringToken[("EQ", "=")]
7779
COLON = StaticStringToken[("COLON", ":")]
7880
LPAR = StaticStringToken[("LPAR", "(")]

hcl2/rule_transformer/transformer.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,9 @@
4444
from hcl2.rule_transformer.rules.strings import (
4545
InterpolationRule,
4646
StringRule,
47-
StringPartRule,
47+
StringPartRule,
48+
HeredocTemplateRule,
49+
HeredocTrimTemplateRule,
4850
)
4951
from hcl2.rule_transformer.rules.tokens import (
5052
NAME,
@@ -127,6 +129,14 @@ def string_part(self, meta: Meta, args) -> StringPartRule:
127129
def interpolation(self, meta: Meta, args) -> InterpolationRule:
128130
return InterpolationRule(args, meta)
129131

132+
@v_args(meta=True)
133+
def heredoc_template(self, meta: Meta, args) -> HeredocTemplateRule:
134+
return HeredocTemplateRule(args, meta)
135+
136+
@v_args(meta=True)
137+
def heredoc_template_trim(self, meta: Meta, args) -> HeredocTrimTemplateRule:
138+
return HeredocTrimTemplateRule(args, meta)
139+
130140
@v_args(meta=True)
131141
def expr_term(self, meta: Meta, args) -> ExprTermRule:
132142
return ExprTermRule(args, meta)

hcl2/rule_transformer/utils.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
1+
import re
12
from contextlib import contextmanager
23
from dataclasses import dataclass, replace
34
from typing import Generator
45

6+
HEREDOC_PATTERN = re.compile(r"<<([a-zA-Z][a-zA-Z0-9._-]+)\n([\s\S]*)\1", re.S)
7+
HEREDOC_TRIM_PATTERN = re.compile(r"<<-([a-zA-Z][a-zA-Z0-9._-]+)\n([\s\S]*)\1", re.S)
8+
9+
510

611
@dataclass
712
class SerializationOptions:
@@ -10,11 +15,12 @@ class SerializationOptions:
1015
wrap_objects: bool = False
1116
wrap_tuples: bool = False
1217
explicit_blocks: bool = True
18+
preserve_heredocs: bool = True
1319

1420

1521
@dataclass
1622
class DeserializationOptions:
17-
pass
23+
heredocs_to_strings: bool = False
1824

1925

2026
@dataclass

0 commit comments

Comments
 (0)