Skip to content

Commit 686414c

Browse files
committed
adds more cautious parsing options
1 parent 19e0c7a commit 686414c

File tree

1 file changed

+72
-16
lines changed

1 file changed

+72
-16
lines changed

refinery/lib/deobfuscation.py

Lines changed: 72 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import ast
77
import re
88

9-
from typing import Any
9+
from typing import Any, NamedTuple
1010

1111

1212
class ExpressionParsingFailure(ValueError):
@@ -21,6 +21,8 @@ class ExpressionParsingFailure(ValueError):
2121
ast.BitOr,
2222
ast.BitXor,
2323
ast.Constant,
24+
ast.Name,
25+
ast.Load,
2426
ast.Div,
2527
ast.FloorDiv,
2628
ast.Invert,
@@ -39,14 +41,15 @@ class ExpressionParsingFailure(ValueError):
3941
})
4042

4143

42-
def cautious_eval(
44+
def cautious_parse(
4345
definition: str,
4446
size_limit: int | None = None,
4547
walker: ast.NodeTransformer | None = None,
4648
environment: dict[str, Any] | None = None,
47-
) -> Any:
49+
allow_variables: bool = True,
50+
) -> ast.Expression:
4851
"""
49-
Very, very, very, very, very carefully evaluate a Python expression.
52+
Very, very, very, very, very carefully parse a Python expression.
5053
"""
5154
definition = re.sub(R'\s+', '', definition)
5255

@@ -57,14 +60,15 @@ def __init__(self, msg):
5760
if size_limit and len(definition) > size_limit:
5861
raise Abort(F'Size limit {size_limit} was exceeded while parsing')
5962

60-
test = definition
61-
if environment:
62-
for symbol in environment:
63-
test = re.sub(RF'\b{symbol}\b', '', test)
64-
if any(x not in '.^%|&~<>()-+/*0123456789xabcdefABCDEF' for x in test):
65-
raise Abort('Unknown characters in expression')
63+
if not allow_variables:
64+
test = definition
65+
if environment:
66+
for symbol in environment:
67+
test = re.sub(RF'\b{symbol}\b', '', test)
68+
if any(x not in '.^%|&~<>()-+/*0123456789xabcdefABCDEF' for x in test):
69+
raise Abort('Unknown characters in expression')
6670
try:
67-
expression = ast.parse(definition)
71+
expression = ast.parse(definition, mode='eval')
6872
except Exception:
6973
raise Abort('Python AST parser failed')
7074

@@ -74,19 +78,41 @@ def __init__(self, msg):
7478
nodes = ast.walk(expression)
7579

7680
try:
77-
assert type(next(nodes)) == ast.Module
78-
assert type(next(nodes)) == ast.Expr
79-
except (StopIteration, AssertionError):
81+
if type(next(nodes)) != ast.Expression:
82+
raise ValueError
83+
except (StopIteration, ValueError):
8084
raise Abort('Not a Python expression')
8185

8286
nodes = list(nodes)
8387
types = {type(node) for node in nodes}
8488

8589
if not types <= _ALLOWED_NODE_TYPES:
8690
problematic = types - _ALLOWED_NODE_TYPES
87-
raise Abort('Expression contains operations that are not allowed: {}'.format(', '.join(str(p) for p in problematic)))
91+
raise ExpressionParsingFailure(
92+
'Expression contains operations that are not allowed: {}'.format(
93+
', '.join(str(p) for p in problematic)))
94+
95+
return expression
8896

89-
return eval(definition, environment)
97+
98+
def cautious_eval(
99+
definition: str,
100+
size_limit: int | None = None,
101+
walker: ast.NodeTransformer | None = None,
102+
environment: dict[str, Any] | None = None,
103+
) -> Any:
104+
"""
105+
Very, very, very, very, very carefully parse a Python expression.
106+
"""
107+
tree = cautious_parse(
108+
definition,
109+
size_limit,
110+
walker,
111+
environment,
112+
allow_variables=False
113+
)
114+
code = compile(tree, filename='[code]', mode='eval')
115+
return eval(code, environment)
90116

91117

92118
def cautious_eval_or_default(
@@ -96,7 +122,37 @@ def cautious_eval_or_default(
96122
walker: ast.NodeTransformer | None = None,
97123
environment: dict[str, Any] | None = None,
98124
):
125+
"""
126+
Very, very, very, very, very carefully parse a Python expression or return a default value.
127+
"""
99128
try:
100129
return cautious_eval(definition, size_limit, walker, environment)
101130
except ExpressionParsingFailure:
102131
return default
132+
133+
134+
class NamesInExpression(NamedTuple):
135+
loaded: dict[str, ast.Name]
136+
stored: dict[str, ast.Name]
137+
others: dict[str, ast.Name]
138+
139+
140+
def names_in_expression(expression: ast.Expression):
141+
"""
142+
Take a parsed expression and extract the names of all variables that are accessed.
143+
This returns a `refinery.lib.deobfuscation.NamesInExpression` tuple where loaded,
144+
stored, and otherwise accessed variables are exposed as dictionaries that map their
145+
name to the corresponding AST node.
146+
"""
147+
result = NamesInExpression({}, {}, {})
148+
for node in ast.walk(expression):
149+
if not isinstance(node, ast.Name):
150+
continue
151+
if isinstance(node.ctx, ast.Load):
152+
result.loaded[node.id] = node
153+
continue
154+
if isinstance(node.ctx, ast.Store):
155+
result.stored[node.id] = node
156+
continue
157+
result.others[node.id] = node
158+
return result

0 commit comments

Comments
 (0)