66import ast
77import re
88
9- from typing import Any
9+ from typing import Any , NamedTuple
1010
1111
1212class ExpressionParsingFailure (ValueError ):
@@ -21,6 +21,8 @@ class ExpressionParsingFailure(ValueError):
2121 ast .BitOr ,
2222 ast .BitXor ,
2323 ast .Constant ,
24+ ast .Name ,
25+ ast .Load ,
2426 ast .Div ,
2527 ast .FloorDiv ,
2628 ast .Invert ,
@@ -39,14 +41,15 @@ class ExpressionParsingFailure(ValueError):
3941})
4042
4143
42- def cautious_eval (
44+ def cautious_parse (
4345 definition : str ,
4446 size_limit : int | None = None ,
4547 walker : ast .NodeTransformer | None = None ,
4648 environment : dict [str , Any ] | None = None ,
47- ) -> Any :
49+ allow_variables : bool = True ,
50+ ) -> ast .Expression :
4851 """
49- Very, very, very, very, very carefully evaluate a Python expression.
52+ Very, very, very, very, very carefully parse a Python expression.
5053 """
5154 definition = re .sub (R'\s+' , '' , definition )
5255
@@ -57,14 +60,15 @@ def __init__(self, msg):
5760 if size_limit and len (definition ) > size_limit :
5861 raise Abort (F'Size limit { size_limit } was exceeded while parsing' )
5962
60- test = definition
61- if environment :
62- for symbol in environment :
63- test = re .sub (RF'\b{ symbol } \b' , '' , test )
64- if any (x not in '.^%|&~<>()-+/*0123456789xabcdefABCDEF' for x in test ):
65- raise Abort ('Unknown characters in expression' )
63+ if not allow_variables :
64+ test = definition
65+ if environment :
66+ for symbol in environment :
67+ test = re .sub (RF'\b{ symbol } \b' , '' , test )
68+ if any (x not in '.^%|&~<>()-+/*0123456789xabcdefABCDEF' for x in test ):
69+ raise Abort ('Unknown characters in expression' )
6670 try :
67- expression = ast .parse (definition )
71+ expression = ast .parse (definition , mode = 'eval' )
6872 except Exception :
6973 raise Abort ('Python AST parser failed' )
7074
@@ -74,19 +78,41 @@ def __init__(self, msg):
7478 nodes = ast .walk (expression )
7579
7680 try :
77- assert type (next (nodes )) == ast .Module
78- assert type ( next ( nodes )) == ast . Expr
79- except (StopIteration , AssertionError ):
81+ if type (next (nodes )) != ast .Expression :
82+ raise ValueError
83+ except (StopIteration , ValueError ):
8084 raise Abort ('Not a Python expression' )
8185
8286 nodes = list (nodes )
8387 types = {type (node ) for node in nodes }
8488
8589 if not types <= _ALLOWED_NODE_TYPES :
8690 problematic = types - _ALLOWED_NODE_TYPES
87- raise Abort ('Expression contains operations that are not allowed: {}' .format (', ' .join (str (p ) for p in problematic )))
91+ raise ExpressionParsingFailure (
92+ 'Expression contains operations that are not allowed: {}' .format (
93+ ', ' .join (str (p ) for p in problematic )))
94+
95+ return expression
8896
89- return eval (definition , environment )
97+
98+ def cautious_eval (
99+ definition : str ,
100+ size_limit : int | None = None ,
101+ walker : ast .NodeTransformer | None = None ,
102+ environment : dict [str , Any ] | None = None ,
103+ ) -> Any :
104+ """
105+ Very, very, very, very, very carefully parse a Python expression.
106+ """
107+ tree = cautious_parse (
108+ definition ,
109+ size_limit ,
110+ walker ,
111+ environment ,
112+ allow_variables = False
113+ )
114+ code = compile (tree , filename = '[code]' , mode = 'eval' )
115+ return eval (code , environment )
90116
91117
92118def cautious_eval_or_default (
@@ -96,7 +122,37 @@ def cautious_eval_or_default(
96122 walker : ast .NodeTransformer | None = None ,
97123 environment : dict [str , Any ] | None = None ,
98124):
125+ """
126+ Very, very, very, very, very carefully parse a Python expression or return a default value.
127+ """
99128 try :
100129 return cautious_eval (definition , size_limit , walker , environment )
101130 except ExpressionParsingFailure :
102131 return default
132+
133+
134+ class NamesInExpression (NamedTuple ):
135+ loaded : dict [str , ast .Name ]
136+ stored : dict [str , ast .Name ]
137+ others : dict [str , ast .Name ]
138+
139+
140+ def names_in_expression (expression : ast .Expression ):
141+ """
142+ Take a parsed expression and extract the names of all variables that are accessed.
143+ This returns a `refinery.lib.deobfuscation.NamesInExpression` tuple where loaded,
144+ stored, and otherwise accessed variables are exposed as dictionaries that map their
145+ name to the corresponding AST node.
146+ """
147+ result = NamesInExpression ({}, {}, {})
148+ for node in ast .walk (expression ):
149+ if not isinstance (node , ast .Name ):
150+ continue
151+ if isinstance (node .ctx , ast .Load ):
152+ result .loaded [node .id ] = node
153+ continue
154+ if isinstance (node .ctx , ast .Store ):
155+ result .stored [node .id ] = node
156+ continue
157+ result .others [node .id ] = node
158+ return result
0 commit comments