Skip to content

Commit 769bc01

Browse files
committed
Fully tested SStub pattern detection
1 parent 1464b94 commit 769bc01

File tree

5 files changed

+1809
-24
lines changed

5 files changed

+1809
-24
lines changed

code_diff/gumtree/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from .editmap import gumtree_editmap
33
from .chawathe import compute_chawathe_edit_script
44
from .ops import (Update, Insert, Delete, Move)
5-
from .ops import serialize_script
5+
from .ops import serialize_script, deserialize_script
66

77
# Edit script ----------------------------------------------------------------
88

code_diff/gumtree/ops.py

Lines changed: 209 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import json
12
from dataclasses import dataclass
23
from typing import Any, Tuple
34

@@ -27,8 +28,6 @@ class Delete(EditOperation):
2728

2829
# Serialization --------------------------------
2930

30-
import json
31-
3231

3332
def _serialize_new_node(new_node_index, node):
3433

@@ -93,3 +92,211 @@ def serialize_script(edit_script, indent = 0):
9392
return "[\n%s\n]" % (",\n").join(sedit_script)
9493

9594
return "[%s]" % ", ".join(sedit_script)
95+
96+
97+
98+
# Deserialize --------------------------------------------------------------------------------------------------------------------------------
99+
100+
class DASTNode:
101+
102+
def __init__(self, type, position, text = None):
103+
self.type = type
104+
self.position = position
105+
self.text = text
106+
107+
def __repr__(self):
108+
return "Node(%s, %s, %s)" % (self.type, str(self.text), self.position)
109+
110+
111+
class InsertNode:
112+
113+
def __init__(self, node_id, type, text = None):
114+
self.node_id = node_id
115+
self.type = type
116+
self.text = text
117+
118+
def __repr__(self):
119+
return "%s(%s, %s)" % (self.node_id, self.type, str(self.text))
120+
121+
122+
def _split_args(inst):
123+
args = []
124+
125+
bracket_open = 0
126+
str_open = False
127+
for i, c in enumerate(inst):
128+
129+
# Lookahead
130+
if i > 0 and i < len(inst) - 1 and c in ["(", ")", ",", "\"", "\'"]:
131+
if inst[i - 1] == ":" and inst[i - 2] == c: continue
132+
if inst[i + 1] == ":" and inst[i + 2] == c: continue
133+
134+
if c in ["\"", "\'"]:
135+
str_open = not str_open
136+
137+
if str_open: continue
138+
139+
if c == "(":
140+
if bracket_open == 0: args.append(i)
141+
bracket_open += 1
142+
continue
143+
144+
if c == ")":
145+
bracket_open -= 1
146+
if bracket_open == 0: args.append(i)
147+
continue
148+
149+
if bracket_open == 1 and c == ",":
150+
args.append(i)
151+
152+
return [inst[args[i - 1] + 1: args[i]].strip() for i in range(1, len(args))]
153+
154+
155+
def _deserialize_insert_node(node_registry, node_info):
156+
157+
if "(" not in node_info or node_info in ["(:(", "):)"]:
158+
return InsertNode("T", *_parse_type(node_info))
159+
160+
node_type, node_id = _split_args(node_info)
161+
162+
if node_id in node_registry: return node_registry[node_id]
163+
164+
insert_node = InsertNode(node_id, node_type)
165+
node_registry[node_id] = insert_node
166+
167+
return insert_node
168+
169+
170+
def _parse_type(node_type):
171+
if ":" in node_type:
172+
return node_type.split(":", 1)
173+
return node_type, None
174+
175+
176+
def _deserialize_node(node_registry, node_info):
177+
178+
if "(" in node_info:
179+
ast_type, ast_position = _split_args(node_info)
180+
ast_type, ast_text = _parse_type(ast_type)
181+
return DASTNode(ast_type, ast_position, text = ast_text)
182+
183+
if node_info in node_registry:
184+
return node_registry[node_info]
185+
186+
return InsertNode(node_info, "unknown")
187+
188+
189+
def _deserialize_update(node_registry, inst):
190+
target_node, update = _split_args(inst)
191+
target_node = _deserialize_node(node_registry, target_node)
192+
return Update(target_node, update)
193+
194+
195+
def _deserialize_insert(node_registry, inst):
196+
new_node, target_node, position = _split_args(inst)
197+
198+
new_node = _deserialize_insert_node(node_registry, new_node)
199+
target_node = _deserialize_node(node_registry, target_node)
200+
201+
return Insert(target_node, new_node, int(position), -1)
202+
203+
204+
def _deserialize_delete(node_registry, inst):
205+
target_node = _split_args(inst)[0]
206+
target_node = _deserialize_node(node_registry, target_node)
207+
return Delete(target_node)
208+
209+
210+
def _deserialize_move(node_registry, inst):
211+
from_node, to_node, position = _split_args(inst)
212+
from_node = _deserialize_node(node_registry, from_node)
213+
to_node = _deserialize_node(node_registry, to_node)
214+
return Move(to_node, from_node, int(position))
215+
216+
217+
def deserialize_script(script_string):
218+
219+
instructions = script_string.split("\n")[1:-1]
220+
221+
script = []
222+
node_registry = {}
223+
for instruction in instructions:
224+
instruction = instruction.strip()
225+
226+
if instruction.startswith("Update"):
227+
op = _deserialize_update(node_registry, instruction)
228+
if instruction.startswith("Insert"):
229+
op = _deserialize_insert(node_registry, instruction)
230+
if instruction.startswith("Delete"):
231+
op = _deserialize_delete(node_registry, instruction)
232+
if instruction.startswith("Move"):
233+
op = _deserialize_move(node_registry, instruction)
234+
235+
script.append(op)
236+
237+
return script
238+
239+
240+
# Fast serialize -----------------------------------------------------------------------------------------------------------------------------
241+
242+
def _json_serialize_new_node(new_node_index, node):
243+
244+
if node.node_id not in new_node_index:
245+
new_node_index[node.node_id] = len(new_node_index)
246+
247+
return "N%d" % new_node_index[node.node_id]
248+
249+
250+
def _json_serialize_ast_node(node):
251+
position = node.position
252+
node_text = node.type
253+
254+
if node.text: node_text += ":" + node.text
255+
256+
return [node_text, position[0][0], position[0][1], position[1][0], position[1][1]]
257+
258+
259+
def _json_serialize_node(new_node_index, node):
260+
261+
if hasattr(node, 'node_id'):
262+
return _json_serialize_new_node(new_node_index, node)
263+
264+
return _json_serialize_ast_node(node)
265+
266+
267+
def json_serialize(edit_script):
268+
edit_ops = []
269+
new_node_index = {}
270+
271+
for operation in edit_script:
272+
operation_name = operation.__class__.__name__
273+
target_node_str = _json_serialize_node(new_node_index, operation.target_node)
274+
275+
if operation_name == "Update":
276+
edit_ops.append([operation_name, target_node_str, operation.value])
277+
278+
elif operation_name == "Insert":
279+
280+
new_node = operation.node
281+
282+
if new_node[1] is None:
283+
new_node_index[operation.insert_id] = len(new_node_index)
284+
new_node_str = [new_node[0], "N%d" % new_node_index[operation.insert_id]]
285+
else: # Leaf node
286+
new_node_str = ["%s:%s" % new_node, "T"]
287+
288+
edit_ops.append([operation_name, target_node_str, new_node_str])
289+
290+
elif operation_name == "Move":
291+
292+
new_node_str = _serialize_node(new_node_index, operation.node)
293+
294+
edit_ops.append([operation_name, target_node_str, new_node_str, operation.position])
295+
296+
elif operation_name == "Delete":
297+
edit_ops.append([operation_name, target_node_str])
298+
299+
return json.dumps(edit_ops)
300+
301+
302+
# Fast deserialize ----------------------------------------------------------------------

0 commit comments

Comments
 (0)