Skip to content

Commit 6cf363e

Browse files
committed
GumTree Serialization
1 parent ace1cdf commit 6cf363e

File tree

3 files changed

+94
-8
lines changed

3 files changed

+94
-8
lines changed

code_diff/gumtree/__init__.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,28 +2,40 @@
22
from .editmap import gumtree_editmap
33
from .chawathe import compute_chawathe_edit_script
44
from .ops import (Update, Insert, Delete, Move)
5+
from .ops import serialize_script
56

67
# Edit script ----------------------------------------------------------------
78

8-
def compute_edit_script(source_ast, target_ast, min_height = 1):
9+
def compute_edit_script(source_ast, target_ast, min_height = 1, max_size = 1000, min_dice = 0.5):
910

1011
# If source_ast and target_ast only leaves
1112
if len(source_ast.children) == 0 and len(target_ast.children) == 0:
12-
return [_update_leaf(source_ast, target_ast)]
13+
return EditScript([_update_leaf(source_ast, target_ast)])
1314

1415
isomap = gumtree_isomap(source_ast, target_ast, min_height)
1516

1617
while len(isomap) == 0 and min_height > 0:
1718
min_height -= 1
1819
isomap = gumtree_isomap(source_ast, target_ast, min_height)
1920

20-
editmap = gumtree_editmap(isomap, source_ast, target_ast)
21+
editmap = gumtree_editmap(isomap, source_ast, target_ast, max_size, min_dice)
2122
editscript = compute_chawathe_edit_script(editmap, source_ast, target_ast)
2223

23-
return editscript
24-
24+
return EditScript(editscript)
2525

26+
2627
# Update leaf ----------------------------------------------------------------
2728

2829
def _update_leaf(source_ast, target_ast):
29-
return Update(source_ast, target_ast.text)
30+
return Update(source_ast, target_ast.text)
31+
32+
33+
# Edit script ----------------------------------------------------------------
34+
35+
class EditScript(list):
36+
37+
def __init__(self, operations):
38+
super().__init__(operations)
39+
40+
def __repr__(self):
41+
return serialize_script(self)

code_diff/gumtree/chawathe.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@ def compute_chawathe_edit_script(editmap, source, target):
3333
op = Insert(
3434
parent_partner.delegate,
3535
(target_node.type, target_node.text),
36-
k
36+
k,
37+
-1
3738
)
3839
edit_script.append(op)
3940
node = parent_partner.apply(op)
@@ -150,10 +151,15 @@ def _partner_child(c, o, src_partner = False):
150151

151152
class InsertNode:
152153

154+
INSERT_COUNT = 0
155+
153156
def __init__(self, type, text = None, children = None):
154157
self.type = type
155158
self.text = text
156159

160+
self.node_id = InsertNode.INSERT_COUNT
161+
InsertNode.INSERT_COUNT += 1
162+
157163
self.parent = None
158164
self.children = children if children is not None else []
159165

@@ -226,6 +232,7 @@ def apply(self, operation):
226232

227233
if isinstance(operation, Insert):
228234
node = InsertNode(*operation.node)
235+
operation.insert_id = node.node_id
229236
wn = self.src._access_wn(node)
230237
self.children.insert(operation.position, wn)
231238

code_diff/gumtree/ops.py

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ class Update(EditOperation):
1313
class Insert(EditOperation):
1414
node: Tuple[str, Any]
1515
position: int
16+
insert_id: int # This is necessary to keep track of nodes (TODO: Better solution?)
1617

1718
@dataclass
1819
class Move(EditOperation):
@@ -21,4 +22,70 @@ class Move(EditOperation):
2122

2223
@dataclass
2324
class Delete(EditOperation):
24-
pass
25+
pass
26+
27+
28+
# Serialization --------------------------------
29+
30+
import json
31+
32+
33+
def _serialize_new_node(new_node_index, node):
34+
35+
if node.node_id not in new_node_index:
36+
new_node_index[node.node_id] = len(new_node_index)
37+
38+
return "N%d" % new_node_index[node.node_id]
39+
40+
def _serialize_ast_node(node):
41+
position = node.position
42+
node_text = node.type
43+
44+
if node.text: node_text += ":" + node.text
45+
46+
return "(%s, line %d:%d - %d:%d)" % (node_text, position[0][0], position[0][1], position[1][0], position[1][1])
47+
48+
49+
def _serialize_node(new_node_index, node):
50+
51+
if hasattr(node, 'node_id'):
52+
return _serialize_new_node(new_node_index, node)
53+
54+
return _serialize_ast_node(node)
55+
56+
57+
def serialize_script(edit_script):
58+
59+
sedit_script = []
60+
new_node_index = {}
61+
62+
for operation in edit_script:
63+
64+
operation_name = operation.__class__.__name__
65+
target_node_str = _serialize_node(new_node_index, operation.target_node)
66+
67+
if operation_name == "Update":
68+
sedit_script.append("%s(%s, %s)" % (operation_name, target_node_str, operation.value))
69+
70+
elif operation_name == "Insert":
71+
72+
new_node = operation.node
73+
74+
if new_node[1] is None:
75+
new_node_index[operation.insert_id] = len(new_node_index)
76+
new_node_str = "(%s, %s)" % (new_node[0], "N%d" % new_node_index[operation.insert_id])
77+
else: # Leaf node
78+
new_node_str = "%s:%s" % new_node
79+
80+
sedit_script.append("%s(%s, %s, %d)" % (operation_name, target_node_str, new_node_str, operation.position))
81+
82+
elif operation_name == "Move":
83+
84+
new_node_str = _serialize_node(new_node_index, operation.node)
85+
86+
sedit_script.append("%s(%s, %s, %d)" % (operation_name, target_node_str, new_node_str, operation.position))
87+
88+
elif operation_name == "Delete":
89+
sedit_script.append("%s(%s)" % (operation_name, target_node_str))
90+
91+
return json.dumps(sedit_script, indent=2)

0 commit comments

Comments
 (0)