1
+ import json
1
2
from dataclasses import dataclass
2
3
from typing import Any , Tuple
3
4
@@ -27,8 +28,6 @@ class Delete(EditOperation):
27
28
28
29
# Serialization --------------------------------
29
30
30
- import json
31
-
32
31
33
32
def _serialize_new_node (new_node_index , node ):
34
33
@@ -93,3 +92,211 @@ def serialize_script(edit_script, indent = 0):
93
92
return "[\n %s\n ]" % (",\n " ).join (sedit_script )
94
93
95
94
return "[%s]" % ", " .join (sedit_script )
95
+
96
+
97
+
98
+ # Deserialize --------------------------------------------------------------------------------------------------------------------------------
99
+
100
+ class DASTNode :
101
+
102
+ def __init__ (self , type , position , text = None ):
103
+ self .type = type
104
+ self .position = position
105
+ self .text = text
106
+
107
+ def __repr__ (self ):
108
+ return "Node(%s, %s, %s)" % (self .type , str (self .text ), self .position )
109
+
110
+
111
+ class InsertNode :
112
+
113
+ def __init__ (self , node_id , type , text = None ):
114
+ self .node_id = node_id
115
+ self .type = type
116
+ self .text = text
117
+
118
+ def __repr__ (self ):
119
+ return "%s(%s, %s)" % (self .node_id , self .type , str (self .text ))
120
+
121
+
122
+ def _split_args (inst ):
123
+ args = []
124
+
125
+ bracket_open = 0
126
+ str_open = False
127
+ for i , c in enumerate (inst ):
128
+
129
+ # Lookahead
130
+ if i > 0 and i < len (inst ) - 1 and c in ["(" , ")" , "," , "\" " , "\' " ]:
131
+ if inst [i - 1 ] == ":" and inst [i - 2 ] == c : continue
132
+ if inst [i + 1 ] == ":" and inst [i + 2 ] == c : continue
133
+
134
+ if c in ["\" " , "\' " ]:
135
+ str_open = not str_open
136
+
137
+ if str_open : continue
138
+
139
+ if c == "(" :
140
+ if bracket_open == 0 : args .append (i )
141
+ bracket_open += 1
142
+ continue
143
+
144
+ if c == ")" :
145
+ bracket_open -= 1
146
+ if bracket_open == 0 : args .append (i )
147
+ continue
148
+
149
+ if bracket_open == 1 and c == "," :
150
+ args .append (i )
151
+
152
+ return [inst [args [i - 1 ] + 1 : args [i ]].strip () for i in range (1 , len (args ))]
153
+
154
+
155
+ def _deserialize_insert_node (node_registry , node_info ):
156
+
157
+ if "(" not in node_info or node_info in ["(:(" , "):)" ]:
158
+ return InsertNode ("T" , * _parse_type (node_info ))
159
+
160
+ node_type , node_id = _split_args (node_info )
161
+
162
+ if node_id in node_registry : return node_registry [node_id ]
163
+
164
+ insert_node = InsertNode (node_id , node_type )
165
+ node_registry [node_id ] = insert_node
166
+
167
+ return insert_node
168
+
169
+
170
+ def _parse_type (node_type ):
171
+ if ":" in node_type :
172
+ return node_type .split (":" , 1 )
173
+ return node_type , None
174
+
175
+
176
+ def _deserialize_node (node_registry , node_info ):
177
+
178
+ if "(" in node_info :
179
+ ast_type , ast_position = _split_args (node_info )
180
+ ast_type , ast_text = _parse_type (ast_type )
181
+ return DASTNode (ast_type , ast_position , text = ast_text )
182
+
183
+ if node_info in node_registry :
184
+ return node_registry [node_info ]
185
+
186
+ return InsertNode (node_info , "unknown" )
187
+
188
+
189
+ def _deserialize_update (node_registry , inst ):
190
+ target_node , update = _split_args (inst )
191
+ target_node = _deserialize_node (node_registry , target_node )
192
+ return Update (target_node , update )
193
+
194
+
195
+ def _deserialize_insert (node_registry , inst ):
196
+ new_node , target_node , position = _split_args (inst )
197
+
198
+ new_node = _deserialize_insert_node (node_registry , new_node )
199
+ target_node = _deserialize_node (node_registry , target_node )
200
+
201
+ return Insert (target_node , new_node , int (position ), - 1 )
202
+
203
+
204
+ def _deserialize_delete (node_registry , inst ):
205
+ target_node = _split_args (inst )[0 ]
206
+ target_node = _deserialize_node (node_registry , target_node )
207
+ return Delete (target_node )
208
+
209
+
210
+ def _deserialize_move (node_registry , inst ):
211
+ from_node , to_node , position = _split_args (inst )
212
+ from_node = _deserialize_node (node_registry , from_node )
213
+ to_node = _deserialize_node (node_registry , to_node )
214
+ return Move (to_node , from_node , int (position ))
215
+
216
+
217
+ def deserialize_script (script_string ):
218
+
219
+ instructions = script_string .split ("\n " )[1 :- 1 ]
220
+
221
+ script = []
222
+ node_registry = {}
223
+ for instruction in instructions :
224
+ instruction = instruction .strip ()
225
+
226
+ if instruction .startswith ("Update" ):
227
+ op = _deserialize_update (node_registry , instruction )
228
+ if instruction .startswith ("Insert" ):
229
+ op = _deserialize_insert (node_registry , instruction )
230
+ if instruction .startswith ("Delete" ):
231
+ op = _deserialize_delete (node_registry , instruction )
232
+ if instruction .startswith ("Move" ):
233
+ op = _deserialize_move (node_registry , instruction )
234
+
235
+ script .append (op )
236
+
237
+ return script
238
+
239
+
240
+ # Fast serialize -----------------------------------------------------------------------------------------------------------------------------
241
+
242
+ def _json_serialize_new_node (new_node_index , node ):
243
+
244
+ if node .node_id not in new_node_index :
245
+ new_node_index [node .node_id ] = len (new_node_index )
246
+
247
+ return "N%d" % new_node_index [node .node_id ]
248
+
249
+
250
+ def _json_serialize_ast_node (node ):
251
+ position = node .position
252
+ node_text = node .type
253
+
254
+ if node .text : node_text += ":" + node .text
255
+
256
+ return [node_text , position [0 ][0 ], position [0 ][1 ], position [1 ][0 ], position [1 ][1 ]]
257
+
258
+
259
+ def _json_serialize_node (new_node_index , node ):
260
+
261
+ if hasattr (node , 'node_id' ):
262
+ return _json_serialize_new_node (new_node_index , node )
263
+
264
+ return _json_serialize_ast_node (node )
265
+
266
+
267
+ def json_serialize (edit_script ):
268
+ edit_ops = []
269
+ new_node_index = {}
270
+
271
+ for operation in edit_script :
272
+ operation_name = operation .__class__ .__name__
273
+ target_node_str = _json_serialize_node (new_node_index , operation .target_node )
274
+
275
+ if operation_name == "Update" :
276
+ edit_ops .append ([operation_name , target_node_str , operation .value ])
277
+
278
+ elif operation_name == "Insert" :
279
+
280
+ new_node = operation .node
281
+
282
+ if new_node [1 ] is None :
283
+ new_node_index [operation .insert_id ] = len (new_node_index )
284
+ new_node_str = [new_node [0 ], "N%d" % new_node_index [operation .insert_id ]]
285
+ else : # Leaf node
286
+ new_node_str = ["%s:%s" % new_node , "T" ]
287
+
288
+ edit_ops .append ([operation_name , target_node_str , new_node_str ])
289
+
290
+ elif operation_name == "Move" :
291
+
292
+ new_node_str = _serialize_node (new_node_index , operation .node )
293
+
294
+ edit_ops .append ([operation_name , target_node_str , new_node_str , operation .position ])
295
+
296
+ elif operation_name == "Delete" :
297
+ edit_ops .append ([operation_name , target_node_str ])
298
+
299
+ return json .dumps (edit_ops )
300
+
301
+
302
+ # Fast deserialize ----------------------------------------------------------------------
0 commit comments