@@ -191,10 +191,19 @@ def worker(func, args, return_dict, key):
191191
192192
193193# layout解析
194- def layout_parse_and_callback (file_id , file_name : str , contents : bytes , callbacks : list , user : str = None , parser_context_param = None ):
194+ def layout_parse_and_callback (file_id , file_name : str , contents : bytes , callbacks : list , user_info : dict = None ,
195+ parser_context_param = None ):
195196 try :
196197 parser_context .register_all (parser_context_param )
197- parser_context .register_user (user )
198+ # 在子进程中重新设置用户信息
199+ if user_info .get ('user' ):
200+ parser_context .register_user (user_info ['user' ])
201+ if user_info .get ('ak_code' ):
202+ parser_context .register_ak_code (user_info ['ak_code' ])
203+ if user_info .get ('ak_sha' ):
204+ parser_context .register_ak_sha (user_info ['ak_sha' ])
205+ logger_context .get_logger ().info (
206+ f"子进程验证 - ak_code: { parser_context .get_ak_code ()} , ak_sha: { parser_context .get_ak_sha ()} , user: { parser_context .get_user ()} " )
198207 # 获取版面解析结果
199208 layout_result_json , layout_result_text = layout_parse (file_name , contents , file_id )
200209 # 解析失败,直接回调
@@ -214,10 +223,21 @@ def layout_parse_and_callback(file_id, file_name: str, contents: bytes, callback
214223
215224
216225# domtree解析
217- def domtree_parse_and_callback (file_id , file_name : str , contents : bytes , callbacks : list , user : str = None , parser_context_param = None ):
226+ def domtree_parse_and_callback (file_id , file_name : str , contents : bytes , callbacks : list , user_info : dict = None ,
227+ parser_context_param = None ):
218228 try :
219229 parser_context .register_all (parser_context_param )
220- parser_context .register_user (user )
230+ # 在子进程中重新设置用户信息
231+ if user_info :
232+ if user_info .get ('user' ):
233+ parser_context .register_user (user_info ['user' ])
234+ if user_info .get ('ak_code' ):
235+ parser_context .register_ak_code (user_info ['ak_code' ])
236+ if user_info .get ('ak_sha' ):
237+ parser_context .register_ak_sha (user_info ['ak_sha' ])
238+ logger_context .get_logger ().info (f"domtree子进程开始: file_id={ file_id } " )
239+ logger_context .get_logger ().info (
240+ f"子进程验证 - ak_code: { parser_context .get_ak_code ()} , ak_sha: { parser_context .get_ak_sha ()} , user: { parser_context .get_user ()} " )
221241 # 获取domtree解析结果
222242 parse_succeed , parse_result , markdown_res = domtree_parse (file_name , contents , file_id )
223243 # 解析失败,直接回调
@@ -396,10 +416,19 @@ def parse_result_layout_and_domtree(file_info, callbacks: list):
396416
397417 parse_contents = pdf_stream if pdf_stream else contents
398418
419+ # 准备 user_info 字典
420+ user_info = {
421+ 'user' : parser_context .get_user (),
422+ 'ak_code' : parser_context .get_ak_code (),
423+ 'ak_sha' : parser_context .get_ak_sha ()
424+ }
425+
399426 p1 = multiprocessing .Process (target = worker , args = (
400- layout_parse_and_callback , (file_id , file_name , parse_contents , callbacks , parser_context .get_user (), parser_context ), return_dict , 'layout_parse' ))
427+ layout_parse_and_callback , (file_id , file_name , parse_contents , callbacks , user_info , parser_context ),
428+ return_dict , 'layout_parse' ))
401429 p2 = multiprocessing .Process (target = worker , args = (
402- domtree_parse_and_callback , (file_id , file_name , parse_contents , callbacks , parser_context .get_user (), parser_context ), return_dict , 'domtree_parse' ))
430+ domtree_parse_and_callback , (file_id , file_name , parse_contents , callbacks , user_info , parser_context ),
431+ return_dict , 'domtree_parse' ))
403432 p1 .start ()
404433 p2 .start ()
405434
0 commit comments