Skip to content

Commit d154bad

Browse files
authored
Merge pull request #28 from LianjiaTech/opt_ocr_limit
多进程透传ak信息
2 parents ea67793 + 60c4c26 commit d154bad

File tree

1 file changed

+35
-6
lines changed

1 file changed

+35
-6
lines changed

services/parse_manager.py

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -191,10 +191,19 @@ def worker(func, args, return_dict, key):
191191

192192

193193
# layout解析
194-
def layout_parse_and_callback(file_id, file_name: str, contents: bytes, callbacks: list, user: str = None, parser_context_param=None):
194+
def layout_parse_and_callback(file_id, file_name: str, contents: bytes, callbacks: list, user_info: dict = None,
195+
parser_context_param=None):
195196
try:
196197
parser_context.register_all(parser_context_param)
197-
parser_context.register_user(user)
198+
# 在子进程中重新设置用户信息
199+
if user_info.get('user'):
200+
parser_context.register_user(user_info['user'])
201+
if user_info.get('ak_code'):
202+
parser_context.register_ak_code(user_info['ak_code'])
203+
if user_info.get('ak_sha'):
204+
parser_context.register_ak_sha(user_info['ak_sha'])
205+
logger_context.get_logger().info(
206+
f"子进程验证 - ak_code: {parser_context.get_ak_code()}, ak_sha: {parser_context.get_ak_sha()}, user: {parser_context.get_user()}")
198207
# 获取版面解析结果
199208
layout_result_json, layout_result_text = layout_parse(file_name, contents, file_id)
200209
# 解析失败,直接回调
@@ -214,10 +223,21 @@ def layout_parse_and_callback(file_id, file_name: str, contents: bytes, callback
214223

215224

216225
# domtree解析
217-
def domtree_parse_and_callback(file_id, file_name: str, contents: bytes, callbacks: list, user: str = None, parser_context_param=None):
226+
def domtree_parse_and_callback(file_id, file_name: str, contents: bytes, callbacks: list, user_info: dict = None,
227+
parser_context_param=None):
218228
try:
219229
parser_context.register_all(parser_context_param)
220-
parser_context.register_user(user)
230+
# 在子进程中重新设置用户信息
231+
if user_info:
232+
if user_info.get('user'):
233+
parser_context.register_user(user_info['user'])
234+
if user_info.get('ak_code'):
235+
parser_context.register_ak_code(user_info['ak_code'])
236+
if user_info.get('ak_sha'):
237+
parser_context.register_ak_sha(user_info['ak_sha'])
238+
logger_context.get_logger().info(f"domtree子进程开始: file_id={file_id}")
239+
logger_context.get_logger().info(
240+
f"子进程验证 - ak_code: {parser_context.get_ak_code()}, ak_sha: {parser_context.get_ak_sha()}, user: {parser_context.get_user()}")
221241
# 获取domtree解析结果
222242
parse_succeed, parse_result, markdown_res = domtree_parse(file_name, contents, file_id)
223243
# 解析失败,直接回调
@@ -396,10 +416,19 @@ def parse_result_layout_and_domtree(file_info, callbacks: list):
396416

397417
parse_contents = pdf_stream if pdf_stream else contents
398418

419+
# 准备 user_info 字典
420+
user_info = {
421+
'user': parser_context.get_user(),
422+
'ak_code': parser_context.get_ak_code(),
423+
'ak_sha': parser_context.get_ak_sha()
424+
}
425+
399426
p1 = multiprocessing.Process(target=worker, args=(
400-
layout_parse_and_callback, (file_id, file_name, parse_contents, callbacks, parser_context.get_user(), parser_context), return_dict, 'layout_parse'))
427+
layout_parse_and_callback, (file_id, file_name, parse_contents, callbacks, user_info, parser_context),
428+
return_dict, 'layout_parse'))
401429
p2 = multiprocessing.Process(target=worker, args=(
402-
domtree_parse_and_callback, (file_id, file_name, parse_contents, callbacks, parser_context.get_user(), parser_context), return_dict, 'domtree_parse'))
430+
domtree_parse_and_callback, (file_id, file_name, parse_contents, callbacks, user_info, parser_context),
431+
return_dict, 'domtree_parse'))
403432
p1.start()
404433
p2.start()
405434

0 commit comments

Comments
 (0)