@@ -85,14 +85,19 @@ def __read_rar_archive(self, path: str, tmp_dir: str, need_content_analysis: boo
8585 yield self .__save_archive_file (tmp_dir = tmp_dir , file_name = name , file = file , need_content_analysis = need_content_analysis )
8686
8787 def __read_7z_archive (self , path : str , tmp_dir : str , need_content_analysis : bool ) -> Iterator [AttachedFile ]:
88- import py7zlib
89-
90- with open (path , "rb" ) as content :
91- arch_file = py7zlib .Archive7z (content )
92- names = arch_file .getnames ()
93- for name in names :
94- file = arch_file .getmember (name )
95- yield self .__save_archive_file (tmp_dir = tmp_dir , file_name = name , file = file , need_content_analysis = need_content_analysis )
88+ import os
89+ import py7zr
90+ import tempfile
91+
92+ with tempfile .TemporaryDirectory () as tmpdir :
93+ with py7zr .SevenZipFile (path , "r" ) as arch_file :
94+ arch_file .extractall (tmpdir )
95+
96+ for dir_path , _ , file_names in os .walk (tmpdir ):
97+ for file_name in file_names :
98+ file_path = os .path .join (dir_path , file_name )
99+ with open (file_path , "rb" ) as file :
100+ yield self .__save_archive_file (tmp_dir = tmp_dir , file_name = file_name , file = file , need_content_analysis = need_content_analysis )
96101
97102 def __save_archive_file (self , tmp_dir : str , file_name : str , file : IO [bytes ], need_content_analysis : bool ) -> AttachedFile :
98103 import os
0 commit comments