Skip to content

Commit 357ef9f

Browse files
committed
python: add tests with many whitespaces
1 parent 2750c2b commit 357ef9f

File tree

1 file changed

+57
-0
lines changed

1 file changed

+57
-0
lines changed

python/tests/test_magika_python_module.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,63 @@ def test_magika_module_with_python_and_non_python_content() -> None:
222222
assert res.prediction.output.label == ContentTypeLabel.TXT
223223

224224

225+
def test_magika_module_with_whitespaces() -> None:
226+
m = Magika()
227+
228+
ws_nums = sorted(
229+
{
230+
1,
231+
m._model_config.min_file_size_for_dl - 1,
232+
m._model_config.min_file_size_for_dl,
233+
m._model_config.min_file_size_for_dl + 1,
234+
m._model_config.beg_size - 1,
235+
m._model_config.beg_size,
236+
m._model_config.beg_size + 1,
237+
m._model_config.end_size - 1,
238+
m._model_config.end_size,
239+
m._model_config.end_size + 1,
240+
m._model_config.beg_size + m._model_config.end_size - 1,
241+
m._model_config.beg_size + m._model_config.end_size,
242+
m._model_config.beg_size + m._model_config.end_size + 1,
243+
m._model_config.beg_size + m._model_config.end_size + 1,
244+
m._model_config.block_size - 1,
245+
m._model_config.block_size,
246+
m._model_config.block_size + 1,
247+
2 * m._model_config.block_size - 1,
248+
2 * m._model_config.block_size,
249+
2 * m._model_config.block_size + 1,
250+
4 * m._model_config.block_size - 1,
251+
4 * m._model_config.block_size,
252+
4 * m._model_config.block_size + 1,
253+
}
254+
)
255+
256+
for ws_num in ws_nums:
257+
print(f"Calling indentify_bytes with {ws_num} whitespaces")
258+
content = b" " * ws_num
259+
res = m.identify_bytes(content)
260+
assert (
261+
res.ok
262+
and res.dl.label == ContentTypeLabel.UNDEFINED
263+
and res.output.label == ContentTypeLabel.TXT
264+
)
265+
res = m.identify_stream(io.BytesIO(content))
266+
assert (
267+
res.ok
268+
and res.dl.label == ContentTypeLabel.UNDEFINED
269+
and res.output.label == ContentTypeLabel.TXT
270+
)
271+
with tempfile.TemporaryDirectory() as td:
272+
tf_path = Path(td) / "test.bin"
273+
tf_path.write_bytes(content)
274+
res = m.identify_path(tf_path)
275+
assert (
276+
res.ok
277+
and res.dl.label == ContentTypeLabel.UNDEFINED
278+
and res.output.label == ContentTypeLabel.TXT
279+
)
280+
281+
225282
def test_magika_module_with_different_prediction_modes() -> None:
226283
model_dir = utils.get_default_model_dir()
227284
m = Magika(model_dir=model_dir, prediction_mode=PredictionMode.BEST_GUESS)

0 commit comments

Comments
 (0)