@@ -222,6 +222,63 @@ def test_magika_module_with_python_and_non_python_content() -> None:
222
222
assert res .prediction .output .label == ContentTypeLabel .TXT
223
223
224
224
225
+ def test_magika_module_with_whitespaces () -> None :
226
+ m = Magika ()
227
+
228
+ ws_nums = sorted (
229
+ {
230
+ 1 ,
231
+ m ._model_config .min_file_size_for_dl - 1 ,
232
+ m ._model_config .min_file_size_for_dl ,
233
+ m ._model_config .min_file_size_for_dl + 1 ,
234
+ m ._model_config .beg_size - 1 ,
235
+ m ._model_config .beg_size ,
236
+ m ._model_config .beg_size + 1 ,
237
+ m ._model_config .end_size - 1 ,
238
+ m ._model_config .end_size ,
239
+ m ._model_config .end_size + 1 ,
240
+ m ._model_config .beg_size + m ._model_config .end_size - 1 ,
241
+ m ._model_config .beg_size + m ._model_config .end_size ,
242
+ m ._model_config .beg_size + m ._model_config .end_size + 1 ,
243
+ m ._model_config .beg_size + m ._model_config .end_size + 1 ,
244
+ m ._model_config .block_size - 1 ,
245
+ m ._model_config .block_size ,
246
+ m ._model_config .block_size + 1 ,
247
+ 2 * m ._model_config .block_size - 1 ,
248
+ 2 * m ._model_config .block_size ,
249
+ 2 * m ._model_config .block_size + 1 ,
250
+ 4 * m ._model_config .block_size - 1 ,
251
+ 4 * m ._model_config .block_size ,
252
+ 4 * m ._model_config .block_size + 1 ,
253
+ }
254
+ )
255
+
256
+ for ws_num in ws_nums :
257
+ print (f"Calling indentify_bytes with { ws_num } whitespaces" )
258
+ content = b" " * ws_num
259
+ res = m .identify_bytes (content )
260
+ assert (
261
+ res .ok
262
+ and res .dl .label == ContentTypeLabel .UNDEFINED
263
+ and res .output .label == ContentTypeLabel .TXT
264
+ )
265
+ res = m .identify_stream (io .BytesIO (content ))
266
+ assert (
267
+ res .ok
268
+ and res .dl .label == ContentTypeLabel .UNDEFINED
269
+ and res .output .label == ContentTypeLabel .TXT
270
+ )
271
+ with tempfile .TemporaryDirectory () as td :
272
+ tf_path = Path (td ) / "test.bin"
273
+ tf_path .write_bytes (content )
274
+ res = m .identify_path (tf_path )
275
+ assert (
276
+ res .ok
277
+ and res .dl .label == ContentTypeLabel .UNDEFINED
278
+ and res .output .label == ContentTypeLabel .TXT
279
+ )
280
+
281
+
225
282
def test_magika_module_with_different_prediction_modes () -> None :
226
283
model_dir = utils .get_default_model_dir ()
227
284
m = Magika (model_dir = model_dir , prediction_mode = PredictionMode .BEST_GUESS )
0 commit comments