22
22
23
23
from timm .data import IMAGENET_DEFAULT_MEAN , IMAGENET_DEFAULT_STD
24
24
from timm .layers import PatchEmbed , Mlp , DropPath , to_2tuple , trunc_normal_ , _assert , ClassifierHead ,\
25
- resample_patch_embed , ndgrid
25
+ resample_patch_embed , ndgrid , get_act_layer , LayerType
26
26
from ._builder import build_model_with_cfg
27
27
from ._features_fx import register_notrace_function
28
28
from ._registry import generate_default_cfgs , register_model , register_model_deprecations
@@ -206,7 +206,7 @@ def __init__(
206
206
proj_drop : float = 0. ,
207
207
attn_drop : float = 0. ,
208
208
drop_path : float = 0. ,
209
- act_layer : nn . Module = nn . GELU ,
209
+ act_layer : LayerType = "gelu" ,
210
210
norm_layer : nn .Module = nn .LayerNorm ,
211
211
pretrained_window_size : _int_or_tuple_2_t = 0 ,
212
212
) -> None :
@@ -235,6 +235,7 @@ def __init__(
235
235
self .shift_size : Tuple [int , int ] = ss
236
236
self .window_area = self .window_size [0 ] * self .window_size [1 ]
237
237
self .mlp_ratio = mlp_ratio
238
+ act_layer = get_act_layer (act_layer )
238
239
239
240
self .attn = WindowAttention (
240
241
dim ,
@@ -372,6 +373,7 @@ def __init__(
372
373
proj_drop : float = 0. ,
373
374
attn_drop : float = 0. ,
374
375
drop_path : float = 0. ,
376
+ act_layer : Union [str , Callable ] = 'gelu' ,
375
377
norm_layer : nn .Module = nn .LayerNorm ,
376
378
pretrained_window_size : _int_or_tuple_2_t = 0 ,
377
379
output_nchw : bool = False ,
@@ -390,6 +392,7 @@ def __init__(
390
392
proj_drop: Projection dropout rate
391
393
attn_drop: Attention dropout rate.
392
394
drop_path: Stochastic depth rate.
395
+ act_layer: Activation layer type.
393
396
norm_layer: Normalization layer.
394
397
pretrained_window_size: Local window size in pretraining.
395
398
output_nchw: Output tensors on NCHW format instead of NHWC.
@@ -424,6 +427,7 @@ def __init__(
424
427
proj_drop = proj_drop ,
425
428
attn_drop = attn_drop ,
426
429
drop_path = drop_path [i ] if isinstance (drop_path , list ) else drop_path ,
430
+ act_layer = act_layer ,
427
431
norm_layer = norm_layer ,
428
432
pretrained_window_size = pretrained_window_size ,
429
433
)
@@ -471,6 +475,7 @@ def __init__(
471
475
proj_drop_rate : float = 0. ,
472
476
attn_drop_rate : float = 0. ,
473
477
drop_path_rate : float = 0.1 ,
478
+ act_layer : Union [str , Callable ] = 'gelu' ,
474
479
norm_layer : Callable = nn .LayerNorm ,
475
480
pretrained_window_sizes : Tuple [int , ...] = (0 , 0 , 0 , 0 ),
476
481
** kwargs ,
@@ -492,6 +497,7 @@ def __init__(
492
497
attn_drop_rate: Attention dropout rate.
493
498
drop_path_rate: Stochastic depth rate.
494
499
norm_layer: Normalization layer.
500
+ act_layer: Activation layer type.
495
501
patch_norm: If True, add normalization after patch embedding.
496
502
pretrained_window_sizes: Pretrained window sizes of each layer.
497
503
output_fmt: Output tensor format if not None, otherwise output 'NHWC' by default.
@@ -541,6 +547,7 @@ def __init__(
541
547
proj_drop = proj_drop_rate ,
542
548
attn_drop = attn_drop_rate ,
543
549
drop_path = dpr [i ],
550
+ act_layer = act_layer ,
544
551
norm_layer = norm_layer ,
545
552
pretrained_window_size = pretrained_window_sizes [i ],
546
553
)]
0 commit comments