@@ -172,31 +172,40 @@ function Base.show(io::IO, l::Dense)
172
172
end
173
173
174
174
"""
175
- Diagonal(α, β )
176
- Diagonal(size::Integer... )
175
+ Diagonal(size::Integer...; bias=true, init=ones32 )
176
+ Diagonal(scale::AbstractArray, [bias] )
177
177
178
178
Create an element-wise linear layer, which performs
179
179
180
- y = α .* x .+ β
180
+ y = scale .* x .+ bias
181
181
182
- The learnable arrays are initialised `α = ones(Float32, size)` and
183
- `β = zeros(Float32, size)`.
182
+ with no activation function.
183
+
184
+ The learnable scale & bias are initialised `init(size...)` and `zeros32(size...)`,
185
+ with `init=ones32` by default. You may specify the function `init`,
186
+ turn off trainable bias with `bias=false`, or provide the array(s) explicitly.
184
187
185
188
Used by [`LayerNorm`](@ref).
186
189
"""
187
- struct Diagonal{T}
188
- α:: T
189
- β:: T
190
+ struct Diagonal{A<: AbstractArray , B}
191
+ scale:: A
192
+ bias:: B
193
+ function Diagonal (W:: M , bias = true ) where M<: AbstractArray
194
+ b = create_bias (W, bias, size (W)... )
195
+ new {M, typeof(b)} (W, b)
196
+ end
190
197
end
191
198
192
- Diagonal (sz:: Integer... ) = Diagonal (ones32 (sz... ), zeros32 (sz ... ) )
199
+ Diagonal (sz:: Integer... ; bias = true , init = ones32 ) = Diagonal (init (sz... ), bias )
193
200
194
201
@functor Diagonal
195
202
196
- (a:: Diagonal )(x) = a. α .* x .+ a. β
203
+ (a:: Diagonal )(x) = a. scale .* x .+ a. bias
197
204
198
205
function Base. show (io:: IO , l:: Diagonal )
199
- print (io, " Diagonal(" , join (size (l. α), " , " ), " )" )
206
+ print (io, " Diagonal(" , join (size (l. scale), " , " ))
207
+ l. bias == false && print (io, " ; bias=false" )
208
+ print (io, " )" )
200
209
end
201
210
202
211
"""
0 commit comments