2024-04-22 00:09:07 +01:00
|
|
|
package my_nn
|
|
|
|
|
|
|
|
// linear is a fully-connected layer
|
|
|
|
|
|
|
|
import (
|
|
|
|
"math"
|
|
|
|
|
|
|
|
"git.andr3h3nriqu3s.com/andr3/gotch/nn"
|
|
|
|
"git.andr3h3nriqu3s.com/andr3/gotch/ts"
|
2024-04-23 00:14:35 +01:00
|
|
|
"github.com/charmbracelet/log"
|
2024-04-22 00:09:07 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
// LinearConfig is a configuration for a linear layer
|
|
|
|
type LinearConfig struct {
|
|
|
|
WsInit nn.Init // iniital weights
|
|
|
|
BsInit nn.Init // optional initial bias
|
|
|
|
Bias bool
|
|
|
|
}
|
|
|
|
|
|
|
|
// DefaultLinearConfig creates default LinearConfig with
|
|
|
|
// weights initiated using KaimingUniform and Bias is set to true
|
|
|
|
func DefaultLinearConfig() *LinearConfig {
|
|
|
|
negSlope := math.Sqrt(5)
|
|
|
|
return &LinearConfig{
|
|
|
|
// NOTE. KaimingUniform cause mem leak due to ts.Uniform()!!!
|
|
|
|
// Avoid using it now.
|
|
|
|
WsInit: nn.NewKaimingUniformInit(nn.WithKaimingNegativeSlope(negSlope)),
|
|
|
|
BsInit: nil,
|
|
|
|
Bias: true,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Linear is a linear fully-connected layer
|
|
|
|
type Linear struct {
|
|
|
|
Ws *ts.Tensor
|
|
|
|
weight_name string
|
|
|
|
Bs *ts.Tensor
|
|
|
|
bias_name string
|
|
|
|
}
|
|
|
|
|
|
|
|
// NewLinear creates a new linear layer
|
|
|
|
// y = x*wT + b
|
|
|
|
// inDim - input dimension (x) [input features - columns]
|
|
|
|
// outDim - output dimension (y) [output features - columns]
|
|
|
|
// NOTE: w will have shape{outDim, inDim}; b will have shape{outDim}
|
|
|
|
func NewLinear(vs *Path, inDim, outDim int64, c *LinearConfig) *Linear {
|
|
|
|
var bias_name string
|
|
|
|
var bs *ts.Tensor
|
|
|
|
var err error
|
|
|
|
if c.Bias {
|
|
|
|
switch {
|
|
|
|
case c.BsInit == nil:
|
|
|
|
shape := []int64{inDim, outDim}
|
|
|
|
fanIn, _, err := nn.CalculateFans(shape)
|
|
|
|
or_panic(err)
|
|
|
|
bound := 0.0
|
|
|
|
if fanIn > 0 {
|
|
|
|
bound = 1 / math.Sqrt(float64(fanIn))
|
|
|
|
}
|
|
|
|
bsInit := nn.NewUniformInit(-bound, bound)
|
|
|
|
bs, bias_name, err = vs.NewVarNamed("bias", []int64{outDim}, bsInit)
|
|
|
|
or_panic(err)
|
|
|
|
|
|
|
|
// Find better way to do this
|
|
|
|
bs, err = bs.T(true)
|
|
|
|
or_panic(err)
|
|
|
|
bs, err = bs.T(true)
|
|
|
|
or_panic(err)
|
|
|
|
|
|
|
|
bs, err = bs.SetRequiresGrad(true, true)
|
|
|
|
or_panic(err)
|
|
|
|
|
|
|
|
err = bs.RetainGrad(false)
|
|
|
|
or_panic(err)
|
|
|
|
|
|
|
|
vs.varstore.UpdateVarTensor(bias_name, bs, true)
|
|
|
|
|
|
|
|
case c.BsInit != nil:
|
|
|
|
bs, bias_name, err = vs.NewVarNamed("bias", []int64{outDim}, c.BsInit)
|
|
|
|
or_panic(err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ws, weight_name, err := vs.NewVarNamed("weight", []int64{outDim, inDim}, c.WsInit)
|
|
|
|
or_panic(err)
|
|
|
|
|
|
|
|
ws, err = ws.T(true)
|
|
|
|
or_panic(err)
|
|
|
|
|
|
|
|
ws, err = ws.SetRequiresGrad(true, true)
|
|
|
|
or_panic(err)
|
|
|
|
|
|
|
|
err = ws.RetainGrad(false)
|
|
|
|
or_panic(err)
|
|
|
|
|
|
|
|
|
|
|
|
vs.varstore.UpdateVarTensor(weight_name, ws, true)
|
|
|
|
|
|
|
|
|
|
|
|
return &Linear{
|
|
|
|
Ws: ws,
|
|
|
|
weight_name: weight_name,
|
|
|
|
Bs: bs,
|
|
|
|
bias_name: bias_name,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-04-23 00:14:35 +01:00
|
|
|
func (l *Linear) Debug() {
|
|
|
|
log.Info("Ws", "ws", l.Ws.MustGrad(false).MustMax(false).Float64Values())
|
|
|
|
log.Info("Bs", "bs", l.Bs.MustGrad(false).MustMax(false).Float64Values())
|
|
|
|
}
|
|
|
|
|
2024-04-22 00:09:07 +01:00
|
|
|
func (l *Linear) ExtractFromVarstore(vs *VarStore) {
|
|
|
|
l.Ws = vs.GetTensorOfVar(l.weight_name)
|
|
|
|
l.Bs = vs.GetTensorOfVar(l.bias_name)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Implement `Module` for `Linear` struct:
|
|
|
|
// =======================================
|
|
|
|
|
|
|
|
// Forward proceeds input node through linear layer.
|
|
|
|
// NOTE:
|
|
|
|
// - It assumes that node has dimensions of 2 (matrix).
|
|
|
|
// To make it work for matrix multiplication, input node should
|
|
|
|
// has same number of **column** as number of **column** in
|
|
|
|
// `LinearLayer` `Ws` property as weights matrix will be
|
|
|
|
// transposed before multiplied to input node. (They are all used `inDim`)
|
|
|
|
// - Input node should have shape of `shape{batch size, input features}`.
|
|
|
|
// (shape{batchSize, inDim}). The input features is `inDim` while the
|
|
|
|
// output feature is `outDim` in `LinearConfig` struct.
|
|
|
|
//
|
|
|
|
// Example:
|
|
|
|
//
|
|
|
|
// inDim := 3
|
|
|
|
// outDim := 2
|
|
|
|
// batchSize := 4
|
|
|
|
// weights: 2x3
|
|
|
|
// [ 1 1 1
|
|
|
|
// 1 1 1 ]
|
|
|
|
//
|
|
|
|
// input node: 3x4
|
|
|
|
// [ 1 1 1
|
|
|
|
// 1 1 1
|
|
|
|
// 1 1 1
|
|
|
|
// 1 1 1 ]
|
|
|
|
func (l *Linear) Forward(xs *ts.Tensor) (retVal *ts.Tensor) {
|
|
|
|
mul, err := xs.Matmul(l.Ws, false)
|
|
|
|
or_panic(err)
|
|
|
|
if l.Bs != nil {
|
|
|
|
mul, err = mul.Add(l.Bs, false)
|
|
|
|
or_panic(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
out, err := mul.Relu(false)
|
|
|
|
or_panic(err)
|
|
|
|
|
|
|
|
return out
|
|
|
|
}
|
|
|
|
|
|
|
|
// ForwardT implements ModuleT interface for Linear layer.
|
|
|
|
//
|
|
|
|
// NOTE: train param will not be used.
|
|
|
|
func (l *Linear) ForwardT(xs *ts.Tensor, train bool) (retVal *ts.Tensor) {
|
|
|
|
mul, err := xs.Matmul(l.Ws, true)
|
|
|
|
or_panic(err)
|
|
|
|
|
|
|
|
|
|
|
|
mul, err = mul.Add(l.Bs, true)
|
|
|
|
or_panic(err)
|
|
|
|
|
|
|
|
out, err := mul.Relu(true)
|
|
|
|
or_panic(err)
|
|
|
|
return out
|
|
|
|
}
|