package my_nn // linear is a fully-connected layer import ( "math" "git.andr3h3nriqu3s.com/andr3/gotch/nn" "git.andr3h3nriqu3s.com/andr3/gotch/ts" "github.com/charmbracelet/log" ) // LinearConfig is a configuration for a linear layer type LinearConfig struct { WsInit nn.Init // iniital weights BsInit nn.Init // optional initial bias Bias bool } // DefaultLinearConfig creates default LinearConfig with // weights initiated using KaimingUniform and Bias is set to true func DefaultLinearConfig() *LinearConfig { negSlope := math.Sqrt(5) return &LinearConfig{ // NOTE. KaimingUniform cause mem leak due to ts.Uniform()!!! // Avoid using it now. WsInit: nn.NewKaimingUniformInit(nn.WithKaimingNegativeSlope(negSlope)), BsInit: nil, Bias: true, } } // Linear is a linear fully-connected layer type Linear struct { Ws *ts.Tensor weight_name string Bs *ts.Tensor bias_name string } // NewLinear creates a new linear layer // y = x*wT + b // inDim - input dimension (x) [input features - columns] // outDim - output dimension (y) [output features - columns] // NOTE: w will have shape{outDim, inDim}; b will have shape{outDim} func NewLinear(vs *Path, inDim, outDim int64, c *LinearConfig) *Linear { var bias_name string var bs *ts.Tensor var err error if c.Bias { switch { case c.BsInit == nil: shape := []int64{inDim, outDim} fanIn, _, err := nn.CalculateFans(shape) or_panic(err) bound := 0.0 if fanIn > 0 { bound = 1 / math.Sqrt(float64(fanIn)) } bsInit := nn.NewUniformInit(-bound, bound) bs, bias_name, err = vs.NewVarNamed("bias", []int64{outDim}, bsInit) or_panic(err) // Find better way to do this bs, err = bs.T(true) or_panic(err) bs, err = bs.T(true) or_panic(err) bs, err = bs.SetRequiresGrad(true, true) or_panic(err) err = bs.RetainGrad(false) or_panic(err) vs.varstore.UpdateVarTensor(bias_name, bs, true) case c.BsInit != nil: bs, bias_name, err = vs.NewVarNamed("bias", []int64{outDim}, c.BsInit) or_panic(err) } } ws, weight_name, err := vs.NewVarNamed("weight", []int64{outDim, inDim}, c.WsInit) or_panic(err) ws, err = ws.T(true) or_panic(err) ws, err = ws.SetRequiresGrad(true, true) or_panic(err) err = ws.RetainGrad(false) or_panic(err) vs.varstore.UpdateVarTensor(weight_name, ws, true) return &Linear{ Ws: ws, weight_name: weight_name, Bs: bs, bias_name: bias_name, } } func (l *Linear) Debug() { log.Info("Ws", "ws", l.Ws.MustGrad(false).MustMax(false).Float64Values()) log.Info("Bs", "bs", l.Bs.MustGrad(false).MustMax(false).Float64Values()) } func (l *Linear) ExtractFromVarstore(vs *VarStore) { l.Ws = vs.GetTensorOfVar(l.weight_name) l.Bs = vs.GetTensorOfVar(l.bias_name) } // Implement `Module` for `Linear` struct: // ======================================= // Forward proceeds input node through linear layer. // NOTE: // - It assumes that node has dimensions of 2 (matrix). // To make it work for matrix multiplication, input node should // has same number of **column** as number of **column** in // `LinearLayer` `Ws` property as weights matrix will be // transposed before multiplied to input node. (They are all used `inDim`) // - Input node should have shape of `shape{batch size, input features}`. // (shape{batchSize, inDim}). The input features is `inDim` while the // output feature is `outDim` in `LinearConfig` struct. // // Example: // // inDim := 3 // outDim := 2 // batchSize := 4 // weights: 2x3 // [ 1 1 1 // 1 1 1 ] // // input node: 3x4 // [ 1 1 1 // 1 1 1 // 1 1 1 // 1 1 1 ] func (l *Linear) Forward(xs *ts.Tensor) (retVal *ts.Tensor) { mul, err := xs.Matmul(l.Ws, false) or_panic(err) if l.Bs != nil { mul, err = mul.Add(l.Bs, false) or_panic(err) } out, err := mul.Relu(false) or_panic(err) return out } // ForwardT implements ModuleT interface for Linear layer. // // NOTE: train param will not be used. func (l *Linear) ForwardT(xs *ts.Tensor, train bool) (retVal *ts.Tensor) { mul, err := xs.Matmul(l.Ws, true) or_panic(err) mul, err = mul.Add(l.Bs, true) or_panic(err) out, err := mul.Relu(true) or_panic(err) return out }