diff --git a/example/linear/main.go b/example/linear/main.go
new file mode 100644
index 0000000..9367b68
--- /dev/null
+++ b/example/linear/main.go
@@ -0,0 +1,17 @@
+package main
+
+import (
+	"github.com/sugarme/gotch"
+	"github.com/sugarme/gotch/nn"
+)
+
+func main() {
+
+	vs := nn.NewVarStore(gotch.CPU)
+
+	path := vs.Root()
+
+	l := nn.NewLinear(path, 4, 3, nn.DefaultLinearConfig())
+
+	l.Bs.Print()
+}
diff --git a/example/mnist/mnist b/example/mnist/mnist
deleted file mode 100755
index cd4b4ee..0000000
Binary files a/example/mnist/mnist and /dev/null differ
diff --git a/example/mnist/nn.go b/example/mnist/nn.go
index bf50e65..07498eb 100644
--- a/example/mnist/nn.go
+++ b/example/mnist/nn.go
@@ -16,19 +16,24 @@ const (
 	LabelNN       int64  = 10
 	MnistDirNN    string = "../../data/mnist"
 
-	epochsNN    = 200
+	epochsNN    = 3
 	batchSizeNN = 256
 
 	LrNN = 1e-3
 )
 
+var l nn.Linear
+
 func netInit(vs nn.Path) ts.Module {
 	n := nn.Seq()
 
-	n.Add(nn.NewLinear(vs.Sub("layer1"), ImageDimNN, HiddenNodesNN, nn.DefaultLinearConfig()))
-	n.AddFn(func(xs ts.Tensor) ts.Tensor {
+	l = nn.NewLinear(vs.Sub("layer1"), ImageDimNN, HiddenNodesNN, nn.DefaultLinearConfig())
+
+	n.Add(l)
+
+	n.AddFn(nn.ForwardWith(func(xs ts.Tensor) ts.Tensor {
 		return xs.MustRelu()
-	})
+	}))
 
 	n.Add(nn.NewLinear(vs, HiddenNodesNN, LabelNN, nn.DefaultLinearConfig()))
 
@@ -46,13 +51,19 @@ func runNN() {
 		log.Fatal(err)
 	}
 
+	bsClone := l.Bs.MustShallowClone()
+
 	for epoch := 0; epoch < epochsNN; epoch++ {
 		loss := net.Forward(ds.TrainImages).CrossEntropyForLogits(ds.TrainLabels)
 		opt.BackwardStep(loss)
 
+		fmt.Printf("Bs vals: %v\n", bsClone.MustToString(int64(1)))
+
+		lossVal := loss.MustShallowClone().MustView([]int64{-1}).MustFloat64Value([]int64{0})
+
 		testAccuracy := net.Forward(ds.TestImages).AccuracyForLogits(ds.TestLabels).MustView([]int64{-1}).MustFloat64Value([]int64{0})
 
-		fmt.Printf("Epoch: %v - Loss: %.3f - Test accuracy: %.2f%%\n", epoch, loss, testAccuracy*100)
+		fmt.Printf("Epoch: %v - Loss: %.3f - Test accuracy: %.2f%%\n", epoch, lossVal, testAccuracy*100)
 	}
 
 }
diff --git a/example/varstore/main.go b/example/varstore/main.go
new file mode 100644
index 0000000..06584d4
--- /dev/null
+++ b/example/varstore/main.go
@@ -0,0 +1,26 @@
+package main
+
+import (
+	"fmt"
+
+	"github.com/sugarme/gotch"
+	"github.com/sugarme/gotch/nn"
+)
+
+func main() {
+
+	vs := nn.NewVarStore(gotch.CPU)
+
+	fmt.Printf("Is VarStore emptry? %v\n ", vs.IsEmpty())
+
+	path := vs.Root()
+
+	init := nn.NewKaimingUniformInit()
+
+	init.InitTensor([]int64{1, 4}, gotch.CPU).Print()
+
+	path.NewVar("layer1", []int64{1, 10}, nn.NewKaimingUniformInit())
+
+	fmt.Printf("Is VarStore emptry? %v\n ", vs.IsEmpty())
+
+}
diff --git a/libtch/c-generated-sample.go b/libtch/c-generated-sample.go
index 578f7cd..5ff16f1 100644
--- a/libtch/c-generated-sample.go
+++ b/libtch/c-generated-sample.go
@@ -257,3 +257,13 @@ func AtgRelu(ptr *Ctensor, self Ctensor) {
 func AtgRelu_(ptr *Ctensor, self Ctensor) {
 	C.atg_relu_(ptr, self)
 }
+
+// void atg_t(tensor *, tensor self);
+func AtgT(ptr *Ctensor, self Ctensor) {
+	C.atg_t(ptr, self)
+}
+
+// void atg_t_(tensor *, tensor self);
+func AtgT_(ptr *Ctensor, self Ctensor) {
+	C.atg_t_(ptr, self)
+}
diff --git a/nn/linear.go b/nn/linear.go
index 27925d0..4c647ff 100644
--- a/nn/linear.go
+++ b/nn/linear.go
@@ -3,6 +3,8 @@ package nn
 // linear is a fully-connected layer
 
 import (
+	"math"
+
 	"github.com/sugarme/gotch"
 	ts "github.com/sugarme/gotch/tensor"
 )
@@ -16,8 +18,8 @@ type LinearConfig struct {
 
 // DefaultLinearConfig creates default LinearConfig with
 // weights initiated using KaimingUniform and Bias is set to true
-func DefaultLinearConfig() *LinearConfig {
-	return &LinearConfig{
+func DefaultLinearConfig() LinearConfig {
+	return LinearConfig{
 		WsInit: NewKaimingUniformInit(),
 		BsInit: nil,
 		Bias:   true,
@@ -35,7 +37,7 @@ type Linear struct {
 // inDim - input dimension (x) [input features - columns]
 // outDim - output dimension (y) [output features - columns]
 // NOTE: w will have shape{outDim, inDim}; b will have shape{outDim}
-func NewLinear(vs Path, inDim, outDim int64, c *LinearConfig) *Linear {
+func NewLinear(vs Path, inDim, outDim int64, c LinearConfig) Linear {
 
 	var bs ts.Tensor
 	// bs has size of output dimension
@@ -43,10 +45,17 @@ func NewLinear(vs Path, inDim, outDim int64, c *LinearConfig) *Linear {
 	case false:
 		bs = ts.MustZeros([]int64{outDim}, gotch.Float.CInt(), vs.Device().CInt())
 	case true:
-		bs = vs.NewVar("bias", []int64{outDim}, c.BsInit)
+		switch {
+		case c.BsInit == nil:
+			bound := 1.0 / math.Sqrt(float64(inDim))
+			bsInit := NewUniformInit(-bound, bound)
+			bs = vs.NewVar("bias", []int64{outDim}, bsInit)
+		case c.BsInit != nil:
+			bs = vs.NewVar("bias", []int64{outDim}, c.BsInit)
+		}
 	}
 
-	return &Linear{
+	return Linear{
 		Ws: vs.NewVar("weight", []int64{outDim, inDim}, c.WsInit),
 		Bs: bs,
 	}
@@ -80,7 +89,7 @@ func NewLinear(vs Path, inDim, outDim int64, c *LinearConfig) *Linear {
 // 	  1 1 1
 // 	  1 1 1
 // 		1 1 1 ]
-func (l *Linear) Forward(xs ts.Tensor) (retVal ts.Tensor) {
+func (l Linear) Forward(xs ts.Tensor) (retVal ts.Tensor) {
 
-	return xs.MustMatMul(l.Ws).MustAdd(l.Bs)
+	return xs.MustMatMul(l.Ws.MustT()).MustAdd(l.Bs)
 }
diff --git a/nn/optimizer.go b/nn/optimizer.go
index d7fb131..9f7c731 100644
--- a/nn/optimizer.go
+++ b/nn/optimizer.go
@@ -44,8 +44,10 @@ func defaultBuild(config OptimizerConfig, vs VarStore, lr float64) (retVal Optim
 	vs.variables.mutex.Lock()
 	defer vs.variables.mutex.Unlock()
 
-	if err = opt.AddParameters(vs.variables.TrainableVariable); err != nil {
-		return retVal, err
+	if len(vs.variables.TrainableVariable) > 0 {
+		if err = opt.AddParameters(vs.variables.TrainableVariable); err != nil {
+			return retVal, err
+		}
 	}
 
 	return Optimizer{
@@ -220,6 +222,7 @@ func (opt *Optimizer) Step() {
 
 // BackwardStep applies a backward step pass, update the gradients, and performs an optimization step.
 func (opt *Optimizer) BackwardStep(loss ts.Tensor) {
+
 	opt.addMissingVariables()
 	err := opt.opt.ZeroGrad()
 	if err != nil {
diff --git a/nn/sequential.go b/nn/sequential.go
index 99ff3a5..b4f50cc 100644
--- a/nn/sequential.go
+++ b/nn/sequential.go
@@ -39,9 +39,9 @@ func (s *Sequential) Add(l ts.Module) {
 //
 // NOTE: fn should have signature `func(t ts.Tensor) ts.Tensor`
 // and it implements Module interface
-func (s *Sequential) AddFn(fn interface{}) {
+func (s *Sequential) AddFn(fn ts.Module) {
 
-	s.Add(fn.(ts.Module))
+	s.Add(fn)
 }
 
 // ForwardAll applies the forward pass and returns the output for each layer.
@@ -144,18 +144,18 @@ func (s *SequentialT) Add(l ts.ModuleT) {
 //
 // NOTE: fn should have signature `func(t ts.Tensor) ts.Tensor`
 // and it implements Module interface
-func (s *SequentialT) AddFn(fn interface{}) {
+func (s *SequentialT) AddFn(fn ts.ModuleT) {
 
-	s.Add(fn.(ts.ModuleT))
+	s.Add(fn)
 }
 
 // AddFn appends a closure after all the current layers.
 //
 // NOTE: fn should have signature `func(t ts.Tensor, train bool) ts.Tensor`
 // and it implements Module interface
-func (s *SequentialT) AddFnT(fn interface{}) {
+func (s *SequentialT) AddFnT(fn ts.ModuleT) {
 
-	s.Add(fn.(ts.ModuleT))
+	s.Add(fn)
 }
 
 // ForwardAll applies the forward pass and returns the output for each layer.
@@ -176,3 +176,21 @@ func (s *SequentialT) ForwardAllT(xs ts.Tensor, train bool, opts ...uint8) (retV
 
 	return retVal
 }
+
+// ForwardWith is a handler function to implement Module interface for
+// any (anonymous) function it wraps.
+//
+// Ref. https://stackoverflow.com/a/42182987
+// NOTE: Specifically, `ForwardWith` is used to wrap anonymous function
+// as input parameter of `AddFn` Sequential method.
+type ForwardWith func(ts.Tensor) ts.Tensor
+
+func (fw ForwardWith) Forward(xs ts.Tensor) ts.Tensor {
+	return fw(xs)
+}
+
+type ForwardTWith func(ts.Tensor, bool) ts.Tensor
+
+func (fw ForwardTWith) ForwardT(xs ts.Tensor, train bool) ts.Tensor {
+	return fw(xs, train)
+}
diff --git a/nn/varstore.go b/nn/varstore.go
index 3870027..0ea149b 100644
--- a/nn/varstore.go
+++ b/nn/varstore.go
@@ -57,6 +57,9 @@ func NewVarStore(device gotch.Device) VarStore {
 	}
 }
 
+// NOTE:
+// To get (initiate) a path, call vs.Root()
+
 // VarStore methods:
 // =================
 
@@ -417,9 +420,10 @@ func (p *Path) OnesNoTrain(name string, dims []int64) (retVal ts.Tensor) {
 // will be tracked.
 // The variable uses a float tensor initialized as per the
 // related argument.
-func (p *Path) NewVar(name string, dims []int64, init Init) (retVal ts.Tensor) {
+func (p *Path) NewVar(name string, dims []int64, ini Init) (retVal ts.Tensor) {
+
+	v := ini.InitTensor(dims, p.varstore.device)
 
-	v := init.InitTensor(dims, p.varstore.device)
 	return p.add(name, v, true)
 }
 
diff --git a/tensor/tensor-generated-sample.go b/tensor/tensor-generated-sample.go
index 64849ee..a381739 100644
--- a/tensor/tensor-generated-sample.go
+++ b/tensor/tensor-generated-sample.go
@@ -774,3 +774,38 @@ func (ts Tensor) MustRelu() (retVal Tensor) {
 
 	return retVal
 }
+
+func (ts Tensor) T() (retVal Tensor, err error) {
+	ptr := (*lib.Ctensor)(unsafe.Pointer(C.malloc(0)))
+	defer C.free(unsafe.Pointer(ptr))
+
+	lib.AtgT(ptr, ts.ctensor)
+	err = TorchErr()
+	if err != nil {
+		return retVal, err
+	}
+
+	retVal = Tensor{ctensor: *ptr}
+
+	return retVal, nil
+}
+
+func (ts Tensor) MustT() (retVal Tensor) {
+	retVal, err := ts.T()
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	return retVal
+}
+
+func (ts Tensor) T_() {
+	ptr := (*lib.Ctensor)(unsafe.Pointer(C.malloc(0)))
+	defer C.free(unsafe.Pointer(ptr))
+
+	lib.AtgT_(ptr, ts.ctensor)
+	err := TorchErr()
+	if err != nil {
+		log.Fatal(err)
+	}
+}