converted to pointer receiver at tensor APIs, tensor and nn sub-packages

2020-10-31 19:25:32 +11:00 · 2020-10-31 19:25:32 +11:00 · 75a7d89b92
commit 75a7d89b92
parent 59ea5f0e1b
32 changed files with 30763 additions and 26902 deletions
--- a/README.md
+++ b/README.md
@ -5,6 +5,7 @@

 - **GoTch** is a C++ Libtorch Go binding for developing and implementing deep learning projects in Go.
 - This package is to create a thin wrapper of Libtorch to make use of its tensor APIs and CUDA support while implementing as much idiomatic Go as possible. 
+- There are about **1129** auto-generated tensor APIs.

 ## Dependencies

--- a/gen/gen.ml
+++ b/gen/gen.ml
@ -1,7 +1,6 @@
 (* Automatically generate the C++ -> C -> Go bindings.
   This takes as input the Descriptions.yaml file that gets generated when
 func (Func.c_go_args_list func)  building PyTorch from source.
-
   Run with: dune exec gen/gen.exe
 *)
 open Base
@ -347,15 +346,15 @@ module Func = struct
              | Bool -> "bool"
              | Int64 -> "int64"
              | Double -> "float64"
-              | Tensor -> "Tensor"
-              | TensorOption -> "Tensor"
+              | Tensor -> "*Tensor"
+              | TensorOption -> "*Tensor"
              | IntList -> "[]int64"
              | TensorList -> "[]Tensor"
              | String -> "string"
              (* TODO. Struct{Kind gotch.DType Device gotch.Device} *)
              (* E.g. `type KindDevice struct{}` *)
              | TensorOptions -> "gotch.KindDevice"
-              | Scalar -> "Scalar"
+              | Scalar -> "*Scalar"
              | ScalarType -> "gotch.DType"
              | Device -> "gotch.Device"
            in
@ -396,9 +395,9 @@ module Func = struct
    (* printf "t name: %s\n" t.name ; *)
    let returns =
      match t.returns with
-      | `fixed 1 -> "retVal Tensor"
+      | `fixed 1 -> "retVal *Tensor"
      | `fixed v ->
-          List.init v ~f:(fun i -> Printf.sprintf "retVal%d Tensor" i)
+          List.init v ~f:(fun i -> Printf.sprintf "retVal%d *Tensor" i)
          |> String.concat ~sep:", " |> Printf.sprintf "%s"
      | `dynamic -> "retVal []Tensor"
    in
@ -698,7 +697,7 @@ let write_wrapper funcs filename =
            match func.returns with
            | `dynamic ->
                pm "\n" ;
-                if is_method then pm "func(ts Tensor) %s(" gofunc_name
+                if is_method then pm "func(ts *Tensor) %s(" gofunc_name
                else pm "func %s(" gofunc_name ;
                pm "%s" go_args_list ;
                pm ")(%s) { \n" (Func.go_return_type func ~fallible:true) ;
@ -714,13 +713,13 @@ let write_wrapper funcs filename =
                pm "  }\n" ;
                (* NOTE. if in_place method, no retVal return *)
                if not (Func.is_inplace func) then
-                  pm "  retVal = Tensor{ctensor: *ptr}\n" ;
+                  pm "  retVal = &Tensor{ctensor: *ptr}\n" ;
                pm "  \n" ;
                pm "  return %s\n" (Func.go_return_notype func ~fallible:true) ;
                pm "} \n"
            | `fixed 1 ->
                pm "\n" ;
-                if is_method then pm "func(ts Tensor) %s(" gofunc_name
+                if is_method then pm "func(ts *Tensor) %s(" gofunc_name
                else pm "func %s(" gofunc_name ;
                pm "%s" go_args_list ;
                pm ")(%s) { \n" (Func.go_return_type func ~fallible:true) ;
@ -736,7 +735,7 @@ let write_wrapper funcs filename =
                pm "  }\n" ;
                (* NOTE. if in_place method, no retVal return *)
                if not (Func.is_inplace func) then
-                  pm "  retVal = Tensor{ctensor: *ptr}\n" ;
+                  pm "  retVal = &Tensor{ctensor: *ptr}\n" ;
                pm "  \n" ;
                pm "  return %s\n" (Func.go_return_notype func ~fallible:true) ;
                pm "} \n"
@ -804,7 +803,7 @@ let write_must_wrapper funcs filename =
            match func.returns with
            | `dynamic ->
                pm "\n" ;
-                if is_method then pm "func(ts Tensor) %s(" gofunc_name
+                if is_method then pm "func(ts *Tensor) %s(" gofunc_name
                else pm "func Must%s(" gofunc_name ;
                pm "%s" go_args_list ;
                pm ")(%s) { \n" (Func.go_return_type func ~fallible:false) ;
@ -821,7 +820,7 @@ let write_must_wrapper funcs filename =
                pm "} \n"
            | `fixed 1 ->
                pm "\n" ;
-                if is_method then pm "func(ts Tensor) Must%s(" gofunc_name
+                if is_method then pm "func(ts *Tensor) Must%s(" gofunc_name
                else pm "func Must%s(" gofunc_name ;
                pm "%s" go_args_list ;
                pm ")(%s) { \n" (Func.go_return_type func ~fallible:false) ;
--- a/libtch/c-generated.go
+++ b/libtch/c-generated.go
--- a/nn/batch-norm.go
+++ b/nn/batch-norm.go
@ -17,8 +17,8 @@ type BatchNormConfig struct {
 	BsInit      Init
 }

-func DefaultBatchNormConfig() BatchNormConfig {
-	return BatchNormConfig{
+func DefaultBatchNormConfig() *BatchNormConfig {
+	return &BatchNormConfig{
 		CudnnEnable: true,
 		Eps:         1e-5,
 		Momentum:    0.1,
@ -29,17 +29,17 @@ func DefaultBatchNormConfig() BatchNormConfig {

 // A batch-normalization layer.
 type BatchNorm struct {
-	config      BatchNormConfig
-	RunningMean ts.Tensor
-	RunningVar  ts.Tensor
-	Ws          ts.Tensor
-	Bs          ts.Tensor
+	config      *BatchNormConfig
+	RunningMean *ts.Tensor
+	RunningVar  *ts.Tensor
+	Ws          *ts.Tensor
+	Bs          *ts.Tensor
 	Nd          uint
 }

 // NewBatchNorm creates a new BatchNorm layer
-func NewBatchNorm(vs Path, nd uint, outDim int64, config BatchNormConfig) BatchNorm {
-	return BatchNorm{
+func NewBatchNorm(vs Path, nd uint, outDim int64, config *BatchNormConfig) *BatchNorm {
+	return &BatchNorm{
 		config:      config,
 		RunningMean: vs.ZerosNoTrain("running_mean", []int64{outDim}),
 		RunningVar:  vs.OnesNoTrain("running_var", []int64{outDim}),
@ -52,7 +52,7 @@ func NewBatchNorm(vs Path, nd uint, outDim int64, config BatchNormConfig) BatchN
 //
 // The input shape is assumed to be (N, C, L). Normalization
 // is performed over the first batch dimension N.
-func BatchNorm1D(vs Path, outDim int64, config BatchNormConfig) BatchNorm {
+func BatchNorm1D(vs Path, outDim int64, config *BatchNormConfig) *BatchNorm {
 	return NewBatchNorm(vs, 1, outDim, config)
 }

@ -60,7 +60,7 @@ func BatchNorm1D(vs Path, outDim int64, config BatchNormConfig) BatchNorm {
 //
 // The input shape is assumed to be (N, C, H, W). Normalization
 // is performed over the first batch dimension N.
-func BatchNorm2D(vs Path, outDim int64, config BatchNormConfig) BatchNorm {
+func BatchNorm2D(vs Path, outDim int64, config *BatchNormConfig) *BatchNorm {
 	return NewBatchNorm(vs, 2, outDim, config)
 }

@ -68,14 +68,14 @@ func BatchNorm2D(vs Path, outDim int64, config BatchNormConfig) BatchNorm {
 //
 // The input shape is assumed to be (N, C, D, H, W). Normalization
 // is performed over the first batch dimension N.
-func BatchNorm3D(vs Path, outDim int64, config BatchNormConfig) BatchNorm {
+func BatchNorm3D(vs Path, outDim int64, config *BatchNormConfig) *BatchNorm {
 	return NewBatchNorm(vs, 3, outDim, config)
 }

 // Implement ModuleT interface for BatchNorm:
 // ==========================================

-func (bn BatchNorm) ForwardT(xs ts.Tensor, train bool) (retVal ts.Tensor) {
+func (bn *BatchNorm) ForwardT(xs *ts.Tensor, train bool) (retVal *ts.Tensor) {

 	dim := xs.Dim()

--- a/nn/conv-transpose.go
+++ b/nn/conv-transpose.go
@ -42,8 +42,8 @@ type ConvTranspose3DConfig struct {
 }

 // DefaultConvConfig create a default 1D ConvConfig
-func DefaultConvTranspose1DConfig() ConvTranspose1DConfig {
-	return ConvTranspose1DConfig{
+func DefaultConvTranspose1DConfig() *ConvTranspose1DConfig {
+	return &ConvTranspose1DConfig{
 		Stride:        []int64{1},
 		Padding:       []int64{0},
 		OutputPadding: []int64{0},
@ -56,83 +56,107 @@ func DefaultConvTranspose1DConfig() ConvTranspose1DConfig {
 }

 type ConvTranspose1D struct {
-	Ws     ts.Tensor
-	Bs     ts.Tensor // optional
-	Config ConvTranspose1DConfig
+	Ws     *ts.Tensor
+	Bs     *ts.Tensor // optional
+	Config *ConvTranspose1DConfig
 }

-func NewConvTranspose1D(vs *Path, inDim, outDim int64, ksizes []int64, cfg ConvTranspose1DConfig) ConvTranspose1D {
+func NewConvTranspose1D(vs *Path, inDim, outDim int64, ksizes []int64, cfg *ConvTranspose1DConfig) *ConvTranspose1D {
 	if len(ksizes) != 1 {
 		log.Fatalf("NewConvTranspose1D method call: Kernel size should be 1. Got %v\n", len(ksizes))
 	}

-	var conv ConvTranspose1D
-	conv.Config = cfg
-	if cfg.Bias {
-		conv.Bs = vs.NewVar("bias", []int64{outDim}, cfg.BsInit)
-	}
+	var (
+		ws *ts.Tensor
+		bs *ts.Tensor
+	)
+
 	weightSize := []int64{outDim, int64(inDim / cfg.Groups)}
 	weightSize = append(weightSize, ksizes...)
-	conv.Ws = vs.NewVar("weight", weightSize, cfg.WsInit)
+	ws = vs.NewVar("weight", weightSize, cfg.WsInit)

-	return conv
+	if cfg.Bias {
+		bs = vs.NewVar("bias", []int64{outDim}, cfg.BsInit)
+	}
+
+	return &ConvTranspose1D{
+		Ws:     ws,
+		Bs:     bs,
+		Config: cfg,
+	}
 }

 type ConvTranspose2D struct {
-	Ws     ts.Tensor
-	Bs     ts.Tensor // optional
-	Config ConvTranspose2DConfig
+	Ws     *ts.Tensor
+	Bs     *ts.Tensor // optional
+	Config *ConvTranspose2DConfig
 }

-func NewConvTranspose2D(vs *Path, inDim, outDim int64, ksizes []int64, cfg ConvTranspose2DConfig) ConvTranspose2D {
+func NewConvTranspose2D(vs *Path, inDim, outDim int64, ksizes []int64, cfg *ConvTranspose2DConfig) *ConvTranspose2D {

 	if len(ksizes) != 2 {
 		log.Fatalf("NewConvTranspose2D method call: Kernel size should be 2. Got %v\n", len(ksizes))
 	}
-	var conv ConvTranspose2D
-	conv.Config = cfg
+
+	var (
+		ws *ts.Tensor
+		bs *ts.Tensor
+	)
+
 	if cfg.Bias {
-		conv.Bs = vs.NewVar("bias", []int64{outDim}, cfg.BsInit)
+		bs = vs.NewVar("bias", []int64{outDim}, cfg.BsInit)
 	}
 	weightSize := []int64{outDim, int64(inDim / cfg.Groups)}
 	weightSize = append(weightSize, ksizes...)
-	conv.Ws = vs.NewVar("weight", weightSize, cfg.WsInit)
+	ws = vs.NewVar("weight", weightSize, cfg.WsInit)

-	return conv
+	return &ConvTranspose2D{
+		Ws:     ws,
+		Bs:     bs,
+		Config: cfg,
+	}
 }

 type ConvTranspose3D struct {
-	Ws     ts.Tensor
-	Bs     ts.Tensor // optional
-	Config ConvTranspose3DConfig
+	Ws     *ts.Tensor
+	Bs     *ts.Tensor // optional
+	Config *ConvTranspose3DConfig
 }

-func NewConvTranspose3D(vs *Path, inDim, outDim int64, ksizes []int64, cfg ConvTranspose3DConfig) ConvTranspose3D {
+func NewConvTranspose3D(vs *Path, inDim, outDim int64, ksizes []int64, cfg *ConvTranspose3DConfig) *ConvTranspose3D {
 	if len(ksizes) != 3 {
 		log.Fatalf("NewConvTranspose3D method call: Kernel size should be 3. Got %v\n", len(ksizes))
 	}
-	var conv ConvTranspose3D
-	conv.Config = cfg
+
+	var (
+		ws *ts.Tensor
+		bs *ts.Tensor
+	)
+
 	if cfg.Bias {
-		conv.Bs = vs.NewVar("bias", []int64{outDim}, cfg.BsInit)
+		bs = vs.NewVar("bias", []int64{outDim}, cfg.BsInit)
 	}
 	weightSize := []int64{outDim, int64(inDim / cfg.Groups)}
 	weightSize = append(weightSize, ksizes...)
-	conv.Ws = vs.NewVar("weight", weightSize, cfg.WsInit)
+	ws = vs.NewVar("weight", weightSize, cfg.WsInit)

-	return conv
+	return &ConvTranspose3D{
+		Ws:     ws,
+		Bs:     bs,
+		Config: cfg,
+	}
 }

 // Implement Module for Conv1D, Conv2D, Conv3D:
 // ============================================

-func (c ConvTranspose1D) Forward(xs ts.Tensor) ts.Tensor {
+func (c *ConvTranspose1D) Forward(xs *ts.Tensor) *ts.Tensor {
 	return ts.MustConvTranspose1d(xs, c.Ws, c.Bs, c.Config.Stride, c.Config.Padding, c.Config.OutputPadding, c.Config.Groups, c.Config.Dilation)
 }

-func (c ConvTranspose2D) Forward(xs ts.Tensor) ts.Tensor {
+func (c *ConvTranspose2D) Forward(xs *ts.Tensor) *ts.Tensor {
 	return ts.MustConvTranspose2d(xs, c.Ws, c.Bs, c.Config.Stride, c.Config.Padding, c.Config.OutputPadding, c.Config.Groups, c.Config.Dilation)
 }
-func (c ConvTranspose3D) Forward(xs ts.Tensor) ts.Tensor {
+func (c *ConvTranspose3D) Forward(xs *ts.Tensor) *ts.Tensor {
 	return ts.MustConvTranspose3d(xs, c.Ws, c.Bs, c.Config.Stride, c.Config.Padding, c.Config.OutputPadding, c.Config.Groups, c.Config.Dilation)
 }
--- a/nn/conv.go
+++ b/nn/conv.go
@ -40,8 +40,8 @@ type Conv3DConfig struct {
 }

 // DefaultConvConfig create a default 1D ConvConfig
-func DefaultConv1DConfig() Conv1DConfig {
-	return Conv1DConfig{
+func DefaultConv1DConfig() *Conv1DConfig {
+	return &Conv1DConfig{
 		Stride:   []int64{1},
 		Padding:  []int64{0},
 		Dilation: []int64{1},
@ -53,8 +53,8 @@ func DefaultConv1DConfig() Conv1DConfig {
 }

 // DefaultConvConfig2D creates a default 2D ConvConfig
-func DefaultConv2DConfig() Conv2DConfig {
-	return Conv2DConfig{
+func DefaultConv2DConfig() *Conv2DConfig {
+	return &Conv2DConfig{
 		Stride:   []int64{1, 1},
 		Padding:  []int64{0, 0},
 		Dilation: []int64{1, 1},
@ -66,60 +66,78 @@ func DefaultConv2DConfig() Conv2DConfig {
 }

 type Conv1D struct {
-	Ws     ts.Tensor
-	Bs     ts.Tensor // optional
-	Config Conv1DConfig
+	Ws     *ts.Tensor
+	Bs     *ts.Tensor // optional
+	Config *Conv1DConfig
 }

-func NewConv1D(vs *Path, inDim, outDim, k int64, cfg Conv1DConfig) Conv1D {
-	var conv Conv1D
-	conv.Config = cfg
+func NewConv1D(vs *Path, inDim, outDim, k int64, cfg *Conv1DConfig) *Conv1D {
+	var (
+		ws *ts.Tensor
+		bs *ts.Tensor
+	)
 	if cfg.Bias {
-		conv.Bs = vs.NewVar("bias", []int64{outDim}, cfg.BsInit)
+		bs = vs.NewVar("bias", []int64{outDim}, cfg.BsInit)
 	}
 	weightSize := []int64{outDim, int64(inDim / cfg.Groups)}
 	weightSize = append(weightSize, k)
-	conv.Ws = vs.NewVar("weight", weightSize, cfg.WsInit)
+	ws = vs.NewVar("weight", weightSize, cfg.WsInit)

-	return conv
+	return &Conv1D{
+		Ws:     ws,
+		Bs:     bs,
+		Config: cfg,
+	}
 }

 type Conv2D struct {
-	Ws     ts.Tensor
-	Bs     ts.Tensor // optional
-	Config Conv2DConfig
+	Ws     *ts.Tensor
+	Bs     *ts.Tensor // optional
+	Config *Conv2DConfig
 }

-func NewConv2D(vs Path, inDim, outDim int64, k int64, cfg Conv2DConfig) Conv2D {
-	var conv Conv2D
-	conv.Config = cfg
+func NewConv2D(vs Path, inDim, outDim int64, k int64, cfg *Conv2DConfig) *Conv2D {
+	var (
+		ws *ts.Tensor
+		bs *ts.Tensor
+	)
 	if cfg.Bias {
-		conv.Bs = vs.NewVar("bias", []int64{outDim}, cfg.BsInit)
+		bs = vs.NewVar("bias", []int64{outDim}, cfg.BsInit)
 	}
 	weightSize := []int64{outDim, int64(inDim / cfg.Groups)}
 	weightSize = append(weightSize, k, k)
-	conv.Ws = vs.NewVar("weight", weightSize, cfg.WsInit)
+	ws = vs.NewVar("weight", weightSize, cfg.WsInit)

-	return conv
+	return &Conv2D{
+		Ws:     ws,
+		Bs:     bs,
+		Config: cfg,
+	}
 }

 type Conv3D struct {
-	Ws     ts.Tensor
-	Bs     ts.Tensor // optional
-	Config Conv3DConfig
+	Ws     *ts.Tensor
+	Bs     *ts.Tensor // optional
+	Config *Conv3DConfig
 }

-func NewConv3D(vs *Path, inDim, outDim, k int64, cfg Conv3DConfig) Conv3D {
-	var conv Conv3D
-	conv.Config = cfg
+func NewConv3D(vs *Path, inDim, outDim, k int64, cfg *Conv3DConfig) *Conv3D {
+	var (
+		ws *ts.Tensor
+		bs *ts.Tensor
+	)
 	if cfg.Bias {
-		conv.Bs = vs.NewVar("bias", []int64{outDim}, cfg.BsInit)
+		bs = vs.NewVar("bias", []int64{outDim}, cfg.BsInit)
 	}
 	weightSize := []int64{outDim, int64(inDim / cfg.Groups)}
 	weightSize = append(weightSize, k, k, k)
-	conv.Ws = vs.NewVar("weight", weightSize, cfg.WsInit)
+	ws = vs.NewVar("weight", weightSize, cfg.WsInit)

-	return conv
+	return &Conv3D{
+		Ws:     ws,
+		Bs:     bs,
+		Config: cfg,
+	}
 }

 type Conv interface{}
@ -175,38 +193,51 @@ func buildConvConfig(ksizes []int64) interface{} {
 func NewConv(vs Path, inDim, outDim int64, ksizes []int64, config interface{}) Conv {

 	configT := reflect.TypeOf(config)
+	var (
+		ws *ts.Tensor
+		bs *ts.Tensor
+	)

 	switch {
 	case len(ksizes) == 1 && configT.Name() == "Conv1DConfig":
-		var conv Conv1D
-		conv.Config = config.(Conv1DConfig)
-		if config.(Conv1DConfig).Bias {
-			conv.Bs = vs.NewVar("bias", []int64{outDim}, config.(Conv1DConfig).BsInit)
+		cfg := config.(Conv1DConfig)
+		if cfg.Bias {
+			bs = vs.NewVar("bias", []int64{outDim}, cfg.BsInit)
 		}
-		weightSize := []int64{outDim, int64(inDim / config.(Conv1DConfig).Groups)}
+		weightSize := []int64{outDim, int64(inDim / cfg.Groups)}
 		weightSize = append(weightSize, ksizes...)
-		conv.Ws = vs.NewVar("weight", weightSize, config.(Conv1DConfig).WsInit)
-		return conv
+		ws = vs.NewVar("weight", weightSize, cfg.WsInit)
+		return &Conv1D{
+			Ws:     ws,
+			Bs:     bs,
+			Config: &cfg,
+		}
 	case len(ksizes) == 2 && configT.Name() == "Conv2DConfig":
-		var conv Conv2D
-		conv.Config = config.(Conv2DConfig)
-		if config.(Conv2DConfig).Bias {
-			conv.Bs = vs.NewVar("bias", []int64{outDim}, config.(Conv2DConfig).BsInit)
+		cfg := config.(Conv2DConfig)
+		if cfg.Bias {
+			bs = vs.NewVar("bias", []int64{outDim}, cfg.BsInit)
 		}
-		weightSize := []int64{outDim, int64(inDim / config.(Conv2DConfig).Groups)}
+		weightSize := []int64{outDim, int64(inDim / cfg.Groups)}
 		weightSize = append(weightSize, ksizes...)
-		conv.Ws = vs.NewVar("weight", weightSize, config.(Conv2DConfig).WsInit)
-		return conv
+		ws = vs.NewVar("weight", weightSize, config.(Conv2DConfig).WsInit)
+		return &Conv2D{
+			Ws:     ws,
+			Bs:     bs,
+			Config: &cfg,
+		}
 	case len(ksizes) == 3 && configT.Name() == "Conv3DConfig":
-		var conv Conv3D
-		conv.Config = config.(Conv3DConfig)
-		if config.(Conv3DConfig).Bias {
-			conv.Bs = vs.NewVar("bias", []int64{outDim}, config.(Conv3DConfig).BsInit)
+		cfg := config.(Conv3DConfig)
+		if cfg.Bias {
+			bs = vs.NewVar("bias", []int64{outDim}, cfg.BsInit)
 		}
-		weightSize := []int64{outDim, int64(inDim / config.(Conv3DConfig).Groups)}
+		weightSize := []int64{outDim, int64(inDim / cfg.Groups)}
 		weightSize = append(weightSize, ksizes...)
-		conv.Ws = vs.NewVar("weight", weightSize, config.(Conv3DConfig).WsInit)
-		return conv
+		ws = vs.NewVar("weight", weightSize, cfg.WsInit)
+		return &Conv3D{
+			Ws:     ws,
+			Bs:     bs,
+			Config: &cfg,
+		}
 	default:
 		err := fmt.Errorf("Expected nd length from 1 to 3. Got %v\n", len(ksizes))
 		panic(err)
@ -216,14 +247,14 @@ func NewConv(vs Path, inDim, outDim int64, ksizes []int64, config interface{}) C
 // Implement Module for Conv1D, Conv2D, Conv3D:
 // ============================================

-func (c Conv1D) Forward(xs ts.Tensor) ts.Tensor {
+func (c *Conv1D) Forward(xs *ts.Tensor) *ts.Tensor {
 	return ts.MustConv1d(xs, c.Ws, c.Bs, c.Config.Stride, c.Config.Padding, c.Config.Dilation, c.Config.Groups)
 }

-func (c Conv2D) Forward(xs ts.Tensor) ts.Tensor {
+func (c *Conv2D) Forward(xs *ts.Tensor) *ts.Tensor {
 	return ts.MustConv2d(xs, c.Ws, c.Bs, c.Config.Stride, c.Config.Padding, c.Config.Dilation, c.Config.Groups)
 }
-func (c Conv3D) Forward(xs ts.Tensor) ts.Tensor {
+func (c *Conv3D) Forward(xs *ts.Tensor) *ts.Tensor {
 	return ts.MustConv3d(xs, c.Ws, c.Bs, c.Config.Stride, c.Config.Padding, c.Config.Dilation, c.Config.Groups)
 }

@ -232,13 +263,13 @@ func (c Conv3D) Forward(xs ts.Tensor) ts.Tensor {

 // NOTE: `train` param won't be used, will be?

-func (c Conv1D) ForwardT(xs ts.Tensor, train bool) ts.Tensor {
+func (c *Conv1D) ForwardT(xs *ts.Tensor, train bool) *ts.Tensor {
 	return ts.MustConv1d(xs, c.Ws, c.Bs, c.Config.Stride, c.Config.Padding, c.Config.Dilation, c.Config.Groups)
 }

-func (c Conv2D) ForwardT(xs ts.Tensor, train bool) ts.Tensor {
+func (c *Conv2D) ForwardT(xs *ts.Tensor, train bool) *ts.Tensor {
 	return ts.MustConv2d(xs, c.Ws, c.Bs, c.Config.Stride, c.Config.Padding, c.Config.Dilation, c.Config.Groups)
 }
-func (c Conv3D) ForwardT(xs ts.Tensor, train bool) ts.Tensor {
+func (c *Conv3D) ForwardT(xs *ts.Tensor, train bool) *ts.Tensor {
 	return ts.MustConv3d(xs, c.Ws, c.Bs, c.Config.Stride, c.Config.Padding, c.Config.Dilation, c.Config.Groups)
 }
--- a/nn/func.go
+++ b/nn/func.go
@ -7,36 +7,36 @@ import (
 )

 type Func struct {
-	f func(ts.Tensor) ts.Tensor
+	f func(*ts.Tensor) *ts.Tensor
 }

-func NewFunc(fn func(ts.Tensor) ts.Tensor) (retVal Func) {
+func NewFunc(fn func(*ts.Tensor) *ts.Tensor) (retVal Func) {
 	return Func{f: fn}
 }

 // Implement Module interface for Func:
 // ====================================
-func (fn Func) Forward(xs ts.Tensor) (retVal ts.Tensor) {
+func (fn Func) Forward(xs *ts.Tensor) (retVal *ts.Tensor) {
 	return fn.f(xs)
 }

 // ForwardT implements ModuleT for Func object as well.
 //
 // NOTE: train param will not be used.
-func (fn Func) ForwardT(xs ts.Tensor, train bool) (retVal ts.Tensor) {
+func (fn Func) ForwardT(xs *ts.Tensor, train bool) (retVal *ts.Tensor) {
 	return fn.f(xs)
 }

 type FuncT struct {
-	f func(ts.Tensor, bool) ts.Tensor
+	f func(*ts.Tensor, bool) *ts.Tensor
 }

-func NewFuncT(fn func(ts.Tensor, bool) ts.Tensor) (retVal FuncT) {
+func NewFuncT(fn func(*ts.Tensor, bool) *ts.Tensor) (retVal FuncT) {
 	return FuncT{f: fn}
 }

 // Implement Module interface for Func:
 // ====================================
-func (fn FuncT) ForwardT(xs ts.Tensor, train bool) (retVal ts.Tensor) {
+func (fn FuncT) ForwardT(xs *ts.Tensor, train bool) (retVal *ts.Tensor) {
 	return fn.f(xs, train)
 }
--- a/nn/init.go
+++ b/nn/init.go
@ -11,10 +11,10 @@ import (

 type Init interface {
 	// creates a new tensor with specified initiation
-	InitTensor(dims []int64, device gotch.Device) (retVal ts.Tensor)
+	InitTensor(dims []int64, device gotch.Device) (retVal *ts.Tensor)

 	// re-initializes (in-place) an existing tensor with the specified initiation
-	Set(tensor ts.Tensor)
+	Set(tensor *ts.Tensor)
 }

 // constInit:
@ -28,7 +28,7 @@ func NewConstInit(v float64) constInit {
 	return constInit{v}
 }

-func (c constInit) InitTensor(dims []int64, device gotch.Device) (retVal ts.Tensor) {
+func (c constInit) InitTensor(dims []int64, device gotch.Device) (retVal *ts.Tensor) {
 	var err error
 	kind := gotch.Float
 	switch {
@ -50,7 +50,7 @@ func (c constInit) InitTensor(dims []int64, device gotch.Device) (retVal ts.Tens
 	return retVal
 }

-func (c constInit) Set(tensor ts.Tensor) {
+func (c constInit) Set(tensor *ts.Tensor) {
 	var err error
 	scalarVal := ts.FloatScalar(c.value)
 	if err != nil {
@ -71,7 +71,7 @@ func NewRandnInit(mean, stdev float64) randnInit {
 	return randnInit{mean, stdev}
 }

-func (r randnInit) InitTensor(dims []int64, device gotch.Device) (retVal ts.Tensor) {
+func (r randnInit) InitTensor(dims []int64, device gotch.Device) (retVal *ts.Tensor) {
 	var err error
 	rand.Seed(86)

@ -92,9 +92,9 @@ func (r randnInit) InitTensor(dims []int64, device gotch.Device) (retVal ts.Tens

 }

-func (r randnInit) Set(tensor ts.Tensor) {
+func (r randnInit) Set(tensor *ts.Tensor) {
 	var (
-		randnTs ts.Tensor
+		randnTs *ts.Tensor
 		err     error
 	)

@ -128,7 +128,7 @@ func NewUniformInit(lo, up float64) uniformInit {
 	return uniformInit{lo, up}
 }

-func (u uniformInit) InitTensor(dims []int64, device gotch.Device) (retVal ts.Tensor) {
+func (u uniformInit) InitTensor(dims []int64, device gotch.Device) (retVal *ts.Tensor) {
 	var err error
 	kind := gotch.Float
 	retVal = ts.MustZeros(dims, kind, device)
@ -139,7 +139,7 @@ func (u uniformInit) InitTensor(dims []int64, device gotch.Device) (retVal ts.Te
 	return retVal
 }

-func (u uniformInit) Set(tensor ts.Tensor) {
+func (u uniformInit) Set(tensor *ts.Tensor) {
 	tensor.Uniform_(u.lo, u.up)
 }

@ -152,7 +152,7 @@ func NewKaimingUniformInit() kaimingUniformInit {
 	return kaimingUniformInit{}
 }

-func (k kaimingUniformInit) InitTensor(dims []int64, device gotch.Device) (retVal ts.Tensor) {
+func (k kaimingUniformInit) InitTensor(dims []int64, device gotch.Device) (retVal *ts.Tensor) {
 	var fanIn int64
 	if len(dims) == 0 {
 		log.Fatalf("KaimingUniformInit method call: dims (%v) should have length >= 1", dims)
@ -191,7 +191,7 @@ func factorial(n int64) (result int64) {
 	return 1
 }

-func (k kaimingUniformInit) Set(tensor ts.Tensor) {
+func (k kaimingUniformInit) Set(tensor *ts.Tensor) {
 	dims, err := tensor.Size()
 	if err != nil {
 		log.Fatalf("uniformInit - Set method call error: %v\n", err)
@ -218,12 +218,12 @@ func NewGlorotNInit() glorotNInit {
 	return glorotNInit{}
 }

-func (gl glorotNInit) InitTensor(dims []int64, device gotch.Device) (retVal ts.Tensor) {
+func (gl glorotNInit) InitTensor(dims []int64, device gotch.Device) (retVal *ts.Tensor) {
 	// TODO: implement

 	return
 }

-func (gl glorotNInit) Set(tensor ts.Tensor) {
+func (gl glorotNInit) Set(tensor *ts.Tensor) {
 	// TODO: implement
 }
--- a/nn/layer-norm.go
+++ b/nn/layer-norm.go
@ -14,8 +14,8 @@ type LayerNormConfig struct {
 	BsInit            Init
 }

-func DefaultLayerNormConfig() LayerNormConfig {
-	return LayerNormConfig{
+func DefaultLayerNormConfig() *LayerNormConfig {
+	return &LayerNormConfig{
 		CudnnEnable:       true,
 		Eps:               1e-5,
 		ElementwiseAffine: true,
@ -26,30 +26,30 @@ func DefaultLayerNormConfig() LayerNormConfig {

 // A layer-normalization layer.
 type LayerNorm struct {
-	Config          LayerNormConfig
-	Ws              ts.Tensor // optional
-	Bs              ts.Tensor // optional
+	Config          *LayerNormConfig
+	Ws              *ts.Tensor // optional
+	Bs              *ts.Tensor // optional
 	NormalizedShape []int64
 }

-func NewLayerNorm(vs Path, normalizedShape []int64, config LayerNormConfig) LayerNorm {
+func NewLayerNorm(vs Path, normalizedShape []int64, config *LayerNormConfig) *LayerNorm {

 	var (
-		ws ts.Tensor
-		bs ts.Tensor
+		ws *ts.Tensor
+		bs *ts.Tensor
 	)
 	if config.ElementwiseAffine {
 		ws = vs.NewVar("weight", normalizedShape, config.WsInit)
 		bs = vs.NewVar("bias", normalizedShape, config.BsInit)
 	}

-	return LayerNorm{config, ws, bs, normalizedShape}
+	return &LayerNorm{config, ws, bs, normalizedShape}
 }

 // Implement Module interface for LayerNorm:
 // =========================================

-func (ln LayerNorm) Forward(xs ts.Tensor) (retVal ts.Tensor) {
+func (ln *LayerNorm) Forward(xs *ts.Tensor) (retVal *ts.Tensor) {

 	return ts.MustLayerNorm(xs, ln.NormalizedShape, ln.Ws, ln.Bs, ln.Config.Eps, ln.Config.CudnnEnable)
 }
--- a/nn/linear.go
+++ b/nn/linear.go
@ -18,8 +18,8 @@ type LinearConfig struct {

 // DefaultLinearConfig creates default LinearConfig with
 // weights initiated using KaimingUniform and Bias is set to true
-func DefaultLinearConfig() LinearConfig {
-	return LinearConfig{
+func DefaultLinearConfig() *LinearConfig {
+	return &LinearConfig{
 		WsInit: NewKaimingUniformInit(),
 		BsInit: nil,
 		Bias:   true,
@ -28,8 +28,8 @@ func DefaultLinearConfig() LinearConfig {

 // Linear is a linear fully-connected layer
 type Linear struct {
-	Ws ts.Tensor
-	Bs ts.Tensor
+	Ws *ts.Tensor
+	Bs *ts.Tensor
 }

 // NewLinear creates a new linear layer
@ -37,9 +37,9 @@ type Linear struct {
 // inDim - input dimension (x) [input features - columns]
 // outDim - output dimension (y) [output features - columns]
 // NOTE: w will have shape{outDim, inDim}; b will have shape{outDim}
-func NewLinear(vs Path, inDim, outDim int64, c LinearConfig) Linear {
+func NewLinear(vs Path, inDim, outDim int64, c *LinearConfig) *Linear {

-	var bs ts.Tensor
+	var bs *ts.Tensor
 	// bs has size of output dimension
 	switch c.Bias {
 	case false:
@ -55,7 +55,7 @@ func NewLinear(vs Path, inDim, outDim int64, c LinearConfig) Linear {
 		}
 	}

-	return Linear{
+	return &Linear{
 		Ws: vs.NewVar("weight", []int64{outDim, inDim}, c.WsInit).MustT(false),
 		Bs: bs,
 	}
@ -89,7 +89,7 @@ func NewLinear(vs Path, inDim, outDim int64, c LinearConfig) Linear {
 // 	  1 1 1
 // 	  1 1 1
 // 		1 1 1 ]
-func (l Linear) Forward(xs ts.Tensor) (retVal ts.Tensor) {
+func (l *Linear) Forward(xs *ts.Tensor) (retVal *ts.Tensor) {

 	mul := xs.MustMatmul(l.Ws, false)
 	return mul.MustAdd(l.Bs, true)
@ -98,7 +98,7 @@ func (l Linear) Forward(xs ts.Tensor) (retVal ts.Tensor) {
 // ForwardT implements ModuleT interface for Linear layer.
 //
 // NOTE: train param will not be used.
-func (l Linear) ForwardT(xs ts.Tensor, train bool) (retVal ts.Tensor) {
+func (l *Linear) ForwardT(xs *ts.Tensor, train bool) (retVal *ts.Tensor) {

 	mul := xs.MustMatmul(l.Ws, false)
 	return mul.MustAdd(l.Bs, true)
--- a/nn/optimizer.go
+++ b/nn/optimizer.go
@ -10,7 +10,7 @@ import (

 // Optimizer is a struct object to run gradient descent.
 type Optimizer struct {
-	opt ts.COptimizer
+	opt *ts.COptimizer
 	// variables            Variables // having embedded sync.Mutex
 	variablesInOptimizer uint8
 	config               interface{}
@ -18,7 +18,7 @@ type Optimizer struct {

 // OptimizerConfig defines Optimizer configurations. These configs can be used to build optimizer.
 type OptimizerConfig interface {
-	buildCOpt(lr float64) (retVal ts.COptimizer, err error)
+	buildCOpt(lr float64) (*ts.COptimizer, error)

 	// Build builds an optimizer with the specified learning rate handling variables stored in `vs`.
 	//
@ -29,11 +29,11 @@ type OptimizerConfig interface {
 	// (config AdamOptimizerConfig) Build(vs VarStore, lr float64) (retVal Optimizer, err error){
 	//		return defaultBuild(config, vs, lr)
 	// }
-	Build(vs VarStore, lr float64) (retVal Optimizer, err error)
+	Build(vs *VarStore, lr float64) (*Optimizer, error)
 }

 // defaultBuild is `default` Build method for OptimizerConfig interface
-func defaultBuild(config OptimizerConfig, vs VarStore, lr float64) (retVal Optimizer, err error) {
+func defaultBuild(config OptimizerConfig, vs *VarStore, lr float64) (retVal *Optimizer, err error) {

 	opt, err := config.buildCOpt(lr)
 	if err != nil {
@ -43,7 +43,7 @@ func defaultBuild(config OptimizerConfig, vs VarStore, lr float64) (retVal Optim
 	var parameters []ts.Tensor
 	for _, v := range vs.Vars.TrainableVariables {
 		param := v.MustShallowClone()
-		parameters = append(parameters, param)
+		parameters = append(parameters, *param)
 	}

 	if len(vs.Vars.TrainableVariables) > 0 {
@ -54,7 +54,7 @@ func defaultBuild(config OptimizerConfig, vs VarStore, lr float64) (retVal Optim

 	// TODO: should we clone or copy?

-	return Optimizer{
+	return &Optimizer{
 		opt: opt,
 		// variables:            vs.Vars,
 		variablesInOptimizer: uint8(len(vs.Vars.TrainableVariables)),
@ -74,8 +74,8 @@ type SGDConfig struct {
 }

 // DefaultSGDConfig creates SGDConfig with default values.
-func DefaultSGDConfig() SGDConfig {
-	return SGDConfig{
+func DefaultSGDConfig() *SGDConfig {
+	return &SGDConfig{
 		Momentum:  0.0,
 		Dampening: 0.0,
 		Wd:        0.0,
@ -84,8 +84,8 @@ func DefaultSGDConfig() SGDConfig {
 }

 // NewSGD creates the configuration for a SGD optimizer with specified values
-func NewSGDConfig(momentum, dampening, wd float64, nesterov bool) (retVal SGDConfig) {
-	return SGDConfig{
+func NewSGDConfig(momentum, dampening, wd float64, nesterov bool) *SGDConfig {
+	return &SGDConfig{
 		Momentum:  momentum,
 		Dampening: dampening,
 		Wd:        wd,
@ -94,11 +94,11 @@ func NewSGDConfig(momentum, dampening, wd float64, nesterov bool) (retVal SGDCon
 }

 // Implement OptimizerConfig interface for SGDConfig
-func (c SGDConfig) buildCOpt(lr float64) (retVal ts.COptimizer, err error) {
+func (c *SGDConfig) buildCOpt(lr float64) (*ts.COptimizer, error) {
 	return ts.Sgd(lr, c.Momentum, c.Dampening, c.Wd, c.Nesterov)
 }

-func (c SGDConfig) Build(vs VarStore, lr float64) (retVal Optimizer, err error) {
+func (c *SGDConfig) Build(vs *VarStore, lr float64) (*Optimizer, error) {
 	return defaultBuild(c, vs, lr)
 }

@ -112,8 +112,8 @@ type AdamConfig struct {
 }

 // DefaultAdamConfig creates AdamConfig with default values
-func DefaultAdamConfig() AdamConfig {
-	return AdamConfig{
+func DefaultAdamConfig() *AdamConfig {
+	return &AdamConfig{
 		Beta1: 0.9,
 		Beta2: 0.999,
 		Wd:    0.0,
@ -121,8 +121,8 @@ func DefaultAdamConfig() AdamConfig {
 }

 // NewAdamConfig creates AdamConfig with specified values
-func NewAdamConfig(beta1, beta2, wd float64) AdamConfig {
-	return AdamConfig{
+func NewAdamConfig(beta1, beta2, wd float64) *AdamConfig {
+	return &AdamConfig{
 		Beta1: beta1,
 		Beta2: beta2,
 		Wd:    wd,
@ -130,11 +130,11 @@ func NewAdamConfig(beta1, beta2, wd float64) AdamConfig {
 }

 // Implement OptimizerConfig interface for AdamConfig
-func (c AdamConfig) buildCOpt(lr float64) (retVal ts.COptimizer, err error) {
+func (c *AdamConfig) buildCOpt(lr float64) (*ts.COptimizer, error) {
 	return ts.Adam(lr, c.Beta1, c.Beta2, c.Wd)
 }

-func (c AdamConfig) Build(vs VarStore, lr float64) (retVal Optimizer, err error) {
+func (c *AdamConfig) Build(vs *VarStore, lr float64) (*Optimizer, error) {
 	return defaultBuild(c, vs, lr)
 }

@ -150,8 +150,8 @@ type RMSPropConfig struct {
 }

 // DefaultAdamConfig creates AdamConfig with default values
-func DefaultRMSPropConfig() RMSPropConfig {
-	return RMSPropConfig{
+func DefaultRMSPropConfig() *RMSPropConfig {
+	return &RMSPropConfig{
 		Alpha:    0.99,
 		Eps:      1e-8,
 		Wd:       0.0,
@ -161,8 +161,8 @@ func DefaultRMSPropConfig() RMSPropConfig {
 }

 // NewRMSPropConfig creates RMSPropConfig with specified values
-func NewRMSPropConfig(alpha, eps, wd, momentum float64, centered bool) RMSPropConfig {
-	return RMSPropConfig{
+func NewRMSPropConfig(alpha, eps, wd, momentum float64, centered bool) *RMSPropConfig {
+	return &RMSPropConfig{
 		Alpha:    alpha,
 		Eps:      eps,
 		Wd:       wd,
@ -172,11 +172,11 @@ func NewRMSPropConfig(alpha, eps, wd, momentum float64, centered bool) RMSPropCo
 }

 // Implement OptimizerConfig interface for RMSPropConfig
-func (c RMSPropConfig) buildCOpt(lr float64) (retVal ts.COptimizer, err error) {
+func (c *RMSPropConfig) buildCOpt(lr float64) (*ts.COptimizer, error) {
 	return ts.RmsProp(lr, c.Alpha, c.Eps, c.Wd, c.Momentum, c.Centered)
 }

-func (c RMSPropConfig) Build(vs VarStore, lr float64) (retVal Optimizer, err error) {
+func (c *RMSPropConfig) Build(vs *VarStore, lr float64) (*Optimizer, error) {
 	return defaultBuild(c, vs, lr)
 }

@ -229,7 +229,7 @@ func (opt *Optimizer) Step() {
 }

 // BackwardStep applies a backward step pass, update the gradients, and performs an optimization step.
-func (opt *Optimizer) BackwardStep(loss ts.Tensor) {
+func (opt *Optimizer) BackwardStep(loss *ts.Tensor) {

 	opt.addMissingVariables()

@ -250,7 +250,7 @@ func (opt *Optimizer) BackwardStep(loss ts.Tensor) {
 // BackwardStepClip applies a backward step pass, update the gradients, and performs an optimization step.
 //
 // The gradients are clipped based on `max` before being applied.
-func (opt *Optimizer) BackwardStepClip(loss ts.Tensor, max float64) {
+func (opt *Optimizer) BackwardStepClip(loss *ts.Tensor, max float64) {
 	opt.addMissingVariables()

 	err := opt.opt.ZeroGrad()
--- a/nn/rnn.go
+++ b/nn/rnn.go
@ -15,33 +15,33 @@ type RNN interface {
 	// Applies a single step of the recurrent network.
 	//
 	// The input should have dimensions [batch_size, features].
-	Step(input ts.Tensor, inState State) State
+	Step(input *ts.Tensor, inState State) State

 	// Applies multiple steps of the recurrent network.
 	//
 	// The input should have dimensions [batch_size, seq_len, features].
 	// The initial state is the result of applying zero_state.
-	Seq(input ts.Tensor) (ts.Tensor, State)
+	Seq(input *ts.Tensor) (*ts.Tensor, State)

 	// Applies multiple steps of the recurrent network.
 	//
 	// The input should have dimensions [batch_size, seq_len, features].
-	SeqInit(input ts.Tensor, inState State) (ts.Tensor, State)
+	SeqInit(input *ts.Tensor, inState State) (*ts.Tensor, State)
 }

 // The state for a LSTM network, this contains two tensors.
 type LSTMState struct {
-	Tensor1 ts.Tensor
-	Tensor2 ts.Tensor
+	Tensor1 *ts.Tensor
+	Tensor2 *ts.Tensor
 }

 // The hidden state vector, which is also the output of the LSTM.
-func (ls LSTMState) H() (retVal ts.Tensor) {
+func (ls *LSTMState) H() *ts.Tensor {
 	return ls.Tensor1.MustShallowClone()
 }

 // The cell state vector.
-func (ls LSTMState) C() (retVal ts.Tensor) {
+func (ls *LSTMState) C() *ts.Tensor {
 	return ls.Tensor2.MustShallowClone()
 }

@ -57,8 +57,8 @@ type RNNConfig struct {
 }

 // Default creates default RNN configuration
-func DefaultRNNConfig() RNNConfig {
-	return RNNConfig{
+func DefaultRNNConfig() *RNNConfig {
+	return &RNNConfig{
 		HasBiases:     true,
 		NumLayers:     1,
 		Dropout:       float64(0.0),
@ -74,12 +74,12 @@ func DefaultRNNConfig() RNNConfig {
 type LSTM struct {
 	flatWeights []ts.Tensor
 	hiddenDim   int64
-	config      RNNConfig
+	config      *RNNConfig
 	device      gotch.Device
 }

 // NewLSTM creates a LSTM layer.
-func NewLSTM(vs Path, inDim, hiddenDim int64, cfg RNNConfig) (retVal LSTM) {
+func NewLSTM(vs *Path, inDim, hiddenDim int64, cfg *RNNConfig) *LSTM {

 	var numDirections int64 = 1
 	if cfg.Bidirectional {
@ -100,7 +100,7 @@ func NewLSTM(vs Path, inDim, hiddenDim int64, cfg RNNConfig) (retVal LSTM) {
 			bIh := vs.Zeros("b_ih", []int64{gateDim})
 			bHh := vs.Zeros("b_hh", []int64{gateDim})

-			flatWeights = append(flatWeights, wIh, wHh, bIh, bHh)
+			flatWeights = append(flatWeights, *wIh, *wHh, *bIh, *bHh)
 		}
 	}

@ -112,7 +112,7 @@ func NewLSTM(vs Path, inDim, hiddenDim int64, cfg RNNConfig) (retVal LSTM) {
 		ts.Must_CudnnRnnFlattenWeight(flatWeights, 4, inDim, 2, hiddenDim, cfg.NumLayers, cfg.BatchFirst, cfg.Bidirectional)
 	}

-	return LSTM{
+	return &LSTM{
 		flatWeights: flatWeights,
 		hiddenDim:   hiddenDim,
 		config:      cfg,
@ -124,7 +124,7 @@ func NewLSTM(vs Path, inDim, hiddenDim int64, cfg RNNConfig) (retVal LSTM) {
 // Implement RNN interface for LSTM:
 // =================================

-func (l LSTM) ZeroState(batchDim int64) (retVal State) {
+func (l *LSTM) ZeroState(batchDim int64) (retVal State) {
 	var numDirections int64 = 1
 	if l.config.Bidirectional {
 		numDirections = 2
@ -144,7 +144,7 @@ func (l LSTM) ZeroState(batchDim int64) (retVal State) {
 	return retVal
 }

-func (l LSTM) Step(input ts.Tensor, inState State) (retVal State) {
+func (l *LSTM) Step(input *ts.Tensor, inState State) (retVal State) {
 	ip := input.MustUnsqueeze(1, false)

 	output, state := l.SeqInit(ip, inState)
@ -156,7 +156,7 @@ func (l LSTM) Step(input ts.Tensor, inState State) (retVal State) {
 	return state
 }

-func (l LSTM) Seq(input ts.Tensor) (output ts.Tensor, state State) {
+func (l *LSTM) Seq(input *ts.Tensor) (output *ts.Tensor, state State) {
 	batchDim := input.MustSize()[0]
 	inState := l.ZeroState(batchDim)

@ -169,9 +169,9 @@ func (l LSTM) Seq(input ts.Tensor) (output ts.Tensor, state State) {
 	return output, state
 }

-func (l LSTM) SeqInit(input ts.Tensor, inState State) (ts.Tensor, State) {
+func (l *LSTM) SeqInit(input *ts.Tensor, inState State) (*ts.Tensor, State) {

-	output, h, c := input.MustLstm([]ts.Tensor{inState.(LSTMState).Tensor1, inState.(LSTMState).Tensor2}, l.flatWeights, l.config.HasBiases, l.config.NumLayers, l.config.Dropout, l.config.Train, l.config.Bidirectional, l.config.BatchFirst)
+	output, h, c := input.MustLstm([]ts.Tensor{*inState.(LSTMState).Tensor1, *inState.(LSTMState).Tensor2}, l.flatWeights, l.config.HasBiases, l.config.NumLayers, l.config.Dropout, l.config.Train, l.config.Bidirectional, l.config.BatchFirst)

 	return output, LSTMState{
 		Tensor1: h,
@ -181,10 +181,10 @@ func (l LSTM) SeqInit(input ts.Tensor, inState State) (ts.Tensor, State) {

 // GRUState is a GRU state. It contains a single tensor.
 type GRUState struct {
-	Tensor ts.Tensor
+	Tensor *ts.Tensor
 }

-func (gs GRUState) Value() ts.Tensor {
+func (gs *GRUState) Value() *ts.Tensor {
 	return gs.Tensor
 }

@ -194,12 +194,12 @@ func (gs GRUState) Value() ts.Tensor {
 type GRU struct {
 	flatWeights []ts.Tensor
 	hiddenDim   int64
-	config      RNNConfig
+	config      *RNNConfig
 	device      gotch.Device
 }

 // NewGRU create a new GRU layer
-func NewGRU(vs Path, inDim, hiddenDim int64, cfg RNNConfig) (retVal GRU) {
+func NewGRU(vs *Path, inDim, hiddenDim int64, cfg *RNNConfig) (retVal *GRU) {
 	var numDirections int64 = 1
 	if cfg.Bidirectional {
 		numDirections = 2
@ -222,7 +222,7 @@ func NewGRU(vs Path, inDim, hiddenDim int64, cfg RNNConfig) (retVal GRU) {
 			bIh := vs.Zeros("b_ih", []int64{gateDim})
 			bHh := vs.Zeros("b_hh", []int64{gateDim})

-			flatWeights = append(flatWeights, wIh, wHh, bIh, bHh)
+			flatWeights = append(flatWeights, *wIh, *wHh, *bIh, *bHh)
 		}
 	}

@ -232,7 +232,7 @@ func NewGRU(vs Path, inDim, hiddenDim int64, cfg RNNConfig) (retVal GRU) {
 		ts.Must_CudnnRnnFlattenWeight(flatWeights, 4, inDim, 3, hiddenDim, cfg.NumLayers, cfg.BatchFirst, cfg.Bidirectional)
 	}

-	return GRU{
+	return &GRU{
 		flatWeights: flatWeights,
 		hiddenDim:   hiddenDim,
 		config:      cfg,
@ -243,7 +243,7 @@ func NewGRU(vs Path, inDim, hiddenDim int64, cfg RNNConfig) (retVal GRU) {
 // Implement RNN interface for GRU:
 // ================================

-func (g GRU) ZeroState(batchDim int64) (retVal State) {
+func (g *GRU) ZeroState(batchDim int64) (retVal State) {
 	var numDirections int64 = 1
 	if g.config.Bidirectional {
 		numDirections = 2
@ -257,7 +257,7 @@ func (g GRU) ZeroState(batchDim int64) (retVal State) {
 	return GRUState{Tensor: tensor}
 }

-func (g GRU) Step(input ts.Tensor, inState State) (retVal State) {
+func (g *GRU) Step(input *ts.Tensor, inState State) (retVal State) {
 	unsqueezedInput := input.MustUnsqueeze(1, false)
 	output, state := g.SeqInit(unsqueezedInput, inState)

@ -269,7 +269,7 @@ func (g GRU) Step(input ts.Tensor, inState State) (retVal State) {
 	return state
 }

-func (g GRU) Seq(input ts.Tensor) (output ts.Tensor, state State) {
+func (g *GRU) Seq(input *ts.Tensor) (output *ts.Tensor, state State) {
 	batchDim := input.MustSize()[0]
 	inState := g.ZeroState(batchDim)

@ -281,7 +281,7 @@ func (g GRU) Seq(input ts.Tensor) (output ts.Tensor, state State) {
 	return output, state
 }

-func (g GRU) SeqInit(input ts.Tensor, inState State) (ts.Tensor, State) {
+func (g *GRU) SeqInit(input *ts.Tensor, inState State) (*ts.Tensor, State) {

 	output, h := input.MustGru(inState.(GRUState).Tensor, g.flatWeights, g.config.HasBiases, g.config.NumLayers, g.config.Dropout, g.config.Train, g.config.Bidirectional, g.config.BatchFirst)

--- a/nn/rnn_test.go
+++ b/nn/rnn_test.go
@ -10,7 +10,7 @@ import (
 	ts "github.com/sugarme/gotch/tensor"
 )

-func gruTest(rnnConfig nn.RNNConfig, t *testing.T) {
+func gruTest(rnnConfig *nn.RNNConfig, t *testing.T) {

 	var (
 		batchDim  int64 = 5
@ -47,7 +47,7 @@ func gruTest(rnnConfig nn.RNNConfig, t *testing.T) {
 	input = ts.MustRandn([]int64{batchDim, seqLen, inputDim}, gotch.Float, gotch.CPU)
 	output, _ = gru.Seq(input)
 	wantSeq := []int64{batchDim, seqLen, outputDim * numDirections}
-	gotSeq := output.(ts.Tensor).MustSize()
+	gotSeq := output.(*ts.Tensor).MustSize()

 	if !reflect.DeepEqual(wantSeq, gotSeq) {
 		fmt.Println("Seq test:")
@ -75,7 +75,7 @@ func TestGRU(t *testing.T) {
 	gruTest(cfg, t)
 }

-func lstmTest(rnnConfig nn.RNNConfig, t *testing.T) {
+func lstmTest(rnnConfig *nn.RNNConfig, t *testing.T) {

 	var (
 		batchDim  int64 = 5
@ -121,7 +121,7 @@ func lstmTest(rnnConfig nn.RNNConfig, t *testing.T) {
 	output, _ = lstm.Seq(input)

 	wantSeq := []int64{batchDim, seqLen, outputDim * numDirections}
-	gotSeq := output.(ts.Tensor).MustSize()
+	gotSeq := output.(*ts.Tensor).MustSize()

 	if !reflect.DeepEqual(wantSeq, gotSeq) {
 		fmt.Println("Seq test:")
--- a/nn/sequential.go
+++ b/nn/sequential.go
@ -14,15 +14,15 @@ type Sequential struct {
 }

 // Seq creates a new empty sequential layer
-func Seq() Sequential {
-	return Sequential{layers: make([]ts.Module, 0)}
+func Seq() *Sequential {
+	return &Sequential{layers: make([]ts.Module, 0)}
 }

 // Sequential methods:
 //====================

 // Len returns number of sub-layers embedded in this layer
-func (s Sequential) Len() (retVal int64) {
+func (s *Sequential) Len() (retVal int64) {
 	return int64(len(s.layers))
 }

@ -47,7 +47,7 @@ func (s *Sequential) AddFn(fn ts.Module) {
 }

 // ForwardAll applies the forward pass and returns the output for each layer.
-func (s *Sequential) ForwardAll(xs ts.Tensor, opts ...uint8) (retVal []ts.Tensor) {
+func (s *Sequential) ForwardAll(xs *ts.Tensor, opts ...uint8) (retVal []ts.Tensor) {

 	var n uint8 = uint8(len(s.layers))
 	if len(opts) > 0 {
@ -55,11 +55,11 @@ func (s *Sequential) ForwardAll(xs ts.Tensor, opts ...uint8) (retVal []ts.Tensor
 	}

 	if s.IsEmpty() {
-		return []ts.Tensor{xs.MustShallowClone()}
+		return []ts.Tensor{*xs.MustShallowClone()}
 	}

 	for i := 0; i < int(n); i++ {
-		retVal = append(retVal, s.layers[i].Forward(xs))
+		retVal = append(retVal, *s.layers[i].Forward(xs))
 	}

 	return retVal
@ -76,7 +76,7 @@ func WithUint8(n uint8) func() uint8 {
 // ==========================================

 // Forward implements Module interface for Sequential
-func (s *Sequential) Forward(xs ts.Tensor) (retVal ts.Tensor) {
+func (s *Sequential) Forward(xs *ts.Tensor) (retVal *ts.Tensor) {
 	if s.IsEmpty() {
 		return xs.MustShallowClone()
 	}
@ -85,12 +85,12 @@ func (s *Sequential) Forward(xs ts.Tensor) (retVal ts.Tensor) {
 	outs := make([]ts.Tensor, len(s.layers))
 	for i := 0; i < len(s.layers); i++ {
 		if i == 0 {
-			outs[0] = s.layers[i].Forward(xs)
+			outs[0] = *s.layers[i].Forward(xs)
 			defer outs[0].MustDrop()
 		} else if i == len(s.layers)-1 {
-			return s.layers[i].Forward(outs[i-1])
+			return s.layers[i].Forward(&outs[i-1])
 		} else {
-			outs[i] = s.layers[i].Forward(outs[i-1])
+			outs[i] = *s.layers[i].Forward(&outs[i-1])
 			defer outs[i].MustDrop()
 		}
 	}
@ -104,8 +104,8 @@ type SequentialT struct {
 }

 /// SeqT creates a new empty sequential layer.
-func SeqT() SequentialT {
-	return SequentialT{
+func SeqT() *SequentialT {
+	return &SequentialT{
 		layers: make([]ts.ModuleT, 0),
 	}
 }
@ -140,7 +140,7 @@ func (s *SequentialT) IsEmpty() (retVal bool) {
 *   return currTs
 * }
 *  */
-func (s SequentialT) ForwardT(xs ts.Tensor, train bool) (retVal ts.Tensor) {
+func (s *SequentialT) ForwardT(xs *ts.Tensor, train bool) (retVal *ts.Tensor) {
 	if s.IsEmpty() {
 		return xs.MustShallowClone()
 	}
@ -149,12 +149,12 @@ func (s SequentialT) ForwardT(xs ts.Tensor, train bool) (retVal ts.Tensor) {
 	outs := make([]ts.Tensor, len(s.layers))
 	for i := 0; i < len(s.layers); i++ {
 		if i == 0 {
-			outs[0] = s.layers[i].ForwardT(xs, train)
+			outs[0] = *s.layers[i].ForwardT(xs, train)
 			defer outs[0].MustDrop()
 		} else if i == len(s.layers)-1 {
-			return s.layers[i].ForwardT(outs[i-1], train)
+			return s.layers[i].ForwardT(&outs[i-1], train)
 		} else {
-			outs[i] = s.layers[i].ForwardT(outs[i-1], train)
+			outs[i] = *s.layers[i].ForwardT(&outs[i-1], train)
 			defer outs[i].MustDrop()
 		}
 	}
@ -187,7 +187,7 @@ func (s *SequentialT) AddFnT(fn ts.ModuleT) {
 }

 // ForwardAll applies the forward pass and returns the output for each layer.
-func (s *SequentialT) ForwardAllT(xs ts.Tensor, train bool, opts ...uint8) (retVal []ts.Tensor) {
+func (s *SequentialT) ForwardAllT(xs *ts.Tensor, train bool, opts ...uint8) (retVal []ts.Tensor) {

 	var n uint8 = uint8(len(s.layers))
 	if len(opts) > 0 {
@ -195,13 +195,13 @@ func (s *SequentialT) ForwardAllT(xs ts.Tensor, train bool, opts ...uint8) (retV
 	}

 	if s.IsEmpty() {
-		return []ts.Tensor{xs.MustShallowClone()}
+		return []ts.Tensor{*xs.MustShallowClone()}
 	}

 	currTs := xs
 	for i := 0; i < int(n); i++ {
 		res := s.layers[i].ForwardT(currTs, train)
-		retVal = append(retVal, res)
+		retVal = append(retVal, *res)
 		currTs = res
 	}

@ -214,15 +214,15 @@ func (s *SequentialT) ForwardAllT(xs ts.Tensor, train bool, opts ...uint8) (retV
 // Ref. https://stackoverflow.com/a/42182987
 // NOTE: Specifically, `ForwardWith` is used to wrap anonymous function
 // as input parameter of `AddFn` Sequential method.
-type ForwardWith func(ts.Tensor) ts.Tensor
+type ForwardWith func(*ts.Tensor) *ts.Tensor

-func (fw ForwardWith) Forward(xs ts.Tensor) ts.Tensor {
+func (fw ForwardWith) Forward(xs *ts.Tensor) *ts.Tensor {
 	return fw(xs)
 }

-type ForwardTWith func(ts.Tensor, bool) ts.Tensor
+type ForwardTWith func(*ts.Tensor, bool) *ts.Tensor

-func (fw ForwardTWith) ForwardT(xs ts.Tensor, train bool) ts.Tensor {
+func (fw ForwardTWith) ForwardT(xs *ts.Tensor, train bool) *ts.Tensor {
 	return fw(xs, train)
 }

@ -235,7 +235,7 @@ func (fw ForwardTWith) ForwardT(xs ts.Tensor, train bool) ts.Tensor {
 // This seems not working in Go.
 // There 2 ways to get around. One is freeze VarStore, the other is
 // set manually set AutoGrad at `loss` tensor. I.e., `loss = loss.MustSetRequiresGrad(true)`
-func BatchAccuracyForLogits(vs VarStore, m ts.ModuleT, xs, ys ts.Tensor, d gotch.Device, batchSize int) (retVal float64) {
+func BatchAccuracyForLogits(vs *VarStore, m ts.ModuleT, xs, ys *ts.Tensor, d gotch.Device, batchSize int) (retVal float64) {

 	var (
 		sumAccuracy float64 = 0.0
@ -272,7 +272,7 @@ func BatchAccuracyForLogits(vs VarStore, m ts.ModuleT, xs, ys ts.Tensor, d gotch
 // BatchAccuracyForLogitIdx is an alternative of BatchAccuracyForLogits to
 // calculate accuracy for specified batch on module weight. It uses tensor
 // indexing instead of Iter2
-func BatchAccuracyForLogitsIdx(vs VarStore, m ts.ModuleT, xs, ys ts.Tensor, d gotch.Device, batchSize int) (retVal float64) {
+func BatchAccuracyForLogitsIdx(vs *VarStore, m ts.ModuleT, xs, ys *ts.Tensor, d gotch.Device, batchSize int) (retVal float64) {
 	var (
 		sumAccuracy float64 = 0.0
 		sampleCount float64 = 0.0
--- a/nn/sparse.go
+++ b/nn/sparse.go
@ -14,8 +14,8 @@ type EmbeddingConfig struct {
 	PaddingIdx      int64
 }

-func DefaultEmbeddingConfig() EmbeddingConfig {
-	return EmbeddingConfig{
+func DefaultEmbeddingConfig() *EmbeddingConfig {
+	return &EmbeddingConfig{
 		Sparse:          false,
 		ScaleGradByFreq: false,
 		WsInit:          NewRandnInit(0.0, 1.0),
@ -28,13 +28,13 @@ func DefaultEmbeddingConfig() EmbeddingConfig {
 // An embedding layer acts as a simple lookup table that stores embeddings.
 // This is commonly used to store word embeddings.
 type Embedding struct {
-	Ws     ts.Tensor
-	config EmbeddingConfig
+	Ws     *ts.Tensor
+	config *EmbeddingConfig
 }

 // NewEmbedding creates a new Embedding
-func NewEmbedding(vs Path, numEmbeddings int64, embeddingDim int64, config EmbeddingConfig) Embedding {
-	return Embedding{
+func NewEmbedding(vs *Path, numEmbeddings int64, embeddingDim int64, config *EmbeddingConfig) *Embedding {
+	return &Embedding{
 		Ws:     vs.NewVar("weight", []int64{numEmbeddings, embeddingDim}, config.WsInit),
 		config: config,
 	}
@ -44,11 +44,11 @@ func NewEmbedding(vs Path, numEmbeddings int64, embeddingDim int64, config Embed
 // =========================================

 // Forward implements Module interface for Embedding
-func (e Embedding) Forward(xs ts.Tensor) (retVal ts.Tensor) {
+func (e *Embedding) Forward(xs *ts.Tensor) *ts.Tensor {
 	return ts.MustEmbedding(e.Ws, xs, e.config.PaddingIdx, e.config.ScaleGradByFreq, e.config.Sparse)
 }

 // ForwardT implements ModuleT interface for Embedding
-func (e Embedding) ForwardT(xs ts.Tensor, train bool) (retVal ts.Tensor) {
+func (e *Embedding) ForwardT(xs *ts.Tensor, train bool) *ts.Tensor {
 	return ts.MustEmbedding(e.Ws, xs, e.config.PaddingIdx, e.config.ScaleGradByFreq, e.config.Sparse)
 }
--- a/nn/sparse_test.go
+++ b/nn/sparse_test.go
@ -9,7 +9,7 @@ import (
 	ts "github.com/sugarme/gotch/tensor"
 )

-func embeddingTest(embeddingConfig nn.EmbeddingConfig, t *testing.T) {
+func embeddingTest(embeddingConfig *nn.EmbeddingConfig, t *testing.T) {

 	var (
 		batchDim  int64 = 5
--- a/nn/varstore.go
+++ b/nn/varstore.go
@ -20,7 +20,7 @@ const SEP = "."
 // however the tensor is not set to require gradients.
 type Variables struct {
 	mutex              *sync.Mutex
-	NamedVariables     map[string]ts.Tensor
+	NamedVariables     map[string]*ts.Tensor
 	TrainableVariables []ts.Tensor
 }

@ -45,14 +45,14 @@ type Entry struct {
 }

 // NewVarStore creates a new variable store located on the specified device
-func NewVarStore(device gotch.Device) VarStore {
+func NewVarStore(device gotch.Device) *VarStore {
 	variables := Variables{
 		mutex:              &sync.Mutex{},
-		NamedVariables:     make(map[string]ts.Tensor, 0),
+		NamedVariables:     make(map[string]*ts.Tensor, 0),
 		TrainableVariables: make([]ts.Tensor, 0),
 	}

-	return VarStore{
+	return &VarStore{
 		device: device,
 		Vars:   variables,
 	}
@ -94,7 +94,7 @@ func (vs *VarStore) TrainableVariables() (retVal []ts.Tensor) {

 	retVal = vs.Vars.TrainableVariables
 	for _, t := range vs.Vars.TrainableVariables {
-		retVal = append(retVal, t.MustShallowClone())
+		retVal = append(retVal, *t.MustShallowClone())
 	}

 	return retVal
@ -108,7 +108,7 @@ func (vs *VarStore) Variables() (retVal map[string]ts.Tensor) {
 	retVal = make(map[string]ts.Tensor, 0)

 	for k, v := range vs.Vars.NamedVariables {
-		retVal[k] = v.MustShallowClone()
+		retVal[k] = *v.MustShallowClone()
 	}

 	return retVal
@ -119,8 +119,8 @@ func (vs *VarStore) Variables() (retVal map[string]ts.Tensor) {
 // NOTE: Variables are named and organized using paths. This function returns
 // the top level path for the var store and can be combined with '/'
 // to create sub-paths.
-func (vs *VarStore) Root() (retVal Path) {
-	return Path{
+func (vs *VarStore) Root() *Path {
+	return &Path{
 		path:     []string{},
 		varstore: vs,
 	}
@ -130,7 +130,7 @@ func (vs *VarStore) Root() (retVal Path) {
 //
 // NOTE: Weight values for all the tensors currently stored in the
 // var-store gets saved in the given file.
-func (vs *VarStore) Save(filepath string) (err error) {
+func (vs *VarStore) Save(filepath string) error {
 	vs.Vars.mutex.Lock()
 	defer vs.Vars.mutex.Unlock()

@ -155,7 +155,7 @@ func (vs *VarStore) Save(filepath string) (err error) {
 // for these tensors are modified.
 // It will throw error if name of the loaded tensors can not find
 // in the current var-store named tensors set.
-func (vs *VarStore) Load(filepath string) (err error) {
+func (vs *VarStore) Load(filepath string) error {
 	namedTensors, err := ts.LoadMultiWithDevice(filepath, vs.device)
 	if err != nil {
 		return err
@ -163,7 +163,7 @@ func (vs *VarStore) Load(filepath string) (err error) {

 	var namedTensorsMap map[string]ts.Tensor = make(map[string]ts.Tensor, 0)
 	for _, namedTensor := range namedTensors {
-		namedTensorsMap[namedTensor.Name] = namedTensor.Tensor
+		namedTensorsMap[namedTensor.Name] = *namedTensor.Tensor
 	}

 	// Match and in-place copy value (update) from newly loaded tensors
@ -190,7 +190,7 @@ func (vs *VarStore) Load(filepath string) (err error) {
 		}

 		ts.NoGrad(func() {
-			vs.Vars.NamedVariables[tsName].Copy_(currTs)
+			vs.Vars.NamedVariables[tsName].Copy_(&currTs)
 		})
 	}
 	return nil
@ -213,7 +213,7 @@ func (vs *VarStore) LoadPartial(filepath string) (retVal []string, err error) {
 		return nil, err
 	}

-	var namedTensorsMap map[string]ts.Tensor = make(map[string]ts.Tensor, 0)
+	var namedTensorsMap map[string]*ts.Tensor = make(map[string]*ts.Tensor, 0)
 	for _, namedTensor := range namedTensors {
 		namedTensorsMap[namedTensor.Name] = namedTensor.Tensor
 	}
@ -226,7 +226,7 @@ func (vs *VarStore) LoadPartial(filepath string) (retVal []string, err error) {
 	defer vs.Vars.mutex.Unlock()

 	for tsName := range vs.Vars.NamedVariables {
-		var currTs ts.Tensor
+		var currTs *ts.Tensor
 		var ok bool

 		// missing variable
@ -320,7 +320,7 @@ func (vs *VarStore) Copy(src VarStore) (err error) {
 // =============

 // Sub gets a sub-path of the given path.
-func (p *Path) Sub(str string) (retVal Path) {
+func (p *Path) Sub(str string) *Path {

 	if strings.Contains(str, SEP) {
 		log.Fatalf("Sub name cannot contain %v (%v)\n", SEP, str)
@ -328,7 +328,7 @@ func (p *Path) Sub(str string) (retVal Path) {

 	path := p.path
 	path = append(path, str)
-	return Path{
+	return &Path{
 		path:     path,
 		varstore: p.varstore,
 	}
@ -355,7 +355,7 @@ func (p *Path) getpath(name string) (retVal string) {
 	}
 }

-func (p *Path) add(name string, newTs ts.Tensor, trainable bool) (retVal ts.Tensor) {
+func (p *Path) add(name string, newTs *ts.Tensor, trainable bool) (retVal *ts.Tensor) {
 	path := p.getpath(name)

 	p.varstore.Vars.mutex.Lock()
@ -366,7 +366,7 @@ func (p *Path) add(name string, newTs ts.Tensor, trainable bool) (retVal ts.Tens
 	}

 	var (
-		tensor ts.Tensor
+		tensor *ts.Tensor
 		err    error
 	)
 	if trainable {
@ -379,7 +379,7 @@ func (p *Path) add(name string, newTs ts.Tensor, trainable bool) (retVal ts.Tens
 	}

 	if trainable {
-		p.varstore.Vars.TrainableVariables = append(p.varstore.Vars.TrainableVariables, tensor)
+		p.varstore.Vars.TrainableVariables = append(p.varstore.Vars.TrainableVariables, *tensor)
 	}

 	p.varstore.Vars.NamedVariables[path] = tensor
@ -387,7 +387,7 @@ func (p *Path) add(name string, newTs ts.Tensor, trainable bool) (retVal ts.Tens
 	return tensor
 }

-func (p *Path) getOrAddWithLock(name string, tensor ts.Tensor, trainable bool, variables Variables) (retVal ts.Tensor) {
+func (p *Path) getOrAddWithLock(name string, tensor *ts.Tensor, trainable bool, variables Variables) (retVal *ts.Tensor) {
 	path := p.getpath(name)

 	// if found, return it
@ -397,7 +397,7 @@ func (p *Path) getOrAddWithLock(name string, tensor ts.Tensor, trainable bool, v

 	// not found, add it
 	var err error
-	var ttensor ts.Tensor
+	var ttensor *ts.Tensor
 	if trainable {
 		ttensor, err = tensor.SetRequiresGrad(true, false)
 		if err != nil {
@ -408,7 +408,7 @@ func (p *Path) getOrAddWithLock(name string, tensor ts.Tensor, trainable bool, v
 	}

 	if trainable {
-		variables.TrainableVariables = append(variables.TrainableVariables, ttensor)
+		variables.TrainableVariables = append(variables.TrainableVariables, *ttensor)
 	}

 	variables.NamedVariables[path] = ttensor
@ -422,7 +422,7 @@ func (p *Path) getOrAddWithLock(name string, tensor ts.Tensor, trainable bool, v
 // has the specified shape. The variable will not be trainable so
 // gradients will not be tracked.
 // The variable uses a float tensor initialized with zeros.
-func (p *Path) ZerosNoTrain(name string, dims []int64) (retVal ts.Tensor) {
+func (p *Path) ZerosNoTrain(name string, dims []int64) (retVal *ts.Tensor) {

 	device := p.Device()
 	z, err := ts.Zeros(dims, gotch.Float, device)
@ -439,7 +439,7 @@ func (p *Path) ZerosNoTrain(name string, dims []int64) (retVal ts.Tensor) {
 // has the specified shape. The variable will not be trainable so
 // gradients will not be tracked.
 // The variable uses a float tensor initialized with ones.
-func (p *Path) OnesNoTrain(name string, dims []int64) (retVal ts.Tensor) {
+func (p *Path) OnesNoTrain(name string, dims []int64) (retVal *ts.Tensor) {

 	device := p.Device()
 	z, err := ts.Ones(dims, gotch.Float, device)
@ -457,7 +457,7 @@ func (p *Path) OnesNoTrain(name string, dims []int64) (retVal ts.Tensor) {
 // will be tracked.
 // The variable uses a float tensor initialized as per the
 // related argument.
-func (p *Path) NewVar(name string, dims []int64, ini Init) (retVal ts.Tensor) {
+func (p *Path) NewVar(name string, dims []int64, ini Init) (retVal *ts.Tensor) {

 	v := ini.InitTensor(dims, p.varstore.device)

@ -470,7 +470,7 @@ func (p *Path) NewVar(name string, dims []int64, ini Init) (retVal ts.Tensor) {
 // has the specified shape. The variable is trainable, its gradient
 // will be tracked.
 // The variable uses a float tensor initialized with zeros.
-func (p *Path) Zeros(name string, dims []int64) (retVal ts.Tensor) {
+func (p *Path) Zeros(name string, dims []int64) (retVal *ts.Tensor) {

 	return p.NewVar(name, dims, NewConstInit(0.0))
 }
@ -481,7 +481,7 @@ func (p *Path) Zeros(name string, dims []int64) (retVal ts.Tensor) {
 // has the specified shape. The variable is trainable, its gradient
 // will be tracked.
 // The variable uses a float tensor initialized with ones.
-func (p *Path) Ones(name string, dims []int64) (retVal ts.Tensor) {
+func (p *Path) Ones(name string, dims []int64) (retVal *ts.Tensor) {

 	return p.NewVar(name, dims, NewConstInit(1.0))
 }
@ -493,7 +493,7 @@ func (p *Path) Ones(name string, dims []int64) (retVal ts.Tensor) {
 // will be tracked.
 // The variable uses a float tensor initialized randomly using a
 // standard normal distribution.
-func (p *Path) RandnStandard(name string, dims []int64) (retVal ts.Tensor) {
+func (p *Path) RandnStandard(name string, dims []int64) (retVal *ts.Tensor) {

 	return p.NewVar(name, dims, NewRandnInit(0.0, 1.0))
 }
@ -505,7 +505,7 @@ func (p *Path) RandnStandard(name string, dims []int64) (retVal ts.Tensor) {
 // will be tracked.
 // The variable uses a float tensor initialized randomly using a
 // normal distribution with the specified mean and standard deviation.
-func (p *Path) Randn(name string, dims []int64, mean float64, stdev float64) (retVal ts.Tensor) {
+func (p *Path) Randn(name string, dims []int64, mean float64, stdev float64) (retVal *ts.Tensor) {

 	return p.NewVar(name, dims, NewRandnInit(mean, stdev))
 }
@ -517,7 +517,7 @@ func (p *Path) Randn(name string, dims []int64, mean float64, stdev float64) (re
 // will be tracked.
 // The variable uses a float tensor initialized randomly using a
 // uniform distribution between the specified bounds.
-func (p *Path) Uniform(name string, dims []int64, lo, up float64) (retVal ts.Tensor) {
+func (p *Path) Uniform(name string, dims []int64, lo, up float64) (retVal *ts.Tensor) {

 	return p.NewVar(name, dims, NewUniformInit(lo, up))
 }
@ -529,7 +529,7 @@ func (p *Path) Uniform(name string, dims []int64, lo, up float64) (retVal ts.Ten
 // will be tracked.
 // The variable uses a float tensor initialized randomly using a
 // uniform distribution which bounds follow Kaiming initialization.
-func (p *Path) KaimingUniform(name string, dims []int64) (retVal ts.Tensor) {
+func (p *Path) KaimingUniform(name string, dims []int64) (retVal *ts.Tensor) {

 	return p.NewVar(name, dims, NewKaimingUniformInit())
 }
@ -541,7 +541,7 @@ func (p *Path) KaimingUniform(name string, dims []int64) (retVal ts.Tensor) {
 // will be tracked.
 // The variable uses a float tensor initialized by copying some
 // given tensor.
-func (p *Path) VarCopy(name string, t ts.Tensor) (retVal ts.Tensor) {
+func (p *Path) VarCopy(name string, t *ts.Tensor) (retVal *ts.Tensor) {

 	size, err := t.Size()
 	if err != nil {
@ -557,7 +557,7 @@ func (p *Path) VarCopy(name string, t ts.Tensor) (retVal ts.Tensor) {
 }

 // Get gets the tensor corresponding to a given name if present.
-func (p *Path) Get(name string) (retVal ts.Tensor, err error) {
+func (p *Path) Get(name string) (retVal *ts.Tensor, err error) {

 	p.varstore.Vars.mutex.Lock()
 	defer p.varstore.Vars.mutex.Unlock()
@ -572,11 +572,11 @@ func (p *Path) Get(name string) (retVal ts.Tensor, err error) {
 }

 // Entry gets the entry corresponding to a given name for in-place manipulation.
-func (p *Path) Entry(name string) (retVal Entry) {
+func (p *Path) Entry(name string) *Entry {
 	p.varstore.Vars.mutex.Lock()
 	defer p.varstore.Vars.mutex.Unlock()

-	return Entry{
+	return &Entry{
 		name:      name,
 		variables: &p.varstore.Vars,
 		path:      p,
@ -592,14 +592,14 @@ func (p *Path) Entry(name string) (retVal Entry) {
 // var store, the corresponding tensor is returned. Otherwise a new
 // variable is added to the var-store with the entry name and is
 // initialized according to the init parameter.
-func (e *Entry) OrVar(dims []int64, init Init) (retVal ts.Tensor) {
+func (e *Entry) OrVar(dims []int64, init Init) (retVal *ts.Tensor) {

 	v := init.InitTensor(dims, e.path.varstore.device)
 	return e.path.getOrAddWithLock(e.name, v, true, *e.variables)
 }

 // Returns the existing entry if, otherwise create a new variable.
-func (e *Entry) OrVarCopy(tensor ts.Tensor) (retVal ts.Tensor) {
+func (e *Entry) OrVarCopy(tensor *ts.Tensor) (retVal *ts.Tensor) {

 	size, err := tensor.Size()
 	if err != nil {
@ -615,50 +615,50 @@ func (e *Entry) OrVarCopy(tensor ts.Tensor) (retVal ts.Tensor) {
 }

 // Returns the existing entry if, otherwise create a new variable.
-func (e *Entry) OrKaimingUniform(dims []int64) (retVal ts.Tensor) {
+func (e *Entry) OrKaimingUniform(dims []int64) (retVal *ts.Tensor) {

 	return e.OrVar(dims, NewKaimingUniformInit())
 }

 // OrOnes returns the existing entry if, otherwise create a new variable.
-func (e *Entry) OrOnes(dims []int64) (retVal ts.Tensor) {
+func (e *Entry) OrOnes(dims []int64) (retVal *ts.Tensor) {

 	return e.OrVar(dims, NewConstInit(1.0))
 }

 // OrOnesNoTrain returns the existing entry if, otherwise create a new variable.
-func (e *Entry) OrOnesNoTrain(dims []int64) (retVal ts.Tensor) {
+func (e *Entry) OrOnesNoTrain(dims []int64) (retVal *ts.Tensor) {

 	o := ts.MustOnes(dims, gotch.Float, e.path.Device())
 	return e.path.getOrAddWithLock(e.name, o, true, *e.variables)
 }

 // OrRandn returns the existing entry if, otherwise create a new variable.
-func (e *Entry) OrRandn(dims []int64, mean, stdev float64) (retVal ts.Tensor) {
+func (e *Entry) OrRandn(dims []int64, mean, stdev float64) (retVal *ts.Tensor) {

 	return e.OrVar(dims, NewRandnInit(mean, stdev))
 }

 // OrRandnStandard returns the existing entry if, otherwise create a new variable.
-func (e *Entry) OrRandnStandard(dims []int64) (retVal ts.Tensor) {
+func (e *Entry) OrRandnStandard(dims []int64) (retVal *ts.Tensor) {

 	return e.OrVar(dims, NewRandnInit(0.0, 1.0))
 }

 // OrUniform returns the existing entry if, otherwise create a new variable.
-func (e *Entry) OrUniform(dims []int64, lo, up float64) (retVal ts.Tensor) {
+func (e *Entry) OrUniform(dims []int64, lo, up float64) (retVal *ts.Tensor) {

 	return e.OrVar(dims, NewUniformInit(lo, up))
 }

 // OrZeros returns the existing entry if, otherwise create a new variable.
-func (e *Entry) OrZeros(dims []int64) (retVal ts.Tensor) {
+func (e *Entry) OrZeros(dims []int64) (retVal *ts.Tensor) {

 	return e.OrVar(dims, NewConstInit(0.0))
 }

 // OrZerosNoTrain returns the existing entry if, otherwise create a new variable.
-func (e *Entry) OrZerosNoTrain(dims []int64) (retVal ts.Tensor) {
+func (e *Entry) OrZerosNoTrain(dims []int64) (retVal *ts.Tensor) {

 	z := ts.MustZeros(dims, gotch.Float, e.path.Device())
 	return e.path.getOrAddWithLock(e.name, z, true, *e.variables)
--- a/nn/varstore_test.go
+++ b/nn/varstore_test.go
@ -46,7 +46,7 @@ func TestSaveLoad(t *testing.T) {
 		panic(err)
 	}

-	add := func(vs nn.Path) (ts.Tensor, ts.Tensor) {
+	add := func(vs *nn.Path) (*ts.Tensor, *ts.Tensor) {
 		subA := vs.Sub("a")
 		subB := subA.Sub("b")
 		v := subB.Ones("t2", []int64{3})
--- a/tensor/data.go
+++ b/tensor/data.go
@ -16,8 +16,8 @@ import (
 // containing a (potentially random) slice of each of the two input
 // tensors.
 type Iter2 struct {
-	xs                   Tensor
-	ys                   Tensor
+	xs                   *Tensor
+	ys                   *Tensor
 	batchIndex           int64
 	batchSize            int64
 	totalSize            int64
@ -38,12 +38,16 @@ type Iter2 struct {
 // * `xs` - the features to be used by the model.
 // * `ys` - the targets that the model attempts to predict.
 // * `batch_size` - the size of batches to be returned.
-func NewIter2(xs, ys Tensor, batchSize int64) (retVal Iter2, err error) {
+func NewIter2(xs, ys *Tensor, batchSize int64) (*Iter2, error) {
+	var (
+		iter *Iter2
+		err  error
+	)

 	totalSize := xs.MustSize()[0]
 	if ys.MustSize()[0] != totalSize {
 		err = fmt.Errorf("Different dimension for the two inputs: %v - %v", xs.MustSize(), ys.MustSize())
-		return retVal, err
+		return nil, err
 	}

 	// xsClone, err := xs.ZerosLike(false)
@ -58,7 +62,7 @@ func NewIter2(xs, ys Tensor, batchSize int64) (retVal Iter2, err error) {
 	// }
 	// ysClone.Copy_(ys)

-	retVal = Iter2{
+	iter = &Iter2{
 		xs: xs.MustShallowClone(),
 		ys: ys.MustShallowClone(),
 		// xs:                   xsClone,
@ -69,7 +73,7 @@ func NewIter2(xs, ys Tensor, batchSize int64) (retVal Iter2, err error) {
 		returnSmallLastBatch: false,
 	}

-	return retVal, nil
+	return iter, nil
 }

 // MustNewIter2 returns a new iterator.
@ -84,14 +88,14 @@ func NewIter2(xs, ys Tensor, batchSize int64) (retVal Iter2, err error) {
 // * `xs` - the features to be used by the model.
 // * `ys` - the targets that the model attempts to predict.
 // * `batch_size` - the size of batches to be returned.
-func MustNewIter2(xs, ys Tensor, batchSize int64) (retVal Iter2) {
-	retVal, err := NewIter2(xs, ys, batchSize)
+func MustNewIter2(xs, ys *Tensor, batchSize int64) *Iter2 {
+	iter, err := NewIter2(xs, ys, batchSize)

 	if err != nil {
 		log.Fatal(err)
 	}

-	return retVal
+	return iter
 }

 // Shuffle shuffles the dataset.
@ -108,20 +112,20 @@ func (it *Iter2) Shuffle() {
 }

 // ToDevice transfers the mini-batches to a specified device.
-func (it Iter2) ToDevice(device gotch.Device) (retVal Iter2) {
+func (it *Iter2) ToDevice(device gotch.Device) *Iter2 {
 	it.device = device
 	return it
 }

 // ReturnSmallLastBatch when set, returns the last batch even if smaller than the batch size.
-func (it Iter2) ReturnSmallLastBatch() (retVal Iter2) {
+func (it *Iter2) ReturnSmallLastBatch() *Iter2 {
 	it.returnSmallLastBatch = true
 	return it
 }

 type Iter2Item struct {
-	Data  Tensor
-	Label Tensor
+	Data  *Tensor
+	Label *Tensor
 }

 // Next implements iterator for Iter2
@ -148,7 +152,7 @@ func (it *Iter2) Next() (item Iter2Item, ok bool) {
 	}
 }

-func (it Iter2) Drop() {
+func (it *Iter2) Drop() {
 	it.xs.MustDrop()
 	it.ys.MustDrop()
 }
@ -156,17 +160,17 @@ func (it Iter2) Drop() {
 // TextData represent text data in tensor of runes (uint8)
 // and its corresponding string
 type TextData struct {
-	Data         Tensor // frequency (occurence) of byte value from input text
-	CharForLabel []rune // unique rune values from input text
+	Data         *Tensor // frequency (occurence) of byte value from input text
+	CharForLabel []rune  // unique rune values from input text
 }

 // TextDataIter is a text data interator
 type TextDataIter struct {
-	Data       Tensor
+	Data       *Tensor
 	SeqLen     int64
 	BatchIndex int64
 	BatchSize  int64
-	Indexes    Tensor
+	Indexes    *Tensor
 	IndexesLen int64
 }

@ -179,17 +183,17 @@ type TextDataIter struct {
 // will labelled with new label(index)
 // Data: tensor of labels
 // CharForLabel: []rune (unique runes from text input)
-func NewTextData(filename string) (retVal TextData, err error) {
+func NewTextData(filename string) (*TextData, error) {
 	filePath, err := filepath.Abs(filename)
 	if err != nil {
-		return retVal, err
+		return nil, err
 	}

 	r, err := os.Open(filePath)

 	buffer, err := ioutil.ReadAll(r)
 	if err != nil {
-		return retVal, err
+		return nil, err
 	}

 	var labelForChar map[byte]uint8 = make(map[byte]uint8, 0)
@ -216,35 +220,35 @@ func NewTextData(filename string) (retVal TextData, err error) {

 	data := MustOfSlice(dataIndexes)

-	return TextData{
+	return &TextData{
 		Data:         data,
 		CharForLabel: charForLabel,
 	}, nil
 }

 // Labels returns the number of different `character` (rune) used by the dataset.
-func (td TextData) Labels() (retVal int64) {
+func (td *TextData) Labels() (retVal int64) {
 	return int64(len(td.CharForLabel))
 }

 // Data returns a shallow copy of the data.
-func (td TextData) CloneData() (retVal Tensor) {
+func (td *TextData) CloneData() *Tensor {
 	return td.Data.MustShallowClone()
 }

 // LabelForChar returns a corresponding `char` (rune) for
 // specified label input
-func (td TextData) LabelForChar(label int64) (retVal rune) {
+func (td *TextData) LabelForChar(label int64) rune {
 	return td.CharForLabel[int(label)]
 }

 // IterShuffle returns a batch iterator over the dataset.
 // Each sample is made of seq_len characters.
-func (td TextData) IterShuffle(seqLen int64, batchSize int64) (retVal TextDataIter) {
+func (td *TextData) IterShuffle(seqLen int64, batchSize int64) *TextDataIter {

 	indexesLen := td.Data.MustSize()[0] - seqLen + 1

-	return TextDataIter{
+	return &TextDataIter{
 		Data:       td.Data.MustShallowClone(),
 		SeqLen:     seqLen,
 		BatchIndex: 0,
@ -255,12 +259,12 @@ func (td TextData) IterShuffle(seqLen int64, batchSize int64) (retVal TextDataIt
 }

 // Next implements iterator for TextDataIter
-func (tdi *TextDataIter) Next() (retVal Tensor, ok bool) {
+func (tdi *TextDataIter) Next() (*Tensor, bool) {
 	start := tdi.BatchIndex * tdi.BatchSize
 	size := min(tdi.BatchSize, tdi.IndexesLen-start)

 	if size < tdi.BatchSize {
-		return retVal, false
+		return nil, false
 	}

 	tdi.BatchIndex += 1
@ -276,10 +280,10 @@ func (tdi *TextDataIter) Next() (retVal Tensor, ok bool) {
 	for _, idx := range indexes {
 		narrowIdx := NewNarrow(idx, idx+tdi.SeqLen)
 		idxTs := tdi.Data.Idx(narrowIdx)
-		batch = append(batch, idxTs)
+		batch = append(batch, *idxTs)
 	}

-	retVal = MustStack(batch, 0)
+	retVal := MustStack(batch, 0)

 	// Delete intermediate tensors
 	for _, xs := range batch {
@ -289,7 +293,7 @@ func (tdi *TextDataIter) Next() (retVal Tensor, ok bool) {
 	return retVal, true
 }

-func min(v1, v2 int64) (retVal int64) {
+func min(v1, v2 int64) int64 {
 	if v1 < v2 {
 		return v1
 	}
--- a/tensor/image.go
+++ b/tensor/image.go
@ -9,22 +9,20 @@ import (
 )

 // LoadHwc returns a tensor of shape [height, width, channels] on success.
-func LoadHwc(path string) (retVal Tensor, err error) {
+func LoadHwc(path string) (*Tensor, error) {

 	ctensor := lib.AtLoadImage(path)
-	err = TorchErr()
+	err := TorchErr()
 	if err != nil {
-		return retVal, err
+		return nil, err
 	}

-	retVal = Tensor{ctensor}
-
-	return retVal, nil
+	return &Tensor{ctensor}, nil
 }

 // SaveHwc save an image from tensor. It expects a tensor of shape [height,
 // width, channels]
-func SaveHwc(ts Tensor, path string) (err error) {
+func SaveHwc(ts *Tensor, path string) error {

 	lib.AtSaveImage(ts.ctensor, path)
 	return TorchErr()
@ -32,14 +30,13 @@ func SaveHwc(ts Tensor, path string) (err error) {

 // ResizeHwc expects a tensor of shape [height, width, channels].
 // On success returns a tensor of shape [height, width, channels].
-func ResizeHwc(ts Tensor, outWidth, outHeight int64) (retVal Tensor, err error) {
+func ResizeHwc(ts *Tensor, outWidth, outHeight int64) (*Tensor, error) {

 	ctensor := lib.AtResizeImage(ts.ctensor, outWidth, outHeight)
-	err = TorchErr()
+	err := TorchErr()
 	if err != nil {
-		return retVal, err
+		return nil, err
 	}
-	retVal = Tensor{ctensor}

-	return retVal, nil
+	return &Tensor{ctensor}, nil
 }
--- a/tensor/index.go
+++ b/tensor/index.go
@ -79,7 +79,7 @@ type Narrow struct {
 	Start int64
 	End   int64
 }
-type IndexSelect struct{ Index Tensor }
+type IndexSelect struct{ Index *Tensor }
 type InsertNewAxis struct{}

 // NewSelect creates an tensor indexer with given index.
@ -93,7 +93,7 @@ func NewNarrow(start, end int64) Narrow {
 	return Narrow{Start: start, End: end}
 }

-func NewIndexSelect(ts Tensor) IndexSelect {
+func NewIndexSelect(ts *Tensor) IndexSelect {
 	return IndexSelect{Index: ts}
 }

@ -130,7 +130,7 @@ type IndexOp interface {
 //
 // NOTE:
 // - `index`: expects type `TensorIndexer` or `[]TensorIndexer`
-func (ts *Tensor) Idx(index interface{}) (retVal Tensor) {
+func (ts *Tensor) Idx(index interface{}) (retVal *Tensor) {

 	// indexTyp := reflect.TypeOf(index)
 	indexVal := reflect.ValueOf(index)
@ -196,7 +196,7 @@ func (ts *Tensor) Idx(index interface{}) (retVal Tensor) {

 // Tensor Methods:
 // ===============
-func (ts Tensor) indexer(indexSpec []TensorIndexer) (retVal Tensor, err error) {
+func (ts *Tensor) indexer(indexSpec []TensorIndexer) (retVal *Tensor, err error) {

 	// Make sure number of non-newaxis is not exceed number of dimensions
 	var numNewAxis int = 0
@ -221,7 +221,7 @@ func (ts Tensor) indexer(indexSpec []TensorIndexer) (retVal Tensor, err error) {
 		// If `spec` is `IndexSelect` type and
 		if reflect.TypeOf(spec).Name() == "IndexSelect" {
 			if reflect.ValueOf(spec).Kind() == reflect.Struct {
-				inputTensor := reflect.ValueOf(spec).FieldByName("Index").Interface().(Tensor)
+				inputTensor := reflect.ValueOf(spec).FieldByName("Index").Interface().(*Tensor)

 				// 1. Either its input tensor has dimension > 1, throw error.
 				inputTensorShape, err := inputTensor.Size()
@ -249,9 +249,9 @@ func (ts Tensor) indexer(indexSpec []TensorIndexer) (retVal Tensor, err error) {

 	// Now, apply indexing from left to right.
 	var (
-		currTensor Tensor = ts.MustShallowClone()
-		currIdx    int64  = 0
-		nextTensor Tensor
+		currTensor *Tensor = ts.MustShallowClone()
+		currIdx    int64   = 0
+		nextTensor *Tensor
 		nextIdx    int64
 	)

@ -282,8 +282,8 @@ func (ts Tensor) indexer(indexSpec []TensorIndexer) (retVal Tensor, err error) {
 				return retVal, err
 			}
 			nextIdx = currIdx + 1
-		case "IndexSelect": // 1 field `(Index Tensor)`
-			indexTensor := reflect.ValueOf(spec).FieldByName("Index").Interface().(Tensor)
+		case "IndexSelect": // 1 field `(Index *Tensor)`
+			indexTensor := reflect.ValueOf(spec).FieldByName("Index").Interface().(*Tensor)
 			device, err := currTensor.Device()
 			if err != nil {
 				return retVal, err
@ -307,7 +307,7 @@ func (ts Tensor) indexer(indexSpec []TensorIndexer) (retVal Tensor, err error) {
 	return retVal, nil
 }

-func (ts Tensor) mustIndexer(indexSpec []TensorIndexer) (retVal Tensor) {
+func (ts *Tensor) mustIndexer(indexSpec []TensorIndexer) (retVal *Tensor) {
 	retVal, err := ts.indexer(indexSpec)
 	if err != nil {
 		panic(err)
--- a/tensor/iter.go
+++ b/tensor/iter.go
@ -14,27 +14,27 @@ type Iterator interface {
 type Iterable struct {
 	Index    int64
 	Len      int64
-	Content  Tensor
+	Content  *Tensor
 	ItemKind gotch.DType
 }

 // Next implements Iterator interface
-func (it *Iterable) Next() (retVal interface{}, ok bool) {
+func (it *Iterable) Next() (item interface{}, ok bool) {

 	if it.Index == it.Len {
-		return retVal, false
+		return nil, false
 	}

 	var err error
 	switch it.ItemKind.Kind().String() {
 	case "int64":
-		retVal, err = it.Content.Int64Value([]int64{it.Index})
+		item, err = it.Content.Int64Value([]int64{it.Index})
 		if err != nil {
 			log.Fatal(err)
 		}
 		it.Index += 1
 	case "float64":
-		retVal, err = it.Content.Float64Value([]int64{it.Index})
+		item, err = it.Content.Float64Value([]int64{it.Index})
 		if err != nil {
 			log.Fatal(err)
 		}
@ -44,22 +44,22 @@ func (it *Iterable) Next() (retVal interface{}, ok bool) {
 		log.Fatal(err)
 	}

-	return retVal, true
+	return item, true
 }

 // Iter creates an iterable object with specified item type.
-func (ts Tensor) Iter(dtype gotch.DType) (retVal Iterable, err error) {
+func (ts *Tensor) Iter(dtype gotch.DType) (*Iterable, error) {
 	num, err := ts.Size1() // size for 1D tensor
 	if err != nil {
-		return retVal, err
+		return nil, err
 	}
 	tmp, err := ts.ShallowClone()
 	if err != nil {
-		return retVal, err
+		return nil, err
 	}
 	content := tmp.MustTotype(dtype, true)

-	return Iterable{
+	return &Iterable{
 		Index:    0,
 		Len:      num,
 		Content:  content,
--- a/tensor/jit.go
+++ b/tensor/jit.go
@ -950,7 +950,7 @@ func ModuleLoadDataOnDevice(stream io.Reader, device gotch.Device) (retVal CModu
 }

 // Performs the forward pass for a model on some specified tensor inputs.
-func (cm CModule) ForwardTs(tensors []Tensor) (retVal Tensor, err error) {
+func (cm CModule) ForwardTs(tensors []Tensor) (retVal *Tensor, err error) {
 	var ctensors []lib.Ctensor
 	for _, t := range tensors {
 		ctensors = append(ctensors, t.ctensor)
@ -994,7 +994,7 @@ func (cm CModule) ForwardTs(tensors []Tensor) (retVal Tensor, err error) {
 		return retVal, err
 	}

-	return Tensor{ctensor}, nil
+	return &Tensor{ctensor}, nil
 }

 // Performs the forward pass for a model on some specified ivalue input.
@ -1066,9 +1066,9 @@ func (cm CModule) To(device gotch.Device, kind gotch.DType, nonBlocking bool) {
 // Implement Module for CModule:
 // =============================

-func (cm CModule) Forward(tensor Tensor) (retVal Tensor, err error) {
+func (cm CModule) Forward(tensor *Tensor) (retVal *Tensor, err error) {

-	var tensors []Tensor = []Tensor{tensor}
+	var tensors []Tensor = []Tensor{*tensor}
 	return cm.ForwardTs(tensors)
 }

@ -1076,7 +1076,7 @@ func (cm CModule) Forward(tensor Tensor) (retVal Tensor, err error) {
 // ======================================

 // Apply forwards tensor itself through a module.
-func (ts Tensor) ApplyCModule(m CModule) (retVal Tensor) {
+func (ts *Tensor) ApplyCModule(m CModule) (retVal *Tensor) {
 	retVal, err := m.Forward(ts)
 	if err != nil {
 		log.Fatal(err)
--- a/tensor/jit_test.go
+++ b/tensor/jit_test.go
@ -59,7 +59,7 @@ func TestModuleForwardTs(t *testing.T) {
 	ts1 := ts.TensorFrom([]int64{42})
 	ts2 := ts.TensorFrom([]int64{1337})

-	res, err := foo.ForwardTs([]ts.Tensor{ts1, ts2})
+	res, err := foo.ForwardTs([]ts.Tensor{*ts1, *ts2})
 	if err != nil {
 		t.Error(err)
 	}
@ -83,8 +83,8 @@ func TestModuleForwardIValue(t *testing.T) {
 	ts1 := ts.TensorFrom([]int64{42})
 	ts2 := ts.TensorFrom([]int64{1337})

-	iv1 := ts.NewIValue(ts1)
-	iv2 := ts.NewIValue(ts2)
+	iv1 := ts.NewIValue(*ts1)
+	iv2 := ts.NewIValue(*ts2)

 	got, err := foo.ForwardIs([]ts.IValue{iv1, iv2})
 	if err != nil {
@ -93,7 +93,7 @@ func TestModuleForwardIValue(t *testing.T) {

 	expectedTs1 := ts.TensorFrom([]int64{1421})
 	expectedTs2 := ts.TensorFrom([]int64{-1295})
-	want := ts.NewIValue([]ts.Tensor{expectedTs1, expectedTs2})
+	want := ts.NewIValue([]ts.Tensor{*expectedTs1, *expectedTs2})

 	if !reflect.DeepEqual(want.Name(), got.Name()) {
 		t.Errorf("Expected Ivalue Name: %v\n", want.Name())
--- a/tensor/module.go
+++ b/tensor/module.go
@ -9,7 +9,7 @@ package tensor
 // be registered, and will have their parameters converted too when you call .cuda(), etc.
 type Module interface {
 	// ModuleT
-	Forward(xs Tensor) Tensor
+	Forward(xs *Tensor) *Tensor
 }

 // ModuleT is a `Module` with an additional train parameter
@ -17,7 +17,7 @@ type Module interface {
 // between training and evaluation. E.g. When using dropout or batch-normalization.
 type ModuleT interface {
 	// Forward(xs Tensor) Tensor
-	ForwardT(xs Tensor, train bool) Tensor
+	ForwardT(xs *Tensor, train bool) *Tensor
 }

 /*
@ -99,18 +99,18 @@ type ModuleT interface {
 // ======================================

 // Apply forwards tensor itself through a module.
-func (ts Tensor) Apply(m Module) (retVal Tensor) {
+func (ts *Tensor) Apply(m Module) (retVal *Tensor) {
 	return m.Forward(ts)
 }

 // Apply forwards tensor itself through a module T.
-func (ts Tensor) ApplyT(m ModuleT, train bool) (retVal Tensor) {
+func (ts *Tensor) ApplyT(m ModuleT, train bool) (retVal *Tensor) {
 	return m.ForwardT(ts, train)
 }

 // ApplyOpt forwards a tensor itself through a module if given, shallow-copies
 // the tensor otherwise.
-func (ts Tensor) ApplyOpt(opts ...ModuleOption) (retVal Tensor) {
+func (ts *Tensor) ApplyOpt(opts ...ModuleOption) (retVal *Tensor) {

 	switch {
 	case len(opts) > 0:
@ -131,7 +131,7 @@ func WithModule(m Module) ModuleOption {

 // ApplyOptT forwards a tensor itself through a module T if given, shallow-copies
 // the tensor otherwise.
-func (ts Tensor) ApplyOptT(train bool, opts ...ModuleTOption) (retVal Tensor) {
+func (ts *Tensor) ApplyOptT(train bool, opts ...ModuleTOption) (retVal *Tensor) {

 	switch {
 	case len(opts) > 0:
--- a/tensor/must-tensor-generated.go
+++ b/tensor/must-tensor-generated.go
--- a/tensor/optimizer.go
+++ b/tensor/optimizer.go
@ -11,20 +11,18 @@ type COptimizer struct {
 }

 // Adam returns Adam optimizer
-func Adam(lr, beta1, beta2, weightDecay float64) (retVal COptimizer, err error) {
+func Adam(lr, beta1, beta2, weightDecay float64) (*COptimizer, error) {
 	coptimizer := lib.AtoAdam(lr, beta1, beta2, weightDecay)

-	err = TorchErr()
-	if err != nil {
-		return retVal, err
+	if err := TorchErr(); err != nil {
+		return nil, err
 	}

-	retVal = COptimizer{coptimizer}
-	return retVal, nil
+	return &COptimizer{coptimizer}, nil
 }

 // RmsProp returns RMSProp optimizer
-func RmsProp(lr, alpha, eps, wd, momentum float64, centered bool) (retVal COptimizer, err error) {
+func RmsProp(lr, alpha, eps, wd, momentum float64, centered bool) (*COptimizer, error) {
 	var centeredCInt int
 	switch centered {
 	case true:
@ -34,19 +32,15 @@ func RmsProp(lr, alpha, eps, wd, momentum float64, centered bool) (retVal COptim
 	}

 	coptimizer := lib.AtoRmsProp(lr, alpha, eps, wd, momentum, centeredCInt)
-	err = TorchErr()
-	if err != nil {
-		return retVal, err
+	if err := TorchErr(); err != nil {
+		return nil, err
 	}

-	retVal = COptimizer{coptimizer}
-
-	return retVal, nil
-
+	return &COptimizer{coptimizer}, nil
 }

 // Sgd returns SGD optimizer
-func Sgd(lr, momentum, dampening, wd float64, nesterov bool) (retVal COptimizer, err error) {
+func Sgd(lr, momentum, dampening, wd float64, nesterov bool) (*COptimizer, error) {
 	var nesterovCInt int
 	switch nesterov {
 	case true:
@ -56,18 +50,15 @@ func Sgd(lr, momentum, dampening, wd float64, nesterov bool) (retVal COptimizer,
 	}

 	coptimizer := lib.AtoSgd(lr, momentum, dampening, wd, nesterovCInt)
-	err = TorchErr()
-	if err != nil {
-		return retVal, err
+	if err := TorchErr(); err != nil {
+		return nil, err
 	}

-	retVal = COptimizer{coptimizer}
-
-	return retVal, nil
+	return &COptimizer{coptimizer}, nil
 }

 // AddParameters adds parameters as a slice of tensors to optimizer
-func (co COptimizer) AddParameters(tensors []Tensor) (err error) {
+func (co *COptimizer) AddParameters(tensors []Tensor) error {

 	var ctensors []lib.Ctensor
 	for _, t := range tensors {
@ -82,35 +73,35 @@ func (co COptimizer) AddParameters(tensors []Tensor) (err error) {
 }

 // SetLeanringRate sets learning rate for the optimizer
-func (co COptimizer) SetLearningRate(lr float64) (err error) {
+func (co *COptimizer) SetLearningRate(lr float64) error {
 	lib.AtoSetLearningRate(co.coptimizer, lr)

 	return TorchErr()
 }

 // SetMomentum sets a momentum for the optimizer
-func (co COptimizer) SetMomentum(m float64) (err error) {
+func (co *COptimizer) SetMomentum(m float64) error {
 	lib.AtoSetMomentum(co.coptimizer, m)

 	return TorchErr()
 }

 // ZeroGrad sets gradients to zero
-func (co COptimizer) ZeroGrad() (err error) {
+func (co *COptimizer) ZeroGrad() error {
 	lib.AtoZeroGrad(co.coptimizer)

 	return TorchErr()
 }

 // Steps proceeds optimizer
-func (co COptimizer) Step() (err error) {
+func (co *COptimizer) Step() error {
 	lib.AtoStep(co.coptimizer)

 	return TorchErr()
 }

 // Drop removes optimizer and frees up memory.
-func (co COptimizer) Drop() {
+func (co *COptimizer) Drop() {
 	lib.AtoFree(co.coptimizer)

 	if err := TorchErr(); err != nil {
--- a/tensor/other.go
+++ b/tensor/other.go
@ -7,7 +7,7 @@ import (
 )

 // CrossEntropyForLogits computes the cross-entropy loss based on some logits and targets.
-func (ts Tensor) CrossEntropyForLogits(targets Tensor) (retVal Tensor) {
+func (ts *Tensor) CrossEntropyForLogits(targets *Tensor) (retVal *Tensor) {
 	weight := NewTensor()
 	reduction := int64(1) // Mean of loss
 	ignoreIndex := int64(-100)
@ -18,13 +18,13 @@ func (ts Tensor) CrossEntropyForLogits(targets Tensor) (retVal Tensor) {

 // AccuracyForLogits returns the average accuracy for some given logits assuming that
 // targets represent ground-truth.
-func (ts Tensor) AccuracyForLogits(targets Tensor) (retVal Tensor) {
+func (ts *Tensor) AccuracyForLogits(targets *Tensor) (retVal *Tensor) {
 	argmax := ts.MustArgmax(-1, false, true)
 	eq1 := argmax.MustEq1(targets, true)
 	return eq1.MustTotype(gotch.Float, true).MustMean(gotch.Float, true)
 }

-func (ts Tensor) MaxPool2DDefault(ksize int64, del bool) (retVal Tensor) {
+func (ts *Tensor) MaxPool2DDefault(ksize int64, del bool) (retVal *Tensor) {
 	return ts.MustMaxPool2d([]int64{ksize, ksize}, []int64{ksize, ksize}, []int64{0, 0}, []int64{1, 1}, false, del)
 }

--- a/tensor/patch.go
+++ b/tensor/patch.go
@ -13,7 +13,7 @@ import (
 // NOTE. This is a temporarily patched to make it run.
 // TODO. make change at generator for []Tensor input

-func (ts Tensor) Lstm(hxData []Tensor, paramsData []Tensor, hasBiases bool, numLayers int64, dropout float64, train bool, bidirectional bool, batchFirst bool) (output, h, c Tensor, err error) {
+func (ts *Tensor) Lstm(hxData []Tensor, paramsData []Tensor, hasBiases bool, numLayers int64, dropout float64, train bool, bidirectional bool, batchFirst bool) (output, h, c *Tensor, err error) {

 	// NOTE: `atg_lstm` will create 3 consecutive Ctensors in memory of C land. The first
 	// Ctensor will have address given by `ctensorPtr1` here.
@ -55,11 +55,11 @@ func (ts Tensor) Lstm(hxData []Tensor, paramsData []Tensor, hasBiases bool, numL
 		return output, h, c, err
 	}

-	return Tensor{ctensor: *ctensorPtr1}, Tensor{ctensor: *ctensorPtr2}, Tensor{ctensor: *ctensorPtr3}, nil
+	return &Tensor{ctensor: *ctensorPtr1}, &Tensor{ctensor: *ctensorPtr2}, &Tensor{ctensor: *ctensorPtr3}, nil

 }

-func (ts Tensor) MustLstm(hxData []Tensor, paramsData []Tensor, hasBiases bool, numLayers int64, dropout float64, train bool, bidirectional bool, batchFirst bool) (output, h, c Tensor) {
+func (ts *Tensor) MustLstm(hxData []Tensor, paramsData []Tensor, hasBiases bool, numLayers int64, dropout float64, train bool, bidirectional bool, batchFirst bool) (output, h, c *Tensor) {
 	output, h, c, err := ts.Lstm(hxData, paramsData, hasBiases, numLayers, dropout, train, bidirectional, batchFirst)

 	if err != nil {
@ -69,7 +69,7 @@ func (ts Tensor) MustLstm(hxData []Tensor, paramsData []Tensor, hasBiases bool,
 	return output, h, c
 }

-func (ts Tensor) Gru(hx Tensor, paramsData []Tensor, hasBiases bool, numLayers int64, dropout float64, train bool, bidirectional bool, batchFirst bool) (output, h Tensor, err error) {
+func (ts *Tensor) Gru(hx *Tensor, paramsData []Tensor, hasBiases bool, numLayers int64, dropout float64, train bool, bidirectional bool, batchFirst bool) (output, h *Tensor, err error) {

 	// NOTE: `atg_gru` will create 2 consecutive Ctensors in memory of C land.
 	// The first Ctensor will have address given by `ctensorPtr1` here.
@ -105,11 +105,11 @@ func (ts Tensor) Gru(hx Tensor, paramsData []Tensor, hasBiases bool, numLayers i
 		return output, h, err
 	}

-	return Tensor{ctensor: *ctensorPtr1}, Tensor{ctensor: *ctensorPtr2}, nil
+	return &Tensor{ctensor: *ctensorPtr1}, &Tensor{ctensor: *ctensorPtr2}, nil

 }

-func (ts Tensor) MustGru(hx Tensor, paramsData []Tensor, hasBiases bool, numLayers int64, dropout float64, train bool, bidirectional bool, batchFirst bool) (output, h Tensor) {
+func (ts *Tensor) MustGru(hx *Tensor, paramsData []Tensor, hasBiases bool, numLayers int64, dropout float64, train bool, bidirectional bool, batchFirst bool) (output, h *Tensor) {
 	output, h, err := ts.Gru(hx, paramsData, hasBiases, numLayers, dropout, train, bidirectional, batchFirst)
 	if err != nil {
 		log.Fatal(err)
@ -118,7 +118,7 @@ func (ts Tensor) MustGru(hx Tensor, paramsData []Tensor, hasBiases bool, numLaye
 	return output, h
 }

-func (ts Tensor) TopK(k int64, dim int64, largest bool, sorted bool) (ts1 Tensor, ts2 Tensor, err error) {
+func (ts *Tensor) TopK(k int64, dim int64, largest bool, sorted bool) (ts1, ts2 *Tensor, err error) {

 	// NOTE: `lib.AtgTopk` will return 2 tensors in C memory. First tensor pointer
 	// is given by ctensorPtr1
@ -139,10 +139,10 @@ func (ts Tensor) TopK(k int64, dim int64, largest bool, sorted bool) (ts1 Tensor
 		return ts1, ts2, err
 	}

-	return Tensor{ctensor: *ctensorPtr1}, Tensor{ctensor: *ctensorPtr2}, nil
+	return &Tensor{ctensor: *ctensorPtr1}, &Tensor{ctensor: *ctensorPtr2}, nil
 }

-func (ts Tensor) MustTopK(k int64, dim int64, largest bool, sorted bool) (ts1 Tensor, ts2 Tensor) {
+func (ts *Tensor) MustTopK(k int64, dim int64, largest bool, sorted bool) (ts1, ts2 *Tensor) {

 	ts1, ts2, err := ts.TopK(k, dim, largest, sorted)
 	if err != nil {
@ -154,7 +154,7 @@ func (ts Tensor) MustTopK(k int64, dim int64, largest bool, sorted bool) (ts1 Te

 // NOTE. `NLLLoss` is a version of `NllLoss` in tensor-generated
 // with default weight, reduction and ignoreIndex
-func (ts Tensor) NLLLoss(target Tensor, del bool) (retVal Tensor, err error) {
+func (ts *Tensor) NLLLoss(target Tensor, del bool) (retVal *Tensor, err error) {
 	ptr := (*lib.Ctensor)(unsafe.Pointer(C.malloc(0)))
 	if del {
 		defer ts.MustDrop()
@ -169,12 +169,12 @@ func (ts Tensor) NLLLoss(target Tensor, del bool) (retVal Tensor, err error) {
 		return retVal, err
 	}

-	retVal = Tensor{ctensor: *ptr}
+	retVal = &Tensor{ctensor: *ptr}

 	return retVal, nil
 }

-func (ts Tensor) MustNLLLoss(target Tensor, del bool) (retVal Tensor) {
+func (ts *Tensor) MustNLLLoss(target Tensor, del bool) (retVal *Tensor) {
 	retVal, err := ts.NLLLoss(target, del)
 	if err != nil {
 		log.Fatal(err)
@ -285,7 +285,7 @@ func MustBroadcastTensors(tensors []Tensor, del bool) (retVal []Tensor) {
 }

 // tensor *atg_chunk(tensor self, int64_t chunks, int64_t dim);
-func (ts Tensor) Chunk(chunks int64, dim int64) (retVal []Tensor, err error) {
+func (ts *Tensor) Chunk(chunks int64, dim int64) (retVal []Tensor, err error) {
 	ctensorsPtr := lib.AtgChunk(ts.ctensor, chunks, dim)
 	if err = TorchErr(); err != nil {
 		return retVal, err
@ -307,7 +307,7 @@ func (ts Tensor) Chunk(chunks int64, dim int64) (retVal []Tensor, err error) {
 	return retVal, nil
 }

-func (ts Tensor) MustChunk(chunks int64, dim int64, del bool) (retVal []Tensor) {
+func (ts *Tensor) MustChunk(chunks int64, dim int64, del bool) (retVal []Tensor) {
 	if del {
 		defer ts.MustDrop()
 	}
@ -321,7 +321,7 @@ func (ts Tensor) MustChunk(chunks int64, dim int64, del bool) (retVal []Tensor)
 }

 // tensor *atg_meshgrid(tensor *tensors_data, int tensors_len);
-func (ts Tensor) Meshgrid(tensors []Tensor) (retVal []Tensor, err error) {
+func (ts *Tensor) Meshgrid(tensors []Tensor) (retVal []Tensor, err error) {

 	var ctensors []lib.Ctensor
 	for _, t := range tensors {
@ -348,7 +348,7 @@ func (ts Tensor) Meshgrid(tensors []Tensor) (retVal []Tensor, err error) {
 	return retVal, nil
 }

-func (ts Tensor) MustMeshgrid(tensors []Tensor, del bool) (retVal []Tensor) {
+func (ts *Tensor) MustMeshgrid(tensors []Tensor, del bool) (retVal []Tensor) {
 	if del {
 		defer ts.MustDrop()
 	}
@ -362,7 +362,7 @@ func (ts Tensor) MustMeshgrid(tensors []Tensor, del bool) (retVal []Tensor) {
 }

 // tensor *atg_nonzero_numpy(tensor self);
-func (ts Tensor) NonzeroNumpy() (retVal []Tensor, err error) {
+func (ts *Tensor) NonzeroNumpy() (retVal []Tensor, err error) {

 	ctensorsPtr := lib.AtgNonzeroNumpy(ts.ctensor)
 	if err = TorchErr(); err != nil {
@ -384,7 +384,7 @@ func (ts Tensor) NonzeroNumpy() (retVal []Tensor, err error) {
 	return retVal, nil
 }

-func (ts Tensor) MustNonzeroNumpy(del bool) (retVal []Tensor) {
+func (ts *Tensor) MustNonzeroNumpy(del bool) (retVal []Tensor) {
 	if del {
 		defer ts.MustDrop()
 	}
@ -403,7 +403,7 @@ func (ts Tensor) MustNonzeroNumpy(del bool) (retVal []Tensor) {
 //  - splitSize – size of a single chunk
 //  - dim – dimension along which to split the tensor.
 // Ref. https://pytorch.org/docs/stable/generated/torch.split.html
-func (ts Tensor) Split(splitSize, dim int64) (retVal []Tensor, err error) {
+func (ts *Tensor) Split(splitSize, dim int64) (retVal []Tensor, err error) {

 	ctensorsPtr := lib.AtgSplit(ts.ctensor, splitSize, dim)
 	if err = TorchErr(); err != nil {
@ -430,7 +430,7 @@ func (ts Tensor) Split(splitSize, dim int64) (retVal []Tensor, err error) {
 	return retVal, nil
 }

-func (ts Tensor) MustSplit(splitSize, dim int64, del bool) (retVal []Tensor) {
+func (ts *Tensor) MustSplit(splitSize, dim int64, del bool) (retVal []Tensor) {
 	if del {
 		defer ts.MustDrop()
 	}
@ -449,7 +449,7 @@ func (ts Tensor) MustSplit(splitSize, dim int64, del bool) (retVal []Tensor) {
 //  - splitSizes – slice of sizes for each chunk
 //  - dim – dimension along which to split the tensor.
 // Ref. https://pytorch.org/docs/stable/generated/torch.split.html
-func (ts Tensor) SplitWithSizes(splitSizes []int64, dim int64) (retVal []Tensor, err error) {
+func (ts *Tensor) SplitWithSizes(splitSizes []int64, dim int64) (retVal []Tensor, err error) {

 	ctensorsPtr := lib.AtgSplitWithSizes(ts.ctensor, splitSizes, len(splitSizes), dim)
 	if err = TorchErr(); err != nil {
@ -476,7 +476,7 @@ func (ts Tensor) SplitWithSizes(splitSizes []int64, dim int64) (retVal []Tensor,
 	return retVal, nil
 }

-func (ts Tensor) MustSplitWithSizes(splitSizes []int64, dim int64, del bool) (retVal []Tensor) {
+func (ts *Tensor) MustSplitWithSizes(splitSizes []int64, dim int64, del bool) (retVal []Tensor) {
 	if del {
 		defer ts.MustDrop()
 	}
@ -490,7 +490,7 @@ func (ts Tensor) MustSplitWithSizes(splitSizes []int64, dim int64, del bool) (re
 }

 // tensor *atg_unbind(tensor self, int64_t dim);
-func (ts Tensor) Unbind(dim int64) (retVal []Tensor, err error) {
+func (ts *Tensor) Unbind(dim int64) (retVal []Tensor, err error) {

 	ctensorsPtr := lib.AtgUnbind(ts.ctensor, dim)
 	if err = TorchErr(); err != nil {
@ -512,7 +512,7 @@ func (ts Tensor) Unbind(dim int64) (retVal []Tensor, err error) {
 	return retVal, nil
 }

-func (ts Tensor) MustUnbind(dim int64, del bool) (retVal []Tensor) {
+func (ts *Tensor) MustUnbind(dim int64, del bool) (retVal []Tensor) {
 	if del {
 		defer ts.MustDrop()
 	}
--- a/tensor/scalar.go
+++ b/tensor/scalar.go
@ -12,19 +12,19 @@ type Scalar struct {
 }

 // IntScalar creates a integer scalar
-func IntScalar(v int64) Scalar {
+func IntScalar(v int64) *Scalar {
 	cscalar := lib.AtsInt(v)
-	return Scalar{cscalar}
+	return &Scalar{cscalar}
 }

 // FloatScalar creates a float scalar
-func FloatScalar(v float64) Scalar {
+func FloatScalar(v float64) *Scalar {
 	cscalar := lib.AtsFloat(v)
-	return Scalar{cscalar}
+	return &Scalar{cscalar}
 }

 // ToInt returns a integer value
-func (sc Scalar) ToInt() (retVal int64, err error) {
+func (sc *Scalar) ToInt() (retVal int64, err error) {
 	retVal = lib.AtsToInt(sc.cscalar)
 	err = TorchErr()
 	if err != nil {
@ -35,7 +35,7 @@ func (sc Scalar) ToInt() (retVal int64, err error) {
 }

 // ToFloat returns a float value
-func (sc Scalar) ToFloat() (retVal float64, err error) {
+func (sc *Scalar) ToFloat() (retVal float64, err error) {
 	retVal = lib.AtsToFloat(sc.cscalar)
 	err = TorchErr()
 	if err != nil {
@ -46,7 +46,7 @@ func (sc Scalar) ToFloat() (retVal float64, err error) {
 }

 // ToString returns a string representation of scalar value
-func (sc Scalar) ToString() (retVal string, err error) {
+func (sc *Scalar) ToString() (retVal string, err error) {
 	retVal = lib.AtsToString(sc.cscalar)
 	err = TorchErr()
 	if err != nil {
@ -60,12 +60,12 @@ func (sc Scalar) ToString() (retVal string, err error) {
 //
 // TODO: Really? after running s.Drop() and s.ToInt()
 // it returns Zero.
-func (sc Scalar) Drop() (err error) {
+func (sc *Scalar) Drop() (err error) {
 	lib.AtsFree(sc.cscalar)
 	return TorchErr()
 }

-func (sc Scalar) MustDrop() {
+func (sc *Scalar) MustDrop() {
 	lib.AtsFree(sc.cscalar)
 	if err := TorchErr(); err != nil {
 		log.Fatal(err)
--- a/tensor/tensor-generated.go
+++ b/tensor/tensor-generated.go
--- a/tensor/tensor.go
+++ b/tensor/tensor.go