fix(tensor/module): fixed and moved BatchAccuracyForLogits to nn/sequential; chore(example): clean-up

This commit is contained in:
sugarme 2020-07-11 12:53:23 +10:00
parent 44ef7776e5
commit 8b05753eb4
29 changed files with 281 additions and 1552 deletions

View File

@ -1,87 +0,0 @@
package main
// Training various models on the CIFAR-10 dataset.
// The dataset can be downloaded from, files
// should be placed in the data/ directory.
// The resnet model reaches 95.4% accuracy.
import (
// "log"
// "os/exec"
ts ""
func main() {
dir := "../../data/cifar10"
ds := vision.CFLoadDir(dir)
fmt.Printf("TrainImages shape: %v\n", ds.TrainImages.MustSize())
fmt.Printf("TrainLabel shape: %v\n", ds.TrainLabels.MustSize())
fmt.Printf("TestImages shape: %v\n", ds.TestImages.MustSize())
fmt.Printf("TestLabel shape: %v\n", ds.TestLabels.MustSize())
fmt.Printf("Number of labels: %v\n", ds.Labels)
// cuda := gotch.CudaBuilder(0)
// device := cuda.CudaIfAvailable()
device := gotch.CPU
var si *gotch.SI
si = gotch.GetSysInfo()
fmt.Printf("Total RAM (MB):\t %8.2f\n", float64(si.TotalRam)/1024)
fmt.Printf("Used RAM (MB):\t %8.2f\n", float64(si.TotalRam-si.FreeRam)/1024)
startRAM := si.TotalRam - si.FreeRam
vs := nn.NewVarStore(device)
for epoch := 0; epoch < 150; epoch++ {
iter := ts.MustNewIter2(ds.TrainImages, ds.TrainLabels, int64(64))
for {
item, ok := iter.Next()
if !ok {
devicedData := item.Data.MustTo(vs.Device(), true)
devicedLabel := item.Label.MustTo(vs.Device(), true)
bimages := vision.Augmentation(devicedData, true, 4, 8)
si = gotch.GetSysInfo()
memUsed := (float64(si.TotalRam-si.FreeRam) - float64(startRAM)) / 1024
fmt.Printf("Epoch:\t %v\t Memory Used:\t [%8.2f MiB]\n", epoch, memUsed)
* // Print out GPU used
* nvidia := "nvidia-smi"
* cmd := exec.Command(nvidia)
* stdout, err := cmd.Output()
* if err != nil {
* log.Fatal(err.Error())
* }
* fmt.Println(string(stdout))
* */

View File

@ -10,7 +10,6 @@ package main
import (
// "os/exec"
@ -80,17 +79,6 @@ func fastResnet(p nn.Path) (retVal nn.SequentialT) {
return seq
func learningRate(epoch int) (retVal float64) {
switch {
case epoch < 50:
return 0.1
case epoch < 100:
return 0.01
return 0.001
func main() {
dir := "../../data/cifar10"
ds := vision.CFLoadDir(dir)
@ -103,50 +91,42 @@ func main() {
cuda := gotch.CudaBuilder(0)
device := cuda.CudaIfAvailable()
// device := gotch.CPU
vs := nn.NewVarStore(device)
net := fastResnet(vs.Root())
// optConfig := nn.NewSGDConfig(0.9, 0.0, 5e-4, true)
// opt, err := optConfig.Build(vs, 0.01)
// if err != nil {
// log.Fatal(err)
// }
var lossVal float64
startTime := time.Now()
var bestAccuracy float64
for epoch := 0; epoch < 350; epoch++ {
// opt.SetLR(learningRate(epoch))
for epoch := 0; epoch < 150; epoch++ {
optConfig := nn.NewSGDConfig(0.9, 0.0, 5e-4, true)
var opt nn.Optimizer
var err error
var (
opt nn.Optimizer
err error
switch {
case epoch < 150:
case epoch < 50:
opt, err = optConfig.Build(vs, 0.1)
if err != nil {
case epoch < 250:
case epoch < 100:
opt, err = optConfig.Build(vs, 0.01)
if err != nil {
case epoch >= 250:
case epoch >= 100:
opt, err = optConfig.Build(vs, 0.001)
if err != nil {
// iter := ts.MustNewIter2(ds.TrainImages, ds.TrainLabels, int64(64))
iter := ts.MustNewIter2(ds.TrainImages, ds.TrainLabels, int64(128))
iter := ts.MustNewIter2(ds.TrainImages, ds.TrainLabels, int64(64))
// iter = iter.ToDevice(device)
for {
item, ok := iter.Next()
@ -171,63 +151,14 @@ func main() {
testAcc := batchAccuracyForLogits(net, ds.TestImages, ds.TestLabels, vs.Device(), 100)
testAcc := nn.BatchAccuracyForLogits(vs, net, ds.TestImages, ds.TestLabels, vs.Device(), 512)
fmt.Printf("Epoch:\t %v\t Loss: \t %.3f \tAcc: %10.2f%%\n", epoch, lossVal, testAcc*100.0)
// fmt.Printf("Epoch: %10.0d\tLoss:%10.3f\n", epoch, lossVal)
if testAcc > bestAccuracy {
bestAccuracy = testAcc
* // Print out GPU used
* nvidia := "nvidia-smi"
* cmd := exec.Command(nvidia)
* stdout, err := cmd.Output()
* if err != nil {
* log.Fatal(err.Error())
* }
* fmt.Println(string(stdout))
* */
// testAcc := ts.BatchAccuracyForLogits(net, ds.TestImages, ds.TestLabels, vs.Device(), 512)
fmt.Printf("Best Accuracy: %10.2f%%\n", bestAccuracy*100.0)
fmt.Printf("Taken time:\t%.2f mins\n", time.Since(startTime).Minutes())
func batchAccuracyForLogits(m ts.ModuleT, xs, ys ts.Tensor, d gotch.Device, batchSize int) (retVal float64) {
var (
sumAccuracy float64 = 0.0
sampleCount float64 = 0.0
iter2 := ts.MustNewIter2(xs, ys, int64(batchSize))
for {
item, ok := iter2.Next()
if !ok {
size := float64(item.Data.MustSize()[0])
bImages := item.Data.MustTo(d, true)
bLabels := item.Label.MustTo(d, true)
logits := m.ForwardT(bImages, false)
acc := logits.AccuracyForLogits(bLabels)
sumAccuracy += acc.Values()[0] * size
sampleCount += size
return sumAccuracy / sampleCount

View File

@ -1,16 +0,0 @@
package main
import (
func main() {
var d gotch.Cuda
fmt.Printf("Cuda device count: %v\n", d.DeviceCount())
fmt.Printf("Cuda is available: %v\n", d.IsAvailable())
fmt.Printf("Cudnn is available: %v\n", d.CudnnIsAvailable())

View File

@ -1,8 +1,8 @@
// A wrapper around the linux syscall sysinfo(2).
package main
// helper to debug memory blow-up
import (
@ -34,7 +34,7 @@ var sis = &SI{}
func Get() *SI {
func CPUInfo() *SI {
// Note: uint64 is uint32 on 32 bit CPUs
@ -89,36 +89,3 @@ func Get() *SI {
return sis
// Make the "fmt" Stringer interface happy.
func (si SI) String() string {
// XXX: Is the copy of SI done atomic? Not sure.
// Without an outer lock this may print a junk.
return fmt.Sprintf("uptime\t\t%v\nload\t\t%2.2f %2.2f %2.2f\nprocs\t\t%d\n"+
"ram total\t%d kB\nram free\t%d kB\nram buffer\t%d kB\n"+
"swap total\t%d kB\nswap free\t%d kB",
//"high ram total\t%d kB\nhigh ram free\t%d kB\n"
si.Uptime, si.Loads[0], si.Loads[1], si.Loads[2], si.Procs,
si.TotalRam, si.FreeRam, si.BufferRam,
si.TotalSwap, si.FreeSwap,
// archaic si.TotalHighRam, si.FreeHighRam
Convert to string in a thread safe way.
uptime 279h6m21s
load 0.12 0.04 0.05
procs 143
ram total 383752 kB
ram free 254980 kB
ram buffer 7640 kB
swap total 887800 kB
swap free 879356 kB
func (si *SI) ToString() string {
return si.String()

View File

@ -0,0 +1,20 @@
package main
import (
func GPUInfo() {
// Print out GPU used
nvidia := "nvidia-smi"
cmd := exec.Command(nvidia)
stdout, err := cmd.Output()
if err != nil {

View File

@ -0,0 +1,90 @@
package main
import (
ts ""
var device string
func createTensors(samples int) []ts.Tensor {
n := int(10e6)
var data []float64
for i := 0; i < n; i++ {
data = append(data, float64(i))
var tensors []ts.Tensor
s := ts.FloatScalar(float64(0.23))
for i := 0; i < 1; i++ {
t := ts.MustOfSlice(data).MustMul1(s, true)
tensors = append(tensors, t)
return tensors
func dropTensors(tensors []ts.Tensor) {
for _, t := range tensors {
func init() {
flag.StringVar(&device, "device", "CPU", "Select CPU or GPU to use")
func main() {
// TODO: create flags to load tensor to device(CPU, GPU) and get CPU or GPU
// infor accordingly
switch device {
case "CPU":
var si *SI
si = CPUInfo()
fmt.Printf("Total RAM (MB):\t %8.2f\n", float64(si.TotalRam)/1024)
fmt.Printf("Used RAM (MB):\t %8.2f\n", float64(si.TotalRam-si.FreeRam)/1024)
startRAM := si.TotalRam - si.FreeRam
epochs := 50
for i := 0; i < epochs; i++ {
tensors := createTensors(10000)
si = CPUInfo()
fmt.Printf("Epoch %v\t Used: [%8.2f MiB]\n", i, (float64(si.TotalRam-si.FreeRam)-float64(startRAM))/1024)
case "GPU":
cuda := gotch.CudaBuilder(0)
gpu := cuda.CudaIfAvailable()
epochs := 50
for i := 0; i < epochs; i++ {
tensors := createTensors(10000)
var gpuTensors []ts.Tensor
for _, t := range tensors {
gpuTensors = append(gpuTensors, t.MustTo(gpu, true))
for _, t := range gpuTensors {
fmt.Printf("Epoch %v\n", i)
log.Fatalf("Invalid device flag (%v). It should be either CPU or GPU.", device)

View File

@ -1,53 +0,0 @@
package main
import (
func main() {
// Try to compare 2 tensor with incompatible dimensions
// and check this returns an error
dx := []int32{1, 2, 3}
dy := []int32{1, 2, 3, 4}
// dy := []int32{1, 2, 5}
xs, err := tensor.OfSlice(dx)
if err != nil {
ys, err := tensor.OfSlice(dy)
if err != nil {
fmt.Printf("xs num of dimensions: %v\n", xs.Dim())
fmt.Printf("ys num of dimensions: %v\n", ys.Dim())
xsize, err := xs.Size()
if err != nil {
ysize, err := ys.Size()
if err != nil {
fmt.Printf("xs shape: %v\n", xsize)
fmt.Printf("ys shape: %v\n", ysize)
res, err := xs.Eq1(ys)
if err != nil {

View File

@ -1,48 +0,0 @@
package main
import (
ts ""
func main() {
// mockup data
var (
n int = 20
xvals []float32
yvals []float32
epochs = 10
for i := 0; i < n; i++ {
xvals = append(xvals, float32(i))
yvals = append(yvals, float32(2*i+1))
xtrain, err := ts.NewTensorFromData(xvals, []int64{int64(n), 1})
if err != nil {
ytrain, err := ts.NewTensorFromData(yvals, []int64{int64(n), 1})
if err != nil {
ws := ts.MustZeros([]int64{1, int64(n)}, gotch.Float.CInt(), gotch.CPU.CInt())
bs := ts.MustZeros([]int64{1, int64(n)}, gotch.Float.CInt(), gotch.CPU.CInt())
for epoch := 0; epoch < epochs; epoch++ {
logit := ws.MustMatMul(xtrain).MustAdd(bs)
loss := ts.NewTensor().MustLogSoftmax(-1, gotch.Float.CInt())

View File

@ -1,17 +0,0 @@
package main
import (
func main() {
vs := nn.NewVarStore(gotch.CPU)
path := vs.Root()
l := nn.NewLinear(path, 4, 3, nn.DefaultLinearConfig())

View File

@ -13,21 +13,21 @@
- Run with `go clean -cache -testcache && go run . -model="linear"`
- Accuraccy should be about **91.68%**.
- Accuracy should be about **91.68%**.
## Neural Network (NN)
- Run with `go clean -cache -testcache && go run . -model="nn"`
- Accuraccy should be about **TODO: update%**.
- Accuracy should be about **94%**.
## Convolutional Neural Network (CNN)
- Run with `go clean -cache -testcache && go run . -model="cnn"`
- Accuraccy should be about **TODO: update%**.
- Accuracy should be about **99.3%**.

View File

@ -46,26 +46,21 @@ func (n Net) ForwardT(xs ts.Tensor, train bool) (retVal ts.Tensor) {
defer outView1.MustDrop()
outC1 := outView1.Apply(n.conv1)
// defer outC1.MustDrop()
outMP1 := outC1.MaxPool2DDefault(2, true)
defer outMP1.MustDrop()
outC2 := outMP1.Apply(n.conv2)
// defer outC2.MustDrop()
outMP2 := outC2.MaxPool2DDefault(2, true)
// defer outMP2.MustDrop()
outView2 := outMP2.MustView([]int64{-1, 1024}, true)
defer outView2.MustDrop()
outFC1 := outView2.Apply(&n.fc1)
// defer outFC1.MustDrop()
outRelu := outFC1.MustRelu(true)
defer outRelu.MustDrop()
// outRelu.Dropout_(0.5, train)
outDropout := ts.MustDropout(outRelu, 0.5, train)
defer outDropout.MustDrop()
@ -83,12 +78,14 @@ func runCNN1() {
cuda := gotch.CudaBuilder(0)
vs := nn.NewVarStore(cuda.CudaIfAvailable())
// vs := nn.NewVarStore(gotch.CPU)
net := newNet(vs.Root())
opt, err := nn.DefaultAdamConfig().Build(vs, LrCNN)
if err != nil {
var bestAccuracy float64 = 0.0
startTime := time.Now()
for epoch := 0; epoch < epochsCNN; epoch++ {
@ -102,20 +99,16 @@ func runCNN1() {
batches := samples / batchSize
batchIndex := 0
var epocLoss ts.Tensor
// var loss ts.Tensor
for i := 0; i < batches; i++ {
start := batchIndex * batchSize
size := batchSize
if samples-start < batchSize {
// size = samples - start
batchIndex += 1
// Indexing
narrowIndex := ts.NewNarrow(int64(start), int64(start+size))
// bImages := ds.TrainImages.Idx(narrowIndex)
// bLabels := ds.TrainLabels.Idx(narrowIndex)
bImages := imagesTs.Idx(narrowIndex)
bLabels := labelsTs.Idx(narrowIndex)
@ -126,7 +119,6 @@ func runCNN1() {
loss := logits.CrossEntropyForLogits(bLabels)
// loss = loss.MustSetRequiresGrad(true)
epocLoss = loss.MustShallowClone()
@ -136,112 +128,21 @@ func runCNN1() {
// logits.MustDrop()
// loss.MustDrop()
testAccuracy := batchAccuracyForLogits(net, testImages, testLabels, vs.Device(), 1024)
testAccuracy := nn.BatchAccuracyForLogits(vs, net, testImages, testLabels, vs.Device(), 1024)
fmt.Printf("Epoch: %v\t Loss: %.2f \t Test accuracy: %.2f%%\n", epoch, epocLoss.Values()[0], testAccuracy*100.0)
if testAccuracy > bestAccuracy {
bestAccuracy = testAccuracy
// fmt.Printf("Epoch:\t %v\tLoss: \t %.2f\n", epoch, epocLoss.Values()[0])
testAccuracy := ts.BatchAccuracyForLogitsIdx(net, testImages, testLabels, vs.Device(), 1024)
fmt.Printf("Test accuracy: %.2f%%\n", testAccuracy*100)
fmt.Printf("Best test accuracy: %.2f%%\n", bestAccuracy*100.0)
fmt.Printf("Taken time:\t%.2f mins\n", time.Since(startTime).Minutes())
func runCNN2() {
var ds vision.Dataset
ds = vision.LoadMNISTDir(MnistDirNN)
cuda := gotch.CudaBuilder(0)
vs := nn.NewVarStore(cuda.CudaIfAvailable())
net := newNet(vs.Root())
opt, err := nn.DefaultAdamConfig().Build(vs, LrNN)
if err != nil {
startTime := time.Now()
var lossVal float64
for epoch := 0; epoch < epochsCNN; epoch++ {
iter := ts.MustNewIter2(ds.TrainImages, ds.TrainLabels, batchCNN)
// iter.Shuffle()
for {
item, ok := iter.Next()
if !ok {
bImages := item.Data.MustTo(vs.Device(), true)
bLabels := item.Label.MustTo(vs.Device(), true)
// _ = ts.MustGradSetEnabled(true)
logits := net.ForwardT(bImages, true)
loss := logits.CrossEntropyForLogits(bLabels)
lossVal = loss.Values()[0]
// fmt.Printf("Epoch:\t %v\tLoss: \t %.2f\n", epoch, lossVal)
testAcc := batchAccuracyForLogits(net, ds.TestImages, ds.TestLabels, vs.Device(), batchCNN)
fmt.Printf("Epoch:\t %v\tLoss: \t %.2f\t Accuracy: %.2f\n", epoch, lossVal, testAcc*100.0)
testAcc := ts.BatchAccuracyForLogits(net, ds.TestImages, ds.TestLabels, vs.Device(), batchCNN)
fmt.Printf("Loss: \t %.2f\t Accuracy: %.2f\n", lossVal, testAcc*100)
fmt.Printf("Taken time:\t%.2f mins\n", time.Since(startTime).Minutes())
func batchAccuracyForLogits(m ts.ModuleT, xs, ys ts.Tensor, d gotch.Device, batchSize int) (retVal float64) {
var (
sumAccuracy float64 = 0.0
sampleCount float64 = 0.0
iter2 := ts.MustNewIter2(xs, ys, int64(batchSize))
for {
item, ok := iter2.Next()
if !ok {
size := float64(item.Data.MustSize()[0])
bImages := item.Data.MustTo(d, true)
bLabels := item.Label.MustTo(d, true)
logits := m.ForwardT(bImages, false)
acc := logits.AccuracyForLogits(bLabels)
sumAccuracy += acc.Values()[0] * size
sampleCount += size
return sumAccuracy / sampleCount

View File

@ -1,64 +0,0 @@
package main
import (
ts ""
func testOptimizer() {
var data []float64
for i := 0; i < 15; i++ {
data = append(data, float64(i))
xs, err := ts.NewTensorFromData(data, []int64{int64(len(data)), 1})
if err != nil {
ys := xs.MustMul1(ts.FloatScalar(0.42)).MustAdd1(ts.FloatScalar(1.337))
vs := nn.NewVarStore(gotch.CPU)
cfg := nn.LinearConfig{
WsInit: nn.NewConstInit(0.001),
BsInit: nn.NewConstInit(0.001),
Bias: true,
// fmt.Printf("Number of trainable variables: %v\n", vs.Len())
linear := nn.NewLinear(vs.Root(), 1, 1, cfg)
// fmt.Printf("Trainable variables at app: %v\n", vs.TrainableVariable())
loss := xs.Apply(linear).MustMseLoss(ys, ts.ReductionMean.ToInt())
initialLoss := loss.MustView([]int64{-1}).MustFloat64Value([]int64{0})
fmt.Printf("Initial Loss: %.3f\n", initialLoss)
opt, err := nn.DefaultSGDConfig().Build(vs, 1e-2)
if err != nil {
log.Fatal("Failed building SGD optimizer")
for i := 0; i < 50; i++ {
// loss = xs.Apply(linear)
loss = linear.Forward(xs)
loss = loss.MustMseLoss(ys, ts.ReductionMean.ToInt())
fmt.Printf("Loss: %.3f\n", loss.MustView([]int64{-1}).MustFloat64Value([]int64{0}))
fmt.Printf("Bs: %.3f - Bs Grad: %.3f\n", linear.Bs.MustView([]int64{-1}).MustFloat64Value([]int64{0}), linear.Bs.MustGrad().MustFloat64Value([]int64{0}))
fmt.Printf("Ws: %.3f - Ws Grad: %.3f\n", linear.Ws.MustView([]int64{-1}).MustFloat64Value([]int64{0}), linear.Ws.MustGrad().MustFloat64Value([]int64{0}))
func main() {

View File

@ -1,41 +0,0 @@
package main
import (
func main() {
s := tensor.FloatScalar(float64(1.23))
fmt.Printf("scalar value: %v\n", s)
intVal, err := s.ToInt()
if err != nil {
floatVal, err := s.ToFloat()
if err != nil {
strVal, err := s.ToString()
if err != nil {
fmt.Printf("scalar to int64 value: %v\n", intVal)
fmt.Printf("scalar to float64 value: %v\n", floatVal)
fmt.Printf("scalar to string value: %v\n", strVal)
s.Drop() // will set scalar to zero
fmt.Printf("scalar value: %v\n", s)
zeroVal, err := s.ToInt()
if err != nil {
log.Fatalf("Panic: %v\n", err)
fmt.Printf("Won't expect this val: %v\n", zeroVal)

View File

@ -1,205 +0,0 @@
package main
import (
ts ""
func main() {
// noSeq()
// noSeq2Layers()
// seqNoVarStore()
func noSeq() {
ds := vision.LoadMNISTDir("../../data/mnist")
wsInit := nn.NewKaimingUniformInit()
ws := wsInit.InitTensor([]int64{10, 784}, gotch.CPU).MustT(true)
bound := 1.0 / math.Sqrt(float64(784))
bsInit := nn.NewUniformInit(-bound, bound)
bs := bsInit.InitTensor([]int64{10}, gotch.CPU)
for i := 0; i < 2000; i++ {
mul := ds.TrainImages.MustMatMul(ws, false)
logits := mul.MustAdd(bs, true)
loss := logits.AccuracyForLogits(ds.TrainLabels)
fmt.Printf("Epoch %v\t Loss: %.3f\n", i, loss.Values()[0])
func withSeq() {
seq := nn.Seq()
vs := nn.NewVarStore(gotch.CPU)
// seq.Add(nn.NewLinear(vs.Root(), 784, 10, *nn.DefaultLinearConfig()))
seq.Add(nn.NewLinear(vs.Root(), 784, 128, *nn.DefaultLinearConfig()))
seq.Add(nn.NewLinear(vs.Root(), 128, 10, *nn.DefaultLinearConfig()))
opt, err := nn.DefaultAdamConfig().Build(vs, 1e-2)
if err != nil {
ds := vision.LoadMNISTDir("../../data/mnist")
for i := 0; i < 2000; i++ {
logits := seq.Forward(ds.TrainImages)
loss := logits.CrossEntropyForLogits(ds.TrainLabels)
testLogits := seq.Forward(ds.TestImages)
testAccuracy := testLogits.AccuracyForLogits(ds.TestLabels)
fmt.Printf("Epoch: %v \t Loss: %.3f \t Test accuracy: %.2f%%\n", i, loss.Values()[0], testAccuracy.Values()[0]*100)
func noSeq2Layers() {
ds := vision.LoadMNISTDir("../../data/mnist")
wsInit := nn.NewKaimingUniformInit()
ws1 := wsInit.InitTensor([]int64{1024, 784}, gotch.CPU).MustT(true)
ws2 := wsInit.InitTensor([]int64{10, 1024}, gotch.CPU).MustT(true)
bound1 := 1.0 / math.Sqrt(float64(784))
bsInit1 := nn.NewUniformInit(-bound1, bound1)
bs1 := bsInit1.InitTensor([]int64{1024}, gotch.CPU)
bound2 := 1.0 / math.Sqrt(float64(1024))
bsInit2 := nn.NewUniformInit(-bound2, bound2)
bs2 := bsInit2.InitTensor([]int64{10}, gotch.CPU)
for i := 0; i < 2000; i++ {
mul1 := ds.TrainImages.MustMatMul(ws1, false)
out1 := mul1.MustAdd(bs1, true)
mul2 := out1.MustMatMul(ws2, true)
logits := mul2.MustAdd(bs2, true)
loss := logits.AccuracyForLogits(ds.TrainLabels)
fmt.Printf("Epoch %v\t Loss: %.3f\n", i, loss.Values()[0])
func seqNoVarStore() {
ds := vision.LoadMNISTDir("../../data/mnist")
wsInit := nn.NewKaimingUniformInit()
ws1 := wsInit.InitTensor([]int64{1024, 784}, gotch.CPU).MustT(true)
ws2 := wsInit.InitTensor([]int64{10, 1024}, gotch.CPU).MustT(true)
bound1 := 1.0 / math.Sqrt(float64(784))
bsInit1 := nn.NewUniformInit(-bound1, bound1)
bs1 := bsInit1.InitTensor([]int64{1024}, gotch.CPU)
bound2 := 1.0 / math.Sqrt(float64(1024))
bsInit2 := nn.NewUniformInit(-bound2, bound2)
bs2 := bsInit2.InitTensor([]int64{10}, gotch.CPU)
l1 := Linear{&ws1, &bs1}
l2 := Linear{&ws2, &bs2}
seq := Seq()
// seq.Add1(l1)
// seq.Add2(l2)
for i := 0; i < 2000; i++ {
logits := seq.Forward(ds.TrainImages)
type Linear struct {
Ws *ts.Tensor
Bs *ts.Tensor
func (l Linear) Forward(xs ts.Tensor) ts.Tensor {
mul := xs.MustMatMul(*l.Ws, false)
return mul.MustAdd(*l.Bs, true)
type Sequential struct {
layers []ts.Module
l1 ts.Module
l2 ts.Module
func Seq() Sequential {
return Sequential{layers: make([]ts.Module, 0)}
// Len returns number of sub-layers embedded in this layer
func (s *Sequential) Len() (retVal int64) {
return int64(len(s.layers))
// IsEmpty returns true if this layer does not have any sub-layers.
func (s *Sequential) IsEmpty() (retVal bool) {
return len(s.layers) == 0
// Add appends a layer after all the current layers.
func (s *Sequential) Add(l ts.Module) {
s.layers = append(s.layers, l)
func (s *Sequential) Add1(l ts.Module) {
s.l1 = l
func (s *Sequential) Add2(l ts.Module) {
s.l2 = l
func (s Sequential) Forward(xs ts.Tensor) (retVal ts.Tensor) {
if s.IsEmpty() {
return xs.MustShallowClone()
// forward sequentially
outs := make([]ts.Tensor, len(s.layers))
for i := 0; i < len(s.layers); i++ {
if i == 0 {
outs[0] = s.layers[i].Forward(xs)
defer outs[0].MustDrop()
} else if i == len(s.layers)-1 {
return s.layers[i].Forward(outs[i-1])
} else {
outs[i+1] = s.layers[i].Forward(outs[i-1])
defer outs[i+1].MustDrop()
// out1 := s.l1.Forward(xs)
// defer out1.MustDrop()
// return s.l2.Forward(out1)

View File

@ -1,45 +0,0 @@
package main
import (
ts ""
func myModule(p nn.Path, dim int64) ts.Module {
x1 := p.Zeros("x1", []int64{dim})
x2 := p.Zeros("x1", []int64{dim})
return nn.NewFunc(func(xs ts.Tensor) ts.Tensor {
return xs.MustMul(x1).MustAdd(xs.MustExp().MustMul(x2))
func main() {
vs := nn.NewVarStore(gotch.CPU)
m := myModule(vs.Root(), 7)
opt, err := nn.DefaultSGDConfig().Build(vs, 1e-2)
if err != nil {
for i := 0; i < 50; i++ {
xs := ts.MustZeros([]int64{7}, gotch.Float.CInt(), gotch.CPU.CInt())
ys := ts.MustZeros([]int64{7}, gotch.Float.CInt(), gotch.CPU.CInt())
loss := m.Forward(xs).MustSub(ys).MustPow(ts.IntScalar(2)).MustSum(gotch.Float.CInt())
fmt.Printf("Loss: %v\n", loss.MustView([]int64{-1}).MustFloat64Value([]int64{0}))

View File

@ -1,39 +0,0 @@
package main
import (
func main() {
// TODO: Check Go type of data and tensor DType
// For. if data is []int and DType is Bool
// It is still running but get wrong result.
data := [][]int64{
{1, 1, 1, 2, 2, 2, 3, 3},
{1, 1, 1, 2, 2, 2, 4, 4},
shape := []int64{2, 8}
// data := []int16{1, 1, 1, 2, 2, 2, 3, 3}
// shape := []int64{1, 8}
ts, err := tensor.NewTensorFromData(data, shape)
if err != nil {
numel := uint(6)
// dst := make([]uint8, numel)
var dst = make([]int64, 6)
ts.MustCopyData(dst, numel)

View File

@ -15,8 +15,8 @@ func main() {
xy := tensor.TensorFrom([]float64{2.0})
xz := tensor.TensorFrom([]float64{3.0})
y := x.MustMul(xy)
z := x.MustMul(xz)
y := x.MustMul(xy, false)
z := x.MustMul(xz, false)
xgrad := x.MustGrad()
@ -31,14 +31,3 @@ func main() {
fmt.Printf("Previous GradMode enabled state: %v\n", isGradEnabled)
/* // Compute a second order derivative using run_backward.
* let mut x = Tensor::from(42.0).set_requires_grad(true);
* let y = &x * &x * &x + &x + &x * &x;
* x.zero_grad();
* let dy_over_dx = Tensor::run_backward(&[y], &[&x], true, true);
* assert_eq!(dy_over_dx.len(), 1);
* let dy_over_dx = &dy_over_dx[0];
* dy_over_dx.backward();
* let dy_over_dx2 = x.grad();
* assert_eq!(f64::from(&dy_over_dx2), 254.0); */

View File

@ -1,41 +0,0 @@
package main
import (
func main() {
// TODO: Check Go type of data and tensor DType
// For. if data is []int and DType is Bool
// It is still running but get wrong result.
data := [][]int64{
{1, 1, 1, 2, 2, 2, 3, 3},
{1, 1, 1, 2, 2, 2, 4, 4},
shape := []int64{2, 8}
ts, err := tensor.NewTensorFromData(data, shape)
if err != nil {
ts, err = ts.To(gotch.CPU)
if err != nil {
fmt.Printf("Tensor value BEFORE: %v\n", ts)
scalarVal := tensor.IntScalar(int64(5))
fmt.Printf("Tensor value AFTER: %v\n", ts)

View File

@ -1,60 +0,0 @@
package main
import (
func main() {
data := [][]int64{
{1, 1, 1, 2, 2, 2, 3, 3},
{1, 1, 1, 2, 2, 2, 4, 4},
shape := []int64{2, 8}
// shape := []int64{2, 2, 4}
ts, err := tensor.NewTensorFromData(data, shape)
if err != nil {
// Select
s := tensor.NewSelect(7)
// selectedTs := ts.Idx(s)
// selectedTs.Print()
// Narrow (start inclusive, end exclusive)
n := tensor.NewNarrow(0, 1)
// narrowedTs := ts.Idx(n)
// narrowedTs.Print()
// InsertNewAxis
// i := tensor.NewInsertNewAxis()
// newAxisTs := ts.Idx(i)
// newAxisTs.Print()
// IndexSelect
// idxTensor := tensor.MustOfSlice([]int64{0, 1})
// is := tensor.NewIndexSelect(idxTensor)
// isTs := ts.Idx(is)
// isTs.Print()
// Combined
var tsIndexes []tensor.TensorIndexer = []tensor.TensorIndexer{n, s}
combinedTs := ts.Idx(tsIndexes)
// Copy to index
desTs := tensor.MustZeros([]int64{5}, gotch.Float.CInt(), gotch.CPU.CInt())
srcTs := tensor.MustOnes([]int64{1}, gotch.Float.CInt(), gotch.CPU.CInt())
idx := tensor.NewNarrow(0, 3)
// NOTE: indexing operations return view on the same memory
desTs.Idx(idx).MustView([]int64{-1}, false).Copy_(srcTs)

View File

@ -1,25 +0,0 @@
package main
import (
ts ""
func main() {
tensor := ts.MustArange1(ts.IntScalar(0), ts.IntScalar(2*3), gotch.Int64, gotch.CPU).MustView([]int64{2, 3}, true)
var idxs []ts.TensorIndexer = []ts.TensorIndexer{
// ts.NewNarrow(0, tensor.MustSize()[0]),
// ts.NewNarrow(0, tensor.MustSize()[1]),
result := tensor.Idx(idxs)
fmt.Printf("Original Ts shape: %v\n", tensor.MustSize())
fmt.Printf("Result Ts shape: %v\n", result.MustSize())

View File

@ -1,32 +0,0 @@
package main
import (
func main() {
data := [][]int64{
{1, 1, 1, 2, 2, 2, 3, 3},
{1, 1, 1, 2, 2, 2, 4, 4},
shape := []int64{16}
ts, err := tensor.NewTensorFromData(data, shape)
if err != nil {
it, err := ts.Iter(reflect.Float64)
if err != nil {
for i := 0; i < int(it.Len); i++ {
v := it.Next()

View File

@ -1,74 +0,0 @@
package main
import (
// "runtime"
ts ""
func createTensors(samples int) []ts.Tensor {
n := int(10e6)
var data []float64
for i := 0; i < n; i++ {
data = append(data, float64(i))
var tensors []ts.Tensor
s := ts.FloatScalar(float64(0.23))
// for i := 0; i < samples; i++ {
for i := 0; i < 1; i++ {
t := ts.MustOfSlice(data).MustMul1(s, true)
// t1.MustDrop()
// t.MustDrop()
// t1 = ts.Tensor{}
// t = ts.Tensor{}
// runtime.GC()
// fmt.Printf("t values: %v", t.Values())
// fmt.Printf("t1 values: %v", t1.Values())
tensors = append(tensors, t)
return tensors
func dropTensors(tensors []ts.Tensor) {
for _, t := range tensors {
func main() {
var si *SI
si = Get()
fmt.Printf("Total RAM (MB):\t %8.2f\n", float64(si.TotalRam)/1024)
fmt.Printf("Used RAM (MB):\t %8.2f\n", float64(si.TotalRam-si.FreeRam)/1024)
startRAM := si.TotalRam - si.FreeRam
epochs := 50
// var m runtime.MemStats
for i := 0; i < epochs; i++ {
// runtime.ReadMemStats(&m)
// t0 := float64(m.Sys) / 1024 / 1024
tensors := createTensors(10000)
// runtime.ReadMemStats(&m)
// t1 := float64(m.Sys) / 1024 / 1024
// runtime.ReadMemStats(&m)
// t2 := float64(m.Sys) / 1024 / 1024
// fmt.Printf("Epoch: %v \t Start Mem [%.3f MiB] \t Alloc Mem [%.3f MiB] \t Free Mem [%.3f MiB]\n", i, t0, t1, t2)
si = Get()
fmt.Printf("Epoch %v\t Used: [%8.2f MiB]\n", i, (float64(si.TotalRam-si.FreeRam)-float64(startRAM))/1024)

View File

@ -1,54 +0,0 @@
package main
import (
func main() {
x := tensor.TensorFrom([]float64{2.0})
x = x.MustSetRequiresGrad(true)
xmul := tensor.TensorFrom([]float64{3.0})
xadd := tensor.TensorFrom([]float64{5.0})
x1 := x.MustMul(xmul)
x2 := x1.MustMul(xmul)
x3 := x2.MustMul(xmul)
y := x3.MustAdd(xadd)
inputs := []tensor.Tensor{x}
dy_over_dx, err := tensor.RunBackward([]tensor.Tensor{y}, inputs, true, true)
if err != nil {
fmt.Printf("dy_over_dx length: %v\n", len(dy_over_dx))
// dy_over_dx1 := dy_over_dx[0]
// err = dy_over_dx1.Backward()
// if err != nil {
// log.Fatalf("Errors:\n, %v", err)
// }
/* // Compute a second order derivative using run_backward.
* let mut x = Tensor::from(42.0).set_requires_grad(true);
* let y = &x * &x * &x + &x + &x * &x;
* x.zero_grad();
* let dy_over_dx = Tensor::run_backward(&[y], &[&x], true, true);
* assert_eq!(dy_over_dx.len(), 1);
* let dy_over_dx = &dy_over_dx[0];
* dy_over_dx.backward();
* let dy_over_dx2 = x.grad();
* assert_eq!(f64::from(&dy_over_dx2), 254.0); */

View File

@ -1,89 +0,0 @@
package main
import (
func main() {
// TODO: Check Go type of data and tensor DType
// For. if data is []int and DType is Bool
// It is still running but get wrong result.
data := [][]int64{
{1, 1, 1, 2, 2, 2, 3, 3},
{1, 1, 1, 2, 2, 2, 4, 4},
shape := []int64{2, 8}
// shape := []int64{2, 2, 4}
// dtype := gotch.Int
// ts := tensor.NewTensor()
// sliceTensor, err := ts.FOfSlice(data, dtype)
// if err != nil {
// log.Fatal(err)
// }
ts, err := tensor.NewTensorFromData(data, shape)
if err != nil {
sz, err := ts.Size2()
if err != nil {
fmt.Printf("Shape: %v\n", sz)
fmt.Printf("DType: %v\n", ts.DType())
dx := [][]float64{
{1, 1},
{1, 1},
{1, 1},
dy := [][]float64{
{1, 2, 3},
{1, 1, 1},
xs, err := tensor.NewTensorFromData(dx, []int64{3, 2})
if err != nil {
ys, err := tensor.NewTensorFromData(dy, []int64{2, 3})
if err != nil {
// CPU
startCPUTime := time.Now()
for i := 1; i < 100000; i++ {
fmt.Printf("CPU time: %v\n", time.Since(startCPUTime))
// Cuda
device := gotch.NewCuda()
startGPUTime := time.Now()
for i := 1; i < 100000; i++ {
cx, err := xs.To(device)
if err != nil {
cy, err := ys.To(device)
if err != nil {
fmt.Printf("GPU time: %v\n", time.Since(startGPUTime))

View File

@ -1,69 +0,0 @@
package main
import (
func main() {
ts, err := tensor.OfSlice([]float64{1.3, 29.7})
if err != nil {
res, err := ts.Float64Value([]int64{1})
if err != nil {
resInt64, err := ts.Int64Value([]int64{1})
if err != nil {
grad, err := ts.RequiresGrad()
if err != nil {
fmt.Printf("Requires Grad: %v\n", grad)
ele1, err := ts.DataPtr()
if err != nil {
fmt.Printf("First element address: %v\n", ele1)
fmt.Printf("Number of tensor elements: %v\n", ts.Numel())
clone := ts.MustShallowClone()
atGet := ts.MustGet(1)
atGet.Print() // 29.7
atGet = ts.MustGet(0)
atGet.Print() // 1.3
dst, err := tensor.NewTensorFromData([]int64{1, 2}, []int64{1, 2})
if err != nil {
dst = dst.MustTotype(ts.DType())
tensor.MustCopy_(dst, ts)
// The below statement will be panic as `ts` has been dropped.
// ts.Print()

View File

@ -1,26 +0,0 @@
package main
import (
func main() {
vs := nn.NewVarStore(gotch.CPU)
fmt.Printf("Is VarStore emptry? %v\n ", vs.IsEmpty())
path := vs.Root()
init := nn.NewKaimingUniformInit()
init.InitTensor([]int64{1, 4}, gotch.CPU).Print()
path.NewVar("layer1", []int64{1, 10}, nn.NewKaimingUniformInit())
fmt.Printf("Is VarStore emptry? %v\n ", vs.IsEmpty())

View File

@ -3,6 +3,7 @@ package nn
// A sequential layer used to chain multiple layers and closures.
import (
ts ""
// "reflect"
@ -224,3 +225,104 @@ type ForwardTWith func(ts.Tensor, bool) ts.Tensor
func (fw ForwardTWith) ForwardT(xs ts.Tensor, train bool) ts.Tensor {
return fw(xs, train)
// BatchAccuracyForLogits calculates average accuracy of test batches.
// NOTE: Pytorch uses `NoGradGuard` which is a thread local scope and
// it sets a global flag that is checked by the backend whenever an op is done on a variable.
// The guard itself saved the current status and set it to false in the constructor.
// And restore the saved status in its destructor. That way it is similar to a with torch.no_grad(): block in python.
// This seems not working in Go.
// There 2 ways to get around. One is freeze VarStore, the other is
// set manually set AutoGrad at `loss` tensor. I.e., `loss = loss.MustSetRequiresGrad(true)`
func BatchAccuracyForLogits(vs VarStore, m ts.ModuleT, xs, ys ts.Tensor, d gotch.Device, batchSize int) (retVal float64) {
var (
sumAccuracy float64 = 0.0
sampleCount float64 = 0.0
defer vs.Unfreeze()
iter2 := ts.MustNewIter2(xs, ys, int64(batchSize))
for {
item, ok := iter2.Next()
if !ok {
size := float64(item.Data.MustSize()[0])
bImages := item.Data.MustTo(d, true)
bLabels := item.Label.MustTo(d, true)
logits := m.ForwardT(bImages, false)
acc := logits.AccuracyForLogits(bLabels)
sumAccuracy += acc.Values()[0] * size
sampleCount += size
return sumAccuracy / sampleCount
// BatchAccuracyForLogitIdx is an alternative of BatchAccuracyForLogits to
// calculate accuracy for specified batch on module weight. It uses tensor
// indexing instead of Iter2
func BatchAccuracyForLogitsIdx(vs VarStore, m ts.ModuleT, xs, ys ts.Tensor, d gotch.Device, batchSize int) (retVal float64) {
var (
sumAccuracy float64 = 0.0
sampleCount float64 = 0.0
totalSize := xs.MustSize()[0]
samples := int(totalSize)
index := ts.MustRandperm(int64(totalSize), gotch.Int64, gotch.CPU)
imagesTs := xs.MustIndexSelect(0, index, false)
labelsTs := ys.MustIndexSelect(0, index, false)
batches := samples / batchSize
batchIndex := 0
defer vs.Unfreeze()
for i := 0; i < batches; i++ {
start := batchIndex * batchSize
size := batchSize
if samples-start < batchSize {
batchIndex += 1
// Indexing
narrowIndex := ts.NewNarrow(int64(start), int64(start+size))
bImages := imagesTs.Idx(narrowIndex)
bLabels := labelsTs.Idx(narrowIndex)
bImages = bImages.MustTo(d, true)
bLabels = bLabels.MustTo(d, true)
logits := m.ForwardT(bImages, true)
bAccuracy := logits.AccuracyForLogits(bLabels)
accuVal := bAccuracy.Values()[0]
bSamples := float64(xs.MustSize()[0])
sumAccuracy += accuVal * bSamples
sampleCount += bSamples
// Free up tensors on C memory
return sumAccuracy / sampleCount

View File

@ -1,126 +0,0 @@
// A wrapper around the linux syscall sysinfo(2).
package gotch
// helper to debug memory blow-up
import (
// Go-ized
type SI struct {
Uptime time.Duration // time since boot
Loads [3]float64 // 1, 5, and 15 minute load averages, see e.g. UPTIME(1)
Procs uint64 // number of current processes
TotalRam uint64 // total usable main memory size [kB]
FreeRam uint64 // available memory size [kB]
SharedRam uint64 // amount of shared memory [kB]
BufferRam uint64 // memory used by buffers [kB]
TotalSwap uint64 // total swap space size [kB]
FreeSwap uint64 // swap space still available [kB]
TotalHighRam uint64 // total high memory size [kB]
FreeHighRam uint64 // available high memory size [kB]
mu sync.Mutex // ensures atomic writes; protects the following fields
var sis = &SI{}
// Get the linux sysinfo data structure.
// Useful links in the wild web:
func GetSysInfo() *SI {
// Note: uint64 is uint32 on 32 bit CPUs
type Sysinfo_t struct {
Uptime int64 // Seconds since boot
Loads [3]uint64 // 1, 5, and 15 minute load averages
Totalram uint64 // Total usable main memory size
Freeram uint64 // Available memory size
Sharedram uint64 // Amount of shared memory
Bufferram uint64 // Memory used by buffers
Totalswap uint64 // Total swap space size
Freeswap uint64 // swap space still available
Procs uint16 // Number of current processes
Pad uint16
Pad_cgo_0 [4]byte
Totalhigh uint64 // Total high memory size
Freehigh uint64 // Available high memory size
Unit uint32 // Memory unit size in bytes
X_f [0]byte
Pad_cgo_1 [4]byte // Padding to 64 bytes
// ~1kB garbage
si := &syscall.Sysinfo_t{}
// XXX is a raw syscall thread safe?
err := syscall.Sysinfo(si)
if err != nil {
panic("Commander, we have a problem. syscall.Sysinfo:" + err.Error())
scale := 65536.0 // magic
unit := uint64(si.Unit) * 1024 // kB
sis.Uptime = time.Duration(si.Uptime) * time.Second
sis.Loads[0] = float64(si.Loads[0]) / scale
sis.Loads[1] = float64(si.Loads[1]) / scale
sis.Loads[2] = float64(si.Loads[2]) / scale
sis.Procs = uint64(si.Procs)
sis.TotalRam = uint64(si.Totalram) / unit
sis.FreeRam = uint64(si.Freeram) / unit
sis.BufferRam = uint64(si.Bufferram) / unit
sis.TotalSwap = uint64(si.Totalswap) / unit
sis.FreeSwap = uint64(si.Freeswap) / unit
sis.TotalHighRam = uint64(si.Totalhigh) / unit
sis.FreeHighRam = uint64(si.Freehigh) / unit
return sis
// Make the "fmt" Stringer interface happy.
func (si SI) String() string {
// XXX: Is the copy of SI done atomic? Not sure.
// Without an outer lock this may print a junk.
return fmt.Sprintf("uptime\t\t%v\nload\t\t%2.2f %2.2f %2.2f\nprocs\t\t%d\n"+
"ram total\t%d kB\nram free\t%d kB\nram buffer\t%d kB\n"+
"swap total\t%d kB\nswap free\t%d kB",
//"high ram total\t%d kB\nhigh ram free\t%d kB\n"
si.Uptime, si.Loads[0], si.Loads[1], si.Loads[2], si.Procs,
si.TotalRam, si.FreeRam, si.BufferRam,
si.TotalSwap, si.FreeSwap,
// archaic si.TotalHighRam, si.FreeHighRam
Convert to string in a thread safe way.
uptime 279h6m21s
load 0.12 0.04 0.05
procs 143
ram total 383752 kB
ram free 254980 kB
ram buffer 7640 kB
swap total 887800 kB
swap free 879356 kB
func (si *SI) ToString() string {
return si.String()

View File

@ -1,7 +1,5 @@
package tensor
import ""
// Module interface is a container with only one method `Forward`
// The following is `module` concept from Pytorch documenation:
@ -52,108 +50,50 @@ type ModuleT interface {
* }
* */
// BatchAccuracyForLigits calculate accuracy in batch.
// TODO: It would be nice if it is one method an object that implements ModuleT
// interface.
func BatchAccuracyForLogits(m ModuleT, xs, ys Tensor, d gotch.Device, batchSize int) (retVal float64) {
var (
sumAccuracy float64 = 0.0
sampleCount float64 = 0.0
_ = MustGradSetEnabled(false)
iter2 := MustNewIter2(xs, ys, int64(batchSize))
for {
item, ok := iter2.Next()
if !ok {
size := float64(item.Data.MustSize()[0])
bImages := item.Data.MustTo(d, true)
bLabels := item.Label.MustTo(d, true)
logits := m.ForwardT(bImages, false)
acc := logits.AccuracyForLogits(bLabels)
sumAccuracy += acc.Values()[0] * size
sampleCount += size
_ = MustGradSetEnabled(true)
return sumAccuracy / sampleCount
// BatchAccuracyForLogitIdx is an alternative of BatchAccuracyForLogits to
// calculate accuracy for specified batch on module weight. It uses tensor
// indexing instead of Iter2
func BatchAccuracyForLogitsIdx(m ModuleT, xs, ys Tensor, d gotch.Device, batchSize int) (retVal float64) {
var (
sumAccuracy float64 = 0.0
sampleCount float64 = 0.0
// Switch Grad off
_ = NewNoGradGuard()
totalSize := xs.MustSize()[0]
samples := int(totalSize)
index := MustRandperm(int64(totalSize), gotch.Int64, gotch.CPU)
imagesTs := xs.MustIndexSelect(0, index, false)
labelsTs := ys.MustIndexSelect(0, index, false)
batches := samples / batchSize
batchIndex := 0
for i := 0; i < batches; i++ {
start := batchIndex * batchSize
size := batchSize
if samples-start < batchSize {
// size = samples - start
batchIndex += 1
// Indexing
narrowIndex := NewNarrow(int64(start), int64(start+size))
bImages := imagesTs.Idx(narrowIndex)
bLabels := labelsTs.Idx(narrowIndex)
bImages = bImages.MustTo(d, true)
bLabels = bLabels.MustTo(d, true)
logits := m.ForwardT(bImages, true)
bAccuracy := logits.AccuracyForLogits(bLabels)
accuVal := bAccuracy.Values()[0]
bSamples := float64(xs.MustSize()[0])
sumAccuracy += accuVal * bSamples
sampleCount += bSamples
// Free up tensors on C memory
// logits.MustDrop()
// Switch Grad on
// _ = MustGradSetEnabled(true)
return sumAccuracy / sampleCount
// NOTE: this func has been moved to `nn/sequential` as `NoGradGuard`
// seem not working in Go and the function needs to add varstore variable
// parameter. Hence, it is moved to `nn` to avoid cycle reference.
* // BatchAccuracyForLigits calculate accuracy in batch.
* //
* // TODO: It would be nice if it is one method an object that implements ModuleT
* // interface.
* func BatchAccuracyForLogits(m ModuleT, xs, ys Tensor, d gotch.Device, batchSize int) (retVal float64) {
* var (
* sumAccuracy float64 = 0.0
* sampleCount float64 = 0.0
* )
* _ = MustGradSetEnabled(false)
* iter2 := MustNewIter2(xs, ys, int64(batchSize))
* for {
* item, ok := iter2.Next()
* if !ok {
* break
* }
* size := float64(item.Data.MustSize()[0])
* bImages := item.Data.MustTo(d, true)
* bLabels := item.Label.MustTo(d, true)
* logits := m.ForwardT(bImages, false)
* acc := logits.AccuracyForLogits(bLabels)
* sumAccuracy += acc.Values()[0] * size
* sampleCount += size
* bImages.MustDrop()
* bLabels.MustDrop()
* acc.MustDrop()
* }
* _ = MustGradSetEnabled(true)
* return sumAccuracy / sampleCount
* }
* */
// Tensor methods for Module and ModuleT:
// ======================================