From b69d46eae4f53200fa2a500a0f5b22d56d5c28de Mon Sep 17 00:00:00 2001 From: sugarme Date: Sat, 31 Oct 2020 22:11:50 +1100 Subject: [PATCH] converted to pointer receiver at vision sub-package --- example/basic/main.go | 8 +--- example/mnist/cnn.go | 23 ++++++------ example/mnist/linear.go | 2 +- example/mnist/nn.go | 10 ++--- nn/batch-norm.go | 8 ++-- nn/conv.go | 4 +- nn/linear.go | 2 +- vision/alexnet.go | 30 +++++++-------- vision/cifar.go | 10 ++--- vision/dataset.go | 33 ++++++++-------- vision/densenet.go | 30 +++++++-------- vision/efficientnet.go | 83 +++++++++++++++++++++-------------------- vision/image.go | 58 ++++++++++++++-------------- vision/imagenet.go | 82 ++++++++++++++++++++-------------------- vision/inception.go | 57 ++++++++++++++-------------- vision/mnist.go | 15 +++----- vision/mobilenet.go | 14 +++---- vision/resnet.go | 63 +++++++++++++++---------------- vision/squeezenet.go | 34 ++++++++--------- vision/vgg.go | 42 ++++++++++----------- 20 files changed, 296 insertions(+), 312 deletions(-) diff --git a/example/basic/main.go b/example/basic/main.go index 57951e4..f58c056 100644 --- a/example/basic/main.go +++ b/example/basic/main.go @@ -1,7 +1,7 @@ package main import ( - "fmt" + // "fmt" "github.com/sugarme/gotch" ts "github.com/sugarme/gotch/tensor" @@ -13,10 +13,4 @@ func main() { tensor := ts.MustArange(ts.IntScalar(2*3*4), gotch.Int64, gotch.CPU).MustView([]int64{2, 3, 4}, true) tensor.Print() - - fmt.Printf("tensor is nil: %v\n", tensor.IsNil()) - - tensor.MustDrop() - - fmt.Printf("tensor is nil: %v\n", tensor.IsNil()) } diff --git a/example/mnist/cnn.go b/example/mnist/cnn.go index 760c79e..c8f80b5 100644 --- a/example/mnist/cnn.go +++ b/example/mnist/cnn.go @@ -22,26 +22,26 @@ const ( ) type Net struct { - conv1 nn.Conv2D - conv2 nn.Conv2D - fc1 nn.Linear - fc2 nn.Linear + conv1 *nn.Conv2D + conv2 *nn.Conv2D + fc1 *nn.Linear + fc2 *nn.Linear } -func newNet(vs nn.Path) Net { +func newNet(vs *nn.Path) *Net { conv1 := nn.NewConv2D(vs, 1, 32, 5, nn.DefaultConv2DConfig()) conv2 := nn.NewConv2D(vs, 32, 64, 5, nn.DefaultConv2DConfig()) fc1 := nn.NewLinear(vs, 1024, 1024, nn.DefaultLinearConfig()) fc2 := nn.NewLinear(vs, 1024, 10, nn.DefaultLinearConfig()) - return Net{ + return &Net{ conv1, conv2, fc1, fc2} } -func (n Net) ForwardT(xs ts.Tensor, train bool) (retVal ts.Tensor) { +func (n *Net) ForwardT(xs *ts.Tensor, train bool) *ts.Tensor { outView1 := xs.MustView([]int64{-1, 1, 28, 28}, false) defer outView1.MustDrop() @@ -57,20 +57,19 @@ func (n Net) ForwardT(xs ts.Tensor, train bool) (retVal ts.Tensor) { outView2 := outMP2.MustView([]int64{-1, 1024}, true) defer outView2.MustDrop() - outFC1 := outView2.Apply(&n.fc1) + outFC1 := outView2.Apply(n.fc1) outRelu := outFC1.MustRelu(true) defer outRelu.MustDrop() outDropout := ts.MustDropout(outRelu, 0.5, train) defer outDropout.MustDrop() - return outDropout.Apply(&n.fc2) - + return outDropout.Apply(n.fc2) } func runCNN1() { - var ds vision.Dataset + var ds *vision.Dataset ds = vision.LoadMNISTDir(MnistDirNN) testImages := ds.TestImages testLabels := ds.TestLabels @@ -98,7 +97,7 @@ func runCNN1() { batches := samples / batchSize batchIndex := 0 - var epocLoss ts.Tensor + var epocLoss *ts.Tensor for i := 0; i < batches; i++ { start := batchIndex * batchSize size := batchSize diff --git a/example/mnist/linear.go b/example/mnist/linear.go index 61e492a..2f19266 100644 --- a/example/mnist/linear.go +++ b/example/mnist/linear.go @@ -17,7 +17,7 @@ const ( ) func runLinear() { - var ds vision.Dataset + var ds *vision.Dataset ds = vision.LoadMNISTDir(MnistDir) device := gotch.CPU diff --git a/example/mnist/nn.go b/example/mnist/nn.go index 66b2217..e2d7625 100644 --- a/example/mnist/nn.go +++ b/example/mnist/nn.go @@ -23,21 +23,21 @@ const ( var l nn.Linear -func netInit(vs nn.Path) ts.Module { +func netInit(vs *nn.Path) ts.Module { n := nn.Seq() n.Add(nn.NewLinear(vs, ImageDimNN, HiddenNodesNN, nn.DefaultLinearConfig())) - n.AddFn(nn.NewFunc(func(xs ts.Tensor) ts.Tensor { + n.AddFn(nn.NewFunc(func(xs *ts.Tensor) *ts.Tensor { return xs.MustRelu(false) })) n.Add(nn.NewLinear(vs, HiddenNodesNN, LabelNN, nn.DefaultLinearConfig())) - return &n + return n } -func train(trainX, trainY, testX, testY ts.Tensor, m ts.Module, opt nn.Optimizer, epoch int) { +func train(trainX, trainY, testX, testY *ts.Tensor, m ts.Module, opt *nn.Optimizer, epoch int) { logits := m.Forward(trainX) loss := logits.CrossEntropyForLogits(trainY) @@ -56,7 +56,7 @@ func train(trainX, trainY, testX, testY ts.Tensor, m ts.Module, opt nn.Optimizer func runNN() { - var ds vision.Dataset + var ds *vision.Dataset ds = vision.LoadMNISTDir(MnistDirNN) vs := nn.NewVarStore(gotch.CPU) net := netInit(vs.Root()) diff --git a/nn/batch-norm.go b/nn/batch-norm.go index 1b5d471..09336a8 100644 --- a/nn/batch-norm.go +++ b/nn/batch-norm.go @@ -38,7 +38,7 @@ type BatchNorm struct { } // NewBatchNorm creates a new BatchNorm layer -func NewBatchNorm(vs Path, nd uint, outDim int64, config *BatchNormConfig) *BatchNorm { +func NewBatchNorm(vs *Path, nd uint, outDim int64, config *BatchNormConfig) *BatchNorm { return &BatchNorm{ config: config, RunningMean: vs.ZerosNoTrain("running_mean", []int64{outDim}), @@ -52,7 +52,7 @@ func NewBatchNorm(vs Path, nd uint, outDim int64, config *BatchNormConfig) *Batc // // The input shape is assumed to be (N, C, L). Normalization // is performed over the first batch dimension N. -func BatchNorm1D(vs Path, outDim int64, config *BatchNormConfig) *BatchNorm { +func BatchNorm1D(vs *Path, outDim int64, config *BatchNormConfig) *BatchNorm { return NewBatchNorm(vs, 1, outDim, config) } @@ -60,7 +60,7 @@ func BatchNorm1D(vs Path, outDim int64, config *BatchNormConfig) *BatchNorm { // // The input shape is assumed to be (N, C, H, W). Normalization // is performed over the first batch dimension N. -func BatchNorm2D(vs Path, outDim int64, config *BatchNormConfig) *BatchNorm { +func BatchNorm2D(vs *Path, outDim int64, config *BatchNormConfig) *BatchNorm { return NewBatchNorm(vs, 2, outDim, config) } @@ -68,7 +68,7 @@ func BatchNorm2D(vs Path, outDim int64, config *BatchNormConfig) *BatchNorm { // // The input shape is assumed to be (N, C, D, H, W). Normalization // is performed over the first batch dimension N. -func BatchNorm3D(vs Path, outDim int64, config *BatchNormConfig) *BatchNorm { +func BatchNorm3D(vs *Path, outDim int64, config *BatchNormConfig) *BatchNorm { return NewBatchNorm(vs, 3, outDim, config) } diff --git a/nn/conv.go b/nn/conv.go index 9c0a103..5844fd3 100644 --- a/nn/conv.go +++ b/nn/conv.go @@ -96,7 +96,7 @@ type Conv2D struct { Config *Conv2DConfig } -func NewConv2D(vs Path, inDim, outDim int64, k int64, cfg *Conv2DConfig) *Conv2D { +func NewConv2D(vs *Path, inDim, outDim int64, k int64, cfg *Conv2DConfig) *Conv2D { var ( ws *ts.Tensor bs *ts.Tensor @@ -190,7 +190,7 @@ func buildConvConfig(ksizes []int64) interface{} { // NewConv is a generic builder to build Conv1D, Conv2D, Conv3D. It returns // an interface Conv which might need a type assertion for further use. -func NewConv(vs Path, inDim, outDim int64, ksizes []int64, config interface{}) Conv { +func NewConv(vs *Path, inDim, outDim int64, ksizes []int64, config interface{}) Conv { configT := reflect.TypeOf(config) var ( diff --git a/nn/linear.go b/nn/linear.go index 3150bf4..dfce112 100644 --- a/nn/linear.go +++ b/nn/linear.go @@ -37,7 +37,7 @@ type Linear struct { // inDim - input dimension (x) [input features - columns] // outDim - output dimension (y) [output features - columns] // NOTE: w will have shape{outDim, inDim}; b will have shape{outDim} -func NewLinear(vs Path, inDim, outDim int64, c *LinearConfig) *Linear { +func NewLinear(vs *Path, inDim, outDim int64, c *LinearConfig) *Linear { var bs *ts.Tensor // bs has size of output dimension diff --git a/vision/alexnet.go b/vision/alexnet.go index 556acc4..813bd61 100644 --- a/vision/alexnet.go +++ b/vision/alexnet.go @@ -8,7 +8,7 @@ import ( // AlexNet implementation // https://arxiv.org/abs/1404.5997 -func anConv2d(p nn.Path, cIn, cOut, ksize, padding, stride int64) (retVal nn.Conv2D) { +func anConv2d(p *nn.Path, cIn, cOut, ksize, padding, stride int64) *nn.Conv2D { config := nn.DefaultConv2DConfig() config.Stride = []int64{stride, stride} config.Padding = []int64{padding, padding} @@ -16,15 +16,15 @@ func anConv2d(p nn.Path, cIn, cOut, ksize, padding, stride int64) (retVal nn.Con return nn.NewConv2D(p, cIn, cOut, ksize, config) } -func anMaxPool2d(xs ts.Tensor, ksize, stride int64) (retVal ts.Tensor) { +func anMaxPool2d(xs *ts.Tensor, ksize, stride int64) *ts.Tensor { return xs.MustMaxPool2d([]int64{ksize, ksize}, []int64{stride, stride}, []int64{0, 0}, []int64{1, 1}, false, false) } -func features(p nn.Path) (retVal ts.ModuleT) { +func features(p *nn.Path) ts.ModuleT { seq := nn.SeqT() seq.Add(anConv2d(p.Sub("0"), 3, 64, 11, 2, 4)) - seq.AddFn(nn.NewFunc(func(xs ts.Tensor) ts.Tensor { + seq.AddFn(nn.NewFunc(func(xs *ts.Tensor) *ts.Tensor { tmp1 := xs.MustRelu(false) res := anMaxPool2d(tmp1, 3, 2) tmp1.MustDrop() @@ -33,7 +33,7 @@ func features(p nn.Path) (retVal ts.ModuleT) { seq.Add(anConv2d(p.Sub("3"), 64, 192, 5, 1, 2)) - seq.AddFn(nn.NewFunc(func(xs ts.Tensor) ts.Tensor { + seq.AddFn(nn.NewFunc(func(xs *ts.Tensor) *ts.Tensor { tmp1 := xs.MustRelu(false) res := anMaxPool2d(tmp1, 3, 2) tmp1.MustDrop() @@ -42,19 +42,19 @@ func features(p nn.Path) (retVal ts.ModuleT) { seq.Add(anConv2d(p.Sub("6"), 192, 384, 3, 1, 1)) - seq.AddFn(nn.NewFunc(func(xs ts.Tensor) ts.Tensor { + seq.AddFn(nn.NewFunc(func(xs *ts.Tensor) *ts.Tensor { return xs.MustRelu(false) })) seq.Add(anConv2d(p.Sub("8"), 384, 256, 3, 1, 1)) - seq.AddFn(nn.NewFunc(func(xs ts.Tensor) ts.Tensor { + seq.AddFn(nn.NewFunc(func(xs *ts.Tensor) *ts.Tensor { return xs.MustRelu(false) })) seq.Add(anConv2d(p.Sub("10"), 256, 256, 3, 1, 1)) - seq.AddFn(nn.NewFunc(func(xs ts.Tensor) ts.Tensor { + seq.AddFn(nn.NewFunc(func(xs *ts.Tensor) *ts.Tensor { tmp1 := xs.MustRelu(false) res := anMaxPool2d(tmp1, 3, 2) tmp1.MustDrop() @@ -64,26 +64,26 @@ func features(p nn.Path) (retVal ts.ModuleT) { return seq } -func classifier(p nn.Path, nclasses int64) (retVal ts.ModuleT) { +func classifier(p *nn.Path, nclasses int64) ts.ModuleT { seq := nn.SeqT() - seq.AddFnT(nn.NewFuncT(func(xs ts.Tensor, train bool) ts.Tensor { + seq.AddFnT(nn.NewFuncT(func(xs *ts.Tensor, train bool) *ts.Tensor { return ts.MustDropout(xs, 0.5, train) })) seq.Add(nn.NewLinear(p.Sub("1"), 256*6*6, 4096, nn.DefaultLinearConfig())) - seq.AddFn(nn.NewFunc(func(xs ts.Tensor) ts.Tensor { + seq.AddFn(nn.NewFunc(func(xs *ts.Tensor) *ts.Tensor { return xs.MustRelu(false) })) - seq.AddFnT(nn.NewFuncT(func(xs ts.Tensor, train bool) ts.Tensor { + seq.AddFnT(nn.NewFuncT(func(xs *ts.Tensor, train bool) *ts.Tensor { return ts.MustDropout(xs, 0.5, train) })) seq.Add(nn.NewLinear(p.Sub("4"), 4096, 4096, nn.DefaultLinearConfig())) - seq.AddFn(nn.NewFunc(func(xs ts.Tensor) ts.Tensor { + seq.AddFn(nn.NewFunc(func(xs *ts.Tensor) *ts.Tensor { return xs.MustRelu(false) })) @@ -92,12 +92,12 @@ func classifier(p nn.Path, nclasses int64) (retVal ts.ModuleT) { return seq } -func AlexNet(p nn.Path, nclasses int64) (retVal ts.ModuleT) { +func AlexNet(p *nn.Path, nclasses int64) ts.ModuleT { seq := nn.SeqT() seq.Add(features(p.Sub("features"))) - seq.AddFn(nn.NewFunc(func(xs ts.Tensor) ts.Tensor { + seq.AddFn(nn.NewFunc(func(xs *ts.Tensor) *ts.Tensor { tmp1 := xs.MustAdaptiveAvgPool2d([]int64{6, 6}, false) res := tmp1.FlatView() tmp1.MustDrop() diff --git a/vision/cifar.go b/vision/cifar.go index 2d6cfb6..54ca5e2 100644 --- a/vision/cifar.go +++ b/vision/cifar.go @@ -24,7 +24,7 @@ const ( samplesPerFile int64 = 10000 ) -func readFile(filename string) (imagesTs ts.Tensor, labelsTs ts.Tensor) { +func readFile(filename string) (imagesTs *ts.Tensor, labelsTs *ts.Tensor) { f, err := os.Open(filename) if err != nil { log.Fatalf("readImages errors: %v\n", err) @@ -74,7 +74,7 @@ func readFile(filename string) (imagesTs ts.Tensor, labelsTs ts.Tensor) { return imagesTs, labelsTs } -func CFLoadDir(dir string) (retVal Dataset) { +func CFLoadDir(dir string) *Dataset { dirAbs, err := filepath.Abs(dir) if err != nil { @@ -96,11 +96,11 @@ func CFLoadDir(dir string) (retVal Dataset) { for _, f := range trainFiles { img, l := readFile(fmt.Sprintf("%v/%v", dirAbs, f)) - trainImages = append(trainImages, img) - trainLabels = append(trainLabels, l) + trainImages = append(trainImages, *img) + trainLabels = append(trainLabels, *l) } - return Dataset{ + return &Dataset{ TrainImages: ts.MustCat(trainImages, 0), TrainLabels: ts.MustCat(trainLabels, 0), TestImages: testImages, diff --git a/vision/dataset.go b/vision/dataset.go index 1a4d211..d690de4 100644 --- a/vision/dataset.go +++ b/vision/dataset.go @@ -12,10 +12,10 @@ import ( ) type Dataset struct { - TrainImages ts.Tensor - TrainLabels ts.Tensor - TestImages ts.Tensor - TestLabels ts.Tensor + TrainImages *ts.Tensor + TrainLabels *ts.Tensor + TestImages *ts.Tensor + TestLabels *ts.Tensor Labels int64 } @@ -23,20 +23,20 @@ type Dataset struct { //================= // TrainIter creates an iterator of Iter type for train images and labels -func (ds Dataset) TrainIter(batchSize int64) (retVal ts.Iter2) { +func (ds *Dataset) TrainIter(batchSize int64) *ts.Iter2 { return ts.MustNewIter2(ds.TrainImages, ds.TrainLabels, batchSize) } // TestIter creates an iterator of Iter type for test images and labels -func (ds Dataset) TestIter(batchSize int64) (retVal ts.Iter2) { +func (ds *Dataset) TestIter(batchSize int64) *ts.Iter2 { return ts.MustNewIter2(ds.TestImages, ds.TestLabels, batchSize) } // RandomFlip randomly applies horizontal flips // This expects a 4 dimension NCHW tensor and returns a tensor with // an identical shape. -func RandomFlip(t ts.Tensor) (retVal ts.Tensor) { +func RandomFlip(t *ts.Tensor) *ts.Tensor { size := t.MustSize() @@ -53,7 +53,7 @@ func RandomFlip(t ts.Tensor) (retVal ts.Tensor) { outputView := output.Idx(ts.NewSelect(int64(batchIdx))) tView := t.Idx(ts.NewSelect(int64(batchIdx))) - var src ts.Tensor + var src *ts.Tensor if rand.Float64() == 1.0 { src = tView } else { @@ -72,7 +72,7 @@ func RandomFlip(t ts.Tensor) (retVal ts.Tensor) { // Pad the image using reflections and take some random crops. // This expects a 4 dimension NCHW tensor and returns a tensor with // an identical shape. -func RandomCrop(t ts.Tensor, pad int64) (retVal ts.Tensor) { +func RandomCrop(t *ts.Tensor, pad int64) *ts.Tensor { size := t.MustSize() @@ -115,7 +115,7 @@ func RandomCrop(t ts.Tensor, pad int64) (retVal ts.Tensor) { // Applies cutout: randomly remove some square areas in the original images. // https://arxiv.org/abs/1708.04552 -func RandomCutout(t ts.Tensor, sz int64) (retVal ts.Tensor) { +func RandomCutout(t *ts.Tensor, sz int64) *ts.Tensor { size := t.MustSize() @@ -168,11 +168,11 @@ func RandomCutout(t ts.Tensor, sz int64) (retVal ts.Tensor) { return output } -func Augmentation(t ts.Tensor, flip bool, crop int64, cutout int64) (retVal ts.Tensor) { +func Augmentation(t *ts.Tensor, flip bool, crop int64, cutout int64) *ts.Tensor { tclone := t.MustShallowClone() - var flipTs ts.Tensor + var flipTs *ts.Tensor if flip { flipTs = RandomFlip(tclone) tclone.MustDrop() @@ -180,7 +180,7 @@ func Augmentation(t ts.Tensor, flip bool, crop int64, cutout int64) (retVal ts.T flipTs = tclone } - var cropTs ts.Tensor + var cropTs *ts.Tensor if crop > 0 { cropTs = RandomCrop(flipTs, crop) flipTs.MustDrop() @@ -188,12 +188,13 @@ func Augmentation(t ts.Tensor, flip bool, crop int64, cutout int64) (retVal ts.T cropTs = flipTs } + var output *ts.Tensor if cutout > 0 { - retVal = RandomCutout(cropTs, cutout) + output = RandomCutout(cropTs, cutout) cropTs.MustDrop() } else { - retVal = cropTs + output = cropTs } - return retVal + return output } diff --git a/vision/densenet.go b/vision/densenet.go index c9ba4fe..a8eca2a 100644 --- a/vision/densenet.go +++ b/vision/densenet.go @@ -12,7 +12,7 @@ import ( ts "github.com/sugarme/gotch/tensor" ) -func dnConv2d(p nn.Path, cIn, cOut, ksize, padding, stride int64) (retVal nn.Conv2D) { +func dnConv2d(p *nn.Path, cIn, cOut, ksize, padding, stride int64) *nn.Conv2D { config := nn.DefaultConv2DConfig() config.Stride = []int64{stride, stride} config.Padding = []int64{padding, padding} @@ -21,14 +21,14 @@ func dnConv2d(p nn.Path, cIn, cOut, ksize, padding, stride int64) (retVal nn.Con return nn.NewConv2D(p, cIn, cOut, ksize, config) } -func denseLayer(p nn.Path, cIn, bnSize, growth int64) (retVal ts.ModuleT) { +func denseLayer(p *nn.Path, cIn, bnSize, growth int64) ts.ModuleT { cInter := bnSize * growth bn1 := nn.BatchNorm2D(p.Sub("norm1"), cIn, nn.DefaultBatchNormConfig()) conv1 := dnConv2d(p.Sub("conv1"), cIn, cInter, 1, 0, 1) bn2 := nn.BatchNorm2D(p.Sub("norm2"), cInter, nn.DefaultBatchNormConfig()) conv2 := dnConv2d(p.Sub("conv2"), cInter, growth, 3, 1, 1) - return nn.NewFuncT(func(xs ts.Tensor, train bool) ts.Tensor { + return nn.NewFuncT(func(xs *ts.Tensor, train bool) *ts.Tensor { ys1 := xs.ApplyT(bn1, train) ys2 := ys1.MustRelu(true) ys3 := ys2.Apply(conv1) @@ -39,14 +39,14 @@ func denseLayer(p nn.Path, cIn, bnSize, growth int64) (retVal ts.ModuleT) { ys := ys5.Apply(conv2) ys5.MustDrop() - res := ts.MustCat([]ts.Tensor{xs, ys}, 1) + res := ts.MustCat([]ts.Tensor{*xs, *ys}, 1) ys.MustDrop() return res }) } -func denseBlock(p nn.Path, cIn, bnSize, growth, nlayers int64) (retVal ts.ModuleT) { +func denseBlock(p *nn.Path, cIn, bnSize, growth, nlayers int64) ts.ModuleT { seq := nn.SeqT() for i := 0; i < int(nlayers); i++ { @@ -56,25 +56,25 @@ func denseBlock(p nn.Path, cIn, bnSize, growth, nlayers int64) (retVal ts.Module return seq } -func transition(p nn.Path, cIn, cOut int64) (retVal ts.ModuleT) { +func transition(p *nn.Path, cIn, cOut int64) ts.ModuleT { seq := nn.SeqT() seq.Add(nn.BatchNorm2D(p.Sub("norm"), cIn, nn.DefaultBatchNormConfig())) - seq.AddFn(nn.NewFunc(func(xs ts.Tensor) ts.Tensor { + seq.AddFn(nn.NewFunc(func(xs *ts.Tensor) *ts.Tensor { return xs.MustRelu(false) })) seq.Add(dnConv2d(p.Sub("conv"), cIn, cOut, 1, 0, 1)) - seq.AddFn(nn.NewFunc(func(xs ts.Tensor) ts.Tensor { + seq.AddFn(nn.NewFunc(func(xs *ts.Tensor) *ts.Tensor { return xs.AvgPool2DDefault(2, false) })) return seq } -func densenet(p nn.Path, cIn, cOut, bnSize int64, blockConfig []int64, growth int64) (retVal ts.ModuleT) { +func densenet(p *nn.Path, cIn, cOut, bnSize int64, blockConfig []int64, growth int64) ts.ModuleT { fp := p.Sub("features") seq := nn.SeqT() @@ -82,7 +82,7 @@ func densenet(p nn.Path, cIn, cOut, bnSize int64, blockConfig []int64, growth in seq.Add(nn.BatchNorm2D(fp.Sub("norm0"), cIn, nn.DefaultBatchNormConfig())) - seq.AddFn(nn.NewFunc(func(xs ts.Tensor) ts.Tensor { + seq.AddFn(nn.NewFunc(func(xs *ts.Tensor) *ts.Tensor { tmp := xs.MustRelu(false) return tmp.MustMaxPool2d([]int64{3, 3}, []int64{2, 2}, []int64{1, 1}, []int64{1, 1}, false, true) })) @@ -101,7 +101,7 @@ func densenet(p nn.Path, cIn, cOut, bnSize int64, blockConfig []int64, growth in seq.Add(nn.BatchNorm2D(fp.Sub("norm5"), nfeat, nn.DefaultBatchNormConfig())) - seq.AddFn(nn.NewFunc(func(xs ts.Tensor) ts.Tensor { + seq.AddFn(nn.NewFunc(func(xs *ts.Tensor) *ts.Tensor { tmp1 := xs.MustRelu(false) tmp2 := tmp1.MustAvgPool2d([]int64{7, 7}, []int64{1, 1}, []int64{0, 0}, false, true, 1, true) res := tmp2.FlatView() @@ -114,18 +114,18 @@ func densenet(p nn.Path, cIn, cOut, bnSize int64, blockConfig []int64, growth in return seq } -func DenseNet121(p nn.Path, nclasses int64) (retVal ts.ModuleT) { +func DenseNet121(p *nn.Path, nclasses int64) ts.ModuleT { return densenet(p, 64, 4, 32, []int64{6, 12, 24, 16}, nclasses) } -func DenseNet161(p nn.Path, nclasses int64) (retVal ts.ModuleT) { +func DenseNet161(p *nn.Path, nclasses int64) ts.ModuleT { return densenet(p, 96, 4, 48, []int64{6, 12, 36, 24}, nclasses) } -func DenseNet169(p nn.Path, nclasses int64) (retVal ts.ModuleT) { +func DenseNet169(p *nn.Path, nclasses int64) ts.ModuleT { return densenet(p, 64, 4, 32, []int64{6, 12, 32, 32}, nclasses) } -func DenseNet201(p nn.Path, nclasses int64) (retVal ts.ModuleT) { +func DenseNet201(p *nn.Path, nclasses int64) ts.ModuleT { return densenet(p, 64, 4, 32, []int64{6, 12, 48, 32}, nclasses) } diff --git a/vision/efficientnet.go b/vision/efficientnet.go index 0aa482a..b51c7e1 100644 --- a/vision/efficientnet.go +++ b/vision/efficientnet.go @@ -23,8 +23,8 @@ type BlockArgs struct { Stride int64 } -func ba(k, r, i, o, er int64, sr float64, s int64) (retVal BlockArgs) { - return BlockArgs{ +func ba(k, r, i, o, er int64, sr float64, s int64) *BlockArgs { + return &BlockArgs{ KernelSize: k, NumRepeat: r, InputFilters: i, @@ -37,13 +37,13 @@ func ba(k, r, i, o, er int64, sr float64, s int64) (retVal BlockArgs) { func blockArgs() (retVal []BlockArgs) { return []BlockArgs{ - ba(3, 1, 32, 16, 1, 0.25, 1), - ba(3, 2, 16, 24, 6, 0.25, 2), - ba(5, 2, 24, 40, 6, 0.25, 2), - ba(3, 3, 40, 80, 6, 0.25, 2), - ba(5, 3, 80, 112, 6, 0.25, 1), - ba(5, 4, 112, 192, 6, 0.25, 2), - ba(3, 1, 192, 320, 6, 0.25, 1), + *ba(3, 1, 32, 16, 1, 0.25, 1), + *ba(3, 2, 16, 24, 6, 0.25, 2), + *ba(5, 2, 24, 40, 6, 0.25, 2), + *ba(3, 3, 40, 80, 6, 0.25, 2), + *ba(5, 3, 80, 112, 6, 0.25, 1), + *ba(5, 4, 112, 192, 6, 0.25, 2), + *ba(3, 1, 192, 320, 6, 0.25, 1), } } @@ -54,12 +54,12 @@ type params struct { Dropout float64 } -func (p params) roundRepeats(repeats int64) (retVal int64) { +func (p *params) roundRepeats(repeats int64) int64 { return int64(math.Ceil(p.Depth * float64(repeats))) } -func (p params) roundFilters(filters int64) (retVal int64) { +func (p *params) roundFilters(filters int64) int64 { var divisor int64 = 8 filF := p.Width * float64(filters) filI := int64(filF + float64(divisor)/2.0) @@ -74,11 +74,11 @@ func (p params) roundFilters(filters int64) (retVal int64) { } // Conv2D with same padding -func enConv2d(vs nn.Path, i, o, k int64, c nn.Conv2DConfig, train bool) (retVal ts.ModuleT) { +func enConv2d(vs *nn.Path, i, o, k int64, c *nn.Conv2DConfig, train bool) ts.ModuleT { conv2d := nn.NewConv2D(vs, i, o, k, c) s := c.Stride - return nn.NewFunc(func(xs ts.Tensor) (res ts.Tensor) { + return nn.NewFunc(func(xs *ts.Tensor) *ts.Tensor { size := xs.MustSize() ih := size[2] iw := size[3] @@ -94,6 +94,7 @@ func enConv2d(vs nn.Path, i, o, k int64, c nn.Conv2DConfig, train bool) (retVal padW = ((ow - 1) * s[0]) + k - iw } + var res *ts.Tensor if padW > 0 || padH > 0 { zeroP2D := xs.MustZeroPad2d(padW/2, padW-padW/2, padH/2, padH-padH/2, false) res = zeroP2D.ApplyT(conv2d, train) @@ -106,8 +107,8 @@ func enConv2d(vs nn.Path, i, o, k int64, c nn.Conv2DConfig, train bool) (retVal }) } -func newParams(width, depth float64, res int64, dropout float64) (retVal params) { - return params{ +func newParams(width, depth float64, res int64, dropout float64) *params { + return ¶ms{ width, depth, res, @@ -115,39 +116,39 @@ func newParams(width, depth float64, res int64, dropout float64) (retVal params) } } -func b0() (retVal params) { +func b0() *params { return newParams(1.0, 1.0, 224, 0.2) } -func b1() (retVal params) { +func b1() *params { return newParams(1.0, 1.1, 240, 0.2) } -func b2() (retVal params) { +func b2() *params { return newParams(1.1, 1.2, 260, 0.3) } -func b3() (retVal params) { +func b3() *params { return newParams(1.2, 1.4, 300, 0.3) } -func b4() (retVal params) { +func b4() *params { return newParams(1.4, 1.8, 380, 0.4) } -func b5() (retVal params) { +func b5() *params { return newParams(1.6, 2.2, 456, 0.4) } -func b6() (retVal params) { +func b6() *params { return newParams(1.8, 2.6, 528, 0.5) } -func b7() (retVal params) { +func b7() *params { return newParams(2.0, 3.1, 600, 0.5) } -func block(p nn.Path, args BlockArgs) (retVal ts.ModuleT) { +func block(p *nn.Path, args BlockArgs) ts.ModuleT { inp := args.InputFilters oup := args.InputFilters * args.ExpandRatio @@ -169,7 +170,7 @@ func block(p nn.Path, args BlockArgs) (retVal ts.ModuleT) { if args.ExpandRatio != 1 { expansion.Add(enConv2d(p.Sub("_expand_conv"), inp, oup, 1, convConfigNoBias, false)) expansion.Add(nn.BatchNorm2D(p.Sub("_bn0"), oup, bn2d)) - expansion.AddFn(nn.NewFuncT(func(xs ts.Tensor, train bool) ts.Tensor { + expansion.AddFn(nn.NewFuncT(func(xs *ts.Tensor, train bool) *ts.Tensor { return xs.Swish() })) } @@ -178,7 +179,7 @@ func block(p nn.Path, args BlockArgs) (retVal ts.ModuleT) { depthwiseBn := nn.BatchNorm2D(p.Sub("_bn1"), oup, bn2d) // NOTE: args.SeRatio is optional float64. Default = 0 - var se nn.SequentialT // se will be nil if args.SeRatio == 0 + var se *nn.SequentialT // se will be nil if args.SeRatio == 0 if args.SeRatio > 0 { var nsc int64 = 1 if (float64(inp) * args.SeRatio) > 1 { @@ -188,7 +189,7 @@ func block(p nn.Path, args BlockArgs) (retVal ts.ModuleT) { se = nn.SeqT() se.Add(enConv2d(p.Sub("_se_reduce"), oup, nsc, 1, nn.DefaultConv2DConfig(), false)) - se.AddFn(nn.NewFuncT(func(xs ts.Tensor, train bool) ts.Tensor { + se.AddFn(nn.NewFuncT(func(xs *ts.Tensor, train bool) *ts.Tensor { return xs.Swish() })) @@ -199,8 +200,8 @@ func block(p nn.Path, args BlockArgs) (retVal ts.ModuleT) { projectBn := nn.BatchNorm2D(p.Sub("_bn2"), finalOup, bn2d) - return nn.NewFuncT(func(xs ts.Tensor, train bool) ts.Tensor { - var ys ts.Tensor + return nn.NewFuncT(func(xs *ts.Tensor, train bool) *ts.Tensor { + var ys *ts.Tensor if args.ExpandRatio == 1 { ys = xs.MustShallowClone() } else { @@ -213,7 +214,7 @@ func block(p nn.Path, args BlockArgs) (retVal ts.ModuleT) { ys3 := ys2.Swish() ys2.MustDrop() - var ys4 ts.Tensor + var ys4 *ts.Tensor // NOTE: args.SeRatio is optional value. if args.SeRatio == 0 { ys4 = ys3 @@ -238,7 +239,7 @@ func block(p nn.Path, args BlockArgs) (retVal ts.ModuleT) { }) } -func efficientnet(p nn.Path, params params, nclasses int64) (retVal ts.ModuleT) { +func efficientnet(p *nn.Path, params *params, nclasses int64) ts.ModuleT { args := blockArgs() @@ -287,13 +288,13 @@ func efficientnet(p nn.Path, params params, nclasses int64) (retVal ts.ModuleT) classifier := nn.SeqT() - classifier.AddFnT(nn.NewFuncT(func(xs ts.Tensor, train bool) ts.Tensor { + classifier.AddFnT(nn.NewFuncT(func(xs *ts.Tensor, train bool) *ts.Tensor { return ts.MustDropout(xs, 0.2, train) })) classifier.Add(nn.NewLinear(p.Sub("_fc"), outC, nclasses, nn.DefaultLinearConfig())) - return nn.NewFuncT(func(xs ts.Tensor, train bool) ts.Tensor { + return nn.NewFuncT(func(xs *ts.Tensor, train bool) *ts.Tensor { tmp1 := xs.ApplyT(convStem, false) tmp2 := tmp1.ApplyT(bn0, train) tmp1.MustDrop() @@ -318,34 +319,34 @@ func efficientnet(p nn.Path, params params, nclasses int64) (retVal ts.ModuleT) } -func EfficientNetB0(p nn.Path, nclasses int64) (retVal ts.ModuleT) { +func EfficientNetB0(p *nn.Path, nclasses int64) ts.ModuleT { return efficientnet(p, b0(), nclasses) } -func EfficientNetB1(p nn.Path, nclasses int64) (retVal ts.ModuleT) { +func EfficientNetB1(p *nn.Path, nclasses int64) ts.ModuleT { return efficientnet(p, b1(), nclasses) } -func EfficientNetB2(p nn.Path, nclasses int64) (retVal ts.ModuleT) { +func EfficientNetB2(p *nn.Path, nclasses int64) ts.ModuleT { return efficientnet(p, b2(), nclasses) } -func EfficientNetB3(p nn.Path, nclasses int64) (retVal ts.ModuleT) { +func EfficientNetB3(p *nn.Path, nclasses int64) ts.ModuleT { return efficientnet(p, b3(), nclasses) } -func EfficientNetB4(p nn.Path, nclasses int64) (retVal ts.ModuleT) { +func EfficientNetB4(p *nn.Path, nclasses int64) ts.ModuleT { return efficientnet(p, b4(), nclasses) } -func EfficientNetB5(p nn.Path, nclasses int64) (retVal ts.ModuleT) { +func EfficientNetB5(p *nn.Path, nclasses int64) ts.ModuleT { return efficientnet(p, b5(), nclasses) } -func EfficientNetB6(p nn.Path, nclasses int64) (retVal ts.ModuleT) { +func EfficientNetB6(p *nn.Path, nclasses int64) ts.ModuleT { return efficientnet(p, b6(), nclasses) } -func EfficientNetB7(p nn.Path, nclasses int64) (retVal ts.ModuleT) { +func EfficientNetB7(p *nn.Path, nclasses int64) ts.ModuleT { return efficientnet(p, b7(), nclasses) } diff --git a/vision/image.go b/vision/image.go index bd4c4a9..c818f45 100644 --- a/vision/image.go +++ b/vision/image.go @@ -14,18 +14,16 @@ import ( ) // (height, width, channel) -> (channel, height, width) -func hwcToCHW(tensor ts.Tensor) (retVal ts.Tensor) { - var err error - retVal, err = tensor.Permute([]int64{2, 0, 1}, true) +func hwcToCHW(tensor *ts.Tensor) *ts.Tensor { + retVal, err := tensor.Permute([]int64{2, 0, 1}, true) if err != nil { log.Fatalf("hwcToCHW error: %v\n", err) } return retVal } -func chwToHWC(tensor ts.Tensor) (retVal ts.Tensor) { - var err error - retVal, err = tensor.Permute([]int64{1, 2, 0}, true) +func chwToHWC(tensor *ts.Tensor) *ts.Tensor { + retVal, err := tensor.Permute([]int64{1, 2, 0}, true) if err != nil { log.Fatalf("hwcToCHW error: %v\n", err) } @@ -35,15 +33,14 @@ func chwToHWC(tensor ts.Tensor) (retVal ts.Tensor) { // Load loads an image from a file. // // On success returns a tensor of shape [channel, height, width]. -func Load(path string) (retVal ts.Tensor, err error) { - var tensor ts.Tensor - tensor, err = ts.LoadHwc(path) +func Load(path string) (*ts.Tensor, error) { + var tensor *ts.Tensor + tensor, err := ts.LoadHwc(path) if err != nil { - return retVal, err + return nil, err } - retVal = hwcToCHW(tensor) - return retVal, nil + return hwcToCHW(tensor), nil } // Save saves an image to a file. @@ -53,7 +50,7 @@ func Load(path string) (retVal ts.Tensor, err error) { // are jpg, png, tga, and bmp. // The tensor input should be of kind UInt8 with values ranging from // 0 to 255. -func Save(tensor ts.Tensor, path string) (err error) { +func Save(tensor *ts.Tensor, path string) error { t, err := tensor.Totype(gotch.Uint8, false) // false to keep the input tensor if err != nil { err = fmt.Errorf("Save - Tensor.Totype() error: %v\n", err) @@ -81,21 +78,19 @@ func Save(tensor ts.Tensor, path string) (err error) { // // This expects as input a tensor of shape [channel, height, width] and returns // a tensor of shape [channel, out_h, out_w]. -func Resize(t ts.Tensor, outW int64, outH int64) (retVal ts.Tensor, err error) { +func Resize(t *ts.Tensor, outW int64, outH int64) (*ts.Tensor, error) { tmpTs, err := ts.ResizeHwc(chwToHWC(t), outW, outH) if err != nil { - return retVal, err + return nil, err } - retVal = hwcToCHW(tmpTs) - - return retVal, nil + return hwcToCHW(tmpTs), nil } -func resizePreserveAspectRatioHWC(t ts.Tensor, outW int64, outH int64) (retVal ts.Tensor, err error) { +func resizePreserveAspectRatioHWC(t *ts.Tensor, outW int64, outH int64) (*ts.Tensor, error) { tsSize, err := t.Size() if err != nil { err = fmt.Errorf("resizePreserveAspectRatioHWC - ts.Size() method call err: %v\n", err) - return retVal, err + return nil, err } // TODO: check it @@ -106,7 +101,7 @@ func resizePreserveAspectRatioHWC(t ts.Tensor, outW int64, outH int64) (retVal t tmpTs, err := ts.ResizeHwc(t, outW, outH) if err != nil { err = fmt.Errorf("resizePreserveAspectRatioHWC - ts.ResizeHwc() method call err: %v\n", err) - return retVal, err + return nil, err } return hwcToCHW(tmpTs), nil } else { @@ -123,18 +118,19 @@ func resizePreserveAspectRatioHWC(t ts.Tensor, outW int64, outH int64) (retVal t tmpTs, err := ts.ResizeHwc(t, resizeW, resizeH) tensor := hwcToCHW(tmpTs) - var tensorW ts.Tensor - var tensorH ts.Tensor + var tensorW *ts.Tensor + var tensorH *ts.Tensor if resizeW == outW { tensorW = tensor } else { tensorW, err = tensor.Narrow(2, (resizeW-outW)/2, outW, true) if err != nil { err = fmt.Errorf("resizePreserveAspectRatioHWC - ts.Narrow() method call err: %v\n", err) - return retVal, err + return nil, err } } + var retVal *ts.Tensor if int64(resizeH) == outH { retVal = tensorW } else { @@ -153,28 +149,28 @@ func resizePreserveAspectRatioHWC(t ts.Tensor, outW int64, outH int64) (retVal t // ResizePreserveAspectRatio resizes an image, preserve the aspect ratio by taking a center crop. // // This expects as input a tensor of shape [channel, height, width] and returns -func ResizePreserveAspectRatio(t ts.Tensor, outW int64, outH int64) (retVal ts.Tensor, err error) { +func ResizePreserveAspectRatio(t *ts.Tensor, outW int64, outH int64) (*ts.Tensor, error) { return resizePreserveAspectRatioHWC(chwToHWC(t), outW, outH) } // LoadAndResize loads and resizes an image, preserve the aspect ratio by taking a center crop. -func LoadAndResize(path string, outW int64, outH int64) (retVal ts.Tensor, err error) { +func LoadAndResize(path string, outW int64, outH int64) (*ts.Tensor, error) { tensor, err := ts.LoadHwc(path) if err != nil { - return retVal, err + return nil, err } return resizePreserveAspectRatioHWC(tensor, outW, outH) } // LoadDir loads all the images in a directory. -func LoadDir(dir string, outW int64, outH int64) (retVal ts.Tensor, err error) { +func LoadDir(dir string, outW int64, outH int64) (*ts.Tensor, error) { var filePaths []string // "dir/filename.ext" var tensors []ts.Tensor files, err := ioutil.ReadDir(dir) if err != nil { err = fmt.Errorf("LoadDir - Read directory error: %v\n", err) - return retVal, err + return nil, err } for _, f := range files { filePaths = append(filePaths, fmt.Sprintf("%v%v", dir, f.Name())) @@ -184,9 +180,9 @@ func LoadDir(dir string, outW int64, outH int64) (retVal ts.Tensor, err error) { tensor, err := LoadAndResize(path, outW, outH) if err != nil { err = fmt.Errorf("LoadDir - LoadAndResize method call error: %v\n", err) - return retVal, err + return nil, err } - tensors = append(tensors, tensor) + tensors = append(tensors, *tensor) } return ts.Stack(tensors, 0) diff --git a/vision/imagenet.go b/vision/imagenet.go index d4c1339..d6c6e2a 100644 --- a/vision/imagenet.go +++ b/vision/imagenet.go @@ -17,71 +17,71 @@ import ( type ImageNet struct { mutex *sync.Mutex - mean ts.Tensor - std ts.Tensor + mean *ts.Tensor + std *ts.Tensor } -func NewImageNet() ImageNet { - return ImageNet{ +func NewImageNet() *ImageNet { + return &ImageNet{ mutex: &sync.Mutex{}, mean: ts.MustOfSlice([]float32{0.485, 0.456, 0.406}).MustView([]int64{3, 1, 1}, true), std: ts.MustOfSlice([]float32{0.229, 0.224, 0.225}).MustView([]int64{3, 1, 1}, true), } } -func (in ImageNet) Normalize(tensor ts.Tensor) (retVal ts.Tensor, err error) { +func (in *ImageNet) Normalize(tensor *ts.Tensor) (*ts.Tensor, error) { in.mutex.Lock() defer in.mutex.Unlock() res, err := tensor.Totype(gotch.Float, false) if err != nil { - return retVal, err + return nil, err } resDiv1, err := res.Div1(ts.FloatScalar(float64(255.0)), true) if err != nil { - return retVal, err + return nil, err } resMean, err := resDiv1.Sub(in.mean, true) if err != nil { - return retVal, err + return nil, err } resStd, err := resMean.Div(in.std, true) if err != nil { - return retVal, err + return nil, err } return resStd, nil } -func (in ImageNet) UnNormalize(tensor ts.Tensor) (retVal ts.Tensor, err error) { +func (in *ImageNet) UnNormalize(tensor *ts.Tensor) (*ts.Tensor, error) { in.mutex.Lock() defer in.mutex.Unlock() resMul, err := tensor.Mul(in.std, true) if err != nil { - return retVal, err + return nil, err } resAdd, err := resMul.Add(in.mean, true) if err != nil { - return retVal, err + return nil, err } resMul1, err := resAdd.Mul1(ts.FloatScalar(float64(255.0)), true) if err != nil { - return retVal, err + return nil, err } resClamp, err := resMul1.Clamp(ts.FloatScalar(float64(0.0)), ts.FloatScalar(float64(255.0)), true) if err != nil { - return retVal, err + return nil, err } res, err := resClamp.Totype(gotch.Uint8, true) if err != nil { - return retVal, err + return nil, err } return res, nil @@ -90,7 +90,7 @@ func (in ImageNet) UnNormalize(tensor ts.Tensor) (retVal ts.Tensor, err error) { // SaveImage saves a tensor image to a path. // // NOTE: This will carry out the ImageNet unnormalization. -func (in ImageNet) SaveImage(tensor ts.Tensor, path string) (err error) { +func (in *ImageNet) SaveImage(tensor *ts.Tensor, path string) error { unnormTs, err := in.UnNormalize(tensor) if err != nil { err = fmt.Errorf("ImageNet - SaveImage method call: %v", err) @@ -101,11 +101,11 @@ func (in ImageNet) SaveImage(tensor ts.Tensor, path string) (err error) { } // Load loads an image from a file and applies the ImageNet normalization. -func (in ImageNet) LoadImage(path string) (retVal ts.Tensor, err error) { +func (in *ImageNet) LoadImage(path string) (*ts.Tensor, error) { tensor, err := Load(path) if err != nil { err = fmt.Errorf("ImageNet - LoadImage method call: %v", err) - return retVal, err + return nil, err } return in.Normalize(tensor) @@ -114,11 +114,11 @@ func (in ImageNet) LoadImage(path string) (retVal ts.Tensor, err error) { // LoadImageAndResize loads an image from a file and resize it to the specified width and height. // // NOTE: This will apply the ImageNet normalization. -func (in ImageNet) LoadImageAndResize(path string, w, h int64) (retVal ts.Tensor, err error) { +func (in *ImageNet) LoadImageAndResize(path string, w, h int64) (*ts.Tensor, error) { tensor, err := LoadAndResize(path, w, h) if err != nil { err = fmt.Errorf("ImageNet - LoadImageAndResize method call: %v", err) - return retVal, err + return nil, err } return tensor, nil @@ -127,17 +127,17 @@ func (in ImageNet) LoadImageAndResize(path string, w, h int64) (retVal ts.Tensor // LoadImageAndResize224 loads an image from a file and resize it to 224x224. // // NOTE: This will apply the ImageNet normalization. -func (in ImageNet) LoadImageAndResize224(path string) (retVal ts.Tensor, err error) { +func (in *ImageNet) LoadImageAndResize224(path string) (*ts.Tensor, error) { tensor, err := in.LoadImageAndResize(path, int64(224), int64(224)) if err != nil { err = fmt.Errorf("ImageNet - LoadImageAndResize224/LoadImageAndResize method call: %v", err) - return retVal, err + return nil, err } return in.Normalize(tensor) } -func (in ImageNet) hasSuffix(path string) (retVal bool) { +func (in *ImageNet) hasSuffix(path string) bool { ext := filepath.Ext(path) @@ -149,13 +149,13 @@ func (in ImageNet) hasSuffix(path string) (retVal bool) { } } -func (in ImageNet) loadImageFromDir(dir string) (retVal ts.Tensor, err error) { +func (in *ImageNet) loadImageFromDir(dir string) (*ts.Tensor, error) { var images []ts.Tensor files, err := ioutil.ReadDir(dir) if err != nil { err = fmt.Errorf("ImageNet - loadImageFromDir method call: %v", err) - return retVal, err + return nil, err } for _, file := range files { @@ -166,15 +166,15 @@ func (in ImageNet) loadImageFromDir(dir string) (retVal ts.Tensor, err error) { img, err := in.LoadImageAndResize224(fmt.Sprintf("%v/%v", dir, file.Name())) if err != nil { err = fmt.Errorf("ImageNet - loadImageFromDir method call: %v", err) - return retVal, err + return nil, err } - images = append(images, img) + images = append(images, *img) } if len(images) == 0 { err = fmt.Errorf("There no supported image files in specified directory (%v)", dir) - return retVal, err + return nil, err } return ts.Stack(images, int64(0)) @@ -186,7 +186,7 @@ func (in ImageNet) loadImageFromDir(dir string) (retVal ts.Tensor, err error) { // In each of these datasets, there should be a subdirectory per class named // in the same way. // The ImageNet normalization is applied, image are resized to 224x224. -func (in ImageNet) LoadFromDir(path string) (retVal Dataset, err error) { +func (in *ImageNet) LoadFromDir(path string) (*Dataset, error) { absPath, err := filepath.Abs(path) if err != nil { @@ -203,7 +203,7 @@ func (in ImageNet) LoadFromDir(path string) (retVal Dataset, err error) { subs, err := ioutil.ReadDir(validPath) if err != nil { err := fmt.Errorf("ImageNet - LoadFromDir method call: %v\n", err) - return retVal, err + return nil, err } for _, sub := range subs { @@ -230,30 +230,30 @@ func (in ImageNet) LoadFromDir(path string) (retVal Dataset, err error) { trainTs, err := in.loadImageFromDir(trainDir) if err != nil { err := fmt.Errorf("ImageNet - LoadFromDir method call - Err at classes iterating: %v\n", err) - return retVal, err + return nil, err } ntrainTs := trainTs.MustSize()[0] - trainImages = append(trainImages, trainTs) + trainImages = append(trainImages, *trainTs) trainLabelOnes := ts.MustOnes([]int64{ntrainTs}, gotch.Int64, gotch.CPU) - trainLabels = append(trainLabels, trainLabelOnes.MustMul1(ts.IntScalar(labelIndex), true)) + trainLabels = append(trainLabels, *trainLabelOnes.MustMul1(ts.IntScalar(labelIndex), true)) // test testDir := fmt.Sprintf("%v/%v", validPath, labelDir) testTs, err := in.loadImageFromDir(testDir) if err != nil { err := fmt.Errorf("ImageNet - LoadFromDir method call - Err at classes interating: %v\n", err) - return retVal, err + return nil, err } ntestTs := testTs.MustSize()[0] - testImages = append(testImages, testTs) + testImages = append(testImages, *testTs) testLabelOnes := ts.MustOnes([]int64{ntestTs}, gotch.Int64, gotch.CPU) - testLabels = append(testLabels, testLabelOnes.MustMul1(ts.IntScalar(labelIndex), true)) + testLabels = append(testLabels, *testLabelOnes.MustMul1(ts.IntScalar(labelIndex), true)) } - return Dataset{ + return &Dataset{ TrainImages: ts.MustCat(trainImages, 0), TrainLabels: ts.MustCat(trainLabels, 0), TestImages: ts.MustCat(testImages, 0), @@ -264,7 +264,7 @@ func (in ImageNet) LoadFromDir(path string) (retVal Dataset, err error) { const imagenetClassCount int64 = 1000 -func (in ImageNet) ClassCount() (retVal int64) { +func (in *ImageNet) ClassCount() int64 { return imagenetClassCount } @@ -1271,7 +1271,7 @@ var imagenetClasses []string = []string{ "toilet tissue, toilet paper, bathroom tissue", } -func (in ImageNet) Classes() (retVal []string) { +func (in *ImageNet) Classes() []string { return imagenetClasses } @@ -1281,9 +1281,9 @@ type TopItem struct { } // Returns the top k classes as well as the associated scores. -func (in ImageNet) Top(input ts.Tensor, k int64) (retVal []TopItem) { +func (in *ImageNet) Top(input ts.Tensor, k int64) []TopItem { - var tensor ts.Tensor + var tensor *ts.Tensor shape := input.MustSize() switch { diff --git a/vision/inception.go b/vision/inception.go index b44e755..b646f0d 100644 --- a/vision/inception.go +++ b/vision/inception.go @@ -7,7 +7,7 @@ import ( ts "github.com/sugarme/gotch/tensor" ) -func convBn(p nn.Path, cIn, cOut, ksize, pad, stride int64) (retVal ts.ModuleT) { +func convBn(p *nn.Path, cIn, cOut, ksize, pad, stride int64) ts.ModuleT { convConfig := nn.DefaultConv2DConfig() convConfig.Stride = []int64{stride, stride} @@ -24,14 +24,14 @@ func convBn(p nn.Path, cIn, cOut, ksize, pad, stride int64) (retVal ts.ModuleT) seq.Add(nn.BatchNorm2D(p.Sub("bn"), cOut, bnConfig)) - seq.AddFn(nn.NewFunc(func(xs ts.Tensor) ts.Tensor { + seq.AddFn(nn.NewFunc(func(xs *ts.Tensor) *ts.Tensor { return xs.MustRelu(false) })) return seq } -func convBn2(p nn.Path, cIn, cOut int64, ksize []int64, pad []int64) (retVal ts.ModuleT) { +func convBn2(p *nn.Path, cIn, cOut int64, ksize []int64, pad []int64) ts.ModuleT { convConfig := nn.DefaultConv2DConfig() convConfig.Padding = pad convConfig.Bias = false @@ -41,22 +41,22 @@ func convBn2(p nn.Path, cIn, cOut int64, ksize []int64, pad []int64) (retVal ts. seq := nn.SeqT() - seq.Add(nn.NewConv(p.Sub("conv"), cIn, cOut, ksize, convConfig).(nn.Conv2D)) + seq.Add(nn.NewConv(p.Sub("conv"), cIn, cOut, ksize, convConfig).(*nn.Conv2D)) seq.Add(nn.BatchNorm2D(p.Sub("bn"), cOut, bnConfig)) - seq.AddFn(nn.NewFunc(func(xs ts.Tensor) ts.Tensor { + seq.AddFn(nn.NewFunc(func(xs *ts.Tensor) *ts.Tensor { return xs.MustRelu(false) })) return seq } -func inMaxPool2D(xs ts.Tensor, ksize, stride int64) (retVal ts.Tensor) { +func inMaxPool2D(xs *ts.Tensor, ksize, stride int64) *ts.Tensor { return xs.MustMaxPool2d([]int64{ksize, ksize}, []int64{stride, stride}, []int64{0, 0}, []int64{1, 1}, false, false) } -func inceptionA(p nn.Path, cIn, cPool int64) (retVal ts.ModuleT) { +func inceptionA(p *nn.Path, cIn, cPool int64) ts.ModuleT { b1 := convBn(p.Sub("branch1x1"), cIn, 64, 1, 0, 1) b21 := convBn(p.Sub("branch5x5_1"), cIn, 48, 1, 0, 1) b22 := convBn(p.Sub("branch5x5_2"), 48, 64, 5, 2, 1) @@ -65,7 +65,7 @@ func inceptionA(p nn.Path, cIn, cPool int64) (retVal ts.ModuleT) { b33 := convBn(p.Sub("branch3x3dbl_3"), 96, 96, 3, 1, 1) bpool := convBn(p.Sub("branch_pool"), cIn, cPool, 1, 0, 1) - return nn.NewFuncT(func(xs ts.Tensor, train bool) ts.Tensor { + return nn.NewFuncT(func(xs *ts.Tensor, train bool) *ts.Tensor { b1Ts := xs.ApplyT(b1, train) b2Tmp := xs.ApplyT(b21, train) @@ -81,19 +81,19 @@ func inceptionA(p nn.Path, cIn, cPool int64) (retVal ts.ModuleT) { bpoolTmp := xs.MustAvgPool2d([]int64{3, 3}, []int64{1, 1}, []int64{1, 1}, false, true, 9, false) bpoolTs := bpoolTmp.ApplyT(bpool, train) - res := ts.MustCat([]ts.Tensor{b1Ts, b2Ts, b3Ts, bpoolTs}, 1) + res := ts.MustCat([]ts.Tensor{*b1Ts, *b2Ts, *b3Ts, *bpoolTs}, 1) return res }) } -func inceptionB(p nn.Path, cIn int64) (retVal ts.ModuleT) { +func inceptionB(p *nn.Path, cIn int64) ts.ModuleT { b1 := convBn(p.Sub("branch3x3"), cIn, 384, 3, 0, 2) b21 := convBn(p.Sub("branch3x3dbl_1"), cIn, 64, 1, 0, 1) b22 := convBn(p.Sub("branch3x3dbl_2"), 64, 96, 3, 1, 1) b23 := convBn(p.Sub("branch3x3dbl_3"), 96, 96, 3, 0, 2) - return nn.NewFuncT(func(xs ts.Tensor, train bool) ts.Tensor { + return nn.NewFuncT(func(xs *ts.Tensor, train bool) *ts.Tensor { b1Ts := xs.ApplyT(b1, train) b2Tmp1 := xs.ApplyT(b21, train) @@ -104,13 +104,13 @@ func inceptionB(p nn.Path, cIn int64) (retVal ts.ModuleT) { bpoolTs := inMaxPool2D(xs, 3, 2) - res := ts.MustCat([]ts.Tensor{b1Ts, b2Ts, bpoolTs}, 1) + res := ts.MustCat([]ts.Tensor{*b1Ts, *b2Ts, *bpoolTs}, 1) return res }) } -func inceptionC(p nn.Path, cIn int64, c7 int64) (retVal ts.ModuleT) { +func inceptionC(p *nn.Path, cIn int64, c7 int64) ts.ModuleT { b1 := convBn(p.Sub("branch1x1"), cIn, 192, 1, 0, 1) @@ -126,7 +126,7 @@ func inceptionC(p nn.Path, cIn int64, c7 int64) (retVal ts.ModuleT) { bpool := convBn(p.Sub("branch_pool"), cIn, 192, 1, 0, 1) - return nn.NewFuncT(func(xs ts.Tensor, train bool) (res ts.Tensor) { + return nn.NewFuncT(func(xs *ts.Tensor, train bool) *ts.Tensor { b1Ts := xs.ApplyT(b1, train) b2Tmp1 := xs.ApplyT(b21, train) @@ -148,14 +148,11 @@ func inceptionC(p nn.Path, cIn int64, c7 int64) (retVal ts.ModuleT) { bpTmp1 := xs.MustAvgPool2d([]int64{3, 3}, []int64{1, 1}, []int64{1, 1}, false, true, 9, false) bpoolTs := bpTmp1.ApplyT(bpool, train) - res = ts.MustCat([]ts.Tensor{b1Ts, b2Ts, b3Ts, bpoolTs}, 1) - - return res - + return ts.MustCat([]ts.Tensor{*b1Ts, *b2Ts, *b3Ts, *bpoolTs}, 1) }) } -func inceptionD(p nn.Path, cIn int64) (retVal ts.ModuleT) { +func inceptionD(p *nn.Path, cIn int64) ts.ModuleT { b11 := convBn(p.Sub("branch3x3_1"), cIn, 192, 1, 0, 1) b12 := convBn(p.Sub("branch3x3_2"), 192, 320, 3, 0, 2) @@ -165,7 +162,7 @@ func inceptionD(p nn.Path, cIn int64) (retVal ts.ModuleT) { b23 := convBn2(p.Sub("branch7x7x3_3"), 192, 192, []int64{7, 1}, []int64{3, 0}) b24 := convBn(p.Sub("branch7x7x3_4"), 192, 192, 3, 0, 2) - return nn.NewFuncT(func(xs ts.Tensor, train bool) ts.Tensor { + return nn.NewFuncT(func(xs *ts.Tensor, train bool) *ts.Tensor { b1Tmp := xs.ApplyT(b11, train) b1Ts := b1Tmp.ApplyT(b12, train) b1Tmp.MustDrop() @@ -180,12 +177,12 @@ func inceptionD(p nn.Path, cIn int64) (retVal ts.ModuleT) { bpoolTs := inMaxPool2D(xs, 3, 2) - return ts.MustCat([]ts.Tensor{b1Ts, b2Ts, bpoolTs}, 1) + return ts.MustCat([]ts.Tensor{*b1Ts, *b2Ts, *bpoolTs}, 1) }) } -func inceptionE(p nn.Path, cIn int64) (retVal ts.ModuleT) { +func inceptionE(p *nn.Path, cIn int64) ts.ModuleT { b1 := convBn(p.Sub("branch1x1"), cIn, 320, 1, 0, 1) b21 := convBn(p.Sub("branch3x3_1"), cIn, 384, 1, 0, 1) @@ -199,37 +196,37 @@ func inceptionE(p nn.Path, cIn int64) (retVal ts.ModuleT) { bpool := convBn(p.Sub("branch_pool"), cIn, 192, 1, 0, 1) - return nn.NewFuncT(func(xs ts.Tensor, train bool) ts.Tensor { + return nn.NewFuncT(func(xs *ts.Tensor, train bool) *ts.Tensor { b1Ts := xs.ApplyT(b1, train) b2Tmp := xs.ApplyT(b21, train) b2aTs := b2Tmp.ApplyT(b22a, train) b2bTs := b2Tmp.ApplyT(b22b, train) - b2Ts := ts.MustCat([]ts.Tensor{b2aTs, b2bTs}, 1) + b2Ts := ts.MustCat([]ts.Tensor{*b2aTs, *b2bTs}, 1) b3Tmp1 := xs.ApplyT(b31, train) b3Tmp2 := b3Tmp1.ApplyT(b32, train) b3Tmp1.MustDrop() b3aTs := b3Tmp2.ApplyT(b33a, train) b3bTs := b3Tmp2.ApplyT(b33b, train) - b3Ts := ts.MustCat([]ts.Tensor{b3aTs, b3bTs}, 1) + b3Ts := ts.MustCat([]ts.Tensor{*b3aTs, *b3bTs}, 1) bpTmp1 := xs.MustAvgPool2d([]int64{3, 3}, []int64{1, 1}, []int64{1, 1}, false, true, 9, false) bpoolTs := bpTmp1.ApplyT(bpool, train) - return ts.MustCat([]ts.Tensor{b1Ts, b2Ts, b3Ts, bpoolTs}, 1) + return ts.MustCat([]ts.Tensor{*b1Ts, *b2Ts, *b3Ts, *bpoolTs}, 1) }) } -func InceptionV3(p nn.Path, nclasses int64) (retVal ts.ModuleT) { +func InceptionV3(p *nn.Path, nclasses int64) ts.ModuleT { seq := nn.SeqT() seq.Add(convBn(p.Sub("Conv2d_1a_3x3"), 3, 32, 3, 0, 2)) seq.Add(convBn(p.Sub("Conv2d_2a_3x3"), 32, 32, 3, 0, 1)) seq.Add(convBn(p.Sub("Conv2d_2b_3x3"), 32, 64, 3, 1, 1)) - seq.AddFn(nn.NewFunc(func(xs ts.Tensor) ts.Tensor { + seq.AddFn(nn.NewFunc(func(xs *ts.Tensor) *ts.Tensor { tmp := xs.MustRelu(false) res := inMaxPool2D(tmp, 3, 2) tmp.MustDrop() @@ -239,7 +236,7 @@ func InceptionV3(p nn.Path, nclasses int64) (retVal ts.ModuleT) { seq.Add(convBn(p.Sub("Conv2d_3b_1x1"), 64, 80, 1, 0, 1)) seq.Add(convBn(p.Sub("Conv2d_4a_3x3"), 80, 192, 3, 0, 1)) - seq.AddFn(nn.NewFunc(func(xs ts.Tensor) ts.Tensor { + seq.AddFn(nn.NewFunc(func(xs *ts.Tensor) *ts.Tensor { tmp := xs.MustRelu(false) res := inMaxPool2D(tmp, 3, 2) tmp.MustDrop() @@ -262,7 +259,7 @@ func InceptionV3(p nn.Path, nclasses int64) (retVal ts.ModuleT) { seq.Add(inceptionE(p.Sub("Mixed_7b"), 1280)) seq.Add(inceptionE(p.Sub("Mixed_7c"), 2048)) - seq.AddFnT(nn.NewFuncT(func(xs ts.Tensor, train bool) ts.Tensor { + seq.AddFnT(nn.NewFuncT(func(xs *ts.Tensor, train bool) *ts.Tensor { tmp1 := xs.MustAdaptiveAvgPool2d([]int64{1, 1}, false) tmp2 := ts.MustDropout(tmp1, 0.5, train) tmp1.MustDrop() diff --git a/vision/mnist.go b/vision/mnist.go index a81b22a..2dbe3fd 100644 --- a/vision/mnist.go +++ b/vision/mnist.go @@ -52,7 +52,7 @@ func checkMagicNumber(f *os.File, wantNumber int) (err error) { return nil } -func readLabels(filename string) (retVal ts.Tensor) { +func readLabels(filename string) *ts.Tensor { f, err := os.Open(filename) if err != nil { @@ -82,12 +82,10 @@ func readLabels(filename string) (retVal ts.Tensor) { log.Fatal(err) } - retVal = labelsTs.MustTotype(gotch.Int64, true) - - return retVal + return labelsTs.MustTotype(gotch.Int64, true) } -func readImages(filename string) (retVal ts.Tensor) { +func readImages(filename string) *ts.Tensor { f, err := os.Open(filename) if err != nil { log.Fatalf("readImages errors: %v\n", err) @@ -125,13 +123,12 @@ func readImages(filename string) (retVal ts.Tensor) { err = fmt.Errorf("create images tensor err.") log.Fatal(err) } - retVal = imagesTs.MustView([]int64{int64(samples), int64(rows * cols)}, true).MustTotype(gotch.Float, true).MustDiv1(ts.FloatScalar(255.0), true) - return retVal + return imagesTs.MustView([]int64{int64(samples), int64(rows * cols)}, true).MustTotype(gotch.Float, true).MustDiv1(ts.FloatScalar(255.0), true) } // LoadMNISTDir loads all MNIST data from a given directory to Dataset -func LoadMNISTDir(dir string) (retVal Dataset) { +func LoadMNISTDir(dir string) *Dataset { const ( trainLabels = "train-labels-idx1-ubyte" trainImages = "train-images-idx3-ubyte" @@ -149,7 +146,7 @@ func LoadMNISTDir(dir string) (retVal Dataset) { testImagesTs := readImages(testImagesFile) testLabelsTs := readLabels(testLabelsFile) - return Dataset{ + return &Dataset{ TrainImages: trainImagesTs, TrainLabels: trainLabelsTs, TestImages: testImagesTs, diff --git a/vision/mobilenet.go b/vision/mobilenet.go index f433f6f..ca036a7 100644 --- a/vision/mobilenet.go +++ b/vision/mobilenet.go @@ -12,7 +12,7 @@ import ( ) // Conv2D + BatchNorm2D + ReLU6 -func cbr(p nn.Path, cIn, cOut, ks, stride, g int64) (retVal ts.ModuleT) { +func cbr(p *nn.Path, cIn, cOut, ks, stride, g int64) ts.ModuleT { config := nn.DefaultConv2DConfig() config.Stride = []int64{stride, stride} pad := (ks - 1) / 2 @@ -26,7 +26,7 @@ func cbr(p nn.Path, cIn, cOut, ks, stride, g int64) (retVal ts.ModuleT) { seq.Add(nn.BatchNorm2D(p.Sub("1"), cOut, nn.DefaultBatchNormConfig())) - seq.AddFn(nn.NewFunc(func(xs ts.Tensor) ts.Tensor { + seq.AddFn(nn.NewFunc(func(xs *ts.Tensor) *ts.Tensor { tmp := xs.MustRelu(false) res := tmp.MustClampMax(ts.FloatScalar(6.0), true) return res @@ -36,7 +36,7 @@ func cbr(p nn.Path, cIn, cOut, ks, stride, g int64) (retVal ts.ModuleT) { } // Inverted Residual block. -func inv(p nn.Path, cIn, cOut, stride, er int64) (retVal ts.ModuleT) { +func inv(p *nn.Path, cIn, cOut, stride, er int64) ts.ModuleT { cHidden := er * cIn seq := nn.SeqT() @@ -54,7 +54,7 @@ func inv(p nn.Path, cIn, cOut, stride, er int64) (retVal ts.ModuleT) { seq.Add(nn.BatchNorm2D(p.Sub(fmt.Sprintf("%v", id+2)), cOut, nn.DefaultBatchNormConfig())) - return nn.NewFuncT(func(xs ts.Tensor, train bool) ts.Tensor { + return nn.NewFuncT(func(xs *ts.Tensor, train bool) *ts.Tensor { ys := xs.ApplyT(seq, train) if stride == 1 && cIn == cOut { res := ys.MustAdd(xs, true) @@ -75,7 +75,7 @@ var invertedResidualSettings [][]int64 = [][]int64{ {6, 320, 1, 1}, } -func MobileNetV2(p nn.Path, nclasses int64) (retVal ts.ModuleT) { +func MobileNetV2(p *nn.Path, nclasses int64) ts.ModuleT { fp := p.Sub("features") cp := p.Sub("classifier") cIn := int64(32) @@ -108,13 +108,13 @@ func MobileNetV2(p nn.Path, nclasses int64) (retVal ts.ModuleT) { classifier := nn.SeqT() - classifier.AddFnT(nn.NewFuncT(func(xs ts.Tensor, train bool) ts.Tensor { + classifier.AddFnT(nn.NewFuncT(func(xs *ts.Tensor, train bool) *ts.Tensor { return ts.MustDropout(xs, 0.5, train) })) classifier.Add(nn.NewLinear(cp.Sub("1"), 1280, nclasses, nn.DefaultLinearConfig())) - return nn.NewFuncT(func(xs ts.Tensor, train bool) ts.Tensor { + return nn.NewFuncT(func(xs *ts.Tensor, train bool) *ts.Tensor { tmp1 := xs.ApplyT(features, train) tmp2 := tmp1.MustMean1([]int64{2}, false, gotch.Float, true) diff --git a/vision/resnet.go b/vision/resnet.go index 56493db..e470d25 100644 --- a/vision/resnet.go +++ b/vision/resnet.go @@ -12,7 +12,7 @@ import ( // See "Deep Residual Learning for Image Recognition" He et al. 2015 // https://arxiv.org/abs/1512.03385 -func conv2d(path nn.Path, cIn, cOut, ksize, padding, stride int64) (retVal nn.Conv2D) { +func conv2d(path *nn.Path, cIn, cOut, ksize, padding, stride int64) *nn.Conv2D { config := nn.DefaultConv2DConfig() config.Stride = []int64{stride, stride} config.Padding = []int64{padding, padding} @@ -21,21 +21,20 @@ func conv2d(path nn.Path, cIn, cOut, ksize, padding, stride int64) (retVal nn.Co return nn.NewConv2D(path, cIn, cOut, ksize, config) } -func downSample(path nn.Path, cIn, cOut, stride int64) (retVal ts.ModuleT) { +func downSample(path *nn.Path, cIn, cOut, stride int64) ts.ModuleT { if stride != 1 || cIn != cOut { seq := nn.SeqT() seq.Add(conv2d(path.Sub("0"), cIn, cOut, 1, 0, stride)) seq.Add(nn.BatchNorm2D(path.Sub("1"), cOut, nn.DefaultBatchNormConfig())) - retVal = seq - } else { - retVal = nn.SeqT() + + return seq } - return retVal + return nn.SeqT() } -func basicBlock(path nn.Path, cIn, cOut, stride int64) (retVal ts.ModuleT) { +func basicBlock(path *nn.Path, cIn, cOut, stride int64) ts.ModuleT { conv1 := conv2d(path.Sub("conv1"), cIn, cOut, 3, 1, stride) bn1 := nn.BatchNorm2D(path.Sub("bn1"), cOut, nn.DefaultBatchNormConfig()) @@ -43,7 +42,7 @@ func basicBlock(path nn.Path, cIn, cOut, stride int64) (retVal ts.ModuleT) { bn2 := nn.BatchNorm2D(path.Sub("bn2"), cOut, nn.DefaultBatchNormConfig()) downsample := downSample(path.Sub("downsample"), cIn, cOut, stride) - return nn.NewFuncT(func(xs ts.Tensor, train bool) ts.Tensor { + return nn.NewFuncT(func(xs *ts.Tensor, train bool) *ts.Tensor { c1 := xs.Apply(conv1) bn1 := c1.ApplyT(bn1, train) c1.MustDrop() @@ -61,7 +60,7 @@ func basicBlock(path nn.Path, cIn, cOut, stride int64) (retVal ts.ModuleT) { }) } -func basicLayer(path nn.Path, cIn, cOut, stride, cnt int64) (retVal ts.ModuleT) { +func basicLayer(path *nn.Path, cIn, cOut, stride, cnt int64) ts.ModuleT { layer := nn.SeqT() layer.Add(basicBlock(path.Sub("0"), cIn, cOut, stride)) @@ -73,7 +72,7 @@ func basicLayer(path nn.Path, cIn, cOut, stride, cnt int64) (retVal ts.ModuleT) return layer } -func resnet(path nn.Path, nclasses int64, c1, c2, c3, c4 int64) (retVal nn.FuncT) { +func resnet(path *nn.Path, nclasses int64, c1, c2, c3, c4 int64) nn.FuncT { conv1 := conv2d(path.Sub("conv1"), 3, 64, 7, 3, 2) bn1 := nn.BatchNorm2D(path.Sub("bn1"), 64, nn.DefaultBatchNormConfig()) layer1 := basicLayer(path.Sub("layer1"), 64, 64, 1, c1) @@ -86,7 +85,7 @@ func resnet(path nn.Path, nclasses int64, c1, c2, c3, c4 int64) (retVal nn.FuncT linearConfig := nn.DefaultLinearConfig() fc := nn.NewLinear(path.Sub("fc"), 512, nclasses, linearConfig) - return nn.NewFuncT(func(xs ts.Tensor, train bool) (retVal ts.Tensor) { + return nn.NewFuncT(func(xs *ts.Tensor, train bool) *ts.Tensor { c1 := xs.Apply(conv1) xs.MustDrop() bn1 := c1.ApplyT(bn1, train) @@ -105,14 +104,14 @@ func resnet(path nn.Path, nclasses int64, c1, c2, c3, c4 int64) (retVal nn.FuncT fv := avgpool.FlatView() avgpool.MustDrop() - retVal = fv.ApplyOpt(ts.WithModule(fc)) + retVal := fv.ApplyOpt(ts.WithModule(fc)) fv.MustDrop() return retVal }) } else { // No final layer - return nn.NewFuncT(func(xs ts.Tensor, train bool) (retVal ts.Tensor) { + return nn.NewFuncT(func(xs *ts.Tensor, train bool) *ts.Tensor { c1 := xs.Apply(conv1) xs.MustDrop() bn1 := c1.ApplyT(bn1, train) @@ -129,7 +128,7 @@ func resnet(path nn.Path, nclasses int64, c1, c2, c3, c4 int64) (retVal nn.FuncT l3.MustDrop() avgpool := l4.MustAdaptiveAvgPool2d([]int64{1, 1}, false) l4.MustDrop() - retVal = avgpool.FlatView() + retVal := avgpool.FlatView() avgpool.MustDrop() return retVal @@ -138,24 +137,24 @@ func resnet(path nn.Path, nclasses int64, c1, c2, c3, c4 int64) (retVal nn.FuncT } // Creates a ResNet-18 model. -func ResNet18(path nn.Path, numClasses int64) (retVal nn.FuncT) { +func ResNet18(path *nn.Path, numClasses int64) nn.FuncT { return resnet(path, numClasses, 2, 2, 2, 2) } -func ResNet18NoFinalLayer(path nn.Path) (retVal nn.FuncT) { +func ResNet18NoFinalLayer(path *nn.Path) nn.FuncT { return resnet(path, 0, 2, 2, 2, 2) } -func ResNet34(path nn.Path, numClasses int64) (retVal nn.FuncT) { +func ResNet34(path *nn.Path, numClasses int64) nn.FuncT { return resnet(path, numClasses, 3, 4, 6, 3) } -func ResNet34NoFinalLayer(path nn.Path) (retVal nn.FuncT) { +func ResNet34NoFinalLayer(path *nn.Path) nn.FuncT { return resnet(path, 0, 3, 4, 6, 3) } // Bottleneck versions for ResNet 50, 101, and 152. -func bottleneckBlock(path nn.Path, cIn, cOut, stride, e int64) (retVal ts.ModuleT) { +func bottleneckBlock(path *nn.Path, cIn, cOut, stride, e int64) ts.ModuleT { eDim := e * cOut conv1 := conv2d(path.Sub("conv1"), cIn, cOut, 1, 0, 1) @@ -166,7 +165,7 @@ func bottleneckBlock(path nn.Path, cIn, cOut, stride, e int64) (retVal ts.Module bn3 := nn.BatchNorm2D(path.Sub("bn3"), eDim, nn.DefaultBatchNormConfig()) downsample := downSample(path.Sub("downsample"), cIn, eDim, stride) - return nn.NewFuncT(func(xs ts.Tensor, train bool) ts.Tensor { + return nn.NewFuncT(func(xs *ts.Tensor, train bool) *ts.Tensor { c1 := xs.Apply(conv1) bn1 := c1.ApplyT(bn1, train) c1.MustDrop() @@ -187,7 +186,7 @@ func bottleneckBlock(path nn.Path, cIn, cOut, stride, e int64) (retVal ts.Module }) } -func bottleneckLayer(path nn.Path, cIn, cOut, stride, cnt int64) (retVal ts.ModuleT) { +func bottleneckLayer(path *nn.Path, cIn, cOut, stride, cnt int64) ts.ModuleT { layer := nn.SeqT() layer.Add(bottleneckBlock(path.Sub("0"), cIn, cOut, stride, 4)) @@ -198,7 +197,7 @@ func bottleneckLayer(path nn.Path, cIn, cOut, stride, cnt int64) (retVal ts.Modu return layer } -func bottleneckResnet(path nn.Path, nclasses int64, c1, c2, c3, c4 int64) (retVal ts.ModuleT) { +func bottleneckResnet(path *nn.Path, nclasses int64, c1, c2, c3, c4 int64) ts.ModuleT { conv1 := conv2d(path.Sub("conv1"), 3, 64, 7, 3, 2) bn1 := nn.BatchNorm2D(path.Sub("bn1"), 64, nn.DefaultBatchNormConfig()) layer1 := bottleneckLayer(path.Sub("layer1"), 64, 64, 1, c1) @@ -209,7 +208,7 @@ func bottleneckResnet(path nn.Path, nclasses int64, c1, c2, c3, c4 int64) (retVa if nclasses > 0 { fc := nn.NewLinear(path.Sub("fc"), 4*512, nclasses, nn.DefaultLinearConfig()) - return nn.NewFuncT(func(xs ts.Tensor, train bool) (retVal ts.Tensor) { + return nn.NewFuncT(func(xs *ts.Tensor, train bool) *ts.Tensor { c1 := xs.Apply(conv1) xs.MustDrop() bn1 := c1.ApplyT(bn1, train) @@ -228,12 +227,12 @@ func bottleneckResnet(path nn.Path, nclasses int64, c1, c2, c3, c4 int64) (retVa fv := avgpool.FlatView() avgpool.MustDrop() - retVal = fv.ApplyOpt(ts.WithModule(fc)) + retVal := fv.ApplyOpt(ts.WithModule(fc)) fv.MustDrop() return retVal }) } else { - return nn.NewFuncT(func(xs ts.Tensor, train bool) (retVal ts.Tensor) { + return nn.NewFuncT(func(xs *ts.Tensor, train bool) *ts.Tensor { c1 := xs.Apply(conv1) xs.MustDrop() bn1 := c1.ApplyT(bn1, train) @@ -250,7 +249,7 @@ func bottleneckResnet(path nn.Path, nclasses int64, c1, c2, c3, c4 int64) (retVa l3.MustDrop() avgpool := l4.MustAdaptiveAvgPool2d([]int64{1, 1}, false) l4.MustDrop() - retVal = avgpool.FlatView() + retVal := avgpool.FlatView() avgpool.MustDrop() return retVal @@ -258,26 +257,26 @@ func bottleneckResnet(path nn.Path, nclasses int64, c1, c2, c3, c4 int64) (retVa } } -func ResNet50(path nn.Path, numClasses int64) (retVal ts.ModuleT) { +func ResNet50(path *nn.Path, numClasses int64) ts.ModuleT { return bottleneckResnet(path, numClasses, 3, 4, 6, 3) } -func ResNet50NoFinalLayer(path nn.Path) (retVal ts.ModuleT) { +func ResNet50NoFinalLayer(path *nn.Path) ts.ModuleT { return bottleneckResnet(path, 0, 3, 4, 6, 3) } -func ResNet101(path nn.Path, numClasses int64) (retVal ts.ModuleT) { +func ResNet101(path *nn.Path, numClasses int64) ts.ModuleT { return bottleneckResnet(path, numClasses, 3, 4, 23, 3) } -func ResNet101NoFinalLayer(path nn.Path) (retVal ts.ModuleT) { +func ResNet101NoFinalLayer(path *nn.Path) ts.ModuleT { return bottleneckResnet(path, 0, 3, 4, 23, 3) } -func ResNet152(path nn.Path, numClasses int64) (retVal ts.ModuleT) { +func ResNet152(path *nn.Path, numClasses int64) ts.ModuleT { return bottleneckResnet(path, numClasses, 3, 8, 36, 3) } -func ResNet150NoFinalLayer(path nn.Path) (retVal ts.ModuleT) { +func ResNet150NoFinalLayer(path *nn.Path) ts.ModuleT { return bottleneckResnet(path, 0, 3, 8, 36, 3) } diff --git a/vision/squeezenet.go b/vision/squeezenet.go index f0a4322..de92eaf 100644 --- a/vision/squeezenet.go +++ b/vision/squeezenet.go @@ -7,11 +7,11 @@ import ( ts "github.com/sugarme/gotch/tensor" ) -func snMaxPool2D(xs ts.Tensor) (retVal ts.Tensor) { +func snMaxPool2D(xs *ts.Tensor) *ts.Tensor { return xs.MustMaxPool2d([]int64{3, 3}, []int64{2, 2}, []int64{0, 0}, []int64{1, 1}, true, false) } -func fire(p nn.Path, cIn int64, cSqueeze int64, cExp1 int64, cExp3 int64) (retVal ts.ModuleT) { +func fire(p *nn.Path, cIn int64, cSqueeze int64, cExp1 int64, cExp3 int64) ts.ModuleT { cfg3 := nn.DefaultConv2DConfig() cfg3.Padding = []int64{1, 1} @@ -21,7 +21,7 @@ func fire(p nn.Path, cIn int64, cSqueeze int64, cExp1 int64, cExp3 int64) (retVa exp3 := nn.NewConv2D(p.Sub("expand3x3"), cSqueeze, cExp3, 3, cfg3) // NOTE: train will not be used - return nn.NewFuncT(func(xs ts.Tensor, train bool) ts.Tensor { + return nn.NewFuncT(func(xs *ts.Tensor, train bool) *ts.Tensor { tmp1 := xs.Apply(squeeze) tmp2 := tmp1.MustRelu(true) @@ -31,11 +31,11 @@ func fire(p nn.Path, cIn int64, cSqueeze int64, cExp1 int64, cExp3 int64) (retVa exp3Tmp := tmp2.Apply(exp3) exp3Ts := exp3Tmp.MustRelu(true) - return ts.MustCat([]ts.Tensor{exp1Ts, exp3Ts}, 1) + return ts.MustCat([]ts.Tensor{*exp1Ts, *exp3Ts}, 1) }) } -func squeezenet(p nn.Path, v1_0 bool, nclasses int64) (retVal ts.ModuleT) { +func squeezenet(p *nn.Path, v1_0 bool, nclasses int64) ts.ModuleT { fp := p.Sub("features") cp := p.Sub("classifier") @@ -50,11 +50,11 @@ func squeezenet(p nn.Path, v1_0 bool, nclasses int64) (retVal ts.ModuleT) { if v1_0 { features.Add(nn.NewConv2D(fp.Sub("0"), 3, 96, 7, initialConvConfig)) - features.AddFn(nn.NewFunc(func(xs ts.Tensor) ts.Tensor { + features.AddFn(nn.NewFunc(func(xs *ts.Tensor) *ts.Tensor { return xs.MustRelu(false) })) - features.AddFn(nn.NewFunc(func(xs ts.Tensor) ts.Tensor { + features.AddFn(nn.NewFunc(func(xs *ts.Tensor) *ts.Tensor { return snMaxPool2D(xs) })) @@ -64,7 +64,7 @@ func squeezenet(p nn.Path, v1_0 bool, nclasses int64) (retVal ts.ModuleT) { features.Add(fire(fp.Sub("5"), 128, 32, 128, 128)) - features.AddFn(nn.NewFunc(func(xs ts.Tensor) ts.Tensor { + features.AddFn(nn.NewFunc(func(xs *ts.Tensor) *ts.Tensor { return snMaxPool2D(xs) })) @@ -76,7 +76,7 @@ func squeezenet(p nn.Path, v1_0 bool, nclasses int64) (retVal ts.ModuleT) { features.Add(fire(fp.Sub("10"), 384, 64, 256, 256)) - features.AddFn(nn.NewFunc(func(xs ts.Tensor) ts.Tensor { + features.AddFn(nn.NewFunc(func(xs *ts.Tensor) *ts.Tensor { return snMaxPool2D(xs) })) @@ -85,11 +85,11 @@ func squeezenet(p nn.Path, v1_0 bool, nclasses int64) (retVal ts.ModuleT) { } else { features.Add(nn.NewConv2D(fp.Sub("0"), 3, 64, 3, initialConvConfig)) - features.AddFn(nn.NewFunc(func(xs ts.Tensor) ts.Tensor { + features.AddFn(nn.NewFunc(func(xs *ts.Tensor) *ts.Tensor { return xs.MustRelu(false) })) - features.AddFn(nn.NewFunc(func(xs ts.Tensor) ts.Tensor { + features.AddFn(nn.NewFunc(func(xs *ts.Tensor) *ts.Tensor { return snMaxPool2D(xs) })) @@ -97,7 +97,7 @@ func squeezenet(p nn.Path, v1_0 bool, nclasses int64) (retVal ts.ModuleT) { features.Add(fire(fp.Sub("4"), 128, 16, 64, 64)) - features.AddFn(nn.NewFunc(func(xs ts.Tensor) ts.Tensor { + features.AddFn(nn.NewFunc(func(xs *ts.Tensor) *ts.Tensor { return snMaxPool2D(xs) })) @@ -105,7 +105,7 @@ func squeezenet(p nn.Path, v1_0 bool, nclasses int64) (retVal ts.ModuleT) { features.Add(fire(fp.Sub("7"), 256, 32, 128, 128)) - features.AddFn(nn.NewFunc(func(xs ts.Tensor) ts.Tensor { + features.AddFn(nn.NewFunc(func(xs *ts.Tensor) *ts.Tensor { return snMaxPool2D(xs) })) @@ -118,13 +118,13 @@ func squeezenet(p nn.Path, v1_0 bool, nclasses int64) (retVal ts.ModuleT) { features.Add(fire(fp.Sub("12"), 512, 64, 256, 256)) } - features.AddFnT(nn.NewFuncT(func(xs ts.Tensor, train bool) ts.Tensor { + features.AddFnT(nn.NewFuncT(func(xs *ts.Tensor, train bool) *ts.Tensor { return ts.MustDropout(xs, 0.5, train) })) features.Add(nn.NewConv2D(cp.Sub("1"), 512, nclasses, 1, finalConvConfig)) - features.AddFn(nn.NewFunc(func(xs ts.Tensor) ts.Tensor { + features.AddFn(nn.NewFunc(func(xs *ts.Tensor) *ts.Tensor { tmp1 := xs.MustRelu(false) tmp2 := tmp1.MustAdaptiveAvgPool2d([]int64{1, 1}, false) tmp1.MustDrop() @@ -136,10 +136,10 @@ func squeezenet(p nn.Path, v1_0 bool, nclasses int64) (retVal ts.ModuleT) { return features } -func SqueezeNetV1_0(p nn.Path, nclasses int64) (retVal ts.ModuleT) { +func SqueezeNetV1_0(p *nn.Path, nclasses int64) ts.ModuleT { return squeezenet(p, true, nclasses) } -func SqueezeNetV1_1(p nn.Path, nclasses int64) (retVal ts.ModuleT) { +func SqueezeNetV1_1(p *nn.Path, nclasses int64) ts.ModuleT { return squeezenet(p, false, nclasses) } diff --git a/vision/vgg.go b/vision/vgg.go index e2b7d62..2a96aae 100644 --- a/vision/vgg.go +++ b/vision/vgg.go @@ -11,7 +11,7 @@ import ( // NOTE: each list element contains multiple convolutions with some specified number // of features followed by a single max-pool layer. -func layersA() (retVal [][]int64) { +func layersA() [][]int64 { return [][]int64{ {64}, {128}, @@ -21,7 +21,7 @@ func layersA() (retVal [][]int64) { } } -func layersB() (retVal [][]int64) { +func layersB() [][]int64 { return [][]int64{ {64, 64}, {128, 128}, @@ -31,7 +31,7 @@ func layersB() (retVal [][]int64) { } } -func layersD() (retVal [][]int64) { +func layersD() [][]int64 { return [][]int64{ {64, 64}, {128, 128}, @@ -41,7 +41,7 @@ func layersD() (retVal [][]int64) { } } -func layersE() (retVal [][]int64) { +func layersE() [][]int64 { return [][]int64{ {64, 64}, {128, 128}, @@ -51,7 +51,7 @@ func layersE() (retVal [][]int64) { } } -func vggConv2d(path nn.Path, cIn, cOut int64) (retVal nn.Conv2D) { +func vggConv2d(path *nn.Path, cIn, cOut int64) *nn.Conv2D { config := nn.DefaultConv2DConfig() config.Stride = []int64{1, 1} @@ -60,7 +60,7 @@ func vggConv2d(path nn.Path, cIn, cOut int64) (retVal nn.Conv2D) { return nn.NewConv2D(path, cIn, cOut, 3, config) } -func vgg(path nn.Path, config [][]int64, nclasses int64, batchNorm bool) nn.SequentialT { +func vgg(path *nn.Path, config [][]int64, nclasses int64, batchNorm bool) *nn.SequentialT { c := path.Sub("classifier") seq := nn.SeqT() @@ -77,40 +77,40 @@ func vgg(path nn.Path, config [][]int64, nclasses int64, batchNorm bool) nn.Sequ seq.Add(nn.BatchNorm2D(f.Sub(fmt.Sprintf("%v", bnLen)), cOut, nn.DefaultBatchNormConfig())) } - seq.AddFn(nn.NewFunc(func(xs ts.Tensor) ts.Tensor { + seq.AddFn(nn.NewFunc(func(xs *ts.Tensor) *ts.Tensor { return xs.MustRelu(false) })) cIn = cOut } // end of inner For loop - seq.AddFn(nn.NewFunc(func(xs ts.Tensor) ts.Tensor { + seq.AddFn(nn.NewFunc(func(xs *ts.Tensor) *ts.Tensor { return xs.MaxPool2DDefault(2, false) })) } // end of outer For loop - seq.AddFn(nn.NewFunc(func(xs ts.Tensor) ts.Tensor { + seq.AddFn(nn.NewFunc(func(xs *ts.Tensor) *ts.Tensor { return xs.FlatView() })) seq.Add(nn.NewLinear(c.Sub(fmt.Sprint("0")), 512*7*7, 4096, nn.DefaultLinearConfig())) - seq.AddFn(nn.NewFunc(func(xs ts.Tensor) ts.Tensor { + seq.AddFn(nn.NewFunc(func(xs *ts.Tensor) *ts.Tensor { return xs.MustRelu(false) })) - seq.AddFn(nn.NewFuncT(func(xs ts.Tensor, train bool) ts.Tensor { + seq.AddFn(nn.NewFuncT(func(xs *ts.Tensor, train bool) *ts.Tensor { return ts.MustDropout(xs, 0.5, train) })) seq.Add(nn.NewLinear(c.Sub(fmt.Sprint("3")), 4096, 4096, nn.DefaultLinearConfig())) - seq.AddFn(nn.NewFunc(func(xs ts.Tensor) ts.Tensor { + seq.AddFn(nn.NewFunc(func(xs *ts.Tensor) *ts.Tensor { return xs.MustRelu(false) })) - seq.AddFn(nn.NewFuncT(func(xs ts.Tensor, train bool) ts.Tensor { + seq.AddFn(nn.NewFuncT(func(xs *ts.Tensor, train bool) *ts.Tensor { return ts.MustDropout(xs, 0.5, train) })) @@ -119,34 +119,34 @@ func vgg(path nn.Path, config [][]int64, nclasses int64, batchNorm bool) nn.Sequ return seq } -func VGG11(path nn.Path, nclasses int64) (retVal nn.SequentialT) { +func VGG11(path *nn.Path, nclasses int64) *nn.SequentialT { return vgg(path, layersA(), nclasses, false) } -func VGG11BN(path nn.Path, nclasses int64) (retVal nn.SequentialT) { +func VGG11BN(path *nn.Path, nclasses int64) *nn.SequentialT { return vgg(path, layersA(), nclasses, true) } -func VGG13(path nn.Path, nclasses int64) (retVal nn.SequentialT) { +func VGG13(path *nn.Path, nclasses int64) *nn.SequentialT { return vgg(path, layersB(), nclasses, false) } -func VGG13BN(path nn.Path, nclasses int64) (retVal nn.SequentialT) { +func VGG13BN(path *nn.Path, nclasses int64) *nn.SequentialT { return vgg(path, layersB(), nclasses, true) } -func VGG16(path nn.Path, nclasses int64) (retVal nn.SequentialT) { +func VGG16(path *nn.Path, nclasses int64) *nn.SequentialT { return vgg(path, layersD(), nclasses, false) } -func VGG16BN(path nn.Path, nclasses int64) (retVal nn.SequentialT) { +func VGG16BN(path *nn.Path, nclasses int64) *nn.SequentialT { return vgg(path, layersD(), nclasses, true) } -func VGG19(path nn.Path, nclasses int64) (retVal nn.SequentialT) { +func VGG19(path *nn.Path, nclasses int64) *nn.SequentialT { return vgg(path, layersE(), nclasses, false) } -func VGG19BN(path nn.Path, nclasses int64) (retVal nn.SequentialT) { +func VGG19BN(path *nn.Path, nclasses int64) *nn.SequentialT { return vgg(path, layersE(), nclasses, true) }