From 3c115ee79f388491ec220740816a07a185a29b95 Mon Sep 17 00:00:00 2001 From: sugarme Date: Thu, 2 Jul 2020 21:30:45 +1000 Subject: [PATCH] fix(tensor): free C memory at tensor.Values() method. fix(vision/resnet): free C memory --- example/mnist/nn.go | 12 ++++--- tensor/tensor.go | 10 +++--- vision/resnet.go | 84 ++++++++++++++++++++++++++++++++++++--------- 3 files changed, 80 insertions(+), 26 deletions(-) diff --git a/example/mnist/nn.go b/example/mnist/nn.go index 950d2f2..a1d2abe 100644 --- a/example/mnist/nn.go +++ b/example/mnist/nn.go @@ -33,7 +33,6 @@ func netInit(vs nn.Path) ts.Module { })) n.Add(nn.NewLinear(vs, HiddenNodesNN, LabelNN, *nn.DefaultLinearConfig())) - // n.Add(nn.NewLinear(vs, ImageDimNN, LabelNN, nn.DefaultLinearConfig())) return &n } @@ -45,11 +44,14 @@ func train(trainX, trainY, testX, testY ts.Tensor, m ts.Module, opt nn.Optimizer opt.BackwardStep(loss) - testAccuracy := m.Forward(testX).AccuracyForLogits(testY) - fmt.Printf("Epoch: %v \t Loss: %.3f \t Test accuracy: %.2f%%\n", epoch, loss.Values()[0], testAccuracy.Values()[0]*100) - - loss.MustDrop() + testLogits := m.Forward(testX) + testAccuracy := testLogits.AccuracyForLogits(testY) + accuracy := testAccuracy.Values()[0] * 100 testAccuracy.MustDrop() + lossVal := loss.Values()[0] + loss.MustDrop() + + fmt.Printf("Epoch: %v \t Loss: %.3f \t Test accuracy: %.2f%%\n", epoch, lossVal, accuracy) } func runNN() { diff --git a/tensor/tensor.go b/tensor/tensor.go index 9312038..5161a58 100644 --- a/tensor/tensor.go +++ b/tensor/tensor.go @@ -1014,13 +1014,15 @@ func (r Reduction) ToInt() (retVal int) { // Values returns values of tensor in a slice of float64. func (ts Tensor) Values() []float64 { - clone := ts.MustShallowClone().MustDetach().MustView([]int64{-1}, true) - - n := clone.MustSize()[0] + clone := ts.MustShallowClone() + dt := clone.MustDetach() + clone.MustDrop() + flat := dt.MustView([]int64{-1}, true) + n := flat.MustSize()[0] var values []float64 for i := 0; i < int(n); i++ { - val := clone.MustFloat64Value([]int64{int64(i)}) + val := flat.MustFloat64Value([]int64{int64(i)}) values = append(values, val) } diff --git a/vision/resnet.go b/vision/resnet.go index bca4b2d..6c214b7 100644 --- a/vision/resnet.go +++ b/vision/resnet.go @@ -37,8 +37,6 @@ func downSample(path nn.Path, cIn, cOut, stride int64) (retVal ts.ModuleT) { func basicBlock(path nn.Path, cIn, cOut, stride int64) (retVal ts.ModuleT) { - // TODO: check and make sure delete middle tensors created in C memory - // otherwise, there will be a memory blow out! conv1 := conv2d(path.Sub("conv1"), cIn, cOut, 3, 1, stride) bn1 := nn.BatchNorm2D(path.Sub("bn1"), cOut, nn.DefaultBatchNormConfig()) conv2 := conv2d(path.Sub("conv2"), cOut, cOut, 3, 1, 1) @@ -46,9 +44,6 @@ func basicBlock(path nn.Path, cIn, cOut, stride int64) (retVal ts.ModuleT) { downsample := downSample(path.Sub("downsample"), cIn, cOut, stride) return nn.NewFuncT(func(xs ts.Tensor, train bool) ts.Tensor { - // ys := xs.Apply(conv1).ApplyT(bn1, train).MustRelu(false).Apply(conv2).ApplyT(bn2, train) - // downsampleLayer := xs.ApplyT(downsample, train).MustAdd(ys, true) - // res := downsampleLayer.MustRelu(true) c1 := xs.Apply(conv1) bn1 := c1.ApplyT(bn1, train) c1.MustDrop() @@ -88,8 +83,12 @@ func resnet(path nn.Path, nclasses int64, c1, c2, c3, c4 int64) (retVal nn.FuncT if nclasses > 0 { // With final layer + linearConfig := nn.DefaultLinearConfig() + fc := nn.NewLinear(path.Sub("fc"), 512, nclasses, *linearConfig) + return nn.NewFuncT(func(xs ts.Tensor, train bool) (retVal ts.Tensor) { c1 := xs.Apply(conv1) + xs.MustDrop() bn1 := c1.ApplyT(bn1, train) c1.MustDrop() relu := bn1.MustRelu(true) @@ -106,12 +105,8 @@ func resnet(path nn.Path, nclasses int64, c1, c2, c3, c4 int64) (retVal nn.FuncT fv := avgpool.FlatView() avgpool.MustDrop() - // final layer - linearConfig := nn.DefaultLinearConfig() - fc := nn.NewLinear(path.Sub("fc"), 512, nclasses, *linearConfig) - retVal = fv.ApplyOpt(ts.WithModule(fc)) - + fv.MustDrop() return retVal }) @@ -125,6 +120,7 @@ func resnet(path nn.Path, nclasses int64, c1, c2, c3, c4 int64) (retVal nn.FuncT relu := bn1.MustRelu(true) maxpool := relu.MustMaxPool2D([]int64{3, 3}, []int64{2, 2}, []int64{1, 1}, []int64{1, 1}, false, true) l1 := maxpool.ApplyT(layer1, train) + maxpool.MustDrop() l2 := l1.ApplyT(layer2, train) l1.MustDrop() l3 := l2.ApplyT(layer3, train) @@ -171,8 +167,23 @@ func bottleneckBlock(path nn.Path, cIn, cOut, stride, e int64) (retVal ts.Module downsample := downSample(path.Sub("downsample"), cIn, eDim, stride) return nn.NewFuncT(func(xs ts.Tensor, train bool) ts.Tensor { - ys := xs.Apply(conv1).ApplyT(bn1, train).MustRelu(true).Apply(conv2).ApplyT(bn2, train).MustRelu(true).Apply(conv3).ApplyT(bn3, train) - return xs.ApplyT(downsample, train).MustAdd(ys, true).MustRelu(true) + c1 := xs.Apply(conv1) + bn1 := c1.ApplyT(bn1, train) + c1.MustDrop() + relu1 := bn1.MustRelu(true) + c2 := relu1.Apply(conv2) + relu1.MustDrop() + bn2 := c2.ApplyT(bn2, train) + relu2 := bn2.MustRelu(true) + c3 := relu2.Apply(conv3) + relu2.MustDrop() + bn3 := c3.ApplyT(bn3, train) + + dsl := xs.ApplyT(downsample, train) + add := dsl.MustAdd(bn3, true) + bn3.MustDrop() + res := add.MustRelu(true) + return res }) } @@ -180,7 +191,7 @@ func bottleneckLayer(path nn.Path, cIn, cOut, stride, cnt int64) (retVal ts.Modu layer := nn.SeqT() layer.Add(bottleneckBlock(path.Sub("0"), cIn, cOut, stride, 4)) - for blockIndex := 0; blockIndex < int(cnt); blockIndex++ { + for blockIndex := 1; blockIndex < int(cnt); blockIndex++ { layer.Add(bottleneckBlock(path.Sub(fmt.Sprint(blockIndex)), (cOut * 4), cOut, 1, 4)) } @@ -198,12 +209,51 @@ func bottleneckResnet(path nn.Path, nclasses int64, c1, c2, c3, c4 int64) (retVa if nclasses > 0 { fc := nn.NewLinear(path.Sub("fc"), 4*512, nclasses, *nn.DefaultLinearConfig()) - return nn.NewFuncT(func(xs ts.Tensor, train bool) ts.Tensor { - return xs.Apply(conv1).ApplyT(bn1, train).MustRelu(true).MustMaxPool2D([]int64{3, 3}, []int64{2, 2}, []int64{1, 1}, []int64{1, 1}, false, true).ApplyT(layer1, train).ApplyT(layer2, train).ApplyT(layer3, train).ApplyT(layer4, train).MustAdaptiveAvgPool2D([]int64{1, 1}).FlatView().ApplyOpt(ts.WithModule(fc)) + return nn.NewFuncT(func(xs ts.Tensor, train bool) (retVal ts.Tensor) { + c1 := xs.Apply(conv1) + xs.MustDrop() + bn1 := c1.ApplyT(bn1, train) + c1.MustDrop() + relu := bn1.MustRelu(true) + maxpool := relu.MustMaxPool2D([]int64{3, 3}, []int64{2, 2}, []int64{1, 1}, []int64{1, 1}, false, true) + l1 := maxpool.ApplyT(layer1, train) + l2 := l1.ApplyT(layer2, train) + l1.MustDrop() + l3 := l2.ApplyT(layer3, train) + l2.MustDrop() + l4 := l3.ApplyT(layer4, train) + l3.MustDrop() + avgpool := l4.MustAdaptiveAvgPool2D([]int64{1, 1}) + l4.MustDrop() + fv := avgpool.FlatView() + avgpool.MustDrop() + + retVal = fv.ApplyOpt(ts.WithModule(fc)) + fv.MustDrop() + return retVal }) } else { - return nn.NewFuncT(func(xs ts.Tensor, train bool) ts.Tensor { - return xs.Apply(conv1).ApplyT(bn1, train).MustRelu(true).MustMaxPool2D([]int64{3, 3}, []int64{2, 2}, []int64{1, 1}, []int64{1, 1}, false, true).ApplyT(layer1, train).ApplyT(layer2, train).ApplyT(layer3, train).ApplyT(layer4, train).MustAdaptiveAvgPool2D([]int64{1, 1}).FlatView() + return nn.NewFuncT(func(xs ts.Tensor, train bool) (retVal ts.Tensor) { + c1 := xs.Apply(conv1) + xs.MustDrop() + bn1 := c1.ApplyT(bn1, train) + c1.MustDrop() + relu := bn1.MustRelu(true) + maxpool := relu.MustMaxPool2D([]int64{3, 3}, []int64{2, 2}, []int64{1, 1}, []int64{1, 1}, false, true) + l1 := maxpool.ApplyT(layer1, train) + maxpool.MustDrop() + l2 := l1.ApplyT(layer2, train) + l1.MustDrop() + l3 := l2.ApplyT(layer3, train) + l2.MustDrop() + l4 := l3.ApplyT(layer4, train) + l3.MustDrop() + avgpool := l4.MustAdaptiveAvgPool2D([]int64{1, 1}) + l4.MustDrop() + retVal = avgpool.FlatView() + avgpool.MustDrop() + + return retVal }) } }