fixed mem leak at RandomAffine

2021-06-26 18:25:26 +10:00 · 2021-06-26 18:25:26 +10:00 · fda5adc77d
commit fda5adc77d
parent d4a77d639d
7 changed files with 238 additions and 40 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Fixed double free tensor at `vision/aug/function.go` Equalize func.
 - Changed `vision/aug` all input image should be `uint8` (Byte) dtype and transformed output has the same dtype (uint8) so that `Compose()` can compose any transformer options.
 - Fixed wrong result of `aug.RandomAdjustSharpness`
+- Fixed memory leak at `aug/function.getAffineGrid`

 ## [0.3.10]
 - Update installation at README.md
--- a/example/augmentation/bb-transformed.jpg
+++ b/example/augmentation/bb-transformed.jpg
--- a/example/augmentation/main.go
+++ b/example/augmentation/main.go
@ -42,14 +42,18 @@ func tOne() {
 		panic(err)
 	}

-	device := gotch.CudaIfAvailable()
-	// device := gotch.CPU
+	// device := gotch.CudaIfAvailable()
+	device := gotch.CPU
 	imgTs := img.MustTo(device, true)
+	// h := imgTs.MustSize()[1]
+	// w := imgTs.MustSize()[2]

 	// t, err := aug.Compose(aug.WithRandomAutocontrast(1.0))
 	// t, err := aug.Compose(aug.WithRandomSolarize(aug.WithSolarizeThreshold(125), aug.WithSolarizePvalue(1.0)))
-	t, err := aug.Compose(aug.WithRandomAdjustSharpness(aug.WithSharpnessPvalue(1.0), aug.WithSharpnessFactor(10)))
+	// t, err := aug.Compose(aug.WithRandomAdjustSharpness(aug.WithSharpnessPvalue(1.0), aug.WithSharpnessFactor(10)))
 	// t, err := aug.Compose(aug.WithRandRotate(0, 360))
+	// Down sampling
+	// t, err := aug.Compose(aug.WithResize(h/2, w/2)) // NOTE. WithResize just works on CPU.
 	// t, err := aug.Compose(aug.WithResize(320, 320)) // NOTE. WithResize just works on CPU.
 	// t, err := aug.Compose(aug.WithRandomPosterize(aug.WithPosterizeBits(2), aug.WithPosterizePvalue(1.0)))
 	// t, err := aug.Compose(aug.WithRandomPerspective(aug.WithPerspectiveScale(0.6), aug.WithPerspectivePvalue(1.0)))
@ -59,12 +63,13 @@ func tOne() {
 	// t, err := aug.Compose(aug.WithRandomVFlip(1.0))
 	// t, err := aug.Compose(aug.WithRandomHFlip(1.0))
 	// t, err := aug.Compose(aug.WithRandomEqualize(1.0))
-	// t, err := aug.Compose(aug.WithRandomCutout(aug.WithCutoutValue([]int64{124, 96, 255}), aug.WithCutoutScale([]float64{0.01, 0.1}), aug.WithCutoutRatio([]float64{0.5, 0.5})))
+	// t, err := aug.Compose(aug.WithRandomCutout(aug.WithCutoutValue([]int64{124, 96, 255}), aug.WithCutoutScale([]float64{0.01, 0.1}), aug.WithCutoutRatio([]float64{0.5, 0.5}), aug.WithCutoutPvalue(1.0)))
 	// t, err := aug.Compose(aug.WithCenterCrop([]int64{320, 320}))
 	// t, err := aug.Compose(aug.WithRandomAutocontrast())
 	// t, err := aug.Compose(aug.WithColorJitter(0.3, 0.3, 0.3, 0.3))
 	// t, err := aug.Compose(aug.WithGaussianBlur([]int64{5, 5}, []float64{1.0, 2.0}))
 	// t, err := aug.Compose(aug.WithRandomAffine(aug.WithAffineDegree([]int64{0, 15}), aug.WithAffineShear([]float64{0, 15})))
+	t, err := aug.Compose(aug.WithRandomAffine(aug.WithAffineDegree([]int64{0, 15}), aug.WithAffineTranslate([]float64{0.0, 0.1})))

 	out := t.Transform(imgTs)
 	fname := fmt.Sprintf("./bb-transformed.jpg")
--- a/vision/aug/affine.go
+++ b/vision/aug/affine.go
@ -139,11 +139,11 @@ type affineOption func(*affineOptions)

 func defaultAffineOptions() *affineOptions {
 	return &affineOptions{
-		degree:            []int64{-180, 180},
-		translate:         nil,
-		scale:             nil,
-		shear:             []float64{-180.0, 180.0},
-		interpolationMode: "bilinear",
+		degree:            []int64{0, 0},
+		translate:         []float64{0, 0},
+		scale:             []float64{1, 1},
+		shear:             []float64{0, 0},
+		interpolationMode: "nearest",
 		fillValue:         []float64{0.0, 0.0, 0.0},
 	}
 }
--- a/vision/aug/function.go
+++ b/vision/aug/function.go
@ -951,7 +951,7 @@ func affine(img *ts.Tensor, angle float64, translations []int64, scale float64,
 	dtype := img.DType()
 	device := img.MustDevice()
 	dim := img.MustSize()
-	theta := ts.MustOfSlice(matrix).MustTotype(dtype, true).MustTo(device, true).MustView([]int64{1, 2, 3}, true)
+	theta := ts.MustOfSlice(matrix).MustTotype(dtype, true).MustTo(device, true).MustReshape([]int64{1, 2, 3}, true)

 	// grid will be generated on the same device as theta and img
 	w := dim[len(dim)-1]
@ -1043,44 +1043,34 @@ func genAffineGrid(theta *ts.Tensor, w, h, ow, oh int64) *ts.Tensor {
 	d := 0.5
 	dtype := theta.DType()
 	device := theta.MustDevice()
-	// base_grid = torch.empty(1, oh, ow, 3, dtype=theta.dtype, device=theta.device)
-	baseGrid := ts.MustEmpty([]int64{1, oh, ow, 3}, dtype, device)

-	// x_grid = torch.linspace(-ow * 0.5 + d, ow * 0.5 + d - 1, steps=ow, device=theta.device)
+	// base_grid = torch.empty(1, oh, ow, 3, dtype=theta.dtype, device=theta.device)
+	x := ts.MustEmpty([]int64{oh, ow, 3}, dtype, device)
+
 	startX := float64(-ow)*0.5 + d
 	endX := float64(ow)*0.5 + d - 1.0
 	xGrid := ts.MustLinspace(ts.FloatScalar(startX), ts.FloatScalar(endX), []int64{ow}, dtype, device)

-	// y_grid = torch.linspace(-oh * 0.5 + d, oh * 0.5 + d - 1, steps=oh, device=theta.device).unsqueeze_(-1)
 	startY := float64(-oh)*0.5 + d
 	endY := float64(oh)*0.5 + d - 1.0
-	yGrid := ts.MustLinspace(ts.FloatScalar(startY), ts.FloatScalar(endY), []int64{oh}, dtype, device)
+	yGrid := ts.MustLinspace(ts.FloatScalar(startY), ts.FloatScalar(endY), []int64{oh}, dtype, device).MustUnsqueeze(-1, true)
+
+	oneGrid := ts.MustOnes([]int64{ow}, dtype, device)

 	// base_grid[..., 0].copy_(x_grid)
 	// base_grid[..., 1].copy_(y_grid)
 	// base_grid[..., 2].fill_(1)
-	baseDim := baseGrid.MustSize()
-	for i := 0; i < int(baseDim[1]); i++ {
-		view := baseGrid.MustSelect(0, 0, false).MustSelect(0, int64(i), true).MustSelect(1, 0, true)
-		view.Copy_(xGrid)
-		view.MustDrop()
-	}
-	for i := 0; i < int(baseDim[2]); i++ {
-		view := baseGrid.MustSelect(0, 0, false).MustSelect(1, int64(i), true).MustSelect(1, 1, true)
-		view.Copy_(yGrid)
-		view.MustDrop()
-	}
+	xview := x.MustTranspose(2, 0, false).MustSelect(0, 0, true).MustTranspose(0, 1, true)
+	xview.Copy_(xGrid)
+	xview.MustDrop()

-	for i := 0; i < int(baseDim[2]); i++ {
-		view := baseGrid.MustSelect(0, 0, false).MustSelect(1, int64(i), true).MustSelect(1, 2, true)
-		// view.Fill_(ts.FloatScalar(1.0)) // NOTE. THIS CAUSES MEMORY LEAK!!!!
-		oneTs := view.MustOnesLike(false)
-		view.Copy_(oneTs)
-		oneTs.MustDrop()
-		view.MustDrop()
-	}
+	yview := x.MustTranspose(2, 0, false).MustSelect(0, 1, true).MustTranspose(0, 1, true)
+	yview.Copy_(yGrid)
+	yview.MustDrop()

-	// rescaled_theta = theta.transpose(1, 2) / torch.tensor([0.5 * w, 0.5 * h], dtype=theta.dtype, device=theta.device)
+	oview := x.MustTranspose(2, 0, false).MustSelect(0, 2, true).MustTranspose(0, 1, true)
+	oview.Copy_(oneGrid)
+	oview.MustDrop()

 	// rescaled_theta1 = theta1.transpose(1, 2) / torch.tensor([0.5 * ow, 0.5 * oh], dtype=dtype, device=device)
 	divTs := ts.MustOfSlice([]float64{0.5 * float64(w), 0.5 * float64(h)}).MustTotype(dtype, true).MustTo(device, true)
@ -1088,9 +1078,7 @@ func genAffineGrid(theta *ts.Tensor, w, h, ow, oh int64) *ts.Tensor {
 	divTs.MustDrop()

 	// output_grid = base_grid.view(1, oh * ow, 3).bmm(rescaled_theta)
-	outputGrid := baseGrid.MustView([]int64{1, oh * ow, 3}, false).MustBmm(rescaledTheta, true).MustView([]int64{1, oh, ow, 2}, true)
-
-	baseGrid.MustDrop()
+	outputGrid := x.MustView([]int64{1, oh * ow, 3}, true).MustBmm(rescaledTheta, true).MustView([]int64{1, oh, ow, 2}, true)
 	xGrid.MustDrop()
 	yGrid.MustDrop()
 	rescaledTheta.MustDrop()
--- a/vision/aug/resize.go
+++ b/vision/aug/resize.go
@ -26,11 +26,23 @@ func (rs *ResizeModule) Forward(x *ts.Tensor) *ts.Tensor {
 		err := fmt.Errorf("Invalid dtype. Expect uint8 (Byte) dtype. Got %v\n", dtype)
 		panic(err)
 	}
-	out, err := vision.Resize(x, rs.width, rs.height)
+
+	device := x.MustDevice()
+	var xCPU *ts.Tensor
+	if device != gotch.CPU {
+		xCPU = x.MustTo(device, false)
+	} else {
+		xCPU = x.MustShallowClone()
+	}
+
+	out, err := vision.Resize(xCPU, rs.width, rs.height)
 	if err != nil {
 		log.Fatal(err)
 	}
-	return out
+
+	xCPU.MustDrop()
+
+	return out.MustTo(device, true)
 }

 func WithResize(h, w int64) Option {
@ -42,3 +54,177 @@ func WithResize(h, w int64) Option {

 // TODO.
 type RandomResizedCrop struct{}
+
+type DownSample struct{}
+
+func newDownSample(p float64) *DownSample {
+	return &DownSample{}
+}
+
+// Forward implements ts.Module for RandRotateModule
+// NOTE. input tensor must be uint8 (Byte) dtype otherwise panic!
+func (rs *DownSample) Forward(x *ts.Tensor) *ts.Tensor {
+	dtype := x.DType()
+	if dtype != gotch.Uint8 {
+		err := fmt.Errorf("Invalid dtype. Expect uint8 (Byte) dtype. Got %v\n", dtype)
+		panic(err)
+	}
+
+	device := x.MustDevice()
+	h := x.MustSize()[1]
+	w := x.MustSize()[2]
+	var xCPU *ts.Tensor
+	if device != gotch.CPU {
+		xCPU = x.MustTo(device, false)
+	} else {
+		xCPU = x.MustShallowClone()
+	}
+
+	out, err := vision.Resize(xCPU, w/2, h/2)
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	xCPU.MustDrop()
+	return out.MustTo(device, true)
+}
+
+type ZoomIn struct {
+	v float64 // v should be [0, 0.5]
+}
+
+func newZoomIn(v float64) *ZoomIn {
+	return &ZoomIn{v: v}
+}
+
+func WithZoomIn(v float64) Option {
+	if v < 0 || v > 0.5 {
+		err := fmt.Errorf("Invalid input value. Expect value in range [0, 0.5]. Got %v\n", v)
+		panic(err)
+	}
+	return func(o *Options) {
+		ds := newZoomIn(v)
+		o.zoomIn = ds
+	}
+}
+
+// Forward implements ts.Module for RandRotateModule
+// NOTE. input tensor must be uint8 (Byte) dtype otherwise panic!
+func (rs *ZoomIn) Forward(x *ts.Tensor) *ts.Tensor {
+	dtype := x.DType()
+	if dtype != gotch.Uint8 {
+		err := fmt.Errorf("Invalid dtype. Expect uint8 (Byte) dtype. Got %v\n", dtype)
+		panic(err)
+	}
+
+	device := x.MustDevice()
+	h := x.MustSize()[1]
+	w := x.MustSize()[2]
+	var xCPU *ts.Tensor
+	if device != gotch.CPU {
+		xCPU = x.MustTo(device, false)
+	} else {
+		xCPU = x.MustShallowClone()
+	}
+
+	var out *ts.Tensor
+	var err error
+	r := randPvalue()
+	switch {
+	case r < rs.v:
+		cropW := int64(rs.v) * w
+		cropH := int64(rs.v) * h
+		newW := w - cropW
+		newH := h - cropH
+		// img = PIL.ImageOps.fit(img, size=(new_w,new_h), bleed=v/2, method=Image.BILINEAR)
+		fitImg := fitImg(xCPU, newW, newH)
+		xCPU.MustDrop()
+		// return img.resize((w,h), resample=Image.BILINEAR)
+		out, err = vision.Resize(fitImg, w, h)
+		if err != nil {
+			log.Fatal(err)
+		}
+
+		fitImg.MustDrop()
+	default:
+		out = x.MustShallowClone()
+	}
+
+	return out.MustTo(device, true)
+}
+
+// TODO.
+func fitImg(x *ts.Tensor, w, h int64) *ts.Tensor {
+
+	panic("Not implemented")
+}
+
+type ZoomOut struct {
+	v float64 // v should be [0, 0.5]
+}
+
+func newZoomOut(v float64) *ZoomOut {
+	return &ZoomOut{v: v}
+}
+
+func WithZoomOut(v float64) Option {
+	if v < 0 || v > 0.5 {
+		err := fmt.Errorf("Invalid input value. Expect value in range [0, 0.5]. Got %v\n", v)
+		panic(err)
+	}
+	return func(o *Options) {
+		ds := newZoomOut(v)
+		o.zoomOut = ds
+	}
+}
+
+// Forward implements ts.Module for RandRotateModule
+// NOTE. input tensor must be uint8 (Byte) dtype otherwise panic!
+func (rs *ZoomOut) Forward(x *ts.Tensor) *ts.Tensor {
+	dtype := x.DType()
+	if dtype != gotch.Uint8 {
+		err := fmt.Errorf("Invalid dtype. Expect uint8 (Byte) dtype. Got %v\n", dtype)
+		panic(err)
+	}
+
+	device := x.MustDevice()
+	h := x.MustSize()[1]
+	w := x.MustSize()[2]
+	var xCPU *ts.Tensor
+	if device != gotch.CPU {
+		xCPU = x.MustTo(device, false)
+	} else {
+		xCPU = x.MustShallowClone()
+	}
+
+	var out *ts.Tensor
+	var err error
+	r := randPvalue()
+	switch {
+	case r < rs.v:
+		padW := int64(rs.v) * w
+		padH := int64(rs.v) * h
+
+		padImg := padImg(xCPU, padW, padH)
+		xCPU.MustDrop()
+		// return img.resize((w,h), resample=Image.BILINEAR)
+		out, err = vision.Resize(padImg, w, h)
+		if err != nil {
+			log.Fatal(err)
+		}
+
+		padImg.MustDrop()
+	default:
+		out = x.MustShallowClone()
+	}
+
+	return out.MustTo(device, true)
+}
+
+// TODO.
+func padImg(x *ts.Tensor, w, h int64) *ts.Tensor {
+
+	// img = np.asarray(img)
+	// img = np.pad(img, [(pad_h//2,pad_h//2), (pad_w//2,pad_w//2), (0,0)], mode='reflect')
+	return x.MustConstantPadNd([]int64{h / 2, h / 2, w / 2, w / 2}, false)
+}
--- a/vision/aug/transform.go
+++ b/vision/aug/transform.go
@ -44,6 +44,9 @@ type Options struct {
 	randomAutocontrast    *RandomAutocontrast
 	randomAdjustSharpness *RandomAdjustSharpness
 	randomEqualize        *RandomEqualize
+	downSample            *DownSample
+	zoomIn                *ZoomIn
+	zoomOut               *ZoomOut
 	normalize             *Normalize
 }

@ -68,6 +71,9 @@ func defaultOption() *Options {
 		randomAutocontrast:    nil,
 		randomAdjustSharpness: nil,
 		randomEqualize:        nil,
+		downSample:            nil,
+		zoomIn:                nil,
+		zoomOut:               nil,
 		normalize:             nil,
 	}
 }
@ -165,6 +171,18 @@ func Compose(opts ...Option) (Transformer, error) {
 		augs.Add(augOpts.normalize)
 	}

+	if augOpts.downSample != nil {
+		augs.Add(augOpts.downSample)
+	}
+
+	if augOpts.zoomIn != nil {
+		augs.Add(augOpts.zoomIn)
+	}
+
+	if augOpts.zoomOut != nil {
+		augs.Add(augOpts.zoomOut)
+	}
+
 	return &Augment{augs}, nil
 }