diff --git a/CHANGELOG.md b/CHANGELOG.md index c5d871e..843f396 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Fixed double free tensor at `vision/aug/function.go` Equalize func. - Changed `vision/aug` all input image should be `uint8` (Byte) dtype and transformed output has the same dtype (uint8) so that `Compose()` can compose any transformer options. - Fixed wrong result of `aug.RandomAdjustSharpness` +- Fixed memory leak at `aug/function.getAffineGrid` ## [0.3.10] - Update installation at README.md diff --git a/example/augmentation/bb-transformed.jpg b/example/augmentation/bb-transformed.jpg index d572ac6..7cc6f98 100644 Binary files a/example/augmentation/bb-transformed.jpg and b/example/augmentation/bb-transformed.jpg differ diff --git a/example/augmentation/main.go b/example/augmentation/main.go index b6f378c..7491e50 100644 --- a/example/augmentation/main.go +++ b/example/augmentation/main.go @@ -42,14 +42,18 @@ func tOne() { panic(err) } - device := gotch.CudaIfAvailable() - // device := gotch.CPU + // device := gotch.CudaIfAvailable() + device := gotch.CPU imgTs := img.MustTo(device, true) + // h := imgTs.MustSize()[1] + // w := imgTs.MustSize()[2] // t, err := aug.Compose(aug.WithRandomAutocontrast(1.0)) // t, err := aug.Compose(aug.WithRandomSolarize(aug.WithSolarizeThreshold(125), aug.WithSolarizePvalue(1.0))) - t, err := aug.Compose(aug.WithRandomAdjustSharpness(aug.WithSharpnessPvalue(1.0), aug.WithSharpnessFactor(10))) + // t, err := aug.Compose(aug.WithRandomAdjustSharpness(aug.WithSharpnessPvalue(1.0), aug.WithSharpnessFactor(10))) // t, err := aug.Compose(aug.WithRandRotate(0, 360)) + // Down sampling + // t, err := aug.Compose(aug.WithResize(h/2, w/2)) // NOTE. WithResize just works on CPU. // t, err := aug.Compose(aug.WithResize(320, 320)) // NOTE. WithResize just works on CPU. // t, err := aug.Compose(aug.WithRandomPosterize(aug.WithPosterizeBits(2), aug.WithPosterizePvalue(1.0))) // t, err := aug.Compose(aug.WithRandomPerspective(aug.WithPerspectiveScale(0.6), aug.WithPerspectivePvalue(1.0))) @@ -59,12 +63,13 @@ func tOne() { // t, err := aug.Compose(aug.WithRandomVFlip(1.0)) // t, err := aug.Compose(aug.WithRandomHFlip(1.0)) // t, err := aug.Compose(aug.WithRandomEqualize(1.0)) - // t, err := aug.Compose(aug.WithRandomCutout(aug.WithCutoutValue([]int64{124, 96, 255}), aug.WithCutoutScale([]float64{0.01, 0.1}), aug.WithCutoutRatio([]float64{0.5, 0.5}))) + // t, err := aug.Compose(aug.WithRandomCutout(aug.WithCutoutValue([]int64{124, 96, 255}), aug.WithCutoutScale([]float64{0.01, 0.1}), aug.WithCutoutRatio([]float64{0.5, 0.5}), aug.WithCutoutPvalue(1.0))) // t, err := aug.Compose(aug.WithCenterCrop([]int64{320, 320})) // t, err := aug.Compose(aug.WithRandomAutocontrast()) // t, err := aug.Compose(aug.WithColorJitter(0.3, 0.3, 0.3, 0.3)) // t, err := aug.Compose(aug.WithGaussianBlur([]int64{5, 5}, []float64{1.0, 2.0})) // t, err := aug.Compose(aug.WithRandomAffine(aug.WithAffineDegree([]int64{0, 15}), aug.WithAffineShear([]float64{0, 15}))) + t, err := aug.Compose(aug.WithRandomAffine(aug.WithAffineDegree([]int64{0, 15}), aug.WithAffineTranslate([]float64{0.0, 0.1}))) out := t.Transform(imgTs) fname := fmt.Sprintf("./bb-transformed.jpg") diff --git a/vision/aug/affine.go b/vision/aug/affine.go index 243aaec..a48b08d 100644 --- a/vision/aug/affine.go +++ b/vision/aug/affine.go @@ -139,11 +139,11 @@ type affineOption func(*affineOptions) func defaultAffineOptions() *affineOptions { return &affineOptions{ - degree: []int64{-180, 180}, - translate: nil, - scale: nil, - shear: []float64{-180.0, 180.0}, - interpolationMode: "bilinear", + degree: []int64{0, 0}, + translate: []float64{0, 0}, + scale: []float64{1, 1}, + shear: []float64{0, 0}, + interpolationMode: "nearest", fillValue: []float64{0.0, 0.0, 0.0}, } } diff --git a/vision/aug/function.go b/vision/aug/function.go index c3de4ba..8302071 100644 --- a/vision/aug/function.go +++ b/vision/aug/function.go @@ -951,7 +951,7 @@ func affine(img *ts.Tensor, angle float64, translations []int64, scale float64, dtype := img.DType() device := img.MustDevice() dim := img.MustSize() - theta := ts.MustOfSlice(matrix).MustTotype(dtype, true).MustTo(device, true).MustView([]int64{1, 2, 3}, true) + theta := ts.MustOfSlice(matrix).MustTotype(dtype, true).MustTo(device, true).MustReshape([]int64{1, 2, 3}, true) // grid will be generated on the same device as theta and img w := dim[len(dim)-1] @@ -1043,44 +1043,34 @@ func genAffineGrid(theta *ts.Tensor, w, h, ow, oh int64) *ts.Tensor { d := 0.5 dtype := theta.DType() device := theta.MustDevice() - // base_grid = torch.empty(1, oh, ow, 3, dtype=theta.dtype, device=theta.device) - baseGrid := ts.MustEmpty([]int64{1, oh, ow, 3}, dtype, device) - // x_grid = torch.linspace(-ow * 0.5 + d, ow * 0.5 + d - 1, steps=ow, device=theta.device) + // base_grid = torch.empty(1, oh, ow, 3, dtype=theta.dtype, device=theta.device) + x := ts.MustEmpty([]int64{oh, ow, 3}, dtype, device) + startX := float64(-ow)*0.5 + d endX := float64(ow)*0.5 + d - 1.0 xGrid := ts.MustLinspace(ts.FloatScalar(startX), ts.FloatScalar(endX), []int64{ow}, dtype, device) - // y_grid = torch.linspace(-oh * 0.5 + d, oh * 0.5 + d - 1, steps=oh, device=theta.device).unsqueeze_(-1) startY := float64(-oh)*0.5 + d endY := float64(oh)*0.5 + d - 1.0 - yGrid := ts.MustLinspace(ts.FloatScalar(startY), ts.FloatScalar(endY), []int64{oh}, dtype, device) + yGrid := ts.MustLinspace(ts.FloatScalar(startY), ts.FloatScalar(endY), []int64{oh}, dtype, device).MustUnsqueeze(-1, true) + + oneGrid := ts.MustOnes([]int64{ow}, dtype, device) // base_grid[..., 0].copy_(x_grid) // base_grid[..., 1].copy_(y_grid) // base_grid[..., 2].fill_(1) - baseDim := baseGrid.MustSize() - for i := 0; i < int(baseDim[1]); i++ { - view := baseGrid.MustSelect(0, 0, false).MustSelect(0, int64(i), true).MustSelect(1, 0, true) - view.Copy_(xGrid) - view.MustDrop() - } - for i := 0; i < int(baseDim[2]); i++ { - view := baseGrid.MustSelect(0, 0, false).MustSelect(1, int64(i), true).MustSelect(1, 1, true) - view.Copy_(yGrid) - view.MustDrop() - } + xview := x.MustTranspose(2, 0, false).MustSelect(0, 0, true).MustTranspose(0, 1, true) + xview.Copy_(xGrid) + xview.MustDrop() - for i := 0; i < int(baseDim[2]); i++ { - view := baseGrid.MustSelect(0, 0, false).MustSelect(1, int64(i), true).MustSelect(1, 2, true) - // view.Fill_(ts.FloatScalar(1.0)) // NOTE. THIS CAUSES MEMORY LEAK!!!! - oneTs := view.MustOnesLike(false) - view.Copy_(oneTs) - oneTs.MustDrop() - view.MustDrop() - } + yview := x.MustTranspose(2, 0, false).MustSelect(0, 1, true).MustTranspose(0, 1, true) + yview.Copy_(yGrid) + yview.MustDrop() - // rescaled_theta = theta.transpose(1, 2) / torch.tensor([0.5 * w, 0.5 * h], dtype=theta.dtype, device=theta.device) + oview := x.MustTranspose(2, 0, false).MustSelect(0, 2, true).MustTranspose(0, 1, true) + oview.Copy_(oneGrid) + oview.MustDrop() // rescaled_theta1 = theta1.transpose(1, 2) / torch.tensor([0.5 * ow, 0.5 * oh], dtype=dtype, device=device) divTs := ts.MustOfSlice([]float64{0.5 * float64(w), 0.5 * float64(h)}).MustTotype(dtype, true).MustTo(device, true) @@ -1088,9 +1078,7 @@ func genAffineGrid(theta *ts.Tensor, w, h, ow, oh int64) *ts.Tensor { divTs.MustDrop() // output_grid = base_grid.view(1, oh * ow, 3).bmm(rescaled_theta) - outputGrid := baseGrid.MustView([]int64{1, oh * ow, 3}, false).MustBmm(rescaledTheta, true).MustView([]int64{1, oh, ow, 2}, true) - - baseGrid.MustDrop() + outputGrid := x.MustView([]int64{1, oh * ow, 3}, true).MustBmm(rescaledTheta, true).MustView([]int64{1, oh, ow, 2}, true) xGrid.MustDrop() yGrid.MustDrop() rescaledTheta.MustDrop() diff --git a/vision/aug/resize.go b/vision/aug/resize.go index 4da26e9..567d7fc 100644 --- a/vision/aug/resize.go +++ b/vision/aug/resize.go @@ -26,11 +26,23 @@ func (rs *ResizeModule) Forward(x *ts.Tensor) *ts.Tensor { err := fmt.Errorf("Invalid dtype. Expect uint8 (Byte) dtype. Got %v\n", dtype) panic(err) } - out, err := vision.Resize(x, rs.width, rs.height) + + device := x.MustDevice() + var xCPU *ts.Tensor + if device != gotch.CPU { + xCPU = x.MustTo(device, false) + } else { + xCPU = x.MustShallowClone() + } + + out, err := vision.Resize(xCPU, rs.width, rs.height) if err != nil { log.Fatal(err) } - return out + + xCPU.MustDrop() + + return out.MustTo(device, true) } func WithResize(h, w int64) Option { @@ -42,3 +54,177 @@ func WithResize(h, w int64) Option { // TODO. type RandomResizedCrop struct{} + +type DownSample struct{} + +func newDownSample(p float64) *DownSample { + return &DownSample{} +} + +// Forward implements ts.Module for RandRotateModule +// NOTE. input tensor must be uint8 (Byte) dtype otherwise panic! +func (rs *DownSample) Forward(x *ts.Tensor) *ts.Tensor { + dtype := x.DType() + if dtype != gotch.Uint8 { + err := fmt.Errorf("Invalid dtype. Expect uint8 (Byte) dtype. Got %v\n", dtype) + panic(err) + } + + device := x.MustDevice() + h := x.MustSize()[1] + w := x.MustSize()[2] + var xCPU *ts.Tensor + if device != gotch.CPU { + xCPU = x.MustTo(device, false) + } else { + xCPU = x.MustShallowClone() + } + + out, err := vision.Resize(xCPU, w/2, h/2) + if err != nil { + log.Fatal(err) + } + + xCPU.MustDrop() + return out.MustTo(device, true) +} + +type ZoomIn struct { + v float64 // v should be [0, 0.5] +} + +func newZoomIn(v float64) *ZoomIn { + return &ZoomIn{v: v} +} + +func WithZoomIn(v float64) Option { + if v < 0 || v > 0.5 { + err := fmt.Errorf("Invalid input value. Expect value in range [0, 0.5]. Got %v\n", v) + panic(err) + } + return func(o *Options) { + ds := newZoomIn(v) + o.zoomIn = ds + } +} + +// Forward implements ts.Module for RandRotateModule +// NOTE. input tensor must be uint8 (Byte) dtype otherwise panic! +func (rs *ZoomIn) Forward(x *ts.Tensor) *ts.Tensor { + dtype := x.DType() + if dtype != gotch.Uint8 { + err := fmt.Errorf("Invalid dtype. Expect uint8 (Byte) dtype. Got %v\n", dtype) + panic(err) + } + + device := x.MustDevice() + h := x.MustSize()[1] + w := x.MustSize()[2] + var xCPU *ts.Tensor + if device != gotch.CPU { + xCPU = x.MustTo(device, false) + } else { + xCPU = x.MustShallowClone() + } + + var out *ts.Tensor + var err error + r := randPvalue() + switch { + case r < rs.v: + cropW := int64(rs.v) * w + cropH := int64(rs.v) * h + newW := w - cropW + newH := h - cropH + // img = PIL.ImageOps.fit(img, size=(new_w,new_h), bleed=v/2, method=Image.BILINEAR) + fitImg := fitImg(xCPU, newW, newH) + xCPU.MustDrop() + // return img.resize((w,h), resample=Image.BILINEAR) + out, err = vision.Resize(fitImg, w, h) + if err != nil { + log.Fatal(err) + } + + fitImg.MustDrop() + default: + out = x.MustShallowClone() + } + + return out.MustTo(device, true) +} + +// TODO. +func fitImg(x *ts.Tensor, w, h int64) *ts.Tensor { + + panic("Not implemented") +} + +type ZoomOut struct { + v float64 // v should be [0, 0.5] +} + +func newZoomOut(v float64) *ZoomOut { + return &ZoomOut{v: v} +} + +func WithZoomOut(v float64) Option { + if v < 0 || v > 0.5 { + err := fmt.Errorf("Invalid input value. Expect value in range [0, 0.5]. Got %v\n", v) + panic(err) + } + return func(o *Options) { + ds := newZoomOut(v) + o.zoomOut = ds + } +} + +// Forward implements ts.Module for RandRotateModule +// NOTE. input tensor must be uint8 (Byte) dtype otherwise panic! +func (rs *ZoomOut) Forward(x *ts.Tensor) *ts.Tensor { + dtype := x.DType() + if dtype != gotch.Uint8 { + err := fmt.Errorf("Invalid dtype. Expect uint8 (Byte) dtype. Got %v\n", dtype) + panic(err) + } + + device := x.MustDevice() + h := x.MustSize()[1] + w := x.MustSize()[2] + var xCPU *ts.Tensor + if device != gotch.CPU { + xCPU = x.MustTo(device, false) + } else { + xCPU = x.MustShallowClone() + } + + var out *ts.Tensor + var err error + r := randPvalue() + switch { + case r < rs.v: + padW := int64(rs.v) * w + padH := int64(rs.v) * h + + padImg := padImg(xCPU, padW, padH) + xCPU.MustDrop() + // return img.resize((w,h), resample=Image.BILINEAR) + out, err = vision.Resize(padImg, w, h) + if err != nil { + log.Fatal(err) + } + + padImg.MustDrop() + default: + out = x.MustShallowClone() + } + + return out.MustTo(device, true) +} + +// TODO. +func padImg(x *ts.Tensor, w, h int64) *ts.Tensor { + + // img = np.asarray(img) + // img = np.pad(img, [(pad_h//2,pad_h//2), (pad_w//2,pad_w//2), (0,0)], mode='reflect') + return x.MustConstantPadNd([]int64{h / 2, h / 2, w / 2, w / 2}, false) +} diff --git a/vision/aug/transform.go b/vision/aug/transform.go index 2767d60..5e69cb1 100644 --- a/vision/aug/transform.go +++ b/vision/aug/transform.go @@ -44,6 +44,9 @@ type Options struct { randomAutocontrast *RandomAutocontrast randomAdjustSharpness *RandomAdjustSharpness randomEqualize *RandomEqualize + downSample *DownSample + zoomIn *ZoomIn + zoomOut *ZoomOut normalize *Normalize } @@ -68,6 +71,9 @@ func defaultOption() *Options { randomAutocontrast: nil, randomAdjustSharpness: nil, randomEqualize: nil, + downSample: nil, + zoomIn: nil, + zoomOut: nil, normalize: nil, } } @@ -165,6 +171,18 @@ func Compose(opts ...Option) (Transformer, error) { augs.Add(augOpts.normalize) } + if augOpts.downSample != nil { + augs.Add(augOpts.downSample) + } + + if augOpts.zoomIn != nil { + augs.Add(augOpts.zoomIn) + } + + if augOpts.zoomOut != nil { + augs.Add(augOpts.zoomOut) + } + return &Augment{augs}, nil }