added image augmentation and minor fixed on ts.Lstsq

2021-05-22 21:02:22 +10:00 · 2021-05-22 21:02:22 +10:00 · 7292c3575e
commit 7292c3575e
parent fe6454c0ca
27 changed files with 3409 additions and 1 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -6,9 +6,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 ## [Unreleased]
 ## [0.3.9]
 - [#24], [#26]: fixed memory leak.
 - [#30]: fixed varstore.Save() randomly panic - segmentfault
 - [#32]: nn.Seq Forward return nil tensor if length of layers = 1
 - [#36]: resolved image augmentation
 ## [0.3.8]
--- a/example/augmentation/README.md
+++ b/example/augmentation/README.md
@ -0,0 +1,31 @@
 # Image Augmentation Example
 This example demonstrates how to use image augmentation functions. It is implemented as similar as possible to [original Pytorch vision/transform](https://pytorch.org/vision/stable/transforms.html#).
 There are 2 APIs (`aug.Compose` and `aug.OneOf`) to compose augmentation methods as shown in the example: 
 ```go
 		t, err := aug.Compose(
 			aug.WithRandomVFlip(0.5),
 			aug.WithRandomHFlip(0.5),
 			aug.WithRandomCutout(),
 			aug.OneOf(
 				0.3,
 				aug.WithColorJitter(0.3, 0.3, 0.3, 0.4),
 				aug.WithRandomGrayscale(1.0),
 			),
 			aug.OneOf(
 				0.3,
 				aug.WithGaussianBlur([]int64{5, 5}, []float64{1.0, 2.0}),
 				aug.WithRandomAffine(),
 			),
 		)
 		if err != nil {
 			panic(err)
 		}
 		out := t.Transform(imgTs)
 ```
--- a/example/augmentation/bb.png
+++ b/example/augmentation/bb.png
--- a/example/augmentation/main.go
+++ b/example/augmentation/main.go
@ -0,0 +1,69 @@
 package main
 import (
 	"fmt"
 	"github.com/sugarme/gotch"
 	"github.com/sugarme/gotch/vision"
 	"github.com/sugarme/gotch/vision/aug"
 )
 func main() {
 	n := 360
 	for i := 1; i <= n; i++ {
 		img, err := vision.Load("./bb.png")
 		if err != nil {
 			panic(err)
 		}
 		device := gotch.CudaIfAvailable()
 		// device := gotch.CPU
 		imgTs := img.MustTo(device, true)
 		// t, err := aug.Compose(aug.WithResize(512, 512)) // NOTE. WithResize just works on CPU.
 		// t, err := aug.Compose(aug.WithRandRotate(0, 360), aug.WithColorJitter(0.3, 0.3, 0.3, 0.4))
 		// t, err := aug.Compose(aug.WithGaussianBlur([]int64{5, 5}, []float64{1.0, 2.0}), aug.WithRandRotate(0, 360), aug.WithColorJitter(0.3, 0.3, 0.3, 0.3))
 		// t, err := aug.Compose(aug.WithRandomCrop([]int64{320, 320}, []int64{10, 10}, true, "constant"))
 		// t, err := aug.Compose(aug.WithCenterCrop([]int64{320, 320}))
 		// t, err := aug.Compose(aug.WithRandomCutout(aug.WithCutoutValue([]int64{124, 96, 255}), aug.WithCutoutScale([]float64{0.01, 0.1}), aug.WithCutoutRatio([]float64{0.5, 0.5})))
 		// t, err := aug.Compose(aug.WithRandomPerspective(aug.WithPerspectiveScale(0.6), aug.WithPerspectivePvalue(0.8)))
 		// t, err := aug.Compose(aug.WithRandomAffine(aug.WithAffineDegree([]int64{0, 15}), aug.WithAffineShear([]float64{0, 15})))
 		// t, err := aug.Compose(aug.WithRandomGrayscale(0.5))
 		// t, err := aug.Compose(aug.WithRandomSolarize(aug.WithSolarizeThreshold(125), aug.WithSolarizePvalue(0.5)))
 		// t, err := aug.Compose(aug.WithRandomInvert(0.5))
 		// t, err := aug.Compose(aug.WithRandomPosterize(aug.WithPosterizeBits(2), aug.WithPosterizePvalue(1.0)))
 		// t, err := aug.Compose(aug.WithRandomAutocontrast())
 		// t, err := aug.Compose(aug.WithRandomAdjustSharpness(aug.WithSharpnessPvalue(0.3), aug.WithSharpnessFactor(10)))
 		// t, err := aug.Compose(aug.WithRandomEqualize(1.0))
 		// t, err := aug.Compose(aug.WithNormalize(aug.WithNormalizeMean([]float64{0.485, 0.456, 0.406}), aug.WithNormalizeStd([]float64{0.229, 0.224, 0.225})))
 		t, err := aug.Compose(
 			aug.WithRandomVFlip(0.5),
 			aug.WithRandomHFlip(0.5),
 			aug.WithRandomCutout(),
 			aug.OneOf(
 				0.3,
 				aug.WithColorJitter(0.3, 0.3, 0.3, 0.4),
 				aug.WithRandomGrayscale(1.0),
 			),
 			aug.OneOf(
 				0.3,
 				aug.WithGaussianBlur([]int64{5, 5}, []float64{1.0, 2.0}),
 				aug.WithRandomAffine(),
 			),
 		)
 		if err != nil {
 			panic(err)
 		}
 		out := t.Transform(imgTs)
 		fname := fmt.Sprintf("./output/bb-%03d.png", i)
 		err = vision.Save(out, fname)
 		if err != nil {
 			panic(err)
 		}
 		imgTs.MustDrop()
 		out.MustDrop()
 		fmt.Printf("%03d/%v completed.\n", i, n)
 	}
 }
--- a/example/augmentation/output/.gitignore
+++ b/example/augmentation/output/.gitignore
@ -0,0 +1,3 @@
 *
 !.gitignore
 !README.md
--- a/example/augmentation/output/README.md
+++ b/example/augmentation/output/README.md
@ -0,0 +1 @@
 Output images will be here.
--- a/tensor/patch.go
+++ b/tensor/patch.go
@ -581,7 +581,7 @@ func (ts *Tensor) Lstsq(a *Tensor, del bool) (retVal *Tensor, err error) {
 }
 func (ts *Tensor) MustLstsq(a *Tensor, del bool) (retVal *Tensor) {
-	retVal, err := ts.Lstsq(del)
+	retVal, err := ts.Lstsq(a, del)
 	if err != nil {
 		log.Fatal(err)
 	}
--- a/vision/aug/affine.go
+++ b/vision/aug/affine.go
@ -0,0 +1,185 @@
 package aug
 import (
 	"github.com/sugarme/gotch"
 	ts "github.com/sugarme/gotch/tensor"
 )
 // RandomAffine is transformation of the image keeping center invariant.
 // If the image is torch Tensor, it is expected
 // to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
 // Args:
 // - degrees (sequence or number): Range of degrees to select from.
 // If degrees is a number instead of sequence like (min, max), the range of degrees
 // will be (-degrees, +degrees). Set to 0 to deactivate rotations.
 // - translate (tuple, optional): tuple of maximum absolute fraction for horizontal
 // and vertical translations. For example translate=(a, b), then horizontal shift
 // is randomly sampled in the range -img_width * a < dx < img_width * a and vertical shift is
 // randomly sampled in the range -img_height * b < dy < img_height * b. Will not translate by default.
 // - scale (tuple, optional): scaling factor interval, e.g (a, b), then scale is
 // randomly sampled from the range a <= scale <= b. Will keep original scale by default.
 // - shear (sequence or number, optional): Range of degrees to select from.
 // If shear is a number, a shear parallel to the x axis in the range (-shear, +shear)
 // will be applied. Else if shear is a sequence of 2 values a shear parallel to the x axis in the
 // range (shear[0], shear[1]) will be applied. Else if shear is a sequence of 4 values,
 // a x-axis shear in (shear[0], shear[1]) and y-axis shear in (shear[2], shear[3]) will be applied.
 // Will not apply shear by default.
 // - interpolation (InterpolationMode): Desired interpolation enum defined by
 // :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``.
 // If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
 // For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable.
 // - fill (sequence or number): Pixel fill value for the area outside the transformed
 // image. Default is ``0``. If given a number, the value is used for all bands respectively.
 // Please use the ``interpolation`` parameter instead.
 // .. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters
 type RandomAffine struct {
 	degree            []int64 // degree range
 	translate         []float64
 	scale             []float64 // scale range
 	shear             []float64
 	interpolationMode string
 	fillValue         []float64
 }
 func (ra *RandomAffine) getParams(imageSize []int64) (float64, []int64, float64, []float64) {
 	angleTs := ts.MustEmpty([]int64{1}, gotch.Float, gotch.CPU)
 	angleTs.MustUniform_(float64(ra.degree[0]), float64(ra.degree[1]))
 	angle := angleTs.Float64Values()[0]
 	angleTs.MustDrop()
 	var translations []int64 = []int64{0, 0}
 	if ra.translate != nil {
 		maxDX := ra.translate[0] * float64(imageSize[0])
 		maxDY := ra.translate[1] * float64(imageSize[1])
 		dx := ts.MustEmpty([]int64{1}, gotch.Float, gotch.CPU)
 		dx.MustUniform_(-maxDX, maxDX)
 		tx := dx.Float64Values()[0]
 		dx.MustDrop()
 		dy := ts.MustEmpty([]int64{1}, gotch.Float, gotch.CPU)
 		dy.MustUniform_(-maxDY, maxDY)
 		ty := dx.Float64Values()[0]
 		dy.MustDrop()
 		translations = []int64{int64(tx), int64(ty)} // should we use math.Round here???
 	}
 	scale := 1.0
 	if ra.scale != nil {
 		scaleTs := ts.MustEmpty([]int64{1}, gotch.Float, gotch.CPU)
 		scaleTs.MustUniform_(ra.scale[0], ra.scale[1])
 		scale = scaleTs.Float64Values()[0]
 		scaleTs.MustDrop()
 	}
 	var (
 		shearX, shearY float64 = 0.0, 0.0
 	)
 	if ra.shear != nil {
 		shearXTs := ts.MustEmpty([]int64{1}, gotch.Float, gotch.CPU)
 		shearXTs.MustUniform_(ra.shear[0], ra.shear[1])
 		shearX = shearXTs.Float64Values()[0]
 		shearXTs.MustDrop()
 		if len(ra.shear) == 4 {
 			shearYTs := ts.MustEmpty([]int64{1}, gotch.Float, gotch.CPU)
 			shearYTs.MustUniform_(ra.shear[2], ra.shear[3])
 			shearY = shearYTs.Float64Values()[0]
 			shearYTs.MustDrop()
 		}
 	}
 	var shear []float64 = []float64{shearX, shearY}
 	return angle, translations, scale, shear
 }
 func (ra *RandomAffine) Forward(x *ts.Tensor) *ts.Tensor {
 	w, h := getImageSize(x)
 	angle, translations, scale, shear := ra.getParams([]int64{w, h})
 	out := affine(x, angle, translations, scale, shear, ra.interpolationMode, ra.fillValue)
 	return out
 }
 func newRandomAffine(opts ...affineOption) *RandomAffine {
 	p := defaultAffineOptions()
 	for _, o := range opts {
 		o(p)
 	}
 	return &RandomAffine{
 		degree:            p.degree,
 		translate:         p.translate,
 		scale:             p.scale,
 		shear:             p.shear,
 		interpolationMode: p.interpolationMode,
 		fillValue:         p.fillValue,
 	}
 }
 type affineOptions struct {
 	degree            []int64
 	translate         []float64
 	scale             []float64
 	shear             []float64
 	interpolationMode string
 	fillValue         []float64
 }
 type affineOption func(*affineOptions)
 func defaultAffineOptions() *affineOptions {
 	return &affineOptions{
 		degree:            []int64{-180, 180},
 		translate:         nil,
 		scale:             nil,
 		shear:             []float64{-180.0, 180.0},
 		interpolationMode: "bilinear",
 		fillValue:         []float64{0.0, 0.0, 0.0},
 	}
 }
 func WithAffineDegree(degree []int64) affineOption {
 	return func(o *affineOptions) {
 		o.degree = degree
 	}
 }
 func WithAffineTranslate(translate []float64) affineOption {
 	return func(o *affineOptions) {
 		o.translate = translate
 	}
 }
 func WithAffineScale(scale []float64) affineOption {
 	return func(o *affineOptions) {
 		o.scale = scale
 	}
 }
 func WithAffineShear(shear []float64) affineOption {
 	return func(o *affineOptions) {
 		o.shear = shear
 	}
 }
 func WithAffineMode(mode string) affineOption {
 	return func(o *affineOptions) {
 		o.interpolationMode = mode
 	}
 }
 func WithAffineFillValue(fillValue []float64) affineOption {
 	return func(o *affineOptions) {
 		o.fillValue = fillValue
 	}
 }
 func WithRandomAffine(opts ...affineOption) Option {
 	ra := newRandomAffine(opts...)
 	return func(o *Options) {
 		o.randomAffine = ra
 	}
 }
--- a/vision/aug/blur.go
+++ b/vision/aug/blur.go
@ -0,0 +1,89 @@
 package aug
 import (
 	"fmt"
 	"log"
 	"github.com/sugarme/gotch"
 	ts "github.com/sugarme/gotch/tensor"
 )
 type GaussianBlur struct {
 	kernelSize []int64   // >= 0 && ks%2 != 0
 	sigma      []float64 // [0.1, 2.0] range(min, max)
 }
 // ks : kernal size. Can be 1-2 element slice
 // sigma: minimal and maximal standard deviation that can be chosen for blurring kernel
 // range (min, max). Can be 1-2 element slice
 func newGaussianBlur(ks []int64, sig []float64) *GaussianBlur {
 	if len(ks) == 0 || len(ks) > 2 {
 		err := fmt.Errorf("Kernel size should have 1-2 elements. Got %v\n", len(ks))
 		log.Fatal(err)
 	}
 	for _, size := range ks {
 		if size <= 0 || size%2 == 0 {
 			err := fmt.Errorf("Kernel size should be an odd and positive number.")
 			log.Fatal(err)
 		}
 	}
 	if len(sig) == 0 || len(sig) > 2 {
 		err := fmt.Errorf("Sigma should have 1-2 elements. Got %v\n", len(sig))
 		log.Fatal(err)
 	}
 	for _, s := range sig {
 		if s <= 0 {
 			err := fmt.Errorf("Sigma should be a positive number.")
 			log.Fatal(err)
 		}
 	}
 	var kernelSize []int64
 	switch len(ks) {
 	case 1:
 		kernelSize = []int64{ks[0], ks[0]}
 	case 2:
 		kernelSize = ks
 	default:
 		panic("Shouldn't reach here.")
 	}
 	var sigma []float64
 	switch len(sig) {
 	case 1:
 		sigma = []float64{sig[0], sig[0]}
 	case 2:
 		min := sig[0]
 		max := sig[1]
 		if min > max {
 			min = sig[1]
 			max = sig[0]
 		}
 		sigma = []float64{min, max}
 	default:
 		panic("Shouldn't reach here.")
 	}
 	return &GaussianBlur{
 		kernelSize: kernelSize,
 		sigma:      sigma,
 	}
 }
 func (b *GaussianBlur) Forward(x *ts.Tensor) *ts.Tensor {
 	sigmaTs := ts.MustEmpty([]int64{1}, gotch.Float, gotch.CPU)
 	sigmaTs.MustUniform_(b.sigma[0], b.sigma[1])
 	sigmaVal := sigmaTs.Float64Values()[0]
 	sigmaTs.MustDrop()
 	return gaussianBlur(x, b.kernelSize, []float64{sigmaVal, sigmaVal})
 }
 func WithGaussianBlur(ks []int64, sig []float64) Option {
 	return func(o *Options) {
 		gb := newGaussianBlur(ks, sig)
 		o.gaussianBlur = gb
 	}
 }
--- a/vision/aug/color.go
+++ b/vision/aug/color.go
@ -0,0 +1,77 @@
 package aug
 import (
 	"math/rand"
 	"time"
 	ts "github.com/sugarme/gotch/tensor"
 )
 // Ref. https://github.com/pytorch/vision/blob/f1d734213af65dc06e777877d315973ba8386080/torchvision/transforms/functional_tensor.py
 type ColorJitter struct {
 	brightness float64
 	contrast   float64
 	saturation float64
 	hue        float64
 }
 func defaultColorJitter() *ColorJitter {
 	return &ColorJitter{
 		brightness: 1.0,
 		contrast:   1.0,
 		saturation: 1.0,
 		hue:        0.0,
 	}
 }
 func (c *ColorJitter) setBrightness(brightness float64) {
 	c.brightness = brightness
 }
 func (c *ColorJitter) setContrast(contrast float64) {
 	c.contrast = contrast
 }
 func (c *ColorJitter) setSaturation(sat float64) {
 	c.saturation = sat
 }
 func (c *ColorJitter) setHue(hue float64) {
 	c.hue = hue
 }
 // Forward implement ts.Module by randomly picking one of brightness, contrast,
 // staturation or hue function to transform input image tensor.
 func (c *ColorJitter) Forward(x *ts.Tensor) *ts.Tensor {
 	rand.Seed(time.Now().UnixNano())
 	idx := rand.Intn(4)
 	switch idx {
 	case 0:
 		v := randVal(getMinMax(c.brightness))
 		return adjustBrightness(x, v)
 	case 1:
 		v := randVal(getMinMax(c.contrast))
 		return adjustContrast(x, v)
 	case 2:
 		v := randVal(getMinMax(c.saturation))
 		return adjustSaturation(x, v)
 	case 3:
 		v := randVal(0, c.hue)
 		return adjustHue(x, v)
 	default:
 		panic("Shouldn't reach here.")
 	}
 }
 func WithColorJitter(brightness, contrast, sat, hue float64) Option {
 	c := defaultColorJitter()
 	c.setBrightness(brightness)
 	c.setContrast(contrast)
 	c.setSaturation(sat)
 	c.setHue(hue)
 	return func(o *Options) {
 		o.colorJitter = c
 	}
 }
--- a/vision/aug/contrast.go
+++ b/vision/aug/contrast.go
@ -0,0 +1,43 @@
 package aug
 import (
 	ts "github.com/sugarme/gotch/tensor"
 )
 // RandomAutocontrast autocontrasts the pixels of the given image randomly with a given probability.
 // If the image is torch Tensor, it is expected
 // to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
 // Args:
 // - p (float): probability of the image being autocontrasted. Default value is 0.5
 type RandomAutocontrast struct {
 	pvalue float64
 }
 func newRandomAutocontrast(pOpt ...float64) *RandomAutocontrast {
 	p := 0.5
 	if len(pOpt) > 0 {
 		p = pOpt[0]
 	}
 	return &RandomAutocontrast{p}
 }
 func (rac *RandomAutocontrast) Forward(x *ts.Tensor) *ts.Tensor {
 	r := randPvalue()
 	var out *ts.Tensor
 	switch {
 	case r < rac.pvalue:
 		out = autocontrast(x)
 	default:
 		out = x.MustShallowClone()
 	}
 	return out
 }
 func WithRandomAutocontrast(p ...float64) Option {
 	rac := newRandomAutocontrast(p...)
 	return func(o *Options) {
 		o.randomAutocontrast = rac
 	}
 }
--- a/vision/aug/crop.go
+++ b/vision/aug/crop.go
@ -0,0 +1,124 @@
 package aug
 import (
 	"fmt"
 	"log"
 	// "math"
 	"github.com/sugarme/gotch"
 	ts "github.com/sugarme/gotch/tensor"
 )
 type RandomCrop struct {
 	size            []int64
 	padding         []int64
 	paddingIfNeeded bool
 	paddingMode     string
 }
 func newRandomCrop(size, padding []int64, paddingIfNeeded bool, paddingMode string) *RandomCrop {
 	return &RandomCrop{
 		size:            size,
 		padding:         padding,
 		paddingIfNeeded: paddingIfNeeded,
 		paddingMode:     paddingMode,
 	}
 }
 // get parameters for crop
 func (c *RandomCrop) params(x *ts.Tensor) (int64, int64, int64, int64) {
 	w, h := getImageSize(x)
 	th, tw := c.size[0], c.size[1]
 	if h+1 < th || w+1 < tw {
 		err := fmt.Errorf("Required crop size %v is larger then input image size %v", c.size, []int64{h, w})
 		log.Fatal(err)
 	}
 	if w == tw && h == th {
 		return 0, 0, h, w
 	}
 	iTs := ts.MustRandint1(0, h-th+1, []int64{1}, gotch.Int64, gotch.CPU)
 	i := iTs.Int64Values()[0]
 	iTs.MustDrop()
 	jTs := ts.MustRandint1(0, w-tw+1, []int64{1}, gotch.Int64, gotch.CPU)
 	j := jTs.Int64Values()[0]
 	jTs.MustDrop()
 	return i, j, th, tw
 }
 func (c *RandomCrop) Forward(x *ts.Tensor) *ts.Tensor {
 	var img *ts.Tensor
 	if c.padding != nil {
 		img = pad(x, c.padding, c.paddingMode)
 	} else {
 		img = x.MustShallowClone()
 	}
 	w, h := getImageSize(x)
 	var (
 		paddedW  *ts.Tensor
 		paddedWH *ts.Tensor
 	)
 	// pad width if needed
 	if c.paddingIfNeeded && w < c.size[1] {
 		padding := []int64{c.size[1] - w, 0}
 		paddedW = pad(img, padding, c.paddingMode)
 	} else {
 		paddedW = img.MustShallowClone()
 	}
 	img.MustDrop()
 	// pad height if needed
 	if c.paddingIfNeeded && h < c.size[0] {
 		padding := []int64{0, c.size[0] - h}
 		paddedWH = pad(paddedW, padding, c.paddingMode)
 	} else {
 		paddedWH = paddedW.MustShallowClone()
 	}
 	paddedW.MustDrop()
 	// i, j, h, w = self.get_params(img, self.size)
 	i, j, h, w := c.params(x)
 	out := crop(paddedWH, i, j, h, w)
 	paddedWH.MustDrop()
 	return out
 }
 func WithRandomCrop(size []int64, padding []int64, paddingIfNeeded bool, paddingMode string) Option {
 	return func(o *Options) {
 		c := newRandomCrop(size, padding, paddingIfNeeded, paddingMode)
 		o.randomCrop = c
 	}
 }
 // CenterCrop crops the given image at the center.
 // If the image is torch Tensor, it is expected
 // to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
 // If image size is smaller than output size along any edge, image is padded with 0 and then center cropped.
 type CenterCrop struct {
 	size []int64
 }
 func newCenterCrop(size []int64) *CenterCrop {
 	if len(size) != 2 {
 		err := fmt.Errorf("Expected size of 2 elements. Got %v\n", len(size))
 		log.Fatal(err)
 	}
 	return &CenterCrop{size}
 }
 func (cc *CenterCrop) Forward(x *ts.Tensor) *ts.Tensor {
 	return centerCrop(x, cc.size)
 }
 func WithCenterCrop(size []int64) Option {
 	return func(o *Options) {
 		cc := newCenterCrop(size)
 		o.centerCrop = cc
 	}
 }
--- a/vision/aug/cutout.go
+++ b/vision/aug/cutout.go
@ -0,0 +1,177 @@
 package aug
 import (
 	"fmt"
 	"log"
 	"math"
 	"github.com/sugarme/gotch"
 	ts "github.com/sugarme/gotch/tensor"
 )
 // Randomly selects a rectangle region in an torch Tensor image and erases its pixels.
 // This transform does not support PIL Image.
 // 'Random Erasing Data Augmentation' by Zhong et al. See https://arxiv.org/abs/1708.04896
 //
 // Args:
 // p: probability that the random erasing operation will be performed.
 // scale: range of proportion of erased area against input image.
 // ratio: range of aspect ratio of erased area.
 // value: erasing value. Default is 0. If a single int, it is used to
 // erase all pixels. If a tuple of length 3, it is used to erase
 // R, G, B channels respectively.
 // If a str of 'random', erasing each pixel with random values.
 type RandomCutout struct {
 	pvalue float64
 	scale  []float64
 	ratio  []float64
 	rgbVal []int64 // RGB value
 }
 type cutoutOptions struct {
 	pvalue float64
 	scale  []float64
 	ratio  []float64
 	rgbVal []int64 // RGB value
 }
 type cutoutOption func(o *cutoutOptions)
 func defaultCutoutOptions() *cutoutOptions {
 	return &cutoutOptions{
 		pvalue: 0.5,
 		scale:  []float64{0.02, 0.33},
 		ratio:  []float64{0.3, 3.3},
 		rgbVal: []int64{0, 0, 0},
 	}
 }
 func newRandomCutout(pvalue float64, scale, ratio []float64, rgbVal []int64) *RandomCutout {
 	return &RandomCutout{
 		pvalue: pvalue,
 		scale:  scale,
 		ratio:  ratio,
 		rgbVal: rgbVal,
 	}
 }
 func WithCutoutPvalue(p float64) cutoutOption {
 	if p < 0 || p > 1 {
 		log.Fatalf("Cutout p-value must be in range from 0 to 1. Got %v\n", p)
 	}
 	return func(o *cutoutOptions) {
 		o.pvalue = p
 	}
 }
 func WithCutoutScale(scale []float64) cutoutOption {
 	if len(scale) != 2 {
 		log.Fatalf("Cutout scale should be in a range of 2 elments. Got %v elements\n", len(scale))
 	}
 	return func(o *cutoutOptions) {
 		o.scale = scale
 	}
 }
 func WithCutoutRatio(ratio []float64) cutoutOption {
 	if len(ratio) != 2 {
 		log.Fatalf("Cutout ratio should be in a range of 2 elments. Got %v elements\n", len(ratio))
 	}
 	return func(o *cutoutOptions) {
 		o.ratio = ratio
 	}
 }
 func WithCutoutValue(rgb []int64) cutoutOption {
 	var rgbVal []int64
 	switch len(rgb) {
 	case 1:
 		rgbVal = []int64{rgb[0], rgb[0], rgb[0]}
 	case 3:
 		rgbVal = rgb
 	default:
 		err := fmt.Errorf("Cutout values can be single value or 3-element (RGB) value. Got %v values.", len(rgb))
 		log.Fatal(err)
 	}
 	return func(o *cutoutOptions) {
 		o.rgbVal = rgbVal
 	}
 }
 func (rc *RandomCutout) cutoutParams(x *ts.Tensor) (int64, int64, int64, int64, *ts.Tensor) {
 	dim := x.MustSize()
 	imgH, imgW := dim[len(dim)-2], dim[len(dim)-1]
 	area := float64(imgH * imgW)
 	logRatio := ts.MustOfSlice(rc.ratio).MustLog(true).Float64Values()
 	for i := 0; i < 10; i++ {
 		scaleTs := ts.MustEmpty([]int64{1}, gotch.Float, gotch.CPU)
 		scaleTs.MustUniform_(rc.scale[0], rc.scale[1])
 		scaleVal := scaleTs.Float64Values()[0]
 		scaleTs.MustDrop()
 		eraseArea := area * scaleVal
 		ratioTs := ts.MustEmpty([]int64{1}, gotch.Float, gotch.CPU)
 		ratioTs.MustUniform_(logRatio[0], logRatio[1])
 		asTs := ratioTs.MustExp(true)
 		asVal := asTs.Float64Values()[0] // aspect ratio
 		asTs.MustDrop()
 		// h = int(round(math.sqrt(erase_area * aspect_ratio)))
 		// w = int(round(math.sqrt(erase_area / aspect_ratio)))
 		h := int64(math.Round(math.Sqrt(eraseArea * asVal)))
 		w := int64(math.Round(math.Sqrt(eraseArea / asVal)))
 		if !(h < imgH && w < imgW) {
 			continue
 		}
 		// v = torch.tensor(value)[:, None, None]
 		v := ts.MustOfSlice(rc.rgbVal).MustUnsqueeze(1, true).MustUnsqueeze(1, true)
 		// i = torch.randint(0, img_h - h + 1, size=(1, )).item()
 		iTs := ts.MustRandint1(0, imgH-h+1, []int64{1}, gotch.Int64, gotch.CPU)
 		i := iTs.Int64Values()[0]
 		iTs.MustDrop()
 		// j = torch.randint(0, img_w - w + 1, size=(1, )).item()
 		jTs := ts.MustRandint1(0, imgW-w+1, []int64{1}, gotch.Int64, gotch.CPU)
 		j := jTs.Int64Values()[0]
 		jTs.MustDrop()
 		return i, j, h, w, v
 	}
 	// return original image
 	img := x.MustShallowClone()
 	return 0, 0, imgH, imgW, img
 }
 func (rc *RandomCutout) Forward(img *ts.Tensor) *ts.Tensor {
 	randTs := ts.MustRandn([]int64{1}, gotch.Float, gotch.CPU)
 	randVal := randTs.Float64Values()[0]
 	randTs.MustDrop()
 	switch randVal < rc.pvalue {
 	case true:
 		x, y, h, w, v := rc.cutoutParams(img)
 		out := cutout(img, x, y, h, w, rc.rgbVal)
 		v.MustDrop()
 		return out
 	case false:
 		out := img.MustShallowClone()
 		return out
 	}
 	panic("Shouldn't reach here")
 }
 func WithRandomCutout(opts ...cutoutOption) Option {
 	params := defaultCutoutOptions()
 	for _, o := range opts {
 		o(params)
 	}
 	return func(o *Options) {
 		rc := newRandomCutout(params.pvalue, params.scale, params.ratio, params.rgbVal)
 		o.randomCutout = rc
 	}
 }
--- a/vision/aug/equalize.go
+++ b/vision/aug/equalize.go
@ -0,0 +1,46 @@
 package aug
 import (
 	ts "github.com/sugarme/gotch/tensor"
 )
 // RandomEqualize equalizes the histogram of the given image randomly with a given probability.
 // If the image is torch Tensor, it is expected
 // to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
 // Args:
 // - p (float): probability of the image being equalized. Default value is 0.5
 // Histogram equalization
 // Ref. https://en.wikipedia.org/wiki/Histogram_equalization
 type RandomEqualize struct {
 	pvalue float64
 }
 func newRandomEqualize(pOpt ...float64) *RandomEqualize {
 	p := 0.5
 	if len(pOpt) > 0 {
 		p = pOpt[0]
 	}
 	return &RandomEqualize{p}
 }
 func (re *RandomEqualize) Forward(x *ts.Tensor) *ts.Tensor {
 	r := randPvalue()
 	var out *ts.Tensor
 	switch {
 	case r < re.pvalue:
 		out = equalize(x)
 	default:
 		out = x.MustShallowClone()
 	}
 	return out
 }
 func WithRandomEqualize(p ...float64) Option {
 	re := newRandomEqualize(p...)
 	return func(o *Options) {
 		o.randomEqualize = re
 	}
 }
--- a/vision/aug/flip.go
+++ b/vision/aug/flip.go
@ -0,0 +1,78 @@
 package aug
 import (
 	"github.com/sugarme/gotch"
 	ts "github.com/sugarme/gotch/tensor"
 )
 // RandomHorizontalFlip horizontally flips the given image randomly with a given probability.
 //
 // If the image is torch Tensor, it is expected to have [..., H, W] shape,
 // where ... means an arbitrary number of leading dimensions
 // Args:
 // p (float): probability of the image being flipped. Default value is 0.5
 type RandomHorizontalFlip struct {
 	pvalue float64
 }
 func newRandomHorizontalFlip(pvalue float64) *RandomHorizontalFlip {
 	return &RandomHorizontalFlip{
 		pvalue: pvalue,
 	}
 }
 func (hf *RandomHorizontalFlip) Forward(x *ts.Tensor) *ts.Tensor {
 	randTs := ts.MustRandn([]int64{1}, gotch.Float, gotch.CPU)
 	randVal := randTs.Float64Values()[0]
 	randTs.MustDrop()
 	switch {
 	case randVal < hf.pvalue:
 		return hflip(x)
 	default:
 		out := x.MustShallowClone()
 		return out
 	}
 }
 func WithRandomHFlip(pvalue float64) Option {
 	return func(o *Options) {
 		hf := newRandomHorizontalFlip(pvalue)
 		o.randomHFlip = hf
 	}
 }
 // RandomVerticalFlip vertically flips the given image randomly with a given probability.
 //
 // If the image is torch Tensor, it is expected to have [..., H, W] shape,
 // where ... means an arbitrary number of leading dimensions
 // Args:
 // p (float): probability of the image being flipped. Default value is 0.5
 type RandomVerticalFlip struct {
 	pvalue float64
 }
 func newRandomVerticalFlip(pvalue float64) *RandomVerticalFlip {
 	return &RandomVerticalFlip{
 		pvalue: pvalue,
 	}
 }
 func (vf *RandomVerticalFlip) Forward(x *ts.Tensor) *ts.Tensor {
 	randTs := ts.MustRandn([]int64{1}, gotch.Float, gotch.CPU)
 	randVal := randTs.Float64Values()[0]
 	randTs.MustDrop()
 	switch {
 	case randVal < vf.pvalue:
 		return vflip(x)
 	default:
 		out := x.MustShallowClone()
 		return out
 	}
 }
 func WithRandomVFlip(pvalue float64) Option {
 	return func(o *Options) {
 		vf := newRandomVerticalFlip(pvalue)
 		o.randomVFlip = vf
 	}
 }
--- a/vision/aug/function.go
+++ b/vision/aug/function.go
--- a/vision/aug/grayscale.go
+++ b/vision/aug/grayscale.go
@ -0,0 +1,81 @@
 package aug
 import (
 	"log"
 	ts "github.com/sugarme/gotch/tensor"
 	// "github.com/sugarme/gotch/tensor"
 )
 // GrayScale converts image to grayscale.
 // If the image is torch Tensor, it is expected
 // to have [..., 3, H, W] shape, where ... means an arbitrary number of leading dimensions
 // Args:
 // - num_output_channels (int): (1 or 3) number of channels desired for output image
 type Grayscale struct {
 	outChan int64
 }
 func (gs *Grayscale) Forward(x *ts.Tensor) *ts.Tensor {
 	out := rgb2Gray(x, gs.outChan)
 	return out
 }
 func newGrayscale(outChanOpt ...int64) *Grayscale {
 	var outChan int64 = 3
 	if len(outChanOpt) > 0 {
 		c := outChanOpt[0]
 		switch c {
 		case 1:
 			outChan = 1
 		case 3:
 			outChan = 3
 		default:
 			log.Fatalf("Out channels should be either 1 or 3. Got %v\n", c)
 		}
 	}
 	return &Grayscale{outChan}
 }
 // RandomGrayscale randomly converts image to grayscale with a probability of p (default 0.1).
 // If the image is torch Tensor, it is expected
 // to have [..., 3, H, W] shape, where ... means an arbitrary number of leading dimensions
 // Args:
 // - p (float): probability that image should be converted to grayscale.
 type RandomGrayscale struct {
 	pvalue float64
 }
 func newRandomGrayscale(pvalueOpt ...float64) *RandomGrayscale {
 	pvalue := 0.1
 	if len(pvalueOpt) > 0 {
 		pvalue = pvalueOpt[0]
 	}
 	return &RandomGrayscale{pvalue}
 }
 func (rgs *RandomGrayscale) Forward(x *ts.Tensor) *ts.Tensor {
 	c := getImageChanNum(x)
 	r := randPvalue()
 	var out *ts.Tensor
 	switch {
 	case r < rgs.pvalue:
 		out = rgb2Gray(x, c)
 	default:
 		out = x.MustShallowClone()
 	}
 	return out
 }
 func WithRandomGrayscale(pvalueOpt ...float64) Option {
 	var p float64 = 0.1
 	if len(pvalueOpt) > 0 {
 		p = pvalueOpt[0]
 	}
 	rgs := newRandomGrayscale(p)
 	return func(o *Options) {
 		o.randomGrayscale = rgs
 	}
 }
--- a/vision/aug/invert.go
+++ b/vision/aug/invert.go
@ -0,0 +1,39 @@
 package aug
 import (
 	ts "github.com/sugarme/gotch/tensor"
 )
 type RandomInvert struct {
 	pvalue float64
 }
 func newRandomInvert(pOpt ...float64) *RandomInvert {
 	p := 0.5
 	if len(pOpt) > 0 {
 		p = pOpt[0]
 	}
 	return &RandomInvert{p}
 }
 func (ri *RandomInvert) Forward(x *ts.Tensor) *ts.Tensor {
 	r := randPvalue()
 	var out *ts.Tensor
 	switch {
 	case r < ri.pvalue:
 		out = invert(x)
 	default:
 		out = x.MustShallowClone()
 	}
 	return out
 }
 func WithRandomInvert(pvalueOpt ...float64) Option {
 	ri := newRandomInvert(pvalueOpt...)
 	return func(o *Options) {
 		o.randomInvert = ri
 	}
 }
--- a/vision/aug/normalize.go
+++ b/vision/aug/normalize.go
@ -0,0 +1,91 @@
 package aug
 import (
 	ts "github.com/sugarme/gotch/tensor"
 )
 // Normalize normalizes a tensor image with mean and standard deviation.
 // Given mean: ``(mean[1],...,mean[n])`` and std: ``(std[1],..,std[n])`` for ``n``
 // channels, this transform will normalize each channel of the input
 // ``torch.*Tensor`` i.e.,
 // ``output[channel] = (input[channel] - mean[channel]) / std[channel]``
 // .. note::
 // This transform acts out of place, i.e., it does not mutate the input tensor.
 // Args:
 // - mean (sequence): Sequence of means for each channel.
 // - std (sequence): Sequence of standard deviations for each channel.
 type Normalize struct {
 	mean []float64 // should be from 0 to 1
 	std  []float64 // should be > 0 and <= 1
 }
 type normalizeOptions struct {
 	mean []float64
 	std  []float64
 }
 type normalizeOption func(*normalizeOptions)
 // Mean and SD can be calculated for specific dataset as follow:
 /*
 	mean = 0.0
 	meansq = 0.0
 	count = 0
 	for index, data in enumerate(train_loader):
 			mean = data.sum()
 			meansq = meansq + (data**2).sum()
 			count += np.prod(data.shape)
 	total_mean = mean/count
 	total_var = (meansq/count) - (total_mean**2)
 	total_std = torch.sqrt(total_var)
 	print("mean: " + str(total_mean))
 	print("std: " + str(total_std))
 */
 // For example. ImageNet dataset has RGB mean and standard error:
 // meanVals := []float64{0.485, 0.456, 0.406}
 // sdVals := []float64{0.229, 0.224, 0.225}
 func defaultNormalizeOptions() *normalizeOptions {
 	return &normalizeOptions{
 		mean: []float64{0, 0, 0},
 		std:  []float64{1, 1, 1},
 	}
 }
 func WithNormalizeStd(std []float64) normalizeOption {
 	return func(o *normalizeOptions) {
 		o.std = std
 	}
 }
 func WithNormalizeMean(mean []float64) normalizeOption {
 	return func(o *normalizeOptions) {
 		o.mean = mean
 	}
 }
 func newNormalize(opts ...normalizeOption) *Normalize {
 	p := defaultNormalizeOptions()
 	for _, o := range opts {
 		o(p)
 	}
 	return &Normalize{
 		mean: p.mean,
 		std:  p.std,
 	}
 }
 func (n *Normalize) Forward(x *ts.Tensor) *ts.Tensor {
 	out := normalize(x, n.mean, n.std)
 	return out
 }
 func WithNormalize(opts ...normalizeOption) Option {
 	n := newNormalize(opts...)
 	return func(o *Options) {
 		o.normalize = n
 	}
 }
--- a/vision/aug/pad.go
+++ b/vision/aug/pad.go
@ -0,0 +1 @@
 package aug
--- a/vision/aug/perspective.go
+++ b/vision/aug/perspective.go
@ -0,0 +1,190 @@
 package aug
 import (
 	// "fmt"
 	"github.com/sugarme/gotch"
 	ts "github.com/sugarme/gotch/tensor"
 )
 // RandomPerspective performs a random perspective transformation of the given image with a given probability.
 // If the image is torch Tensor, it is expected
 // to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
 // Args:
 // distortion_scale (float): argument to control the degree of distortion and ranges from 0 to 1.
 // Default is 0.5.
 // p (float): probability of the image being transformed. Default is 0.5.
 // interpolation (InterpolationMode): Desired interpolation enum defined by
 // :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``.
 // If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
 // For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable.
 // fill (sequence or number): Pixel fill value for the area outside the transformed
 // image. Default is ``0``. If given a number, the value is used for all bands respectively.
 type RandomPerspective struct {
 	distortionScale   float64 // range [0, 1]
 	pvalue            float64 //  range [0, 1]
 	interpolationMode string
 	fillValue         []float64
 }
 type perspectiveOptions struct {
 	distortionScale   float64 // range [0, 1]
 	pvalue            float64 //  range [0, 1]
 	interpolationMode string
 	fillValue         []float64
 }
 func defaultPerspectiveOptions() *perspectiveOptions {
 	return &perspectiveOptions{
 		distortionScale:   0.5,
 		pvalue:            0.5,
 		interpolationMode: "bilinear",
 		fillValue:         []float64{0.0, 0.0, 0.0},
 	}
 }
 type perspectiveOption func(*perspectiveOptions)
 func WithPerspectivePvalue(p float64) perspectiveOption {
 	return func(o *perspectiveOptions) {
 		o.pvalue = p
 	}
 }
 func WithPerspectiveScale(s float64) perspectiveOption {
 	return func(o *perspectiveOptions) {
 		o.distortionScale = s
 	}
 }
 func WithPerspectiveMode(m string) perspectiveOption {
 	return func(o *perspectiveOptions) {
 		o.interpolationMode = m
 	}
 }
 func WithPerspectiveValue(v []float64) perspectiveOption {
 	return func(o *perspectiveOptions) {
 		o.fillValue = v
 	}
 }
 func newRandomPerspective(opts ...perspectiveOption) *RandomPerspective {
 	params := defaultPerspectiveOptions()
 	for _, opt := range opts {
 		opt(params)
 	}
 	return &RandomPerspective{
 		distortionScale:   params.distortionScale,
 		pvalue:            params.pvalue,
 		interpolationMode: params.interpolationMode,
 		fillValue:         params.fillValue,
 	}
 }
 // Get parameters for ``perspective`` for a random perspective transform.
 //
 // Args:
 // - width (int): width of the image.
 // - height (int): height of the image.
 // Returns:
 // - List containing [top-left, top-right, bottom-right, bottom-left] of the original image,
 // - List containing [top-left, top-right, bottom-right, bottom-left] of the transformed image.
 func (rp *RandomPerspective) getParams(w, h int64) ([][]int64, [][]int64) {
 	halfH := h / 2
 	halfW := w / 2
 	var (
 		topLeft     []int64
 		topRight    []int64
 		bottomRight []int64
 		bottomLeft  []int64
 	)
 	// topleft = [
 	// int(torch.randint(0, int(distortion_scale * half_width) + 1, size=(1, )).item()),
 	// int(torch.randint(0, int(distortion_scale * half_height) + 1, size=(1, )).item())
 	// ]
 	tlVal1 := int64(rp.distortionScale*float64(halfW)) + 1
 	tlTs1 := ts.MustRandint1(0, tlVal1, []int64{1}, gotch.Int64, gotch.CPU)
 	tl1 := tlTs1.Int64Values()[0]
 	tlTs1.MustDrop()
 	tlVal2 := int64(rp.distortionScale*float64(halfH)) + 1
 	tlTs2 := ts.MustRandint1(0, tlVal2, []int64{1}, gotch.Int64, gotch.CPU)
 	tl2 := tlTs2.Int64Values()[0]
 	tlTs2.MustDrop()
 	topLeft = []int64{tl1, tl2}
 	// topright = [
 	// int(torch.randint(width - int(distortion_scale * half_width) - 1, width, size=(1, )).item()),
 	// int(torch.randint(0, int(distortion_scale * half_height) + 1, size=(1, )).item())
 	// ]
 	trVal1 := w - int64(rp.distortionScale*float64(halfW)) - 1
 	trTs1 := ts.MustRandint1(trVal1, w, []int64{1}, gotch.Int64, gotch.CPU)
 	tr1 := trTs1.Int64Values()[0]
 	trTs1.MustDrop()
 	trVal2 := int64(rp.distortionScale*float64(halfH)) + 1
 	trTs2 := ts.MustRandint1(0, trVal2, []int64{1}, gotch.Int64, gotch.CPU)
 	tr2 := trTs2.Int64Values()[0]
 	trTs2.MustDrop()
 	topRight = []int64{tr1, tr2}
 	// botright = [
 	// int(torch.randint(width - int(distortion_scale * half_width) - 1, width, size=(1, )).item()),
 	// int(torch.randint(height - int(distortion_scale * half_height) - 1, height, size=(1, )).item())
 	// ]
 	brVal1 := w - int64(rp.distortionScale*float64(halfW)) - 1
 	brTs1 := ts.MustRandint1(brVal1, w, []int64{1}, gotch.Int64, gotch.CPU)
 	br1 := brTs1.Int64Values()[0]
 	brTs1.MustDrop()
 	brVal2 := h - int64(rp.distortionScale*float64(halfH)) - 1
 	brTs2 := ts.MustRandint1(brVal2, h, []int64{1}, gotch.Int64, gotch.CPU)
 	br2 := brTs2.Int64Values()[0]
 	brTs2.MustDrop()
 	bottomRight = []int64{br1, br2}
 	// botleft = [
 	// int(torch.randint(0, int(distortion_scale * half_width) + 1, size=(1, )).item()),
 	// int(torch.randint(height - int(distortion_scale * half_height) - 1, height, size=(1, )).item())
 	// ]
 	blVal1 := int64(rp.distortionScale*float64(halfW)) + 1
 	blTs1 := ts.MustRandint1(0, blVal1, []int64{1}, gotch.Int64, gotch.CPU)
 	bl1 := blTs1.Int64Values()[0]
 	blTs1.MustDrop()
 	blVal2 := h - int64(rp.distortionScale*float64(halfH)) - 1
 	blTs2 := ts.MustRandint1(blVal2, h, []int64{1}, gotch.Int64, gotch.CPU)
 	bl2 := blTs2.Int64Values()[0]
 	blTs2.MustDrop()
 	bottomLeft = []int64{bl1, bl2}
 	startPoints := [][]int64{
 		{0, 0},
 		{w - 1, 0},
 		{w - 1, h - 1},
 		{0, h - 1},
 	}
 	endPoints := [][]int64{
 		topLeft,
 		topRight,
 		bottomRight,
 		bottomLeft,
 	}
 	return startPoints, endPoints
 }
 func (rp *RandomPerspective) Forward(x *ts.Tensor) *ts.Tensor {
 	height, width := getImageSize(x)
 	startPoints, endPoints := rp.getParams(height, width)
 	out := perspective(x, startPoints, endPoints, rp.interpolationMode, rp.fillValue)
 	return out
 }
 func WithRandomPerspective(opts ...perspectiveOption) Option {
 	rp := newRandomPerspective(opts...)
 	return func(o *Options) {
 		o.randomPerspective = rp
 	}
 }
--- a/vision/aug/posterize.go
+++ b/vision/aug/posterize.go
@ -0,0 +1,77 @@
 package aug
 import (
 	ts "github.com/sugarme/gotch/tensor"
 )
 // RandomPosterize posterizes the image randomly with a given probability by reducing the
 // number of bits for each color channel. If the image is torch Tensor, it should be of type torch.uint8,
 // and it is expected to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
 // Args:
 // - bits (int): number of bits to keep for each channel (0-8)
 // - p (float): probability of the image being color inverted. Default value is 0.5
 // Ref. https://en.wikipedia.org/wiki/Posterization
 type RandomPosterize struct {
 	pvalue float64
 	bits   uint8
 }
 type posterizeOptions struct {
 	pvalue float64
 	bits   uint8
 }
 type posterizeOption func(*posterizeOptions)
 func defaultPosterizeOptions() *posterizeOptions {
 	return &posterizeOptions{
 		pvalue: 0.5,
 		bits:   4,
 	}
 }
 func WithPosterizePvalue(p float64) posterizeOption {
 	return func(o *posterizeOptions) {
 		o.pvalue = p
 	}
 }
 func WithPosterizeBits(bits uint8) posterizeOption {
 	return func(o *posterizeOptions) {
 		o.bits = bits
 	}
 }
 func newRandomPosterize(opts ...posterizeOption) *RandomPosterize {
 	p := defaultPosterizeOptions()
 	for _, o := range opts {
 		o(p)
 	}
 	return &RandomPosterize{
 		pvalue: p.pvalue,
 		bits:   p.bits,
 	}
 }
 func (rp *RandomPosterize) Forward(x *ts.Tensor) *ts.Tensor {
 	r := randPvalue()
 	var out *ts.Tensor
 	switch {
 	case r < rp.pvalue:
 		out = posterize(x, rp.bits)
 	default:
 		out = x.MustShallowClone()
 	}
 	return out
 }
 func WithRandomPosterize(opts ...posterizeOption) Option {
 	rp := newRandomPosterize(opts...)
 	return func(o *Options) {
 		o.randomPosterize = rp
 	}
 }
--- a/vision/aug/resize.go
+++ b/vision/aug/resize.go
@ -0,0 +1,39 @@
 package aug
 import (
 	"log"
 	"github.com/sugarme/gotch"
 	ts "github.com/sugarme/gotch/tensor"
 	"github.com/sugarme/gotch/vision"
 )
 type ResizeModule struct {
 	height int64
 	width  int64
 }
 func newResizeModule(h, w int64) *ResizeModule {
 	return &ResizeModule{h, w}
 }
 // Forward implements ts.Module for RandRotateModule
 func (rs *ResizeModule) Forward(x *ts.Tensor) *ts.Tensor {
 	imgTs := x.MustTotype(gotch.Uint8, false)
 	out, err := vision.Resize(imgTs, rs.width, rs.height)
 	if err != nil {
 		log.Fatal(err)
 	}
 	imgTs.MustDrop()
 	return out
 }
 func WithResize(h, w int64) Option {
 	return func(o *Options) {
 		rs := newResizeModule(h, w)
 		o.resize = rs
 	}
 }
 // TODO.
 type RandomResizedCrop struct{}
--- a/vision/aug/rotate.go
+++ b/vision/aug/rotate.go
@ -0,0 +1,109 @@
 package aug
 import (
 	"fmt"
 	"log"
 	"math"
 	"math/rand"
 	"time"
 	"github.com/sugarme/gotch"
 	ts "github.com/sugarme/gotch/tensor"
 )
 // RandomRotate randomly rotates a tensor image within a specifed angle range (degree).
 func RandomRotate(img *ts.Tensor, min, max float64) (*ts.Tensor, error) {
 	if min > max {
 		tmp := min
 		min = max
 		max = tmp
 	}
 	if min < -360 || min > 360 || max < -360 || max > 360 {
 		err := fmt.Errorf("min and max should be in range from -360 to 360. Got %v and %v\n", min, max)
 		return nil, err
 	}
 	// device := img.MustDevice()
 	dtype := gotch.Double
 	rand.Seed(time.Now().UnixNano())
 	angle := min + rand.Float64()*(max-min)
 	theta := float64(angle) * (math.Pi / 180)
 	input := img.MustUnsqueeze(0, false).MustTotype(dtype, true)
 	r, err := rotImg(input, theta, dtype)
 	if err != nil {
 		return nil, err
 	}
 	input.MustDrop()
 	rotatedImg := r.MustSqueeze(true)
 	return rotatedImg, nil
 }
 func Rotate(img *ts.Tensor, angle float64) (*ts.Tensor, error) {
 	if angle < -360 || angle > 360 {
 		err := fmt.Errorf("angle must be in range (-360, 360)")
 		return nil, err
 	}
 	dtype := gotch.Double
 	theta := float64(angle) * (math.Pi / 180)
 	input := img.MustUnsqueeze(0, false).MustTotype(dtype, true)
 	r, err := rotImg(input, theta, dtype)
 	if err != nil {
 		return nil, err
 	}
 	input.MustDrop()
 	rotatedImg := r.MustSqueeze(true)
 	return rotatedImg, nil
 }
 // RotateModule
 type RotateModule struct {
 	angle float64
 }
 func newRotate(angle float64) *RotateModule {
 	return &RotateModule{angle}
 }
 // Forward implements ts.Module for RotateModule
 func (r *RotateModule) Forward(x *ts.Tensor) *ts.Tensor {
 	out, err := Rotate(x, r.angle)
 	if err != nil {
 		log.Fatal(err)
 	}
 	return out
 }
 func WithRotate(angle float64) Option {
 	return func(o *Options) {
 		r := newRotate(angle)
 		o.rotate = r
 	}
 }
 // RandomRotateModule
 type RandRotateModule struct {
 	minAngle float64
 	maxAngle float64
 }
 func newRandRotate(min, max float64) *RandRotateModule {
 	return &RandRotateModule{min, max}
 }
 // Forward implements ts.Module for RandRotateModule
 func (rr *RandRotateModule) Forward(x *ts.Tensor) *ts.Tensor {
 	out, err := RandomRotate(x, rr.minAngle, rr.maxAngle)
 	if err != nil {
 		log.Fatal(err)
 	}
 	return out
 }
 func WithRandRotate(minAngle, maxAngle float64) Option {
 	return func(o *Options) {
 		r := newRandRotate(minAngle, maxAngle)
 		o.randRotate = r
 	}
 }
--- a/vision/aug/sharpness.go
+++ b/vision/aug/sharpness.go
@ -0,0 +1,74 @@
 package aug
 import (
 	ts "github.com/sugarme/gotch/tensor"
 )
 // Adjust the sharpness of the image randomly with a given probability. If the image is torch Tensor,
 // it is expected to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
 // Args:
 // sharpness_factor (float):  How much to adjust the sharpness. Can be
 // any non negative number. 0 gives a blurred image, 1 gives the
 // original image while 2 increases the sharpness by a factor of 2.
 // p (float): probability of the image being color inverted. Default value is 0.5
 type RandomAdjustSharpness struct {
 	sharpnessFactor float64
 	pvalue          float64
 }
 type sharpnessOptions struct {
 	sharpnessFactor float64
 	pvalue          float64
 }
 type sharpnessOption func(*sharpnessOptions)
 func defaultSharpnessOptions() *sharpnessOptions {
 	return &sharpnessOptions{
 		sharpnessFactor: 1.0,
 		pvalue:          0.5,
 	}
 }
 func WithSharpnessPvalue(p float64) sharpnessOption {
 	return func(o *sharpnessOptions) {
 		o.pvalue = p
 	}
 }
 func WithSharpnessFactor(f float64) sharpnessOption {
 	return func(o *sharpnessOptions) {
 		o.sharpnessFactor = f
 	}
 }
 func newRandomAdjustSharpness(opts ...sharpnessOption) *RandomAdjustSharpness {
 	p := defaultSharpnessOptions()
 	for _, o := range opts {
 		o(p)
 	}
 	return &RandomAdjustSharpness{
 		sharpnessFactor: p.sharpnessFactor,
 		pvalue:          p.pvalue,
 	}
 }
 func (ras *RandomAdjustSharpness) Forward(x *ts.Tensor) *ts.Tensor {
 	r := randPvalue()
 	var out *ts.Tensor
 	switch {
 	case r < ras.pvalue:
 		out = adjustSharpness(x, ras.sharpnessFactor)
 	default:
 		out = x.MustShallowClone()
 	}
 	return out
 }
 func WithRandomAdjustSharpness(opts ...sharpnessOption) Option {
 	ras := newRandomAdjustSharpness(opts...)
 	return func(o *Options) {
 		o.randomAdjustSharpness = ras
 	}
 }
--- a/vision/aug/solarize.go
+++ b/vision/aug/solarize.go
@ -0,0 +1,79 @@
 package aug
 import (
 	ts "github.com/sugarme/gotch/tensor"
 )
 // RandomSolarize solarizes the image randomly with a given probability by inverting all pixel
 // values above a threshold. If img is a Tensor, it is expected to be in [..., 1 or 3, H, W] format,
 // where ... means it can have an arbitrary number of leading dimensions.
 // If img is PIL Image, it is expected to be in mode "L" or "RGB".
 // Args:
 // - threshold (float): all pixels equal or above this value are inverted.
 // - p (float): probability of the image being color inverted. Default value is 0.5
 // Ref. https://en.wikipedia.org/wiki/Solarization_(photography)
 type RandomSolarize struct {
 	threshold float64
 	pvalue    float64
 }
 type solarizeOptions struct {
 	threshold float64
 	pvalue    float64
 }
 type solarizeOption func(*solarizeOptions)
 func defaultSolarizeOptions() *solarizeOptions {
 	return &solarizeOptions{
 		threshold: 128,
 		pvalue:    0.5,
 	}
 }
 func WithSolarizePvalue(p float64) solarizeOption {
 	return func(o *solarizeOptions) {
 		o.pvalue = p
 	}
 }
 func WithSolarizeThreshold(th float64) solarizeOption {
 	return func(o *solarizeOptions) {
 		o.threshold = th
 	}
 }
 func newRandomSolarize(opts ...solarizeOption) *RandomSolarize {
 	params := defaultSolarizeOptions()
 	for _, o := range opts {
 		o(params)
 	}
 	return &RandomSolarize{
 		threshold: params.threshold,
 		pvalue:    params.pvalue,
 	}
 }
 func (rs *RandomSolarize) Forward(x *ts.Tensor) *ts.Tensor {
 	r := randPvalue()
 	var out *ts.Tensor
 	switch {
 	case r < rs.pvalue:
 		out = solarize(x, rs.threshold)
 	default:
 		out = x.MustShallowClone()
 	}
 	return out
 }
 func WithRandomSolarize(opts ...solarizeOption) Option {
 	rs := newRandomSolarize(opts...)
 	return func(o *Options) {
 		o.randomSolarize = rs
 	}
 }
--- a/vision/aug/transform.go
+++ b/vision/aug/transform.go
@ -0,0 +1,188 @@
 package aug
 import (
 	"math/rand"
 	"time"
 	"github.com/sugarme/gotch/nn"
 	ts "github.com/sugarme/gotch/tensor"
 )
 // Transformer is an interface that can transform an image tensor.
 type Transformer interface {
 	Transform(x *ts.Tensor) *ts.Tensor
 }
 // Augment is a struct composes of augmentation functions to implement Transformer interface.
 type Augment struct {
 	augments *nn.Sequential
 }
 // Transform implements Transformer interface for Augment struct.
 func (a *Augment) Transform(image *ts.Tensor) *ts.Tensor {
 	out := a.augments.Forward(image)
 	return out
 }
 type Options struct {
 	rotate                *RotateModule
 	randRotate            *RandRotateModule
 	resize                *ResizeModule
 	colorJitter           *ColorJitter
 	gaussianBlur          *GaussianBlur
 	randomHFlip           *RandomHorizontalFlip
 	randomVFlip           *RandomVerticalFlip
 	randomCrop            *RandomCrop
 	centerCrop            *CenterCrop
 	randomCutout          *RandomCutout
 	randomPerspective     *RandomPerspective
 	randomAffine          *RandomAffine
 	randomGrayscale       *RandomGrayscale
 	randomSolarize        *RandomSolarize
 	randomPosterize       *RandomPosterize
 	randomInvert          *RandomInvert
 	randomAutocontrast    *RandomAutocontrast
 	randomAdjustSharpness *RandomAdjustSharpness
 	randomEqualize        *RandomEqualize
 	normalize             *Normalize
 }
 func defaultOption() *Options {
 	return &Options{
 		rotate:                nil,
 		randRotate:            nil,
 		resize:                nil,
 		colorJitter:           nil,
 		gaussianBlur:          nil,
 		randomHFlip:           nil,
 		randomVFlip:           nil,
 		randomCrop:            nil,
 		centerCrop:            nil,
 		randomCutout:          nil,
 		randomPerspective:     nil,
 		randomAffine:          nil,
 		randomGrayscale:       nil,
 		randomSolarize:        nil,
 		randomPosterize:       nil,
 		randomInvert:          nil,
 		randomAutocontrast:    nil,
 		randomAdjustSharpness: nil,
 		randomEqualize:        nil,
 		normalize:             nil,
 	}
 }
 type Option func(o *Options)
 // Compose creates a new Augment struct by adding augmentation methods.
 func Compose(opts ...Option) (Transformer, error) {
 	augOpts := defaultOption()
 	for _, opt := range opts {
 		if opt != nil {
 			opt(augOpts)
 		}
 	}
 	var augs *nn.Sequential = nn.Seq()
 	if augOpts.rotate != nil {
 		augs.Add(augOpts.rotate)
 	}
 	if augOpts.randRotate != nil {
 		augs.Add(augOpts.randRotate)
 	}
 	if augOpts.resize != nil {
 		augs.Add(augOpts.resize)
 	}
 	if augOpts.colorJitter != nil {
 		augs.Add(augOpts.colorJitter)
 	}
 	if augOpts.gaussianBlur != nil {
 		augs.Add(augOpts.gaussianBlur)
 	}
 	if augOpts.randomHFlip != nil {
 		augs.Add(augOpts.randomHFlip)
 	}
 	if augOpts.randomVFlip != nil {
 		augs.Add(augOpts.randomVFlip)
 	}
 	if augOpts.randomCrop != nil {
 		augs.Add(augOpts.randomCrop)
 	}
 	if augOpts.centerCrop != nil {
 		augs.Add(augOpts.centerCrop)
 	}
 	if augOpts.randomCutout != nil {
 		augs.Add(augOpts.randomCutout)
 	}
 	if augOpts.randomPerspective != nil {
 		augs.Add(augOpts.randomPerspective)
 	}
 	if augOpts.randomAffine != nil {
 		augs.Add(augOpts.randomAffine)
 	}
 	if augOpts.randomGrayscale != nil {
 		augs.Add(augOpts.randomGrayscale)
 	}
 	if augOpts.randomSolarize != nil {
 		augs.Add(augOpts.randomSolarize)
 	}
 	if augOpts.randomPosterize != nil {
 		augs.Add(augOpts.randomPosterize)
 	}
 	if augOpts.randomInvert != nil {
 		augs.Add(augOpts.randomInvert)
 	}
 	if augOpts.randomAutocontrast != nil {
 		augs.Add(augOpts.randomAutocontrast)
 	}
 	if augOpts.randomAdjustSharpness != nil {
 		augs.Add(augOpts.randomAdjustSharpness)
 	}
 	if augOpts.randomEqualize != nil {
 		augs.Add(augOpts.randomEqualize)
 	}
 	if augOpts.normalize != nil {
 		augs.Add(augOpts.normalize)
 	}
 	return &Augment{augs}, nil
 }
 // OneOf randomly return one transformer from list of transformers
 // with a specific p value.
 func OneOf(pvalue float64, tfOpts ...Option) Option {
 	tfsNum := len(tfOpts)
 	if tfsNum < 1 {
 		return nil
 	}
 	randP := randPvalue()
 	if randP >= pvalue {
 		return nil
 	}
 	rand.Seed(time.Now().UnixNano())
 	idx := rand.Intn(tfsNum)
 	return tfOpts[idx]
 }