diff --git a/CHANGELOG.md b/CHANGELOG.md
index ace57f7..12e7217 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,9 +6,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ## [Unreleased]
+
+## [0.3.9]
 - [#24], [#26]: fixed memory leak.
 - [#30]: fixed varstore.Save() randomly panic - segmentfault
 - [#32]: nn.Seq Forward return nil tensor if length of layers = 1
+- [#36]: resolved image augmentation
 
 ## [0.3.8]
 
diff --git a/example/augmentation/README.md b/example/augmentation/README.md
new file mode 100644
index 0000000..f20d32e
--- /dev/null
+++ b/example/augmentation/README.md
@@ -0,0 +1,31 @@
+# Image Augmentation Example
+
+This example demonstrates how to use image augmentation functions. It is implemented as similar as possible to [original Pytorch vision/transform](https://pytorch.org/vision/stable/transforms.html#).
+
+There are 2 APIs (`aug.Compose` and `aug.OneOf`) to compose augmentation methods as shown in the example: 
+
+```go
+		t, err := aug.Compose(
+			aug.WithRandomVFlip(0.5),
+			aug.WithRandomHFlip(0.5),
+			aug.WithRandomCutout(),
+			aug.OneOf(
+				0.3,
+				aug.WithColorJitter(0.3, 0.3, 0.3, 0.4),
+				aug.WithRandomGrayscale(1.0),
+			),
+			aug.OneOf(
+				0.3,
+				aug.WithGaussianBlur([]int64{5, 5}, []float64{1.0, 2.0}),
+				aug.WithRandomAffine(),
+			),
+		)
+		if err != nil {
+			panic(err)
+		}
+
+		out := t.Transform(imgTs)
+```
+
+
+
diff --git a/example/augmentation/bb.png b/example/augmentation/bb.png
new file mode 100644
index 0000000..6b13541
Binary files /dev/null and b/example/augmentation/bb.png differ
diff --git a/example/augmentation/main.go b/example/augmentation/main.go
new file mode 100644
index 0000000..c50ec1d
--- /dev/null
+++ b/example/augmentation/main.go
@@ -0,0 +1,69 @@
+package main
+
+import (
+	"fmt"
+
+	"github.com/sugarme/gotch"
+	"github.com/sugarme/gotch/vision"
+	"github.com/sugarme/gotch/vision/aug"
+)
+
+func main() {
+	n := 360
+	for i := 1; i <= n; i++ {
+		img, err := vision.Load("./bb.png")
+		if err != nil {
+			panic(err)
+		}
+
+		device := gotch.CudaIfAvailable()
+		// device := gotch.CPU
+		imgTs := img.MustTo(device, true)
+		// t, err := aug.Compose(aug.WithResize(512, 512)) // NOTE. WithResize just works on CPU.
+		// t, err := aug.Compose(aug.WithRandRotate(0, 360), aug.WithColorJitter(0.3, 0.3, 0.3, 0.4))
+		// t, err := aug.Compose(aug.WithGaussianBlur([]int64{5, 5}, []float64{1.0, 2.0}), aug.WithRandRotate(0, 360), aug.WithColorJitter(0.3, 0.3, 0.3, 0.3))
+		// t, err := aug.Compose(aug.WithRandomCrop([]int64{320, 320}, []int64{10, 10}, true, "constant"))
+		// t, err := aug.Compose(aug.WithCenterCrop([]int64{320, 320}))
+		// t, err := aug.Compose(aug.WithRandomCutout(aug.WithCutoutValue([]int64{124, 96, 255}), aug.WithCutoutScale([]float64{0.01, 0.1}), aug.WithCutoutRatio([]float64{0.5, 0.5})))
+		// t, err := aug.Compose(aug.WithRandomPerspective(aug.WithPerspectiveScale(0.6), aug.WithPerspectivePvalue(0.8)))
+		// t, err := aug.Compose(aug.WithRandomAffine(aug.WithAffineDegree([]int64{0, 15}), aug.WithAffineShear([]float64{0, 15})))
+		// t, err := aug.Compose(aug.WithRandomGrayscale(0.5))
+		// t, err := aug.Compose(aug.WithRandomSolarize(aug.WithSolarizeThreshold(125), aug.WithSolarizePvalue(0.5)))
+		// t, err := aug.Compose(aug.WithRandomInvert(0.5))
+		// t, err := aug.Compose(aug.WithRandomPosterize(aug.WithPosterizeBits(2), aug.WithPosterizePvalue(1.0)))
+		// t, err := aug.Compose(aug.WithRandomAutocontrast())
+		// t, err := aug.Compose(aug.WithRandomAdjustSharpness(aug.WithSharpnessPvalue(0.3), aug.WithSharpnessFactor(10)))
+		// t, err := aug.Compose(aug.WithRandomEqualize(1.0))
+		// t, err := aug.Compose(aug.WithNormalize(aug.WithNormalizeMean([]float64{0.485, 0.456, 0.406}), aug.WithNormalizeStd([]float64{0.229, 0.224, 0.225})))
+
+		t, err := aug.Compose(
+			aug.WithRandomVFlip(0.5),
+			aug.WithRandomHFlip(0.5),
+			aug.WithRandomCutout(),
+			aug.OneOf(
+				0.3,
+				aug.WithColorJitter(0.3, 0.3, 0.3, 0.4),
+				aug.WithRandomGrayscale(1.0),
+			),
+			aug.OneOf(
+				0.3,
+				aug.WithGaussianBlur([]int64{5, 5}, []float64{1.0, 2.0}),
+				aug.WithRandomAffine(),
+			),
+		)
+		if err != nil {
+			panic(err)
+		}
+
+		out := t.Transform(imgTs)
+		fname := fmt.Sprintf("./output/bb-%03d.png", i)
+		err = vision.Save(out, fname)
+		if err != nil {
+			panic(err)
+		}
+		imgTs.MustDrop()
+		out.MustDrop()
+
+		fmt.Printf("%03d/%v completed.\n", i, n)
+	}
+}
diff --git a/example/augmentation/output/.gitignore b/example/augmentation/output/.gitignore
new file mode 100644
index 0000000..7c9d611
--- /dev/null
+++ b/example/augmentation/output/.gitignore
@@ -0,0 +1,3 @@
+*
+!.gitignore
+!README.md
diff --git a/example/augmentation/output/README.md b/example/augmentation/output/README.md
new file mode 100644
index 0000000..b11fa64
--- /dev/null
+++ b/example/augmentation/output/README.md
@@ -0,0 +1 @@
+Output images will be here.
diff --git a/tensor/patch.go b/tensor/patch.go
index 13db623..855e372 100644
--- a/tensor/patch.go
+++ b/tensor/patch.go
@@ -581,7 +581,7 @@ func (ts *Tensor) Lstsq(a *Tensor, del bool) (retVal *Tensor, err error) {
 }
 
 func (ts *Tensor) MustLstsq(a *Tensor, del bool) (retVal *Tensor) {
-	retVal, err := ts.Lstsq(del)
+	retVal, err := ts.Lstsq(a, del)
 	if err != nil {
 		log.Fatal(err)
 	}
diff --git a/vision/aug/affine.go b/vision/aug/affine.go
new file mode 100644
index 0000000..c8944c7
--- /dev/null
+++ b/vision/aug/affine.go
@@ -0,0 +1,185 @@
+package aug
+
+import (
+	"github.com/sugarme/gotch"
+	ts "github.com/sugarme/gotch/tensor"
+)
+
+// RandomAffine is transformation of the image keeping center invariant.
+// If the image is torch Tensor, it is expected
+// to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
+// Args:
+// - degrees (sequence or number): Range of degrees to select from.
+// If degrees is a number instead of sequence like (min, max), the range of degrees
+// will be (-degrees, +degrees). Set to 0 to deactivate rotations.
+// - translate (tuple, optional): tuple of maximum absolute fraction for horizontal
+// and vertical translations. For example translate=(a, b), then horizontal shift
+// is randomly sampled in the range -img_width * a < dx < img_width * a and vertical shift is
+// randomly sampled in the range -img_height * b < dy < img_height * b. Will not translate by default.
+// - scale (tuple, optional): scaling factor interval, e.g (a, b), then scale is
+// randomly sampled from the range a <= scale <= b. Will keep original scale by default.
+// - shear (sequence or number, optional): Range of degrees to select from.
+// If shear is a number, a shear parallel to the x axis in the range (-shear, +shear)
+// will be applied. Else if shear is a sequence of 2 values a shear parallel to the x axis in the
+// range (shear[0], shear[1]) will be applied. Else if shear is a sequence of 4 values,
+// a x-axis shear in (shear[0], shear[1]) and y-axis shear in (shear[2], shear[3]) will be applied.
+// Will not apply shear by default.
+// - interpolation (InterpolationMode): Desired interpolation enum defined by
+// :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``.
+// If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
+// For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable.
+// - fill (sequence or number): Pixel fill value for the area outside the transformed
+// image. Default is ``0``. If given a number, the value is used for all bands respectively.
+// Please use the ``interpolation`` parameter instead.
+// .. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters
+type RandomAffine struct {
+	degree            []int64 // degree range
+	translate         []float64
+	scale             []float64 // scale range
+	shear             []float64
+	interpolationMode string
+	fillValue         []float64
+}
+
+func (ra *RandomAffine) getParams(imageSize []int64) (float64, []int64, float64, []float64) {
+	angleTs := ts.MustEmpty([]int64{1}, gotch.Float, gotch.CPU)
+	angleTs.MustUniform_(float64(ra.degree[0]), float64(ra.degree[1]))
+	angle := angleTs.Float64Values()[0]
+	angleTs.MustDrop()
+
+	var translations []int64 = []int64{0, 0}
+	if ra.translate != nil {
+		maxDX := ra.translate[0] * float64(imageSize[0])
+		maxDY := ra.translate[1] * float64(imageSize[1])
+		dx := ts.MustEmpty([]int64{1}, gotch.Float, gotch.CPU)
+		dx.MustUniform_(-maxDX, maxDX)
+		tx := dx.Float64Values()[0]
+		dx.MustDrop()
+
+		dy := ts.MustEmpty([]int64{1}, gotch.Float, gotch.CPU)
+		dy.MustUniform_(-maxDY, maxDY)
+		ty := dx.Float64Values()[0]
+		dy.MustDrop()
+
+		translations = []int64{int64(tx), int64(ty)} // should we use math.Round here???
+	}
+
+	scale := 1.0
+	if ra.scale != nil {
+		scaleTs := ts.MustEmpty([]int64{1}, gotch.Float, gotch.CPU)
+		scaleTs.MustUniform_(ra.scale[0], ra.scale[1])
+		scale = scaleTs.Float64Values()[0]
+		scaleTs.MustDrop()
+	}
+
+	var (
+		shearX, shearY float64 = 0.0, 0.0
+	)
+	if ra.shear != nil {
+		shearXTs := ts.MustEmpty([]int64{1}, gotch.Float, gotch.CPU)
+		shearXTs.MustUniform_(ra.shear[0], ra.shear[1])
+		shearX = shearXTs.Float64Values()[0]
+		shearXTs.MustDrop()
+
+		if len(ra.shear) == 4 {
+			shearYTs := ts.MustEmpty([]int64{1}, gotch.Float, gotch.CPU)
+			shearYTs.MustUniform_(ra.shear[2], ra.shear[3])
+			shearY = shearYTs.Float64Values()[0]
+			shearYTs.MustDrop()
+		}
+	}
+
+	var shear []float64 = []float64{shearX, shearY}
+
+	return angle, translations, scale, shear
+}
+
+func (ra *RandomAffine) Forward(x *ts.Tensor) *ts.Tensor {
+	w, h := getImageSize(x)
+	angle, translations, scale, shear := ra.getParams([]int64{w, h})
+
+	out := affine(x, angle, translations, scale, shear, ra.interpolationMode, ra.fillValue)
+
+	return out
+}
+
+func newRandomAffine(opts ...affineOption) *RandomAffine {
+	p := defaultAffineOptions()
+	for _, o := range opts {
+		o(p)
+	}
+
+	return &RandomAffine{
+		degree:            p.degree,
+		translate:         p.translate,
+		scale:             p.scale,
+		shear:             p.shear,
+		interpolationMode: p.interpolationMode,
+		fillValue:         p.fillValue,
+	}
+}
+
+type affineOptions struct {
+	degree            []int64
+	translate         []float64
+	scale             []float64
+	shear             []float64
+	interpolationMode string
+	fillValue         []float64
+}
+
+type affineOption func(*affineOptions)
+
+func defaultAffineOptions() *affineOptions {
+	return &affineOptions{
+		degree:            []int64{-180, 180},
+		translate:         nil,
+		scale:             nil,
+		shear:             []float64{-180.0, 180.0},
+		interpolationMode: "bilinear",
+		fillValue:         []float64{0.0, 0.0, 0.0},
+	}
+}
+
+func WithAffineDegree(degree []int64) affineOption {
+	return func(o *affineOptions) {
+		o.degree = degree
+	}
+}
+
+func WithAffineTranslate(translate []float64) affineOption {
+	return func(o *affineOptions) {
+		o.translate = translate
+	}
+}
+
+func WithAffineScale(scale []float64) affineOption {
+	return func(o *affineOptions) {
+		o.scale = scale
+	}
+}
+
+func WithAffineShear(shear []float64) affineOption {
+	return func(o *affineOptions) {
+		o.shear = shear
+	}
+}
+
+func WithAffineMode(mode string) affineOption {
+	return func(o *affineOptions) {
+		o.interpolationMode = mode
+	}
+}
+
+func WithAffineFillValue(fillValue []float64) affineOption {
+	return func(o *affineOptions) {
+		o.fillValue = fillValue
+	}
+}
+
+func WithRandomAffine(opts ...affineOption) Option {
+	ra := newRandomAffine(opts...)
+	return func(o *Options) {
+		o.randomAffine = ra
+	}
+}
diff --git a/vision/aug/blur.go b/vision/aug/blur.go
new file mode 100644
index 0000000..ea78524
--- /dev/null
+++ b/vision/aug/blur.go
@@ -0,0 +1,89 @@
+package aug
+
+import (
+	"fmt"
+	"log"
+
+	"github.com/sugarme/gotch"
+	ts "github.com/sugarme/gotch/tensor"
+)
+
+type GaussianBlur struct {
+	kernelSize []int64   // >= 0 && ks%2 != 0
+	sigma      []float64 // [0.1, 2.0] range(min, max)
+}
+
+// ks : kernal size. Can be 1-2 element slice
+// sigma: minimal and maximal standard deviation that can be chosen for blurring kernel
+// range (min, max). Can be 1-2 element slice
+func newGaussianBlur(ks []int64, sig []float64) *GaussianBlur {
+	if len(ks) == 0 || len(ks) > 2 {
+		err := fmt.Errorf("Kernel size should have 1-2 elements. Got %v\n", len(ks))
+		log.Fatal(err)
+	}
+	for _, size := range ks {
+		if size <= 0 || size%2 == 0 {
+			err := fmt.Errorf("Kernel size should be an odd and positive number.")
+			log.Fatal(err)
+		}
+	}
+
+	if len(sig) == 0 || len(sig) > 2 {
+		err := fmt.Errorf("Sigma should have 1-2 elements. Got %v\n", len(sig))
+		log.Fatal(err)
+	}
+
+	for _, s := range sig {
+		if s <= 0 {
+			err := fmt.Errorf("Sigma should be a positive number.")
+			log.Fatal(err)
+		}
+	}
+
+	var kernelSize []int64
+	switch len(ks) {
+	case 1:
+		kernelSize = []int64{ks[0], ks[0]}
+	case 2:
+		kernelSize = ks
+	default:
+		panic("Shouldn't reach here.")
+	}
+
+	var sigma []float64
+	switch len(sig) {
+	case 1:
+		sigma = []float64{sig[0], sig[0]}
+	case 2:
+		min := sig[0]
+		max := sig[1]
+		if min > max {
+			min = sig[1]
+			max = sig[0]
+		}
+		sigma = []float64{min, max}
+	default:
+		panic("Shouldn't reach here.")
+	}
+
+	return &GaussianBlur{
+		kernelSize: kernelSize,
+		sigma:      sigma,
+	}
+}
+
+func (b *GaussianBlur) Forward(x *ts.Tensor) *ts.Tensor {
+	sigmaTs := ts.MustEmpty([]int64{1}, gotch.Float, gotch.CPU)
+	sigmaTs.MustUniform_(b.sigma[0], b.sigma[1])
+	sigmaVal := sigmaTs.Float64Values()[0]
+	sigmaTs.MustDrop()
+
+	return gaussianBlur(x, b.kernelSize, []float64{sigmaVal, sigmaVal})
+}
+
+func WithGaussianBlur(ks []int64, sig []float64) Option {
+	return func(o *Options) {
+		gb := newGaussianBlur(ks, sig)
+		o.gaussianBlur = gb
+	}
+}
diff --git a/vision/aug/color.go b/vision/aug/color.go
new file mode 100644
index 0000000..18b5b0a
--- /dev/null
+++ b/vision/aug/color.go
@@ -0,0 +1,77 @@
+package aug
+
+import (
+	"math/rand"
+	"time"
+
+	ts "github.com/sugarme/gotch/tensor"
+)
+
+// Ref. https://github.com/pytorch/vision/blob/f1d734213af65dc06e777877d315973ba8386080/torchvision/transforms/functional_tensor.py
+
+type ColorJitter struct {
+	brightness float64
+	contrast   float64
+	saturation float64
+	hue        float64
+}
+
+func defaultColorJitter() *ColorJitter {
+	return &ColorJitter{
+		brightness: 1.0,
+		contrast:   1.0,
+		saturation: 1.0,
+		hue:        0.0,
+	}
+}
+
+func (c *ColorJitter) setBrightness(brightness float64) {
+	c.brightness = brightness
+}
+
+func (c *ColorJitter) setContrast(contrast float64) {
+	c.contrast = contrast
+}
+
+func (c *ColorJitter) setSaturation(sat float64) {
+	c.saturation = sat
+}
+
+func (c *ColorJitter) setHue(hue float64) {
+	c.hue = hue
+}
+
+// Forward implement ts.Module by randomly picking one of brightness, contrast,
+// staturation or hue function to transform input image tensor.
+func (c *ColorJitter) Forward(x *ts.Tensor) *ts.Tensor {
+	rand.Seed(time.Now().UnixNano())
+	idx := rand.Intn(4)
+	switch idx {
+	case 0:
+		v := randVal(getMinMax(c.brightness))
+		return adjustBrightness(x, v)
+	case 1:
+		v := randVal(getMinMax(c.contrast))
+		return adjustContrast(x, v)
+	case 2:
+		v := randVal(getMinMax(c.saturation))
+		return adjustSaturation(x, v)
+	case 3:
+		v := randVal(0, c.hue)
+		return adjustHue(x, v)
+	default:
+		panic("Shouldn't reach here.")
+	}
+}
+
+func WithColorJitter(brightness, contrast, sat, hue float64) Option {
+	c := defaultColorJitter()
+	c.setBrightness(brightness)
+	c.setContrast(contrast)
+	c.setSaturation(sat)
+	c.setHue(hue)
+
+	return func(o *Options) {
+		o.colorJitter = c
+	}
+}
diff --git a/vision/aug/contrast.go b/vision/aug/contrast.go
new file mode 100644
index 0000000..7b30d6c
--- /dev/null
+++ b/vision/aug/contrast.go
@@ -0,0 +1,43 @@
+package aug
+
+import (
+	ts "github.com/sugarme/gotch/tensor"
+)
+
+// RandomAutocontrast autocontrasts the pixels of the given image randomly with a given probability.
+// If the image is torch Tensor, it is expected
+// to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
+// Args:
+// - p (float): probability of the image being autocontrasted. Default value is 0.5
+type RandomAutocontrast struct {
+	pvalue float64
+}
+
+func newRandomAutocontrast(pOpt ...float64) *RandomAutocontrast {
+	p := 0.5
+	if len(pOpt) > 0 {
+		p = pOpt[0]
+	}
+
+	return &RandomAutocontrast{p}
+}
+
+func (rac *RandomAutocontrast) Forward(x *ts.Tensor) *ts.Tensor {
+	r := randPvalue()
+	var out *ts.Tensor
+	switch {
+	case r < rac.pvalue:
+		out = autocontrast(x)
+	default:
+		out = x.MustShallowClone()
+	}
+
+	return out
+}
+
+func WithRandomAutocontrast(p ...float64) Option {
+	rac := newRandomAutocontrast(p...)
+	return func(o *Options) {
+		o.randomAutocontrast = rac
+	}
+}
diff --git a/vision/aug/crop.go b/vision/aug/crop.go
new file mode 100644
index 0000000..886f040
--- /dev/null
+++ b/vision/aug/crop.go
@@ -0,0 +1,124 @@
+package aug
+
+import (
+	"fmt"
+	"log"
+	// "math"
+
+	"github.com/sugarme/gotch"
+	ts "github.com/sugarme/gotch/tensor"
+)
+
+type RandomCrop struct {
+	size            []int64
+	padding         []int64
+	paddingIfNeeded bool
+	paddingMode     string
+}
+
+func newRandomCrop(size, padding []int64, paddingIfNeeded bool, paddingMode string) *RandomCrop {
+	return &RandomCrop{
+		size:            size,
+		padding:         padding,
+		paddingIfNeeded: paddingIfNeeded,
+		paddingMode:     paddingMode,
+	}
+}
+
+// get parameters for crop
+func (c *RandomCrop) params(x *ts.Tensor) (int64, int64, int64, int64) {
+	w, h := getImageSize(x)
+	th, tw := c.size[0], c.size[1]
+	if h+1 < th || w+1 < tw {
+		err := fmt.Errorf("Required crop size %v is larger then input image size %v", c.size, []int64{h, w})
+		log.Fatal(err)
+	}
+
+	if w == tw && h == th {
+		return 0, 0, h, w
+	}
+
+	iTs := ts.MustRandint1(0, h-th+1, []int64{1}, gotch.Int64, gotch.CPU)
+	i := iTs.Int64Values()[0]
+	iTs.MustDrop()
+
+	jTs := ts.MustRandint1(0, w-tw+1, []int64{1}, gotch.Int64, gotch.CPU)
+	j := jTs.Int64Values()[0]
+	jTs.MustDrop()
+
+	return i, j, th, tw
+}
+
+func (c *RandomCrop) Forward(x *ts.Tensor) *ts.Tensor {
+	var img *ts.Tensor
+	if c.padding != nil {
+		img = pad(x, c.padding, c.paddingMode)
+	} else {
+		img = x.MustShallowClone()
+	}
+
+	w, h := getImageSize(x)
+
+	var (
+		paddedW  *ts.Tensor
+		paddedWH *ts.Tensor
+	)
+	// pad width if needed
+	if c.paddingIfNeeded && w < c.size[1] {
+		padding := []int64{c.size[1] - w, 0}
+		paddedW = pad(img, padding, c.paddingMode)
+	} else {
+		paddedW = img.MustShallowClone()
+	}
+	img.MustDrop()
+
+	// pad height if needed
+	if c.paddingIfNeeded && h < c.size[0] {
+		padding := []int64{0, c.size[0] - h}
+		paddedWH = pad(paddedW, padding, c.paddingMode)
+	} else {
+		paddedWH = paddedW.MustShallowClone()
+	}
+
+	paddedW.MustDrop()
+
+	// i, j, h, w = self.get_params(img, self.size)
+	i, j, h, w := c.params(x)
+	out := crop(paddedWH, i, j, h, w)
+	paddedWH.MustDrop()
+	return out
+}
+
+func WithRandomCrop(size []int64, padding []int64, paddingIfNeeded bool, paddingMode string) Option {
+	return func(o *Options) {
+		c := newRandomCrop(size, padding, paddingIfNeeded, paddingMode)
+		o.randomCrop = c
+	}
+}
+
+// CenterCrop crops the given image at the center.
+// If the image is torch Tensor, it is expected
+// to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
+// If image size is smaller than output size along any edge, image is padded with 0 and then center cropped.
+type CenterCrop struct {
+	size []int64
+}
+
+func newCenterCrop(size []int64) *CenterCrop {
+	if len(size) != 2 {
+		err := fmt.Errorf("Expected size of 2 elements. Got %v\n", len(size))
+		log.Fatal(err)
+	}
+	return &CenterCrop{size}
+}
+
+func (cc *CenterCrop) Forward(x *ts.Tensor) *ts.Tensor {
+	return centerCrop(x, cc.size)
+}
+
+func WithCenterCrop(size []int64) Option {
+	return func(o *Options) {
+		cc := newCenterCrop(size)
+		o.centerCrop = cc
+	}
+}
diff --git a/vision/aug/cutout.go b/vision/aug/cutout.go
new file mode 100644
index 0000000..c0446b2
--- /dev/null
+++ b/vision/aug/cutout.go
@@ -0,0 +1,177 @@
+package aug
+
+import (
+	"fmt"
+	"log"
+	"math"
+
+	"github.com/sugarme/gotch"
+	ts "github.com/sugarme/gotch/tensor"
+)
+
+// Randomly selects a rectangle region in an torch Tensor image and erases its pixels.
+// This transform does not support PIL Image.
+// 'Random Erasing Data Augmentation' by Zhong et al. See https://arxiv.org/abs/1708.04896
+//
+// Args:
+// p: probability that the random erasing operation will be performed.
+// scale: range of proportion of erased area against input image.
+// ratio: range of aspect ratio of erased area.
+// value: erasing value. Default is 0. If a single int, it is used to
+// erase all pixels. If a tuple of length 3, it is used to erase
+// R, G, B channels respectively.
+// If a str of 'random', erasing each pixel with random values.
+type RandomCutout struct {
+	pvalue float64
+	scale  []float64
+	ratio  []float64
+	rgbVal []int64 // RGB value
+}
+
+type cutoutOptions struct {
+	pvalue float64
+	scale  []float64
+	ratio  []float64
+	rgbVal []int64 // RGB value
+}
+
+type cutoutOption func(o *cutoutOptions)
+
+func defaultCutoutOptions() *cutoutOptions {
+	return &cutoutOptions{
+		pvalue: 0.5,
+		scale:  []float64{0.02, 0.33},
+		ratio:  []float64{0.3, 3.3},
+		rgbVal: []int64{0, 0, 0},
+	}
+}
+
+func newRandomCutout(pvalue float64, scale, ratio []float64, rgbVal []int64) *RandomCutout {
+	return &RandomCutout{
+		pvalue: pvalue,
+		scale:  scale,
+		ratio:  ratio,
+		rgbVal: rgbVal,
+	}
+}
+
+func WithCutoutPvalue(p float64) cutoutOption {
+	if p < 0 || p > 1 {
+		log.Fatalf("Cutout p-value must be in range from 0 to 1. Got %v\n", p)
+	}
+	return func(o *cutoutOptions) {
+		o.pvalue = p
+	}
+}
+
+func WithCutoutScale(scale []float64) cutoutOption {
+	if len(scale) != 2 {
+		log.Fatalf("Cutout scale should be in a range of 2 elments. Got %v elements\n", len(scale))
+	}
+	return func(o *cutoutOptions) {
+		o.scale = scale
+	}
+}
+
+func WithCutoutRatio(ratio []float64) cutoutOption {
+	if len(ratio) != 2 {
+		log.Fatalf("Cutout ratio should be in a range of 2 elments. Got %v elements\n", len(ratio))
+	}
+	return func(o *cutoutOptions) {
+		o.ratio = ratio
+	}
+}
+
+func WithCutoutValue(rgb []int64) cutoutOption {
+	var rgbVal []int64
+	switch len(rgb) {
+	case 1:
+		rgbVal = []int64{rgb[0], rgb[0], rgb[0]}
+	case 3:
+		rgbVal = rgb
+	default:
+		err := fmt.Errorf("Cutout values can be single value or 3-element (RGB) value. Got %v values.", len(rgb))
+		log.Fatal(err)
+	}
+	return func(o *cutoutOptions) {
+		o.rgbVal = rgbVal
+	}
+}
+
+func (rc *RandomCutout) cutoutParams(x *ts.Tensor) (int64, int64, int64, int64, *ts.Tensor) {
+	dim := x.MustSize()
+
+	imgH, imgW := dim[len(dim)-2], dim[len(dim)-1]
+	area := float64(imgH * imgW)
+	logRatio := ts.MustOfSlice(rc.ratio).MustLog(true).Float64Values()
+
+	for i := 0; i < 10; i++ {
+		scaleTs := ts.MustEmpty([]int64{1}, gotch.Float, gotch.CPU)
+		scaleTs.MustUniform_(rc.scale[0], rc.scale[1])
+		scaleVal := scaleTs.Float64Values()[0]
+		scaleTs.MustDrop()
+		eraseArea := area * scaleVal
+
+		ratioTs := ts.MustEmpty([]int64{1}, gotch.Float, gotch.CPU)
+		ratioTs.MustUniform_(logRatio[0], logRatio[1])
+		asTs := ratioTs.MustExp(true)
+		asVal := asTs.Float64Values()[0] // aspect ratio
+		asTs.MustDrop()
+
+		// h = int(round(math.sqrt(erase_area * aspect_ratio)))
+		// w = int(round(math.sqrt(erase_area / aspect_ratio)))
+		h := int64(math.Round(math.Sqrt(eraseArea * asVal)))
+		w := int64(math.Round(math.Sqrt(eraseArea / asVal)))
+		if !(h < imgH && w < imgW) {
+			continue
+		}
+
+		// v = torch.tensor(value)[:, None, None]
+		v := ts.MustOfSlice(rc.rgbVal).MustUnsqueeze(1, true).MustUnsqueeze(1, true)
+
+		// i = torch.randint(0, img_h - h + 1, size=(1, )).item()
+		iTs := ts.MustRandint1(0, imgH-h+1, []int64{1}, gotch.Int64, gotch.CPU)
+		i := iTs.Int64Values()[0]
+		iTs.MustDrop()
+		// j = torch.randint(0, img_w - w + 1, size=(1, )).item()
+		jTs := ts.MustRandint1(0, imgW-w+1, []int64{1}, gotch.Int64, gotch.CPU)
+		j := jTs.Int64Values()[0]
+		jTs.MustDrop()
+		return i, j, h, w, v
+	}
+
+	// return original image
+	img := x.MustShallowClone()
+	return 0, 0, imgH, imgW, img
+}
+
+func (rc *RandomCutout) Forward(img *ts.Tensor) *ts.Tensor {
+	randTs := ts.MustRandn([]int64{1}, gotch.Float, gotch.CPU)
+	randVal := randTs.Float64Values()[0]
+	randTs.MustDrop()
+
+	switch randVal < rc.pvalue {
+	case true:
+		x, y, h, w, v := rc.cutoutParams(img)
+		out := cutout(img, x, y, h, w, rc.rgbVal)
+		v.MustDrop()
+		return out
+	case false:
+		out := img.MustShallowClone()
+		return out
+	}
+
+	panic("Shouldn't reach here")
+}
+
+func WithRandomCutout(opts ...cutoutOption) Option {
+	params := defaultCutoutOptions()
+	for _, o := range opts {
+		o(params)
+	}
+
+	return func(o *Options) {
+		rc := newRandomCutout(params.pvalue, params.scale, params.ratio, params.rgbVal)
+		o.randomCutout = rc
+	}
+}
diff --git a/vision/aug/equalize.go b/vision/aug/equalize.go
new file mode 100644
index 0000000..10d65a2
--- /dev/null
+++ b/vision/aug/equalize.go
@@ -0,0 +1,46 @@
+package aug
+
+import (
+	ts "github.com/sugarme/gotch/tensor"
+)
+
+// RandomEqualize equalizes the histogram of the given image randomly with a given probability.
+// If the image is torch Tensor, it is expected
+// to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
+// Args:
+// - p (float): probability of the image being equalized. Default value is 0.5
+// Histogram equalization
+// Ref. https://en.wikipedia.org/wiki/Histogram_equalization
+type RandomEqualize struct {
+	pvalue float64
+}
+
+func newRandomEqualize(pOpt ...float64) *RandomEqualize {
+	p := 0.5
+	if len(pOpt) > 0 {
+		p = pOpt[0]
+	}
+
+	return &RandomEqualize{p}
+}
+
+func (re *RandomEqualize) Forward(x *ts.Tensor) *ts.Tensor {
+	r := randPvalue()
+
+	var out *ts.Tensor
+	switch {
+	case r < re.pvalue:
+		out = equalize(x)
+	default:
+		out = x.MustShallowClone()
+	}
+
+	return out
+}
+
+func WithRandomEqualize(p ...float64) Option {
+	re := newRandomEqualize(p...)
+	return func(o *Options) {
+		o.randomEqualize = re
+	}
+}
diff --git a/vision/aug/flip.go b/vision/aug/flip.go
new file mode 100644
index 0000000..503b5cf
--- /dev/null
+++ b/vision/aug/flip.go
@@ -0,0 +1,78 @@
+package aug
+
+import (
+	"github.com/sugarme/gotch"
+	ts "github.com/sugarme/gotch/tensor"
+)
+
+// RandomHorizontalFlip horizontally flips the given image randomly with a given probability.
+//
+// If the image is torch Tensor, it is expected to have [..., H, W] shape,
+// where ... means an arbitrary number of leading dimensions
+// Args:
+// p (float): probability of the image being flipped. Default value is 0.5
+type RandomHorizontalFlip struct {
+	pvalue float64
+}
+
+func newRandomHorizontalFlip(pvalue float64) *RandomHorizontalFlip {
+	return &RandomHorizontalFlip{
+		pvalue: pvalue,
+	}
+}
+
+func (hf *RandomHorizontalFlip) Forward(x *ts.Tensor) *ts.Tensor {
+	randTs := ts.MustRandn([]int64{1}, gotch.Float, gotch.CPU)
+	randVal := randTs.Float64Values()[0]
+	randTs.MustDrop()
+	switch {
+	case randVal < hf.pvalue:
+		return hflip(x)
+	default:
+		out := x.MustShallowClone()
+		return out
+	}
+}
+
+func WithRandomHFlip(pvalue float64) Option {
+	return func(o *Options) {
+		hf := newRandomHorizontalFlip(pvalue)
+		o.randomHFlip = hf
+	}
+}
+
+// RandomVerticalFlip vertically flips the given image randomly with a given probability.
+//
+// If the image is torch Tensor, it is expected to have [..., H, W] shape,
+// where ... means an arbitrary number of leading dimensions
+// Args:
+// p (float): probability of the image being flipped. Default value is 0.5
+type RandomVerticalFlip struct {
+	pvalue float64
+}
+
+func newRandomVerticalFlip(pvalue float64) *RandomVerticalFlip {
+	return &RandomVerticalFlip{
+		pvalue: pvalue,
+	}
+}
+
+func (vf *RandomVerticalFlip) Forward(x *ts.Tensor) *ts.Tensor {
+	randTs := ts.MustRandn([]int64{1}, gotch.Float, gotch.CPU)
+	randVal := randTs.Float64Values()[0]
+	randTs.MustDrop()
+	switch {
+	case randVal < vf.pvalue:
+		return vflip(x)
+	default:
+		out := x.MustShallowClone()
+		return out
+	}
+}
+
+func WithRandomVFlip(pvalue float64) Option {
+	return func(o *Options) {
+		vf := newRandomVerticalFlip(pvalue)
+		o.randomVFlip = vf
+	}
+}
diff --git a/vision/aug/function.go b/vision/aug/function.go
new file mode 100644
index 0000000..8464bd9
--- /dev/null
+++ b/vision/aug/function.go
@@ -0,0 +1,1514 @@
+package aug
+
+import (
+	"fmt"
+	"log"
+	"math"
+	"math/rand"
+	"time"
+
+	"github.com/sugarme/gotch"
+	ts "github.com/sugarme/gotch/tensor"
+)
+
+func gaussianKernel1D(ks int64, sigma float64, dtype gotch.DType, device gotch.Device) *ts.Tensor {
+	ksHalf := (ks - 1) / 2
+	x := ts.MustLinspace(ts.IntScalar(-ksHalf), ts.IntScalar(ksHalf), []int64{ks}, dtype, device)
+
+	// pdf = torch.exp(-0.5 * (x / sigma).pow(2))
+	pdf := x.MustDiv1(ts.FloatScalar(sigma), true).MustPow(ts.IntScalar(2), true).MustMul1(ts.FloatScalar(0.5), true).MustExp(true)
+	// kernel1d = pdf / pdf.sum()
+	pdfSum := pdf.MustSum(dtype, false)
+	kernel1d := pdf.MustDiv(pdfSum, true)
+	pdfSum.MustDrop()
+
+	return kernel1d
+}
+
+func gaussianKernel2D(ks []int64, sigma []float64, dtype gotch.DType, device gotch.Device) *ts.Tensor {
+	kernel1dX := gaussianKernel1D(ks[0], sigma[0], dtype, device)
+	kernel1dY := gaussianKernel1D(ks[1], sigma[1], dtype, device)
+
+	// dimX := kernel1dX.MustSize()
+	kernel1dX.MustUnsqueeze_(0) // kernel1d_x[None, :]
+	dimY := kernel1dY.MustSize()
+	kernel1dY.MustUnsqueeze_(int64(len(dimY))) // kernel1d_y[:, None]
+
+	kernel2d := kernel1dY.MustMm(kernel1dX, true)
+	kernel1dX.MustDrop()
+	return kernel2d
+}
+
+func containsDType(dtype gotch.DType, dtypes []gotch.DType) bool {
+	for _, dt := range dtypes {
+		if dtype == dt {
+			return true
+		}
+	}
+
+	return false
+}
+
+func castSqueezeIn(x *ts.Tensor, reqDtypes []gotch.DType) (*ts.Tensor, bool, bool, gotch.DType) {
+	needSqueeze := false
+	xdim := x.MustSize()
+	var img *ts.Tensor
+	if len(xdim) < 4 {
+		img = x.MustUnsqueeze(0, false)
+		needSqueeze = true
+	} else {
+		img = x.MustShallowClone()
+	}
+	outDtype := x.DType()
+	needCast := false
+	if !containsDType(outDtype, reqDtypes) {
+		needCast = true
+		reqDType := reqDtypes[0]
+		img1 := img.MustTotype(reqDType, true)
+		return img1, needCast, needSqueeze, outDtype
+	}
+	return img, needCast, needSqueeze, outDtype
+}
+
+func castSqueezeOut(x *ts.Tensor, needCast, needSqueeze bool, outDType gotch.DType) *ts.Tensor {
+	var (
+		squeezeTs, castTs *ts.Tensor
+	)
+	switch needSqueeze {
+	case true:
+		squeezeTs = x.MustSqueeze1(0, false)
+	case false:
+		squeezeTs = x.MustShallowClone()
+	}
+
+	switch needCast {
+	case true:
+		// it is better to round before cast
+		if containsDType(outDType, []gotch.DType{gotch.Uint8, gotch.Int8, gotch.Int16, gotch.Int, gotch.Int64}) {
+			roundTs := squeezeTs.MustRound(true)
+			castTs = roundTs.MustTotype(outDType, true)
+		} else {
+			castTs = squeezeTs.MustTotype(outDType, true)
+		}
+	case false:
+		castTs = squeezeTs.MustShallowClone()
+		squeezeTs.MustDrop()
+	}
+
+	return castTs
+}
+
+func gaussianBlur(x *ts.Tensor, ks []int64, sigma []float64) *ts.Tensor {
+	dtype := gotch.Float
+	if x.DType() == gotch.Float || x.DType() == gotch.Double {
+		dtype = x.DType()
+	}
+	device := x.MustDevice()
+
+	assertImageTensor(x)
+
+	kernel := gaussianKernel2D(ks, sigma, dtype, device)
+	xdim := x.MustSize()
+	kdim := kernel.MustSize()
+
+	// kernel = kernel.expand(img.shape[-3], 1, kernel.shape[0], kernel.shape[1])
+	kexpand := kernel.MustExpand([]int64{xdim[len(xdim)-3], 1, kdim[0], kdim[1]}, true, true)
+	kdtype := kexpand.DType()
+	img, needCast, needSqueeze, outDType := castSqueezeIn(x, []gotch.DType{kdtype})
+
+	// padding = (left, right, top, bottom)
+	// padding = [kernel_size[0] // 2, kernel_size[0] // 2, kernel_size[1] // 2, kernel_size[1] // 2]
+	left := ks[0] / 2
+	right := ks[0] / 2
+	top := ks[1] / 2
+	bottom := ks[1] / 2
+	padding := []int64{left, right, top, bottom}
+
+	// F.pad()
+	// https://github.com/pytorch/pytorch/blob/71f4c5c1f436258adc303b710efb3f41b2d50c4e/torch/nn/functional.py#L4070
+	// img = torch_pad(img, padding, mode="reflect")
+	imgPad := img.MustReflectionPad2d(padding, true) // deleted img
+
+	imgPadDim := imgPad.MustSize()
+	// img = conv2d(img, kernel, groups=img.shape[-3])
+	// ref. https://github.com/pytorch/pytorch/blob/6060684609ebf66120db5af004b4cdafc5cccbdb/torch/nn/functional.py#L71
+	imgConv2d := ts.MustConv2d(imgPad, kexpand, ts.NewTensor(), []int64{1}, []int64{0}, []int64{1}, imgPadDim[len(imgPadDim)-3])
+	imgPad.MustDrop()
+
+	// img = _cast_squeeze_out(img, need_cast, need_squeeze, out_dtype)
+	out := castSqueezeOut(imgConv2d, needCast, needSqueeze, outDType)
+	imgConv2d.MustDrop()
+
+	return out
+}
+
+func isTorchImage(x *ts.Tensor) bool {
+	return x.Dim() >= 2
+}
+
+func assertImageTensor(x *ts.Tensor) {
+	if !isTorchImage(x) {
+		err := fmt.Errorf("Input tensor is not a torch image.")
+		log.Fatal(err)
+	}
+}
+
+func imageChanNum(x *ts.Tensor) int64 {
+	ndim := x.Dim()
+
+	switch {
+	case ndim == 2:
+		return 1
+	case ndim > 2:
+		return x.MustSize()[0]
+	default:
+		err := fmt.Errorf("imageChanNum - Input should be 2 or more. Got %v", ndim)
+		log.Fatal(err)
+		return 0
+	}
+}
+
+func contains(item int64, list []int64) bool {
+	for _, i := range list {
+		if item == i {
+			return true
+		}
+	}
+
+	return false
+}
+
+func assertChannels(x *ts.Tensor, permitted []int64) {
+	c := imageChanNum(x)
+	if !contains(c, permitted) {
+		err := fmt.Errorf("Input image tensor permitted channels are %+v, but found %v", permitted, c)
+		log.Fatal(err)
+	}
+}
+
+func blend(img1, img2 *ts.Tensor, ratio float64) *ts.Tensor {
+	dtype := img1.DType()
+	// bound := 1.0
+	// if dtype == gotch.Double || dtype == gotch.Float {
+	// bound = 255.0
+	// }
+	bound := 255.0
+
+	// (ratio * img1 + (1.0 - ratio) * img2).clamp(0, bound).to(img1.dtype)
+	i1 := img1.MustMul1(ts.FloatScalar(ratio), false)
+	i2 := img2.MustMul1(ts.FloatScalar(1.0-ratio), false)
+	sumTs := i1.MustAdd(i2, true)
+	i2.MustDrop()
+	out := sumTs.MustClamp(ts.FloatScalar(0), ts.FloatScalar(bound), true).MustTotype(dtype, true)
+	return out
+}
+
+// brightness should be in range 0.25 - 1.25 for visible view
+func adjustBrightness(x *ts.Tensor, brightness float64) *ts.Tensor {
+	if brightness < 0 {
+		err := fmt.Errorf("adjustBrightness - brightness factor (%v) is not non-negative.", brightness)
+		log.Fatal(err)
+	}
+
+	assertImageTensor(x)
+	assertChannels(x, []int64{1, 3})
+
+	zeros := x.MustZerosLike(false)
+	out := blend(x, zeros, brightness)
+	zeros.MustDrop()
+
+	return out
+}
+
+// randVal generates a value from uniform values from 0 to x
+func randVal(from, to float64) float64 {
+	v := ts.MustEmpty([]int64{1}, gotch.Float, gotch.CPU)
+	v.MustUniform_(from, to)
+	randVal := v.Float64Values()[0]
+	v.MustDrop()
+	return randVal
+}
+
+func getMinMax(x float64) (float64, float64) {
+	from := 0.0
+	if 1-x > 0 {
+		from = 1 - x
+	}
+	to := 1 + x
+
+	return from, to
+}
+
+func rgb2Gray(x *ts.Tensor, outChanOpt ...int64) *ts.Tensor {
+	var outChannels int64 = 1
+	if len(outChanOpt) > 0 {
+		outChannels = outChanOpt[0]
+	}
+
+	ndim := x.Dim()
+	if ndim < 3 {
+		err := fmt.Errorf("Input image tensor should have at least 3 dimensions, but found %v", ndim)
+		log.Fatal(err)
+	}
+
+	assertChannels(x, []int64{3})
+	if !contains(outChannels, []int64{1, 3}) {
+		err := fmt.Errorf("Number of output channels should be either 1 or 3")
+		log.Fatal(err)
+	}
+
+	rgbTs := x.MustUnbind(-3, false)
+	r := &rgbTs[0]
+	g := &rgbTs[1]
+	b := &rgbTs[2]
+
+	// This implementation closely follows the TF one:
+	// https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/python/ops/image_ops_impl.py#L2105-L2138
+	// l_img = (0.2989 * r + 0.587 * g + 0.114 * b).to(img.dtype)
+	rmul := r.MustMul1(ts.FloatScalar(0.2989), true)
+	gmul := g.MustMul1(ts.FloatScalar(0.587), true)
+	bmul := b.MustMul1(ts.FloatScalar(0.114), true)
+	addTs := rmul.MustAdd(gmul, true).MustAdd(bmul, true)
+	gmul.MustDrop()
+	bmul.MustDrop()
+	lImg := addTs.MustTotype(x.DType(), true).MustUnsqueeze(-3, true)
+
+	if outChannels == 3 {
+		return lImg.MustExpand(x.MustSize(), true, true)
+	}
+
+	return lImg
+}
+
+func adjustContrast(x *ts.Tensor, contrast float64) *ts.Tensor {
+	if contrast < 0 {
+		err := fmt.Errorf("adjustContrast - contrast factor (%v) is not non-negative.", contrast)
+		log.Fatal(err)
+	}
+
+	assertImageTensor(x)
+	assertChannels(x, []int64{3})
+
+	grayTs := rgb2Gray(x).MustTotype(x.DType(), true)
+
+	mean := grayTs.MustMean1([]int64{-3, -2, -1}, true, gotch.Float, true).MustTotype(x.DType(), true)
+	out := blend(x, mean, contrast)
+	mean.MustDrop()
+
+	return out
+}
+
+func adjustSaturation(x *ts.Tensor, sat float64) *ts.Tensor {
+	if sat < 0 {
+		err := fmt.Errorf("adjustSaturation - saturation factor (%v) is not non-negative.", sat)
+		log.Fatal(err)
+	}
+	assertImageTensor(x)
+	assertChannels(x, []int64{3})
+	grayTs := rgb2Gray(x).MustTotype(x.DType(), true)
+	out := blend(x, grayTs, sat)
+	grayTs.MustDrop()
+
+	return out
+}
+
+func rgb2HSV(x *ts.Tensor) *ts.Tensor {
+	rgbTs := x.MustUnbind(-3, false)
+	r := &rgbTs[0]
+	g := &rgbTs[1]
+	b := &rgbTs[2]
+
+	// # Implementation is based on https://github.com/python-pillow/Pillow/blob/4174d4267616897df3746d315d5a2d0f82c656ee/
+	// # src/libImaging/Convert.c#L330
+	// maxc = torch.max(img, dim=-3).values
+	// minc = torch.min(img, dim=-3).values
+	maxC := x.MustAmax([]int64{-3}, false, false)
+	minC := x.MustAmin([]int64{-3}, false, false)
+
+	// # The algorithm erases S and H channel where `maxc = minc`. This avoids NaN
+	// # from happening in the results, because
+	// #   + S channel has division by `maxc`, which is zero only if `maxc = minc`
+	// #   + H channel has division by `(maxc - minc)`.
+	// #
+	// # Instead of overwriting NaN afterwards, we just prevent it from occuring so
+	// # we don't need to deal with it in case we save the NaN in a buffer in
+	// # backprop, if it is ever supported, but it doesn't hurt to do so.
+	// eqc = maxc == minc
+	eqC := maxC.MustEq1(minC, false)
+
+	// cr = maxc - minc
+	cr := maxC.MustSub(minC, false)
+
+	// # Since `eqc => cr = 0`, replacing denominator with 1 when `eqc` is fine.
+	ones := maxC.MustOnesLike(false)
+
+	// s = cr / torch.where(eqc, ones, maxc)
+	condMaxC := ones.MustWhere1(eqC, maxC, false)
+	s := cr.MustDiv(condMaxC, false)
+
+	// # Note that `eqc => maxc = minc = r = g = b`. So the following calculation
+	// # of `h` would reduce to `bc - gc + 2 + rc - bc + 4 + rc - bc = 6` so it
+	// # would not matter what values `rc`, `gc`, and `bc` have here, and thus
+	// # replacing denominator with 1 when `eqc` is fine.
+	// cr_divisor = torch.where(eqc, ones, cr)
+	// rc = (maxc - r) / cr_divisor
+	// gc = (maxc - g) / cr_divisor
+	// bc = (maxc - b) / cr_divisor
+	crDivisor := ones.MustWhere1(eqC, cr, true) // delete ones
+	rc := maxC.MustSub(r, false).MustDiv(crDivisor, true)
+	gc := maxC.MustSub(g, false).MustDiv(crDivisor, true)
+	bc := maxC.MustSub(b, false).MustDiv(crDivisor, true)
+
+	// hr = (maxc == r) * (bc - gc)
+	rSub := bc.MustSub(gc, false)
+	hr := maxC.MustEq1(r, false).MustMul(rSub, true)
+	rSub.MustDrop()
+
+	// hg = ((maxc == g) & (maxc != r)) * (2.0 + rc - bc)
+	maxcCond1 := maxC.MustNotEqual1(r, false)
+	hgMul := rc.MustSub(bc, false).MustAdd1(ts.FloatScalar(2.0), true)
+	hg := maxC.MustEq1(g, false).MustLogicalAnd(maxcCond1, true).MustMul(hgMul, true)
+	maxcCond1.MustDrop()
+	hgMul.MustDrop()
+
+	// hb = ((maxc != g) & (maxc != r)) * (4.0 + gc - rc)
+	maxcCond2 := maxC.MustNotEqual1(r, false)
+	hbMul := gc.MustSub(rc, false).MustAdd1(ts.FloatScalar(4.0), true)
+	hb := maxC.MustNotEqual1(g, false).MustLogicalAnd(maxcCond2, true).MustMul(hbMul, true)
+	maxcCond2.MustDrop()
+	hbMul.MustDrop()
+
+	// h = (hr + hg + hb)
+	h1 := hr.MustAdd(hg, false).MustAdd(hb, true)
+
+	// h = torch.fmod((h / 6.0 + 1.0), 1.0)
+	h2 := h1.MustDiv1(ts.FloatScalar(6.0), true).MustAdd1(ts.FloatScalar(1.0), true) // delete h1
+	h3 := h2.MustFmod(ts.FloatScalar(1.0), true)                                     // delete h2
+
+	// torch.stack((h, s, maxc), dim=-3)
+	out := ts.MustStack([]ts.Tensor{*h3, *s, *maxC}, -3)
+
+	// Delete intermediate tensors
+	r.MustDrop()
+	g.MustDrop()
+	b.MustDrop()
+	h3.MustDrop()
+	maxC.MustDrop()
+	minC.MustDrop()
+	eqC.MustDrop()
+	s.MustDrop()
+	condMaxC.MustDrop()
+	cr.MustDrop()
+	crDivisor.MustDrop()
+	rc.MustDrop()
+	gc.MustDrop()
+	bc.MustDrop()
+	hr.MustDrop()
+	hg.MustDrop()
+	hb.MustDrop()
+
+	return out
+}
+
+func hsv2RGB(x *ts.Tensor) *ts.Tensor {
+	hsvTs := x.MustUnbind(-3, false)
+	h := &hsvTs[0]
+	s := &hsvTs[1]
+	v := &hsvTs[2]
+
+	i := h.MustMul1(ts.FloatScalar(6.0), false).MustFloor(true)
+	f := h.MustMul1(ts.FloatScalar(0.6), false).MustSub(i, true)
+
+	// p = torch.clamp((v * (1.0 - s)), 0.0, 1.0)
+	x1 := s.MustMul1(ts.FloatScalar(-1), false).MustAdd1(ts.FloatScalar(1.0), true)
+	p := v.MustMul(x1, false).MustClamp(ts.FloatScalar(0.0), ts.FloatScalar(1.0), true)
+	x1.MustDrop()
+
+	// q = torch.clamp((v * (1.0 - s * f)), 0.0, 1.0)
+	x2 := s.MustMul(f, false).MustMul1(ts.FloatScalar(-1), true).MustAdd1(ts.FloatScalar(1.0), true)
+	q := v.MustMul(x2, false).MustClamp(ts.FloatScalar(0.0), ts.FloatScalar(1.0), true)
+	x2.MustDrop()
+
+	//t = torch.clamp((v * (1.0 - s * (1.0 - f))), 0.0, 1.0)
+	// step1. s * (1.0 - f)
+	sub1 := f.MustMul1(ts.FloatScalar(-1), false).MustAdd1(ts.FloatScalar(1.0), true).MustMul(s, true)
+	// step 2: v *(1.0 - step1)
+	x3 := sub1.MustMul1(ts.FloatScalar(-1), true).MustAdd1(ts.FloatScalar(1.0), true).MustMul(v, true) // deleted sub1
+	t := x3.MustClamp(ts.FloatScalar(0.0), ts.FloatScalar(1.0), true)                                  // deleted x3
+
+	//i = i % 6
+	iremainder := i.MustRemainder(ts.FloatScalar(6), true) // delete i
+	// torch.arange(6, device=i.device).view(-1, 1, 1)
+	x4 := ts.MustArange(ts.FloatScalar(6), gotch.Float, iremainder.MustDevice()).MustView([]int64{-1, 1, 1}, true)
+	mask := iremainder.MustUnsqueeze(-3, true).MustEq1(x4, true).MustTotype(x.DType(), true) // delete iremainder
+	x4.MustDrop()
+
+	// a1 = torch.stack((v, q, p, p, t, v), dim=-3)
+	// a2 = torch.stack((t, v, v, q, p, p), dim=-3)
+	// a3 = torch.stack((p, p, t, v, v, q), dim=-3)
+	// a4 = torch.stack((a1, a2, a3), dim=-4)
+	a1 := ts.MustStack([]ts.Tensor{*v, *q, *p, *p, *t, *v}, -3)
+	a2 := ts.MustStack([]ts.Tensor{*t, *v, *v, *q, *p, *p}, -3)
+	a3 := ts.MustStack([]ts.Tensor{*p, *p, *t, *v, *v, *q}, -3)
+	a4 := ts.MustStack([]ts.Tensor{*a1, *a2, *a3}, -4)
+
+	out := ts.MustEinsum("...ijk, ...xijk -> ...xjk", []ts.Tensor{*mask, *a4})
+
+	// Delete intermediate tensors
+	h.MustDrop()
+	s.MustDrop()
+	v.MustDrop()
+	f.MustDrop()
+	p.MustDrop()
+	q.MustDrop()
+	t.MustDrop()
+
+	a1.MustDrop()
+	a2.MustDrop()
+	a3.MustDrop()
+	a4.MustDrop()
+	mask.MustDrop()
+
+	return out
+}
+
+// ref. https://en.wikipedia.org/wiki/HSL_and_HSV
+func adjustHue(x *ts.Tensor, hue float64) *ts.Tensor {
+	if hue < -0.5 || hue > 0.5 {
+		err := fmt.Errorf("hue factor (%v) is not in [-0.5, 0.5]", hue)
+		log.Fatal(err)
+	}
+	assertImageTensor(x)
+	assertChannels(x, []int64{1, 3})
+
+	if c := imageChanNum(x); c == 1 {
+		out := x.MustShallowClone()
+		return out
+	}
+
+	imgFl := x.MustTotype(gotch.Float, false).MustDiv1(ts.FloatScalar(255.0), true)
+	hsvImg := rgb2HSV(imgFl)
+
+	hsvTs := hsvImg.MustUnbind(-3, true)
+	h := &hsvTs[0]
+	s := &hsvTs[1]
+	v := &hsvTs[2]
+	hAdj := h.MustAdd1(ts.FloatScalar(hue), false).MustRemainder(ts.FloatScalar(1.0), true)
+
+	hsvAdj := ts.MustStack([]ts.Tensor{*hAdj, *s, *v}, -3)
+
+	imgHueAdj := hsv2RGB(hsvAdj)
+
+	out := imgHueAdj.MustMul1(ts.FloatScalar(255.0), true)
+
+	imgFl.MustDrop()
+	h.MustDrop()
+	s.MustDrop()
+	v.MustDrop()
+	hAdj.MustDrop()
+	hsvAdj.MustDrop()
+
+	return out
+}
+
+func adjustGamma(x *ts.Tensor, gamma float64, gainOpt ...int64) *ts.Tensor {
+	// var gain int64 = 1
+	// if len(gainOpt) > 0 {
+	// gain = gainOpt[0]
+	// }
+	// TODO
+	return x
+}
+
+func RGB2HSV(x *ts.Tensor) *ts.Tensor {
+	return rgb2HSV(x)
+}
+
+func HSV2RGB(x *ts.Tensor) *ts.Tensor {
+	return hsv2RGB(x)
+}
+
+func pad(x *ts.Tensor, padding []int64, paddingMode string) *ts.Tensor {
+	switch paddingMode {
+	case "reflection":
+		return x.MustReflectionPad2d(padding, false)
+	case "constant":
+		return x.MustConstantPadNd(padding, false)
+	case "replicate":
+		return x.MustReplicationPad2d(padding, false)
+	case "circular":
+		// TODO:
+		// ref: https://github.com/pytorch/pytorch/blob/71f4c5c1f436258adc303b710efb3f41b2d50c4e/torch/nn/functional.py#L4493
+		log.Fatal("Unsupported circular padding.")
+	default:
+		log.Fatalf("Unrecognized padding mode %q\n", paddingMode)
+	}
+	return nil
+}
+
+func getImageSize(x *ts.Tensor) (width, height int64) {
+	assertImageTensor(x)
+	dim := x.MustSize()
+	return dim[len(dim)-1], dim[len(dim)-2]
+}
+
+func makeSlice(from, to int64) []int64 {
+	n := from - to
+	var out []int64 = make([]int64, n)
+	for i := 0; i < int(n); i++ {
+		out[i] = from + int64(i)
+	}
+	return out
+}
+
+func crop(x *ts.Tensor, top, left, height, width int64) *ts.Tensor {
+	// return img[..., top:top + height, left:left + width]
+	dim := x.MustSize()
+	c := dim[0]
+
+	var chans []ts.Tensor = make([]ts.Tensor, c)
+	hNar := ts.NewNarrow(top, top+height)
+	wNar := ts.NewNarrow(left, left+width)
+	for i := 0; i < int(c); i++ {
+		cx := x.Idx(ts.NewSelect(int64(i)))
+		x1 := cx.Idx(hNar)
+		cx.MustDrop()
+		x1T := x1.MustT(true)
+		x2 := x1T.Idx(wNar)
+		x1T.MustDrop()
+		out := x2.MustT(true)
+		chans[i] = *out
+	}
+
+	cropTs := ts.MustStack(chans, 0)
+	for i := range chans {
+		chans[i].MustDrop()
+	}
+	return cropTs
+}
+
+// Crops the given image at the center.
+// If the image is torch Tensor, it is expected
+// to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
+// If image size is smaller than output size along any edge, image is padded with 0 and then center cropped.
+func centerCrop(x *ts.Tensor, size []int64) *ts.Tensor {
+	imgW, imgH := getImageSize(x)
+	cropH, cropW := size[0], size[1]
+
+	var paddedImg *ts.Tensor
+
+	if cropW > imgW || cropH > imgH {
+		// (crop_width - image_width) // 2 if crop_width > image_width else 0,
+		// (crop_width - image_width + 1) // 2 if crop_width > image_width else 0,
+		var left, top, right, bottom int64 = 0, 0, 0, 0
+		if cropW > imgW {
+			left = (cropW - imgW) / 2
+			right = (cropW - imgW + 1) / 2
+		}
+		// (crop_height - image_height) // 2 if crop_height > image_height else 0,
+		// (crop_height - image_height + 1) // 2 if crop_height > image_height else 0,
+		if cropH > imgH {
+			top = (cropH - imgH) / 2
+			bottom = (cropH - imgH + 1) / 2
+		}
+
+		// floatX := x.MustTotype(gotch.Float, false)
+		// paddedImg = pad(floatX, []int64{left, right, top, bottom}, "reflection")
+		// floatX.MustDrop()
+
+		paddedImg = pad(x, []int64{left, right, top, bottom}, "constant")
+		imgW, imgH = getImageSize(paddedImg)
+		if cropW == imgW && cropH == imgH {
+			return paddedImg
+		}
+	} else {
+		paddedImg = x.MustShallowClone()
+	}
+
+	// cropTop := int64(math.Floor(float64(imgH-cropH) / 2.0))
+	// cropLeft := int64(math.Floor(float64(imgW-cropW) / 2.0))
+	cropTop := (imgH - cropH) / 2
+	cropLeft := (imgW - cropW) / 2
+
+	out := crop(paddedImg, cropTop, cropLeft, cropH, cropW)
+	paddedImg.MustDrop()
+
+	return out
+}
+
+// cutout erases the input Tensor Image with given value
+//
+// Args:
+// img (Tensor Image): Tensor image of size (C, H, W) to be erased
+// i (int): i in (i,j) i.e coordinates of the upper left corner.
+// j (int): j in (i,j) i.e coordinates of the upper left corner.
+// h (int): Height of the erased region.
+// w (int): Width of the erased region.
+// v: Erasing value.
+func cutout(x *ts.Tensor, top, left, height, width int64, rgbVal []int64) *ts.Tensor {
+	output := x.MustZerosLike(false)
+	output.Copy_(x)
+	dim := output.MustSize()
+	for i := 0; i < int(dim[0]); i++ {
+		cIdx := ts.NewSelect(int64(i))
+		hNar := ts.NewNarrow(top, top+height)
+		wNar := ts.NewNarrow(left, left+width)
+		srcIdx := []ts.TensorIndexer{cIdx, hNar, wNar}
+		view := output.Idx(srcIdx)
+		oneTs := view.MustOnesLike(false)
+		vTs := oneTs.MustMul1(ts.IntScalar(rgbVal[i]), true)
+		view.Copy_(vTs)
+		vTs.MustDrop()
+		view.MustDrop()
+	}
+
+	// output.Print()
+	return output
+}
+
+func hflip(x *ts.Tensor) *ts.Tensor {
+	assertImageTensor(x)
+	return x.MustFlip([]int64{-1}, false)
+}
+
+func vflip(x *ts.Tensor) *ts.Tensor {
+	assertImageTensor(x)
+	return x.MustFlip([]int64{-2}, false)
+}
+
+// Ref. https://stackoverflow.com/questions/64197754
+// Ref. https://pytorch.org/tutorials/intermediate/spatial_transformer_tutorial.html
+func getRotMat(theta float64) (*ts.Tensor, error) {
+	grid := []float64{math.Cos(theta), -1 * (math.Sin(theta)), 0, math.Sin(theta), math.Cos(theta), 0}
+	t, err := ts.NewTensorFromData(grid, []int64{2, 3})
+	if err != nil {
+		return nil, err
+	}
+
+	return t, nil
+}
+
+func rotImg(x *ts.Tensor, theta float64, dtype gotch.DType) (*ts.Tensor, error) {
+	rotMat, err := getRotMat(theta)
+	if err != nil {
+		return nil, err
+	}
+
+	size := x.MustSize()
+	mat := rotMat.MustUnsqueeze(0, true).MustTotype(dtype, true).MustRepeat([]int64{size[0], 1, 1}, true)
+	grid := ts.MustAffineGridGenerator(mat, size, true).MustTo(x.MustDevice(), true)
+	mat.MustDrop()
+
+	out, err := ts.GridSampler(x, grid, 1, 1, true)
+	if err != nil {
+		return nil, err
+	}
+	grid.MustDrop()
+	return out, nil
+}
+
+func applyGridTransform(x, gridInput *ts.Tensor, mode string, fillValue []float64) *ts.Tensor {
+	dtype := gridInput.DType()
+	img, needCast, needSqueeze, outDtype := castSqueezeIn(x, []gotch.DType{dtype})
+
+	imgDim := img.MustSize()
+	gridDim := gridInput.MustSize()
+	var grid *ts.Tensor
+	if imgDim[0] > 1 {
+		// Apply same grid to a batch of images
+		// grid = grid.expand(img.shape[0], grid.shape[1], grid.shape[2], grid.shape[3])
+		grid = gridInput.MustExpand([]int64{imgDim[0], gridDim[1], gridDim[2], gridDim[3]}, true, false)
+	} else {
+		grid = gridInput.MustShallowClone()
+	}
+
+	// Append a dummy mask for customized fill colors, should be faster than grid_sample() twice
+	// dummy = torch.ones((img.shape[0], 1, img.shape[2], img.shape[3]), dtype=img.dtype, device=img.device)
+	// img = torch.cat((img, dummy), dim=1)
+	dummy := ts.MustOnes([]int64{img.MustSize()[0], 1, img.MustSize()[2], img.MustSize()[3]}, img.DType(), img.MustDevice())
+	imgCat := ts.MustCat([]ts.Tensor{*img, *dummy}, 1)
+	dummy.MustDrop()
+	img.MustDrop()
+
+	// imgSample := gridSample(imgCat, grid, mode, "zeros", false)
+	var (
+		modeInt     int64 = 0
+		paddingMode int64 = 0
+	)
+
+	imgSample := ts.MustGridSampler(imgCat, grid, modeInt, paddingMode, false)
+	imgCat.MustDrop()
+	grid.MustDrop()
+
+	// TODO.
+	// Fill with required color
+	// mask = img[:, -1:, :, :]  # N * 1 * H * W
+	// img = img[:, :-1, :, :]  # N * C * H * W
+	// mask = mask.expand_as(img)
+	// len_fill = len(fill) if isinstance(fill, (tuple, list)) else 1
+	// fill_img = torch.tensor(fill, dtype=img.dtype, device=img.device).view(1, len_fill, 1, 1).expand_as(img)
+	// if mode == 'nearest':
+	// mask = mask < 0.5
+	// img[mask] = fill_img[mask]
+	// else:  # 'bilinear'
+	// img = img * mask + (1.0 - mask) * fill_img
+	image := imgSample.MustNarrow(0, 0, 1, false).MustNarrow(1, 0, 3, true)
+	mask := imgSample.MustNarrow(0, 0, 1, false).MustNarrow(1, -1, 1, true).MustExpandAs(image, true)
+	imgSample.MustDrop()
+	fillImg := ts.MustOfSlice(fillValue).MustTotype(image.DType(), true).MustTo(image.MustDevice(), true).MustView([]int64{1, 3, 1, 1}, true).MustExpandAs(image, true)
+
+	// img = img * mask + (1.0 - mask) * fill_img
+	addTs := mask.MustMul1(ts.FloatScalar(-1), false).MustAdd1(ts.FloatScalar(1.0), true).MustMul(fillImg, true)
+	imgOut := image.MustMul(mask, true).MustAdd(addTs, true)
+	addTs.MustDrop()
+	image.MustDrop()
+	mask.MustDrop()
+	fillImg.MustDrop()
+
+	// out := castSqueezeOut(imgSample, needCast, needSqueeze, outDtype)
+	out := castSqueezeOut(imgOut, needCast, needSqueeze, outDtype)
+	imgOut.MustDrop()
+
+	return out
+}
+
+// Helper function to get the coefficients (a, b, c, d, e, f, g, h) for the perspective transforms.
+//
+// In Perspective Transform each pixel (x, y) in the original image gets transformed as,
+// (x, y) -> ( (ax + by + c) / (gx + hy + 1), (dx + ey + f) / (gx + hy + 1) )
+// Args:
+// - startpoints (list of list of ints): List containing four lists of two integers corresponding to four corners
+// ``[top-left, top-right, bottom-right, bottom-left]`` of the original image.
+// - endpoints (list of list of ints): List containing four lists of two integers corresponding to four corners
+// ``[top-left, top-right, bottom-right, bottom-left]`` of the transformed image.
+// Returns:
+// - octuple (a, b, c, d, e, f, g, h) for transforming each pixel.
+func perspectiveCoeff(startPoints, endPoints [][]int64) []float64 {
+	size := int64(2 * len(startPoints))
+	aMat := ts.MustZeros([]int64{size, 8}, gotch.Float, gotch.CPU)
+	for i := 0; i < len(startPoints); i++ {
+		p1 := endPoints[i]
+		p2 := startPoints[i]
+		// a_matrix[2 * i, :] = torch.tensor([p1[0], p1[1], 1, 0, 0, 0, -p2[0] * p1[0], -p2[0] * p1[1]])
+		val1 := ts.MustOfSlice([]int64{p1[0], p1[1], 1, 0, 0, 0, -p2[0] * p1[0], -p2[0] * p1[1]})
+		// a_matrix[2 * i + 1, :] = torch.tensor([0, 0, 0, p1[0], p1[1], 1, -p2[1] * p1[0], -p2[1] * p1[1]])
+		val2 := ts.MustOfSlice([]int64{0, 0, 0, p1[0], p1[1], 1, -p2[1] * p1[0], -p2[1] * p1[1]})
+
+		idx1 := ts.NewSelect(int64(2 * i))
+		aMatView1 := aMat.Idx(idx1)
+		aMatView1.Copy_(val1)
+		val1.MustDrop()
+
+		idx2 := ts.NewSelect(int64(2*i + 1))
+		aMatView2 := aMat.Idx(idx2)
+		aMatView2.Copy_(val2)
+		val2.MustDrop()
+	}
+
+	var startData []int64
+	for _, p := range startPoints {
+		startData = append(startData, p[0], p[1])
+	}
+
+	// bMat := ts.MustOfSlice(startPoints).MustTotype(gotch.Float, true).MustView([]int64{8}, true)
+	bMat := ts.MustOfSlice(startData).MustTotype(gotch.Float, true).MustView([]int64{8}, true)
+
+	res := bMat.MustLstsq(aMat, true)
+
+	aMat.MustDrop()
+	outputTs := res.MustSqueeze1(1, true)
+	output := outputTs.Float64Values()
+	outputTs.MustDrop()
+
+	return output
+}
+
+func perspectiveGrid(coef []float64, ow, oh int64, dtype gotch.DType, device gotch.Device) *ts.Tensor {
+	// https://github.com/python-pillow/Pillow/blob/4634eafe3c695a014267eefdce830b4a825beed7/
+	// src/libImaging/Geometry.c#L394
+	// x_out = (coeffs[0] * x + coeffs[1] * y + coeffs[2]) / (coeffs[6] * x + coeffs[7] * y + 1)
+	// y_out = (coeffs[3] * x + coeffs[4] * y + coeffs[5]) / (coeffs[6] * x + coeffs[7] * y + 1)
+
+	theta1 := ts.MustOfSlice([]float64{
+		coef[0],
+		coef[1],
+		coef[2],
+		coef[3],
+		coef[4],
+		coef[5],
+	}).MustTotype(dtype, true).MustTo(device, true).MustView([]int64{1, 2, 3}, true)
+
+	theta2 := ts.MustOfSlice([]float64{
+		coef[6],
+		coef[7],
+		coef[1.0],
+		coef[6],
+		coef[7],
+		coef[1.0],
+	}).MustTotype(dtype, true).MustTo(device, true).MustView([]int64{1, 2, 3}, true)
+
+	d := 0.5
+
+	baseGrid := ts.MustEmpty([]int64{1, oh, ow, 3}, dtype, device)
+
+	// x_grid = torch.linspace(d, ow * 1.0 + d - 1.0, steps=ow, device=device)
+	endX := float64(ow) + d - 1.0
+	xGrid := ts.MustLinspace(ts.FloatScalar(d), ts.FloatScalar(endX), []int64{ow}, dtype, device)
+
+	// y_grid = torch.linspace(d, oh * 1.0 + d - 1.0, steps=oh, device=device).unsqueeze_(-1)
+	endY := float64(oh) + d - 1.0
+	yGrid := ts.MustLinspace(ts.FloatScalar(d), ts.FloatScalar(endY), []int64{oh}, dtype, device)
+
+	// base_grid[..., 0].copy_(x_grid)
+	// base_grid[..., 1].copy_(y_grid)
+	// base_grid[..., 2].fill_(1)
+	baseDim := baseGrid.MustSize()
+	for i := 0; i < int(baseDim[1]); i++ {
+		view := baseGrid.MustSelect(0, 0, false).MustSelect(0, int64(i), true).MustSelect(1, 0, true)
+		view.Copy_(xGrid)
+		view.MustDrop()
+	}
+	for i := 0; i < int(baseDim[2]); i++ {
+		view := baseGrid.MustSelect(0, 0, false).MustSelect(1, int64(i), true).MustSelect(1, 1, true)
+		view.Copy_(yGrid)
+		view.MustDrop()
+	}
+
+	for i := 0; i < int(baseDim[2]); i++ {
+		view := baseGrid.MustSelect(0, 0, false).MustSelect(1, int64(i), true).MustSelect(1, 2, true)
+		// view.Fill_(ts.FloatScalar(1.0)) // NOTE. THIS CAUSES MEMORY LEAK!!!
+		oneTs := view.MustOnesLike(false)
+		view.Copy_(oneTs)
+		oneTs.MustDrop()
+		view.MustDrop()
+	}
+
+	// rescaled_theta1 = theta1.transpose(1, 2) / torch.tensor([0.5 * ow, 0.5 * oh], dtype=dtype, device=device)
+	divTs := ts.MustOfSlice([]float64{0.5 * float64(ow), 0.5 * float64(oh)}).MustTotype(dtype, true).MustTo(device, true)
+	rescaledTheta1 := theta1.MustTranspose(1, 2, true).MustDiv(divTs, true)
+	divTs.MustDrop()
+	outputGrid1 := baseGrid.MustView([]int64{1, oh * ow, 3}, false).MustBmm(rescaledTheta1, true)
+
+	// output_grid2 = base_grid.view(1, oh * ow, 3).bmm(theta2.transpose(1, 2))
+	rescaledTheta2 := theta2.MustTranspose(1, 2, true)
+	outputGrid2 := baseGrid.MustView([]int64{1, oh * ow, 3}, false).MustBmm(rescaledTheta2, true)
+
+	rescaledTheta1.MustDrop()
+	rescaledTheta2.MustDrop()
+
+	outputGrid := outputGrid1.MustDiv(outputGrid2, true).MustSub1(ts.FloatScalar(1.0), true).MustView([]int64{1, oh, ow, 2}, true)
+	outputGrid2.MustDrop()
+
+	baseGrid.MustDrop()
+
+	return outputGrid
+}
+
+func perspective(x *ts.Tensor, startPoints, endPoints [][]int64, mode string, fillValue []float64) *ts.Tensor {
+	coef := perspectiveCoeff(startPoints, endPoints)
+
+	assertImageTensor(x)
+	// assertGridTransformInputs(x, nil, mode, fillValue, []string{"nearest", "bilinear"}, coef)
+
+	dim := x.MustSize()
+	ow, oh := dim[len(dim)-1], dim[len(dim)-2]
+	device := x.MustDevice()
+	grid := perspectiveGrid(coef, ow, oh, gotch.Float, device)
+
+	output := applyGridTransform(x, grid, mode, fillValue)
+	grid.MustDrop()
+
+	return output
+}
+
+// Apply affine transformation on the image keeping image center invariant.
+//
+//If the image is torch Tensor, it is expected
+// to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
+// Args:
+// - img (Tensor): image to transform.
+// - angle (number): rotation angle in degrees between -180 and 180, clockwise direction.
+// - translate (sequence of integers): horizontal and vertical translations (post-rotation translation)
+// - scale (float): overall scale
+// - shear (float or sequence): shear angle value in degrees between -180 to 180, clockwise direction.
+// If a sequence is specified, the first value corresponds to a shear parallel to the x axis, while
+// the second value corresponds to a shear parallel to the y axis.
+// - interpolation (InterpolationMode): Desired interpolation enum defined by
+// :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``.
+// If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
+// - fill (sequence or number, optional): Pixel fill value for the area outside the transformed
+// image. If given a number, the value is used for all bands respectively.
+func affine(img *ts.Tensor, angle float64, translations []int64, scale float64, shear []float64, interpolationMode string, fillValue []float64) *ts.Tensor {
+
+	var translateF []float64
+	for _, v := range translations {
+		translateF = append(translateF, float64(v))
+	}
+
+	matrix := getInverseAffineMatrix([]float64{0.0, 0.0}, angle, translateF, scale, shear)
+
+	dtype := gotch.Float
+	device := img.MustDevice()
+	dim := img.MustSize()
+	theta := ts.MustOfSlice(matrix).MustTotype(dtype, true).MustTo(device, true).MustView([]int64{1, 2, 3}, true)
+
+	// grid will be generated on the same device as theta and img
+	w := dim[len(dim)-1]
+	h := dim[len(dim)-2]
+	ow := w
+	oh := h
+
+	// grid = _gen_affine_grid(theta, w=shape[-1], h=shape[-2], ow=shape[-1], oh=shape[-2])
+	grid := genAffineGrid(theta, w, h, ow, oh)
+	// grid := ts.MustEmpty([]int64{1, 512, 512, 2}, dtype, device)
+
+	out := applyGridTransform(img, grid, interpolationMode, fillValue)
+
+	grid.MustDrop()
+	theta.MustDrop()
+
+	return out
+}
+
+// Helper method to compute inverse matrix for affine transformation
+//
+// As it is explained in PIL.Image.rotate
+// We need compute INVERSE of affine transformation matrix: M = T * C * RSS * C^-1
+// where T is translation matrix: [1, 0, tx | 0, 1, ty | 0, 0, 1]
+//       C is translation matrix to keep center: [1, 0, cx | 0, 1, cy | 0, 0, 1]
+//       RSS is rotation with scale and shear matrix
+//       RSS(a, s, (sx, sy)) =
+//       = R(a) * S(s) * SHy(sy) * SHx(sx)
+//       = [ s*cos(a - sy)/cos(sy), s*(-cos(a - sy)*tan(x)/cos(y) - sin(a)), 0 ]
+//         [ s*sin(a + sy)/cos(sy), s*(-sin(a - sy)*tan(x)/cos(y) + cos(a)), 0 ]
+//         [ 0                    , 0                                      , 1 ]
+//
+// where R is a rotation matrix, S is a scaling matrix, and SHx and SHy are the shears:
+// SHx(s) = [1, -tan(s)] and SHy(s) = [1      , 0]
+//          [0, 1      ]              [-tan(s), 1]
+//
+// Thus, the inverse is M^-1 = C * RSS^-1 * C^-1 * T^-1
+func getInverseAffineMatrix(center []float64, angle float64, translate []float64, scale float64, shear []float64) []float64 {
+
+	// convert to randiants
+	rot := angle * math.Pi / 180
+	sx := shear[0] * math.Pi / 180
+	sy := shear[1] * math.Pi / 180
+
+	cx, cy := center[0], center[1]
+	tx, ty := translate[0], translate[1]
+
+	// RSS without scaling
+	// a = math.cos(rot - sy) / math.cos(sy)
+	a := math.Cos(rot-sy) / math.Cos(sy)
+	// b = -math.cos(rot - sy) * math.tan(sx) / math.cos(sy) - math.sin(rot)
+	b := -math.Cos(rot-sy)*math.Tan(sx)/math.Cos(sy) - math.Sin(rot)
+	// c = math.sin(rot - sy) / math.cos(sy)
+	c := math.Sin(rot-sy) / math.Cos(sy)
+	// d = -math.sin(rot - sy) * math.tan(sx) / math.cos(sy) + math.cos(rot)
+	d := -math.Sin(rot-sy)*math.Tan(sx)/math.Cos(sy) + math.Cos(rot)
+
+	// Inverted rotation matrix with scale and shear
+	// det([[a, b], [c, d]]) == 1, since det(rotation) = 1 and det(shear) = 1
+	// matrix = [d, -b, 0.0, -c, a, 0.0]
+	var matrix []float64 = []float64{d, -b, 0.0, -c, a, 0.0}
+	// matrix = [x / scale for x in matrix]
+	var mat []float64
+	for _, v := range matrix {
+		mat = append(mat, v/scale)
+	}
+
+	// Apply inverse of translation and of center translation: RSS^-1 * C^-1 * T^-1
+	// matrix[2] += matrix[0] * (-cx - tx) + matrix[1] * (-cy - ty)
+	mat[2] += mat[0]*(-cx-tx) + mat[1]*(-cy-ty)
+	// matrix[5] += matrix[3] * (-cx - tx) + matrix[4] * (-cy - ty)
+	mat[5] += mat[3]*(-cx-tx) + mat[4]*(-cy-ty)
+
+	// Apply center translation: C * RSS^-1 * C^-1 * T^-1
+	// matrix[2] += cx
+	mat[2] += cx
+	// matrix[5] += cy
+	mat[5] += cy
+
+	return mat
+}
+
+// https://github.com/pytorch/pytorch/blob/74b65c32be68b15dc7c9e8bb62459efbfbde33d8/aten/src/ATen/native/
+// AffineGridGenerator.cpp#L18
+// Difference with AffineGridGenerator is that:
+// 1) we normalize grid values after applying theta
+// 2) we can normalize by other image size, such that it covers "extend" option like in PIL.Image.rotate
+func genAffineGrid(theta *ts.Tensor, w, h, ow, oh int64) *ts.Tensor {
+	d := 0.5
+	dtype := theta.DType()
+	device := theta.MustDevice()
+	// base_grid = torch.empty(1, oh, ow, 3, dtype=theta.dtype, device=theta.device)
+	baseGrid := ts.MustEmpty([]int64{1, oh, ow, 3}, dtype, device)
+
+	// x_grid = torch.linspace(-ow * 0.5 + d, ow * 0.5 + d - 1, steps=ow, device=theta.device)
+	startX := float64(-ow)*0.5 + d
+	endX := float64(ow)*0.5 + d - 1.0
+	xGrid := ts.MustLinspace(ts.FloatScalar(startX), ts.FloatScalar(endX), []int64{ow}, dtype, device)
+
+	// y_grid = torch.linspace(-oh * 0.5 + d, oh * 0.5 + d - 1, steps=oh, device=theta.device).unsqueeze_(-1)
+	startY := float64(-oh)*0.5 + d
+	endY := float64(oh)*0.5 + d - 1.0
+	yGrid := ts.MustLinspace(ts.FloatScalar(startY), ts.FloatScalar(endY), []int64{oh}, dtype, device)
+
+	// base_grid[..., 0].copy_(x_grid)
+	// base_grid[..., 1].copy_(y_grid)
+	// base_grid[..., 2].fill_(1)
+	baseDim := baseGrid.MustSize()
+	for i := 0; i < int(baseDim[1]); i++ {
+		view := baseGrid.MustSelect(0, 0, false).MustSelect(0, int64(i), true).MustSelect(1, 0, true)
+		view.Copy_(xGrid)
+		view.MustDrop()
+	}
+	for i := 0; i < int(baseDim[2]); i++ {
+		view := baseGrid.MustSelect(0, 0, false).MustSelect(1, int64(i), true).MustSelect(1, 1, true)
+		view.Copy_(yGrid)
+		view.MustDrop()
+	}
+
+	for i := 0; i < int(baseDim[2]); i++ {
+		view := baseGrid.MustSelect(0, 0, false).MustSelect(1, int64(i), true).MustSelect(1, 2, true)
+		// view.Fill_(ts.FloatScalar(1.0)) // NOTE. THIS CAUSES MEMORY LEAK!!!!
+		oneTs := view.MustOnesLike(false)
+		view.Copy_(oneTs)
+		oneTs.MustDrop()
+		view.MustDrop()
+	}
+
+	// rescaled_theta = theta.transpose(1, 2) / torch.tensor([0.5 * w, 0.5 * h], dtype=theta.dtype, device=theta.device)
+
+	// rescaled_theta1 = theta1.transpose(1, 2) / torch.tensor([0.5 * ow, 0.5 * oh], dtype=dtype, device=device)
+	divTs := ts.MustOfSlice([]float64{0.5 * float64(w), 0.5 * float64(h)}).MustTotype(dtype, true).MustTo(device, true)
+	rescaledTheta := theta.MustTranspose(1, 2, false).MustDiv(divTs, true)
+	divTs.MustDrop()
+
+	// output_grid = base_grid.view(1, oh * ow, 3).bmm(rescaled_theta)
+	outputGrid := baseGrid.MustView([]int64{1, oh * ow, 3}, false).MustBmm(rescaledTheta, true).MustView([]int64{1, oh, ow, 2}, true)
+
+	baseGrid.MustDrop()
+	xGrid.MustDrop()
+	yGrid.MustDrop()
+	rescaledTheta.MustDrop()
+
+	return outputGrid
+}
+
+// randPvalue generates a random propability value [0, 1]
+func randPvalue() float64 {
+	rand.Seed(time.Now().UnixNano())
+	var min, max float64 = 0.0, 1.0
+
+	r := min + rand.Float64()*(max-min)
+	return r
+}
+
+func getImageChanNum(x *ts.Tensor) int64 {
+	dim := x.MustSize()
+	switch {
+	case len(dim) == 2:
+		return 1
+	case len(dim) > 2:
+		return dim[len(dim)-3]
+	default:
+		log.Fatalf("Input image tensor should have dim of 2 or more. Got %v\n", len(dim))
+	}
+
+	log.Fatalf("Input image tensor should have dim of 2 or more. Got %v\n", len(dim))
+	return -1
+}
+
+// solarize solarizes an RGB/grayscale image by inverting all pixel values above a threshold.
+// Args:
+// - img (Tensor): Image to have its colors inverted.
+// If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format,
+// where ... means it can have an arbitrary number of leading dimensions.
+// - threshold (float): All pixels equal or above this value are inverted.
+func solarize(img *ts.Tensor, threshold float64) *ts.Tensor {
+	assertImageTensor(img)
+
+	dim := img.MustSize()
+	if len(dim) < 3 {
+		log.Fatalf("Input image tensor should have at least 3 dimensions. Got %v", len(dim))
+	}
+
+	assertChannels(img, []int64{1, 3})
+
+	invertedImg := invert(img)
+
+	// return torch.where(img >= threshold, inverted_img, img)
+	conditionTs := img.MustGe(ts.FloatScalar(threshold), false)
+
+	out := img.MustWhere1(conditionTs, invertedImg, false)
+
+	invertedImg.MustDrop()
+	conditionTs.MustDrop()
+
+	return out
+}
+
+// invert inverts image tensor.
+func invert(img *ts.Tensor) *ts.Tensor {
+	assertImageTensor(img)
+
+	dim := img.MustSize()
+	if len(dim) < 3 {
+		log.Fatalf("Input image tensor should have at least 3 dimensions. Got %v", len(dim))
+	}
+
+	assertChannels(img, []int64{1, 3})
+
+	var bound int64 = 255
+	// return bound - img
+	out := img.MustMul1(ts.IntScalar(-1), false).MustAdd1(ts.IntScalar(bound), true)
+	return out
+}
+
+func posterize(img *ts.Tensor, bits uint8) *ts.Tensor {
+	assertImageTensor(img)
+
+	dim := img.MustSize()
+
+	if len(dim) < 3 {
+		log.Fatalf("Input image tensor should have at least 3 dimensions. Got %v\n", len(dim))
+	}
+
+	dtype := img.DType()
+	if dtype != gotch.Uint8 {
+		log.Fatalf("Only dtype uint8 image tensors are supported. Got %v", dtype)
+	}
+
+	assertChannels(img, []int64{1, 3})
+
+	// mask = -int(2**(8 - bits))  # JIT-friendly for: ~(2 ** (8 - bits) - 1)
+	// or mask := -int64(1<<(uint8(8) - bits))
+	mask := -int64(math.Exp2(float64(uint8(8) - bits)))
+
+	out := img.MustBitwiseAnd(ts.IntScalar(mask), false)
+	return out
+}
+
+func autocontrast(img *ts.Tensor) *ts.Tensor {
+	assertImageTensor(img)
+
+	dim := img.MustSize()
+
+	if len(dim) < 3 {
+		log.Fatalf("Input image tensor should have at least 3 dimensions. Got %v\n", len(dim))
+	}
+
+	var bound int64 = 255
+	dtype := gotch.Float
+
+	// minimum = img.amin(dim=(-2, -1), keepdim=True).to(dtype)
+	minTs := img.MustAmin([]int64{-2, -1}, true, false).MustTotype(dtype, true)
+	// maximum = img.amax(dim=(-2, -1), keepdim=True).to(dtype)
+	maxTs := img.MustAmax([]int64{-2, -1}, true, false).MustTotype(dtype, true)
+
+	// eq_idxs = torch.where(minimum == maximum)[0]
+	// NOTE. Eq(minTs, maxTs) give [n, c, 1, 1] or [channels, 1, 1]
+	eqIdx := minTs.MustEq1(maxTs, false).MustSqueeze1(-1, true).MustSqueeze1(-1, true).MustTotype(gotch.Int64, true)
+
+	// minimum[eq_idxs] = 0
+	minTsView := minTs.MustIndexSelect(0, eqIdx, false)
+	zerosTs := minTsView.MustZerosLike(false)
+	minTsView.Copy_(zerosTs)
+	zerosTs.MustDrop()
+	minTsView.MustDrop()
+
+	// maximum[eq_idxs] = bound
+	maxTsView := maxTs.MustIndexSelect(0, eqIdx, false)
+	boundTs := maxTsView.MustOnesLike(false).MustMul1(ts.IntScalar(bound), true)
+	maxTsView.Copy_(boundTs)
+	boundTs.MustDrop()
+	maxTsView.MustDrop()
+
+	// scale = bound / (maximum - minimum)
+	scale := maxTs.MustSub(minTs, false).MustPow(ts.IntScalar(-1), true).MustMul1(ts.IntScalar(bound), true)
+	//
+	// return ((img - minimum) * scale).clamp(0, bound).to(img.dtype)
+	out := img.MustSub(minTs, false).MustMul(scale, true).MustClamp(ts.IntScalar(0), ts.IntScalar(bound), true).MustTotype(dtype, true)
+
+	minTs.MustDrop()
+	maxTs.MustDrop()
+	eqIdx.MustDrop()
+	scale.MustDrop()
+
+	return out
+}
+
+func adjustSharpness(img *ts.Tensor, factor float64) *ts.Tensor {
+	if factor < 0 {
+		log.Fatalf("Sharpness factor should not be negative. Got %v", factor)
+	}
+
+	assertImageTensor(img)
+	assertChannels(img, []int64{1, 3})
+
+	dim := img.MustSize()
+
+	var out *ts.Tensor
+	if (dim[len(dim)-1]) <= 2 || (dim[len(dim)-2] <= 2) {
+		out = img.MustShallowClone()
+		return out
+	}
+
+	// return _blend(img, _blurred_degenerate_image(img), sharpness_factor)
+	img1 := blurredDegenerateImage(img)
+	out = blend(img, img1, factor)
+
+	img1.MustDrop()
+	return out
+}
+
+func blurredDegenerateImage(img *ts.Tensor) *ts.Tensor {
+	dtype := gotch.Float
+	device := img.MustDevice()
+	dim := img.MustSize()
+
+	// kernel = torch.ones((3, 3), dtype=dtype, device=img.device)
+	kernel := ts.MustOnes([]int64{3, 3}, dtype, device)
+
+	// kernel[1, 1] = 5.0 - Center kernel value
+	kernelView := kernel.MustNarrow(1, 1, 2, false)
+	centerVal := kernelView.MustOnesLike(false).MustMul1(ts.FloatScalar(5.0), true)
+	kernelView.Copy_(centerVal) // center kernel value
+	centerVal.MustDrop()
+	kernelView.MustDrop()
+
+	// kernel /= kernel.sum()
+	kernelSum := kernel.MustSum(dtype, false)
+	kernelS := kernel.MustDiv(kernelSum, false)
+	kernelSum.MustDrop()
+	// kernel = kernel.expand(img.shape[-3], 1, kernel.shape[0], kernel.shape[1])
+	kdim := kernelS.MustSize()
+	kdtype := kernelS.DType()
+	kernelExp := kernelS.MustExpand([]int64{dim[len(dim)-3], 1, kdim[0], kdim[1]}, true, false)
+
+	// result_tmp, need_cast, need_squeeze, out_dtype = _cast_squeeze_in(img, [kernel.dtype, ])
+	resTmp, needCast, needSqueeze, outDtype := castSqueezeIn(img, []gotch.DType{kdtype})
+
+	// result_tmp = conv2d(result_tmp, kernel, groups=result_tmp.shape[-3])
+	stride := []int64{1, 1}
+	padding := []int64{0, 0}
+	dilation := []int64{1, 1}
+	resTmpDim := resTmp.MustSize()
+	group := resTmpDim[len(resTmpDim)-3]
+	resTmp1 := ts.MustConv2d(resTmp, kernelExp, ts.NewTensor(), stride, padding, dilation, group)
+
+	// result_tmp = _cast_squeeze_out(result_tmp, need_cast, need_squeeze, out_dtype)
+	resTmp2 := castSqueezeOut(resTmp1, needCast, needSqueeze, outDtype)
+
+	// result = img.clone()
+	out := img.MustShallowClone()
+
+	// result[..., 1:-1, 1:-1] = result_tmp
+	hDim := int64(len(dim) - 2) // second last dim
+	wDim := int64(len(dim) - 1) // last dim
+	outView := out.MustNarrow(hDim, 1, dim[len(dim)-2]-2, false).MustNarrow(wDim, 1, dim[len(dim)-1]-2, true)
+
+	outView.Copy_(resTmp2)
+
+	outView.MustDrop()
+	kernelS.MustDrop()
+	kernelExp.MustDrop()
+	resTmp.MustDrop()
+	resTmp1.MustDrop()
+	resTmp2.MustDrop()
+
+	return out
+}
+
+func equalize(img *ts.Tensor) *ts.Tensor {
+	assertImageTensor(img)
+
+	shape := img.MustSize()
+	ndim := len(shape)
+	dtype := img.DType()
+
+	if ndim < 3 || ndim > 4 {
+		log.Fatalf("Input image should have 3 or 4 dimensions. Got %v", ndim)
+	}
+
+	if dtype != gotch.Uint8 {
+		log.Fatalf("Only dtype uint8 image tensors are supported. Got %v", dtype)
+	}
+
+	assertChannels(img, []int64{1, 3})
+
+	// single image
+	if ndim == 3 {
+		out := equalizeSingleImage(img)
+		return out
+	}
+
+	// batched images
+	var images []ts.Tensor
+	for i := 0; i < int(shape[0]); i++ {
+		x := img.MustSelect(0, int64(i), false)
+		o := equalizeSingleImage(x)
+		images = append(images, *o)
+		x.MustDrop()
+	}
+
+	out := ts.MustStack(images, 0)
+
+	for _, x := range images {
+		x.MustDrop()
+	}
+
+	return out
+}
+
+func equalizeSingleImage(img *ts.Tensor) *ts.Tensor {
+	dim := img.MustSize()
+	var scaledChans []ts.Tensor
+	for i := 0; i < int(dim[0]); i++ {
+		cTs := img.MustSelect(0, int64(i), false)
+		scaledChan := scaleChannel(cTs)
+		cTs.MustDrop()
+		scaledChans = append(scaledChans, *scaledChan)
+	}
+
+	out := ts.MustStack(scaledChans, 0)
+
+	for _, x := range scaledChans {
+		x.MustDrop()
+	}
+
+	return out
+}
+
+func scaleChannel(imgChan *ts.Tensor) *ts.Tensor {
+	// # TODO: we should expect bincount to always be faster than histc, but this
+	// # isn't always the case. Once
+	// # https://github.com/pytorch/pytorch/issues/53194 is fixed, remove the if
+	// # block and only use bincount.
+	// if img_chan.is_cuda:
+	// hist = torch.histc(img_chan.to(torch.float32), bins=256, min=0, max=255)
+	// else:
+	// hist = torch.bincount(img_chan.view(-1), minlength=256)
+
+	// hist = torch.histc(img_chan.to(torch.float32), bins=256, min=0, max=255)
+	hist := imgChan.MustHistc(256, false)
+
+	// nonzero_hist = hist[hist != 0]
+	nonZeroHist := hist.MustNonzero(false) // [n, 1]
+
+	// step = torch.div(nonzero_hist[:-1].sum(), 255, rounding_mode='floor')
+	nonZeroHistDim := nonZeroHist.MustSize()
+	nonZeroHistSum := nonZeroHist.MustNarrow(0, 0, nonZeroHistDim[0]-1, true).MustSum(gotch.Int64, true)
+	step := nonZeroHistSum.MustDiv1(ts.IntScalar(255), true)
+	stepVal := step.Int64Values()[0]
+	nonZeroHistSum.MustDrop()
+
+	var out *ts.Tensor
+	// if step == 0:
+	// return img_chan
+	if stepVal == 0 {
+		out = imgChan.MustShallowClone()
+		return out
+	}
+
+	// lut = torch.div(torch.cumsum(hist, 0) + torch.div(step, 2, rounding_mode='floor'),step, rounding_mode='floor')
+	dtype := gotch.Float
+	halfStep := step.MustDiv1(ts.IntScalar(2), false)
+	histCumSum := hist.MustCumsum(0, dtype, false)
+	histStep := histCumSum.MustAdd(halfStep, false)
+	halfStep.MustDrop()
+	lut := histStep.MustDiv(step, true) // deleted histStep
+
+	// lut = torch.nn.functional.pad(lut, [1, 0])[:-1].clamp(0, 255)
+	lut1 := lut.MustConstantPadNd([]int64{1, 0}, true) // deleted lut
+	lut1Dim := lut1.MustSize()
+
+	lut2 := lut1.MustNarrow(0, 0, lut1Dim[0]-1, true).MustClamp(ts.IntScalar(0), ts.IntScalar(255), true) // deleted lut1
+	// return lut[img_chan.to(torch.int64)].to(torch.uint8)
+	// NOTE: haven't supported multi-dimentional tensor index yet. So we do a in a loop
+	// channel[h, w]
+	h := imgChan.MustSize()[0]
+	// w := imgChan.MustSize()[1]
+	var xs []ts.Tensor
+	for i := 0; i < int(h); i++ {
+		idx := imgChan.MustSelect(0, int64(i), false).MustTotype(gotch.Int64, true)
+		x := lut2.MustIndexSelect(0, idx, false).MustTotype(gotch.Uint8, true)
+		xs = append(xs, *x)
+		idx.MustDrop()
+	}
+	out = ts.MustStack(xs, 0)
+
+	// delete intermediate tensors
+	for _, x := range xs {
+		x.MustDrop()
+	}
+	halfStep.MustDrop()
+	lut2.MustDrop()
+	hist.MustDrop()
+	step.MustDrop()
+
+	return out
+}
+
+// Normalize a float tensor image with mean and standard deviation.
+//
+// Args:
+// - tensor (Tensor): Float tensor image of size (C, H, W) or (B, C, H, W) to be normalized.
+// - mean (sequence): Sequence of means for each channel.
+// - std (sequence): Sequence of standard deviations for each channel.
+// Returns:
+// - Tensor: Normalized Tensor image.
+func normalize(img *ts.Tensor, mean, std []float64) *ts.Tensor {
+	for _, v := range std {
+		if v == 0 {
+			log.Fatalf("One of std (%v) is zero. This is invalid as it leads to division by zero.", std)
+		}
+	}
+
+	assertImageTensor(img)
+
+	dim := img.MustSize()
+	// dtype := img.DType()
+	device := img.MustDevice()
+	if len(dim) < 3 {
+		log.Fatalf("Expected tensor to be a tensor image of size (..., C, H, W). Got tensor.size() =%v", dim)
+	}
+
+	meanTs := ts.MustOfSlice(mean).MustTotype(gotch.Float, true).MustTo(device, true)
+	stdTs := ts.MustOfSlice(std).MustTotype(gotch.Float, true).MustTo(device, true)
+
+	var mTs, sTs *ts.Tensor
+	meanSize := meanTs.MustSize()
+	stdSize := stdTs.MustSize()
+	switch len(meanSize) {
+	case 1:
+		mTs = meanTs.MustView([]int64{-1, 1, 1}, true)
+	case 3:
+		mTs = meanTs.MustShallowClone()
+		meanTs.MustDrop()
+	default:
+		log.Fatalf("mean must be 1 or 3 elements. Got %v\n", len(mean))
+	}
+
+	switch len(stdSize) {
+	case 1:
+		sTs = stdTs.MustView([]int64{-1, 1, 1}, true)
+	case 3:
+		sTs = stdTs.MustShallowClone()
+		stdTs.MustDrop()
+	default:
+		log.Fatalf("std must be 1 or 3 elements. Got %v\n", len(std))
+	}
+
+	// out := img.MustSub(mTs, false).MustDiv(sTs, true)
+	x := img.MustDiv1(ts.FloatScalar(255.0), false)
+	out := x.MustSub(mTs, false).MustDiv(sTs, true).MustMul1(ts.IntScalar(255), true)
+	x.MustDrop()
+
+	mTs.MustDrop()
+	sTs.MustDrop()
+
+	return out
+}
diff --git a/vision/aug/grayscale.go b/vision/aug/grayscale.go
new file mode 100644
index 0000000..9d0ef2c
--- /dev/null
+++ b/vision/aug/grayscale.go
@@ -0,0 +1,81 @@
+package aug
+
+import (
+	"log"
+
+	ts "github.com/sugarme/gotch/tensor"
+	// "github.com/sugarme/gotch/tensor"
+)
+
+// GrayScale converts image to grayscale.
+// If the image is torch Tensor, it is expected
+// to have [..., 3, H, W] shape, where ... means an arbitrary number of leading dimensions
+// Args:
+// - num_output_channels (int): (1 or 3) number of channels desired for output image
+type Grayscale struct {
+	outChan int64
+}
+
+func (gs *Grayscale) Forward(x *ts.Tensor) *ts.Tensor {
+	out := rgb2Gray(x, gs.outChan)
+	return out
+}
+
+func newGrayscale(outChanOpt ...int64) *Grayscale {
+	var outChan int64 = 3
+	if len(outChanOpt) > 0 {
+		c := outChanOpt[0]
+		switch c {
+		case 1:
+			outChan = 1
+		case 3:
+			outChan = 3
+		default:
+			log.Fatalf("Out channels should be either 1 or 3. Got %v\n", c)
+		}
+	}
+	return &Grayscale{outChan}
+}
+
+// RandomGrayscale randomly converts image to grayscale with a probability of p (default 0.1).
+// If the image is torch Tensor, it is expected
+// to have [..., 3, H, W] shape, where ... means an arbitrary number of leading dimensions
+// Args:
+// - p (float): probability that image should be converted to grayscale.
+type RandomGrayscale struct {
+	pvalue float64
+}
+
+func newRandomGrayscale(pvalueOpt ...float64) *RandomGrayscale {
+	pvalue := 0.1
+	if len(pvalueOpt) > 0 {
+		pvalue = pvalueOpt[0]
+	}
+	return &RandomGrayscale{pvalue}
+}
+
+func (rgs *RandomGrayscale) Forward(x *ts.Tensor) *ts.Tensor {
+	c := getImageChanNum(x)
+	r := randPvalue()
+	var out *ts.Tensor
+	switch {
+	case r < rgs.pvalue:
+		out = rgb2Gray(x, c)
+	default:
+		out = x.MustShallowClone()
+	}
+
+	return out
+}
+
+func WithRandomGrayscale(pvalueOpt ...float64) Option {
+	var p float64 = 0.1
+	if len(pvalueOpt) > 0 {
+		p = pvalueOpt[0]
+	}
+
+	rgs := newRandomGrayscale(p)
+	return func(o *Options) {
+		o.randomGrayscale = rgs
+	}
+}
diff --git a/vision/aug/invert.go b/vision/aug/invert.go
new file mode 100644
index 0000000..ae23584
--- /dev/null
+++ b/vision/aug/invert.go
@@ -0,0 +1,39 @@
+package aug
+
+import (
+	ts "github.com/sugarme/gotch/tensor"
+)
+
+type RandomInvert struct {
+	pvalue float64
+}
+
+func newRandomInvert(pOpt ...float64) *RandomInvert {
+	p := 0.5
+	if len(pOpt) > 0 {
+		p = pOpt[0]
+	}
+	return &RandomInvert{p}
+}
+
+func (ri *RandomInvert) Forward(x *ts.Tensor) *ts.Tensor {
+	r := randPvalue()
+
+	var out *ts.Tensor
+	switch {
+	case r < ri.pvalue:
+		out = invert(x)
+	default:
+		out = x.MustShallowClone()
+	}
+
+	return out
+}
+
+func WithRandomInvert(pvalueOpt ...float64) Option {
+	ri := newRandomInvert(pvalueOpt...)
+
+	return func(o *Options) {
+		o.randomInvert = ri
+	}
+}
diff --git a/vision/aug/normalize.go b/vision/aug/normalize.go
new file mode 100644
index 0000000..eb1f513
--- /dev/null
+++ b/vision/aug/normalize.go
@@ -0,0 +1,91 @@
+package aug
+
+import (
+	ts "github.com/sugarme/gotch/tensor"
+)
+
+// Normalize normalizes a tensor image with mean and standard deviation.
+// Given mean: ``(mean[1],...,mean[n])`` and std: ``(std[1],..,std[n])`` for ``n``
+// channels, this transform will normalize each channel of the input
+// ``torch.*Tensor`` i.e.,
+// ``output[channel] = (input[channel] - mean[channel]) / std[channel]``
+// .. note::
+// This transform acts out of place, i.e., it does not mutate the input tensor.
+// Args:
+// - mean (sequence): Sequence of means for each channel.
+// - std (sequence): Sequence of standard deviations for each channel.
+type Normalize struct {
+	mean []float64 // should be from 0 to 1
+	std  []float64 // should be > 0 and <= 1
+}
+
+type normalizeOptions struct {
+	mean []float64
+	std  []float64
+}
+
+type normalizeOption func(*normalizeOptions)
+
+// Mean and SD can be calculated for specific dataset as follow:
+/*
+	mean = 0.0
+	meansq = 0.0
+	count = 0
+
+	for index, data in enumerate(train_loader):
+			mean = data.sum()
+			meansq = meansq + (data**2).sum()
+			count += np.prod(data.shape)
+
+	total_mean = mean/count
+	total_var = (meansq/count) - (total_mean**2)
+	total_std = torch.sqrt(total_var)
+	print("mean: " + str(total_mean))
+	print("std: " + str(total_std))
+*/
+
+// For example. ImageNet dataset has RGB mean and standard error:
+// meanVals := []float64{0.485, 0.456, 0.406}
+// sdVals := []float64{0.229, 0.224, 0.225}
+func defaultNormalizeOptions() *normalizeOptions {
+	return &normalizeOptions{
+		mean: []float64{0, 0, 0},
+		std:  []float64{1, 1, 1},
+	}
+}
+
+func WithNormalizeStd(std []float64) normalizeOption {
+	return func(o *normalizeOptions) {
+		o.std = std
+	}
+}
+
+func WithNormalizeMean(mean []float64) normalizeOption {
+	return func(o *normalizeOptions) {
+		o.mean = mean
+	}
+}
+
+func newNormalize(opts ...normalizeOption) *Normalize {
+	p := defaultNormalizeOptions()
+	for _, o := range opts {
+		o(p)
+	}
+
+	return &Normalize{
+		mean: p.mean,
+		std:  p.std,
+	}
+}
+
+func (n *Normalize) Forward(x *ts.Tensor) *ts.Tensor {
+	out := normalize(x, n.mean, n.std)
+	return out
+}
+
+func WithNormalize(opts ...normalizeOption) Option {
+	n := newNormalize(opts...)
+	return func(o *Options) {
+		o.normalize = n
+	}
+}
diff --git a/vision/aug/pad.go b/vision/aug/pad.go
new file mode 100644
index 0000000..6674395
--- /dev/null
+++ b/vision/aug/pad.go
@@ -0,0 +1 @@
+package aug
diff --git a/vision/aug/perspective.go b/vision/aug/perspective.go
new file mode 100644
index 0000000..69ab194
--- /dev/null
+++ b/vision/aug/perspective.go
@@ -0,0 +1,190 @@
+package aug
+
+import (
+	// "fmt"
+
+	"github.com/sugarme/gotch"
+	ts "github.com/sugarme/gotch/tensor"
+)
+
+// RandomPerspective performs a random perspective transformation of the given image with a given probability.
+// If the image is torch Tensor, it is expected
+// to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
+// Args:
+// distortion_scale (float): argument to control the degree of distortion and ranges from 0 to 1.
+// Default is 0.5.
+// p (float): probability of the image being transformed. Default is 0.5.
+// interpolation (InterpolationMode): Desired interpolation enum defined by
+// :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``.
+// If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
+// For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable.
+// fill (sequence or number): Pixel fill value for the area outside the transformed
+// image. Default is ``0``. If given a number, the value is used for all bands respectively.
+type RandomPerspective struct {
+	distortionScale   float64 // range [0, 1]
+	pvalue            float64 //  range [0, 1]
+	interpolationMode string
+	fillValue         []float64
+}
+
+type perspectiveOptions struct {
+	distortionScale   float64 // range [0, 1]
+	pvalue            float64 //  range [0, 1]
+	interpolationMode string
+	fillValue         []float64
+}
+
+func defaultPerspectiveOptions() *perspectiveOptions {
+	return &perspectiveOptions{
+		distortionScale:   0.5,
+		pvalue:            0.5,
+		interpolationMode: "bilinear",
+		fillValue:         []float64{0.0, 0.0, 0.0},
+	}
+}
+
+type perspectiveOption func(*perspectiveOptions)
+
+func WithPerspectivePvalue(p float64) perspectiveOption {
+	return func(o *perspectiveOptions) {
+		o.pvalue = p
+	}
+}
+
+func WithPerspectiveScale(s float64) perspectiveOption {
+	return func(o *perspectiveOptions) {
+		o.distortionScale = s
+	}
+}
+
+func WithPerspectiveMode(m string) perspectiveOption {
+	return func(o *perspectiveOptions) {
+		o.interpolationMode = m
+	}
+}
+
+func WithPerspectiveValue(v []float64) perspectiveOption {
+	return func(o *perspectiveOptions) {
+		o.fillValue = v
+	}
+}
+
+func newRandomPerspective(opts ...perspectiveOption) *RandomPerspective {
+	params := defaultPerspectiveOptions()
+	for _, opt := range opts {
+		opt(params)
+	}
+
+	return &RandomPerspective{
+		distortionScale:   params.distortionScale,
+		pvalue:            params.pvalue,
+		interpolationMode: params.interpolationMode,
+		fillValue:         params.fillValue,
+	}
+}
+
+// Get parameters for ``perspective`` for a random perspective transform.
+//
+// Args:
+// - width (int): width of the image.
+// - height (int): height of the image.
+// Returns:
+// - List containing [top-left, top-right, bottom-right, bottom-left] of the original image,
+// - List containing [top-left, top-right, bottom-right, bottom-left] of the transformed image.
+func (rp *RandomPerspective) getParams(w, h int64) ([][]int64, [][]int64) {
+	halfH := h / 2
+	halfW := w / 2
+
+	var (
+		topLeft     []int64
+		topRight    []int64
+		bottomRight []int64
+		bottomLeft  []int64
+	)
+
+	// topleft = [
+	// int(torch.randint(0, int(distortion_scale * half_width) + 1, size=(1, )).item()),
+	// int(torch.randint(0, int(distortion_scale * half_height) + 1, size=(1, )).item())
+	// ]
+	tlVal1 := int64(rp.distortionScale*float64(halfW)) + 1
+	tlTs1 := ts.MustRandint1(0, tlVal1, []int64{1}, gotch.Int64, gotch.CPU)
+	tl1 := tlTs1.Int64Values()[0]
+	tlTs1.MustDrop()
+	tlVal2 := int64(rp.distortionScale*float64(halfH)) + 1
+	tlTs2 := ts.MustRandint1(0, tlVal2, []int64{1}, gotch.Int64, gotch.CPU)
+	tl2 := tlTs2.Int64Values()[0]
+	tlTs2.MustDrop()
+	topLeft = []int64{tl1, tl2}
+
+	// topright = [
+	// int(torch.randint(width - int(distortion_scale * half_width) - 1, width, size=(1, )).item()),
+	// int(torch.randint(0, int(distortion_scale * half_height) + 1, size=(1, )).item())
+	// ]
+	trVal1 := w - int64(rp.distortionScale*float64(halfW)) - 1
+	trTs1 := ts.MustRandint1(trVal1, w, []int64{1}, gotch.Int64, gotch.CPU)
+	tr1 := trTs1.Int64Values()[0]
+	trTs1.MustDrop()
+	trVal2 := int64(rp.distortionScale*float64(halfH)) + 1
+	trTs2 := ts.MustRandint1(0, trVal2, []int64{1}, gotch.Int64, gotch.CPU)
+	tr2 := trTs2.Int64Values()[0]
+	trTs2.MustDrop()
+	topRight = []int64{tr1, tr2}
+
+	// botright = [
+	// int(torch.randint(width - int(distortion_scale * half_width) - 1, width, size=(1, )).item()),
+	// int(torch.randint(height - int(distortion_scale * half_height) - 1, height, size=(1, )).item())
+	// ]
+	brVal1 := w - int64(rp.distortionScale*float64(halfW)) - 1
+	brTs1 := ts.MustRandint1(brVal1, w, []int64{1}, gotch.Int64, gotch.CPU)
+	br1 := brTs1.Int64Values()[0]
+	brTs1.MustDrop()
+	brVal2 := h - int64(rp.distortionScale*float64(halfH)) - 1
+	brTs2 := ts.MustRandint1(brVal2, h, []int64{1}, gotch.Int64, gotch.CPU)
+	br2 := brTs2.Int64Values()[0]
+	brTs2.MustDrop()
+	bottomRight = []int64{br1, br2}
+
+	// botleft = [
+	// int(torch.randint(0, int(distortion_scale * half_width) + 1, size=(1, )).item()),
+	// int(torch.randint(height - int(distortion_scale * half_height) - 1, height, size=(1, )).item())
+	// ]
+	blVal1 := int64(rp.distortionScale*float64(halfW)) + 1
+	blTs1 := ts.MustRandint1(0, blVal1, []int64{1}, gotch.Int64, gotch.CPU)
+	bl1 := blTs1.Int64Values()[0]
+	blTs1.MustDrop()
+	blVal2 := h - int64(rp.distortionScale*float64(halfH)) - 1
+	blTs2 := ts.MustRandint1(blVal2, h, []int64{1}, gotch.Int64, gotch.CPU)
+	bl2 := blTs2.Int64Values()[0]
+	blTs2.MustDrop()
+	bottomLeft = []int64{bl1, bl2}
+
+	startPoints := [][]int64{
+		{0, 0},
+		{w - 1, 0},
+		{w - 1, h - 1},
+		{0, h - 1},
+	}
+
+	endPoints := [][]int64{
+		topLeft,
+		topRight,
+		bottomRight,
+		bottomLeft,
+	}
+
+	return startPoints, endPoints
+}
+
+func (rp *RandomPerspective) Forward(x *ts.Tensor) *ts.Tensor {
+	height, width := getImageSize(x)
+	startPoints, endPoints := rp.getParams(height, width)
+	out := perspective(x, startPoints, endPoints, rp.interpolationMode, rp.fillValue)
+	return out
+}
+
+func WithRandomPerspective(opts ...perspectiveOption) Option {
+	rp := newRandomPerspective(opts...)
+	return func(o *Options) {
+		o.randomPerspective = rp
+	}
+}
diff --git a/vision/aug/posterize.go b/vision/aug/posterize.go
new file mode 100644
index 0000000..9a0b24a
--- /dev/null
+++ b/vision/aug/posterize.go
@@ -0,0 +1,77 @@
+package aug
+
+import (
+	ts "github.com/sugarme/gotch/tensor"
+)
+
+// RandomPosterize posterizes the image randomly with a given probability by reducing the
+// number of bits for each color channel. If the image is torch Tensor, it should be of type torch.uint8,
+// and it is expected to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
+// Args:
+// - bits (int): number of bits to keep for each channel (0-8)
+// - p (float): probability of the image being color inverted. Default value is 0.5
+// Ref. https://en.wikipedia.org/wiki/Posterization
+type RandomPosterize struct {
+	pvalue float64
+	bits   uint8
+}
+
+type posterizeOptions struct {
+	pvalue float64
+	bits   uint8
+}
+
+type posterizeOption func(*posterizeOptions)
+
+func defaultPosterizeOptions() *posterizeOptions {
+	return &posterizeOptions{
+		pvalue: 0.5,
+		bits:   4,
+	}
+}
+
+func WithPosterizePvalue(p float64) posterizeOption {
+	return func(o *posterizeOptions) {
+		o.pvalue = p
+	}
+}
+
+func WithPosterizeBits(bits uint8) posterizeOption {
+	return func(o *posterizeOptions) {
+		o.bits = bits
+	}
+}
+
+func newRandomPosterize(opts ...posterizeOption) *RandomPosterize {
+	p := defaultPosterizeOptions()
+	for _, o := range opts {
+		o(p)
+	}
+
+	return &RandomPosterize{
+		pvalue: p.pvalue,
+		bits:   p.bits,
+	}
+}
+
+func (rp *RandomPosterize) Forward(x *ts.Tensor) *ts.Tensor {
+
+	r := randPvalue()
+	var out *ts.Tensor
+	switch {
+	case r < rp.pvalue:
+		out = posterize(x, rp.bits)
+	default:
+		out = x.MustShallowClone()
+	}
+
+	return out
+}
+
+func WithRandomPosterize(opts ...posterizeOption) Option {
+	rp := newRandomPosterize(opts...)
+
+	return func(o *Options) {
+		o.randomPosterize = rp
+	}
+}
diff --git a/vision/aug/resize.go b/vision/aug/resize.go
new file mode 100644
index 0000000..84af126
--- /dev/null
+++ b/vision/aug/resize.go
@@ -0,0 +1,39 @@
+package aug
+
+import (
+	"log"
+
+	"github.com/sugarme/gotch"
+	ts "github.com/sugarme/gotch/tensor"
+	"github.com/sugarme/gotch/vision"
+)
+
+type ResizeModule struct {
+	height int64
+	width  int64
+}
+
+func newResizeModule(h, w int64) *ResizeModule {
+	return &ResizeModule{h, w}
+}
+
+// Forward implements ts.Module for RandRotateModule
+func (rs *ResizeModule) Forward(x *ts.Tensor) *ts.Tensor {
+	imgTs := x.MustTotype(gotch.Uint8, false)
+	out, err := vision.Resize(imgTs, rs.width, rs.height)
+	if err != nil {
+		log.Fatal(err)
+	}
+	imgTs.MustDrop()
+	return out
+}
+
+func WithResize(h, w int64) Option {
+	return func(o *Options) {
+		rs := newResizeModule(h, w)
+		o.resize = rs
+	}
+}
+
+// TODO.
+type RandomResizedCrop struct{}
diff --git a/vision/aug/rotate.go b/vision/aug/rotate.go
new file mode 100644
index 0000000..a5afe8b
--- /dev/null
+++ b/vision/aug/rotate.go
@@ -0,0 +1,109 @@
+package aug
+
+import (
+	"fmt"
+	"log"
+	"math"
+	"math/rand"
+	"time"
+
+	"github.com/sugarme/gotch"
+	ts "github.com/sugarme/gotch/tensor"
+)
+
+// RandomRotate randomly rotates a tensor image within a specifed angle range (degree).
+func RandomRotate(img *ts.Tensor, min, max float64) (*ts.Tensor, error) {
+	if min > max {
+		tmp := min
+		min = max
+		max = tmp
+	}
+	if min < -360 || min > 360 || max < -360 || max > 360 {
+		err := fmt.Errorf("min and max should be in range from -360 to 360. Got %v and %v\n", min, max)
+		return nil, err
+	}
+	// device := img.MustDevice()
+	dtype := gotch.Double
+	rand.Seed(time.Now().UnixNano())
+	angle := min + rand.Float64()*(max-min)
+
+	theta := float64(angle) * (math.Pi / 180)
+	input := img.MustUnsqueeze(0, false).MustTotype(dtype, true)
+	r, err := rotImg(input, theta, dtype)
+	if err != nil {
+		return nil, err
+	}
+	input.MustDrop()
+	rotatedImg := r.MustSqueeze(true)
+	return rotatedImg, nil
+}
+
+func Rotate(img *ts.Tensor, angle float64) (*ts.Tensor, error) {
+	if angle < -360 || angle > 360 {
+		err := fmt.Errorf("angle must be in range (-360, 360)")
+		return nil, err
+	}
+	dtype := gotch.Double
+	theta := float64(angle) * (math.Pi / 180)
+	input := img.MustUnsqueeze(0, false).MustTotype(dtype, true)
+	r, err := rotImg(input, theta, dtype)
+	if err != nil {
+		return nil, err
+	}
+	input.MustDrop()
+	rotatedImg := r.MustSqueeze(true)
+	return rotatedImg, nil
+}
+
+// RotateModule
+type RotateModule struct {
+	angle float64
+}
+
+func newRotate(angle float64) *RotateModule {
+	return &RotateModule{angle}
+}
+
+// Forward implements ts.Module for RotateModule
+func (r *RotateModule) Forward(x *ts.Tensor) *ts.Tensor {
+	out, err := Rotate(x, r.angle)
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	return out
+}
+
+func WithRotate(angle float64) Option {
+	return func(o *Options) {
+		r := newRotate(angle)
+		o.rotate = r
+	}
+}
+
+// RandomRotateModule
+type RandRotateModule struct {
+	minAngle float64
+	maxAngle float64
+}
+
+func newRandRotate(min, max float64) *RandRotateModule {
+	return &RandRotateModule{min, max}
+}
+
+// Forward implements ts.Module for RandRotateModule
+func (rr *RandRotateModule) Forward(x *ts.Tensor) *ts.Tensor {
+	out, err := RandomRotate(x, rr.minAngle, rr.maxAngle)
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	return out
+}
+
+func WithRandRotate(minAngle, maxAngle float64) Option {
+	return func(o *Options) {
+		r := newRandRotate(minAngle, maxAngle)
+		o.randRotate = r
+	}
+}
diff --git a/vision/aug/sharpness.go b/vision/aug/sharpness.go
new file mode 100644
index 0000000..d38fad5
--- /dev/null
+++ b/vision/aug/sharpness.go
@@ -0,0 +1,74 @@
+package aug
+
+import (
+	ts "github.com/sugarme/gotch/tensor"
+)
+
+// Adjust the sharpness of the image randomly with a given probability. If the image is torch Tensor,
+// it is expected to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
+// Args:
+// sharpness_factor (float):  How much to adjust the sharpness. Can be
+// any non negative number. 0 gives a blurred image, 1 gives the
+// original image while 2 increases the sharpness by a factor of 2.
+// p (float): probability of the image being color inverted. Default value is 0.5
+type RandomAdjustSharpness struct {
+	sharpnessFactor float64
+	pvalue          float64
+}
+
+type sharpnessOptions struct {
+	sharpnessFactor float64
+	pvalue          float64
+}
+
+type sharpnessOption func(*sharpnessOptions)
+
+func defaultSharpnessOptions() *sharpnessOptions {
+	return &sharpnessOptions{
+		sharpnessFactor: 1.0,
+		pvalue:          0.5,
+	}
+}
+
+func WithSharpnessPvalue(p float64) sharpnessOption {
+	return func(o *sharpnessOptions) {
+		o.pvalue = p
+	}
+}
+
+func WithSharpnessFactor(f float64) sharpnessOption {
+	return func(o *sharpnessOptions) {
+		o.sharpnessFactor = f
+	}
+}
+
+func newRandomAdjustSharpness(opts ...sharpnessOption) *RandomAdjustSharpness {
+	p := defaultSharpnessOptions()
+	for _, o := range opts {
+		o(p)
+	}
+	return &RandomAdjustSharpness{
+		sharpnessFactor: p.sharpnessFactor,
+		pvalue:          p.pvalue,
+	}
+}
+
+func (ras *RandomAdjustSharpness) Forward(x *ts.Tensor) *ts.Tensor {
+	r := randPvalue()
+	var out *ts.Tensor
+	switch {
+	case r < ras.pvalue:
+		out = adjustSharpness(x, ras.sharpnessFactor)
+	default:
+		out = x.MustShallowClone()
+	}
+
+	return out
+}
+
+func WithRandomAdjustSharpness(opts ...sharpnessOption) Option {
+	ras := newRandomAdjustSharpness(opts...)
+	return func(o *Options) {
+		o.randomAdjustSharpness = ras
+	}
+}
diff --git a/vision/aug/solarize.go b/vision/aug/solarize.go
new file mode 100644
index 0000000..729ea6f
--- /dev/null
+++ b/vision/aug/solarize.go
@@ -0,0 +1,79 @@
+package aug
+
+import (
+	ts "github.com/sugarme/gotch/tensor"
+)
+
+// RandomSolarize solarizes the image randomly with a given probability by inverting all pixel
+// values above a threshold. If img is a Tensor, it is expected to be in [..., 1 or 3, H, W] format,
+// where ... means it can have an arbitrary number of leading dimensions.
+// If img is PIL Image, it is expected to be in mode "L" or "RGB".
+// Args:
+// - threshold (float): all pixels equal or above this value are inverted.
+// - p (float): probability of the image being color inverted. Default value is 0.5
+// Ref. https://en.wikipedia.org/wiki/Solarization_(photography)
+type RandomSolarize struct {
+	threshold float64
+	pvalue    float64
+}
+
+type solarizeOptions struct {
+	threshold float64
+	pvalue    float64
+}
+
+type solarizeOption func(*solarizeOptions)
+
+func defaultSolarizeOptions() *solarizeOptions {
+	return &solarizeOptions{
+		threshold: 128,
+		pvalue:    0.5,
+	}
+}
+
+func WithSolarizePvalue(p float64) solarizeOption {
+	return func(o *solarizeOptions) {
+		o.pvalue = p
+	}
+}
+
+func WithSolarizeThreshold(th float64) solarizeOption {
+	return func(o *solarizeOptions) {
+		o.threshold = th
+	}
+}
+
+func newRandomSolarize(opts ...solarizeOption) *RandomSolarize {
+	params := defaultSolarizeOptions()
+
+	for _, o := range opts {
+		o(params)
+	}
+
+	return &RandomSolarize{
+		threshold: params.threshold,
+		pvalue:    params.pvalue,
+	}
+}
+
+func (rs *RandomSolarize) Forward(x *ts.Tensor) *ts.Tensor {
+	r := randPvalue()
+
+	var out *ts.Tensor
+	switch {
+	case r < rs.pvalue:
+		out = solarize(x, rs.threshold)
+	default:
+		out = x.MustShallowClone()
+	}
+
+	return out
+}
+
+func WithRandomSolarize(opts ...solarizeOption) Option {
+	rs := newRandomSolarize(opts...)
+
+	return func(o *Options) {
+		o.randomSolarize = rs
+	}
+}
diff --git a/vision/aug/transform.go b/vision/aug/transform.go
new file mode 100644
index 0000000..2767d60
--- /dev/null
+++ b/vision/aug/transform.go
@@ -0,0 +1,188 @@
+package aug
+
+import (
+	"math/rand"
+	"time"
+
+	"github.com/sugarme/gotch/nn"
+	ts "github.com/sugarme/gotch/tensor"
+)
+
+// Transformer is an interface that can transform an image tensor.
+type Transformer interface {
+	Transform(x *ts.Tensor) *ts.Tensor
+}
+
+// Augment is a struct composes of augmentation functions to implement Transformer interface.
+type Augment struct {
+	augments *nn.Sequential
+}
+
+// Transform implements Transformer interface for Augment struct.
+func (a *Augment) Transform(image *ts.Tensor) *ts.Tensor {
+	out := a.augments.Forward(image)
+	return out
+}
+
+type Options struct {
+	rotate                *RotateModule
+	randRotate            *RandRotateModule
+	resize                *ResizeModule
+	colorJitter           *ColorJitter
+	gaussianBlur          *GaussianBlur
+	randomHFlip           *RandomHorizontalFlip
+	randomVFlip           *RandomVerticalFlip
+	randomCrop            *RandomCrop
+	centerCrop            *CenterCrop
+	randomCutout          *RandomCutout
+	randomPerspective     *RandomPerspective
+	randomAffine          *RandomAffine
+	randomGrayscale       *RandomGrayscale
+	randomSolarize        *RandomSolarize
+	randomPosterize       *RandomPosterize
+	randomInvert          *RandomInvert
+	randomAutocontrast    *RandomAutocontrast
+	randomAdjustSharpness *RandomAdjustSharpness
+	randomEqualize        *RandomEqualize
+	normalize             *Normalize
+}
+
+func defaultOption() *Options {
+	return &Options{
+		rotate:                nil,
+		randRotate:            nil,
+		resize:                nil,
+		colorJitter:           nil,
+		gaussianBlur:          nil,
+		randomHFlip:           nil,
+		randomVFlip:           nil,
+		randomCrop:            nil,
+		centerCrop:            nil,
+		randomCutout:          nil,
+		randomPerspective:     nil,
+		randomAffine:          nil,
+		randomGrayscale:       nil,
+		randomSolarize:        nil,
+		randomPosterize:       nil,
+		randomInvert:          nil,
+		randomAutocontrast:    nil,
+		randomAdjustSharpness: nil,
+		randomEqualize:        nil,
+		normalize:             nil,
+	}
+}
+
+type Option func(o *Options)
+
+// Compose creates a new Augment struct by adding augmentation methods.
+func Compose(opts ...Option) (Transformer, error) {
+	augOpts := defaultOption()
+	for _, opt := range opts {
+		if opt != nil {
+			opt(augOpts)
+		}
+	}
+
+	var augs *nn.Sequential = nn.Seq()
+
+	if augOpts.rotate != nil {
+		augs.Add(augOpts.rotate)
+	}
+
+	if augOpts.randRotate != nil {
+		augs.Add(augOpts.randRotate)
+	}
+
+	if augOpts.resize != nil {
+		augs.Add(augOpts.resize)
+	}
+
+	if augOpts.colorJitter != nil {
+		augs.Add(augOpts.colorJitter)
+	}
+
+	if augOpts.gaussianBlur != nil {
+		augs.Add(augOpts.gaussianBlur)
+	}
+
+	if augOpts.randomHFlip != nil {
+		augs.Add(augOpts.randomHFlip)
+	}
+
+	if augOpts.randomVFlip != nil {
+		augs.Add(augOpts.randomVFlip)
+	}
+
+	if augOpts.randomCrop != nil {
+		augs.Add(augOpts.randomCrop)
+	}
+
+	if augOpts.centerCrop != nil {
+		augs.Add(augOpts.centerCrop)
+	}
+
+	if augOpts.randomCutout != nil {
+		augs.Add(augOpts.randomCutout)
+	}
+
+	if augOpts.randomPerspective != nil {
+		augs.Add(augOpts.randomPerspective)
+	}
+
+	if augOpts.randomAffine != nil {
+		augs.Add(augOpts.randomAffine)
+	}
+
+	if augOpts.randomGrayscale != nil {
+		augs.Add(augOpts.randomGrayscale)
+	}
+
+	if augOpts.randomSolarize != nil {
+		augs.Add(augOpts.randomSolarize)
+	}
+
+	if augOpts.randomPosterize != nil {
+		augs.Add(augOpts.randomPosterize)
+	}
+
+	if augOpts.randomInvert != nil {
+		augs.Add(augOpts.randomInvert)
+	}
+
+	if augOpts.randomAutocontrast != nil {
+		augs.Add(augOpts.randomAutocontrast)
+	}
+
+	if augOpts.randomAdjustSharpness != nil {
+		augs.Add(augOpts.randomAdjustSharpness)
+	}
+
+	if augOpts.randomEqualize != nil {
+		augs.Add(augOpts.randomEqualize)
+	}
+
+	if augOpts.normalize != nil {
+		augs.Add(augOpts.normalize)
+	}
+
+	return &Augment{augs}, nil
+}
+
+// OneOf randomly return one transformer from list of transformers
+// with a specific p value.
+func OneOf(pvalue float64, tfOpts ...Option) Option {
+	tfsNum := len(tfOpts)
+	if tfsNum < 1 {
+		return nil
+	}
+
+	randP := randPvalue()
+	if randP >= pvalue {
+		return nil
+	}
+
+	rand.Seed(time.Now().UnixNano())
+	idx := rand.Intn(tfsNum)
+
+	return tfOpts[idx]
+}