Merge pull request #43 from sugarme/aug

changed vision/aug input/output tensor dtype to be uint8
This commit is contained in:
Sugarme 2021-06-22 00:04:34 +10:00 committed by GitHub
commit 121908de21
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
22 changed files with 261 additions and 79 deletions

View File

@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Fixed incorrect message mismatched tensor shape at `nn.Varstore.Load`
- Fixed incorrect y -> x at `vision/aug/affine.go` getParam func
- Fixed double free tensor at `vision/aug/function.go` Equalize func.
- Changed `vision/aug` all input image should be `uint8` (Byte) dtype and transformed output has the same dtype (uint8) so that `Compose()` can compose any transformer options.
## [0.3.10]
- Update installation at README.md

Binary file not shown.

After

Width:  |  Height:  |  Size: 264 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 265 KiB

View File

@ -2,13 +2,80 @@ package main
import (
"fmt"
"log"
"github.com/sugarme/gotch"
"github.com/sugarme/gotch/vision"
"github.com/sugarme/gotch/vision/aug"
// ts "github.com/sugarme/gotch/tensor"
)
func main() {
// roundTrip()
tOne()
}
func roundTrip() {
img, err := vision.Load("./bb.png")
if err != nil {
panic(err)
}
fmt.Printf("%i", img)
fimg := aug.Byte2FloatImage(img)
fmt.Printf("%i", fimg)
bimg := aug.Float2ByteImage(fimg)
fmt.Printf("%i", bimg)
err = vision.Save(bimg, "./bimg.png")
if err != nil {
log.Fatal(err)
}
}
func tOne() {
img, err := vision.Load("./bb.png")
if err != nil {
panic(err)
}
// device := gotch.CudaIfAvailable()
device := gotch.CPU
imgTs := img.MustTo(device, true)
t, err := aug.Compose(aug.WithRandomSolarize(aug.WithSolarizeThreshold(125), aug.WithSolarizePvalue(1.0)))
// t, err := aug.Compose(aug.WithRandomAdjustSharpness(aug.WithSharpnessPvalue(1.0), aug.WithSharpnessFactor(10)))
// t, err := aug.Compose(aug.WithRandRotate(0, 360))
// t, err := aug.Compose(aug.WithResize(320, 320)) // NOTE. WithResize just works on CPU.
// t, err := aug.Compose(aug.WithRandomPosterize(aug.WithPosterizeBits(2), aug.WithPosterizePvalue(1.0)))
// t, err := aug.Compose(aug.WithRandomPerspective(aug.WithPerspectiveScale(0.6), aug.WithPerspectivePvalue(1.0)))
// t, err := aug.Compose(aug.WithNormalize(aug.WithNormalizeMean([]float64{0.485, 0.456, 0.406}), aug.WithNormalizeStd([]float64{0.229, 0.224, 0.225})))
// t, err := aug.Compose(aug.WithRandomInvert(1.0))
// t, err := aug.Compose(aug.WithRandomGrayscale(1.0))
// t, err := aug.Compose(aug.WithRandomVFlip(1.0))
// t, err := aug.Compose(aug.WithRandomHFlip(1.0))
// t, err := aug.Compose(aug.WithRandomEqualize(1.0))
// t, err := aug.Compose(aug.WithRandomCutout(aug.WithCutoutValue([]int64{124, 96, 255}), aug.WithCutoutScale([]float64{0.01, 0.1}), aug.WithCutoutRatio([]float64{0.5, 0.5})))
// t, err := aug.Compose(aug.WithCenterCrop([]int64{320, 320}))
// t, err := aug.Compose(aug.WithRandomAutocontrast())
// t, err := aug.Compose(aug.WithColorJitter(0.3, 0.3, 0.3, 0.3))
// t, err := aug.Compose(aug.WithGaussianBlur([]int64{5, 5}, []float64{1.0, 2.0}))
// t, err := aug.Compose(aug.WithRandomAffine(aug.WithAffineDegree([]int64{0, 15}), aug.WithAffineShear([]float64{0, 15})))
out := t.Transform(imgTs)
fname := fmt.Sprintf("./bb-transformed.png")
err = vision.Save(out, fname)
if err != nil {
panic(err)
}
imgTs.MustDrop()
out.MustDrop()
}
func tMany() {
n := 360
for i := 1; i <= n; i++ {
img, err := vision.Load("./bb.png")
@ -19,22 +86,6 @@ func main() {
// device := gotch.CudaIfAvailable()
device := gotch.CPU
imgTs := img.MustTo(device, true)
// t, err := aug.Compose(aug.WithResize(512, 512)) // NOTE. WithResize just works on CPU.
// t, err := aug.Compose(aug.WithRandRotate(0, 360), aug.WithColorJitter(0.3, 0.3, 0.3, 0.4))
// t, err := aug.Compose(aug.WithGaussianBlur([]int64{5, 5}, []float64{1.0, 2.0}), aug.WithRandRotate(0, 360), aug.WithColorJitter(0.3, 0.3, 0.3, 0.3))
// t, err := aug.Compose(aug.WithRandomCrop([]int64{320, 320}, []int64{10, 10}, true, "constant"))
// t, err := aug.Compose(aug.WithCenterCrop([]int64{320, 320}))
// t, err := aug.Compose(aug.WithRandomCutout(aug.WithCutoutValue([]int64{124, 96, 255}), aug.WithCutoutScale([]float64{0.01, 0.1}), aug.WithCutoutRatio([]float64{0.5, 0.5})))
// t, err := aug.Compose(aug.WithRandomPerspective(aug.WithPerspectiveScale(0.6), aug.WithPerspectivePvalue(0.8)))
// t, err := aug.Compose(aug.WithRandomAffine(aug.WithAffineDegree([]int64{0, 15}), aug.WithAffineShear([]float64{0, 15})))
// t, err := aug.Compose(aug.WithRandomGrayscale(0.5))
// t, err := aug.Compose(aug.WithRandomSolarize(aug.WithSolarizeThreshold(125), aug.WithSolarizePvalue(0.5)))
// t, err := aug.Compose(aug.WithRandomInvert(0.5))
// t, err := aug.Compose(aug.WithRandomPosterize(aug.WithPosterizeBits(2), aug.WithPosterizePvalue(1.0)))
// t, err := aug.Compose(aug.WithRandomAutocontrast())
// t, err := aug.Compose(aug.WithRandomAdjustSharpness(aug.WithSharpnessPvalue(0.3), aug.WithSharpnessFactor(10)))
// t, err := aug.Compose(aug.WithRandomEqualize(1.0))
// t, err := aug.Compose(aug.WithNormalize(aug.WithNormalizeMean([]float64{0.485, 0.456, 0.406}), aug.WithNormalizeStd([]float64{0.229, 0.224, 0.225})))
t, err := aug.Compose(
aug.WithResize(200, 200),
@ -67,4 +118,5 @@ func main() {
fmt.Printf("%03d/%v completed.\n", i, n)
}
}

View File

@ -95,12 +95,19 @@ func (ra *RandomAffine) getParams(imageSize []int64) (float64, []int64, float64,
}
func (ra *RandomAffine) Forward(x *ts.Tensor) *ts.Tensor {
w, h := getImageSize(x)
assertImageTensor(x)
fx := Byte2FloatImage(x)
w, h := getImageSize(fx)
angle, translations, scale, shear := ra.getParams([]int64{w, h})
out := affine(x, angle, translations, scale, shear, ra.interpolationMode, ra.fillValue)
out := affine(fx, angle, translations, scale, shear, ra.interpolationMode, ra.fillValue)
return out
bx := Float2ByteImage(out)
fx.MustDrop()
out.MustDrop()
return bx
}
func newRandomAffine(opts ...affineOption) *RandomAffine {

View File

@ -73,12 +73,20 @@ func newGaussianBlur(ks []int64, sig []float64) *GaussianBlur {
}
func (b *GaussianBlur) Forward(x *ts.Tensor) *ts.Tensor {
assertImageTensor(x)
fx := Byte2FloatImage(x)
sigmaTs := ts.MustEmpty([]int64{1}, gotch.Float, gotch.CPU)
sigmaTs.MustUniform_(b.sigma[0], b.sigma[1])
sigmaVal := sigmaTs.Float64Values()[0]
sigmaTs.MustDrop()
return gaussianBlur(x, b.kernelSize, []float64{sigmaVal, sigmaVal})
out := gaussianBlur(fx, b.kernelSize, []float64{sigmaVal, sigmaVal})
bx := Float2ByteImage(out)
fx.MustDrop()
out.MustDrop()
return bx
}
func WithGaussianBlur(ks []int64, sig []float64) Option {

View File

@ -44,24 +44,33 @@ func (c *ColorJitter) setHue(hue float64) {
// Forward implement ts.Module by randomly picking one of brightness, contrast,
// staturation or hue function to transform input image tensor.
func (c *ColorJitter) Forward(x *ts.Tensor) *ts.Tensor {
fx := Byte2FloatImage(x)
var out *ts.Tensor
rand.Seed(time.Now().UnixNano())
idx := rand.Intn(4)
switch idx {
case 0:
v := randVal(getMinMax(c.brightness))
return adjustBrightness(x, v)
out = adjustBrightness(fx, v)
case 1:
v := randVal(getMinMax(c.contrast))
return adjustContrast(x, v)
out = adjustContrast(fx, v)
case 2:
v := randVal(getMinMax(c.saturation))
return adjustSaturation(x, v)
out = adjustSaturation(fx, v)
case 3:
v := randVal(0, c.hue)
return adjustHue(x, v)
out = adjustHue(fx, v)
default:
panic("Shouldn't reach here.")
}
bx := Float2ByteImage(out)
fx.MustDrop()
out.MustDrop()
return bx
}
func WithColorJitter(brightness, contrast, sat, hue float64) Option {

View File

@ -23,16 +23,22 @@ func newRandomAutocontrast(pOpt ...float64) *RandomAutocontrast {
}
func (rac *RandomAutocontrast) Forward(x *ts.Tensor) *ts.Tensor {
fx := Byte2FloatImage(x)
r := randPvalue()
var out *ts.Tensor
switch {
case r < rac.pvalue:
out = autocontrast(x)
out = autocontrast(fx)
default:
out = x.MustShallowClone()
out = fx.MustShallowClone()
}
return out
bx := Float2ByteImage(out)
fx.MustDrop()
out.MustDrop()
return bx
}
func WithRandomAutocontrast(p ...float64) Option {

View File

@ -50,14 +50,16 @@ func (c *RandomCrop) params(x *ts.Tensor) (int64, int64, int64, int64) {
}
func (c *RandomCrop) Forward(x *ts.Tensor) *ts.Tensor {
fx := Byte2FloatImage(x)
var img *ts.Tensor
if c.padding != nil {
img = pad(x, c.padding, c.paddingMode)
img = pad(fx, c.padding, c.paddingMode)
} else {
img = x.MustShallowClone()
img = fx.MustShallowClone()
}
w, h := getImageSize(x)
w, h := getImageSize(fx)
var (
paddedW *ts.Tensor
@ -86,7 +88,11 @@ func (c *RandomCrop) Forward(x *ts.Tensor) *ts.Tensor {
i, j, h, w := c.params(x)
out := crop(paddedWH, i, j, h, w)
paddedWH.MustDrop()
return out
bx := Float2ByteImage(out)
fx.MustDrop()
out.MustDrop()
return bx
}
func WithRandomCrop(size []int64, padding []int64, paddingIfNeeded bool, paddingMode string) Option {

View File

@ -146,22 +146,27 @@ func (rc *RandomCutout) cutoutParams(x *ts.Tensor) (int64, int64, int64, int64,
}
func (rc *RandomCutout) Forward(img *ts.Tensor) *ts.Tensor {
fx := Byte2FloatImage(img)
randTs := ts.MustRandn([]int64{1}, gotch.Float, gotch.CPU)
randVal := randTs.Float64Values()[0]
randTs.MustDrop()
var out *ts.Tensor
switch randVal < rc.pvalue {
case true:
x, y, h, w, v := rc.cutoutParams(img)
out := cutout(img, x, y, h, w, rc.rgbVal)
x, y, h, w, v := rc.cutoutParams(fx)
out = cutout(fx, x, y, h, w, rc.rgbVal)
v.MustDrop()
return out
case false:
out := img.MustShallowClone()
return out
out = fx.MustShallowClone()
}
panic("Shouldn't reach here")
bx := Float2ByteImage(out)
fx.MustDrop()
out.MustDrop()
return bx
}
func WithRandomCutout(opts ...cutoutOption) Option {

View File

@ -24,9 +24,9 @@ func newRandomEqualize(pOpt ...float64) *RandomEqualize {
return &RandomEqualize{p}
}
// NOTE. input image MUST be uint8 dtype otherwise panic!
func (re *RandomEqualize) Forward(x *ts.Tensor) *ts.Tensor {
r := randPvalue()
var out *ts.Tensor
switch {
case r < re.pvalue:

View File

@ -22,16 +22,24 @@ func newRandomHorizontalFlip(pvalue float64) *RandomHorizontalFlip {
}
func (hf *RandomHorizontalFlip) Forward(x *ts.Tensor) *ts.Tensor {
fx := Byte2FloatImage(x)
randTs := ts.MustRandn([]int64{1}, gotch.Float, gotch.CPU)
randVal := randTs.Float64Values()[0]
randTs.MustDrop()
var out *ts.Tensor
switch {
case randVal < hf.pvalue:
return hflip(x)
out = hflip(fx)
default:
out := x.MustShallowClone()
return out
out = fx.MustShallowClone()
}
bx := Float2ByteImage(out)
fx.MustDrop()
out.MustDrop()
return bx
}
func WithRandomHFlip(pvalue float64) Option {
@ -58,16 +66,25 @@ func newRandomVerticalFlip(pvalue float64) *RandomVerticalFlip {
}
func (vf *RandomVerticalFlip) Forward(x *ts.Tensor) *ts.Tensor {
fx := Byte2FloatImage(x)
randTs := ts.MustRandn([]int64{1}, gotch.Float, gotch.CPU)
randVal := randTs.Float64Values()[0]
randTs.MustDrop()
var out *ts.Tensor
switch {
case randVal < vf.pvalue:
return vflip(x)
out = vflip(fx)
default:
out := x.MustShallowClone()
return out
out = fx.MustShallowClone()
}
bx := Float2ByteImage(out)
fx.MustDrop()
out.MustDrop()
return bx
}
func WithRandomVFlip(pvalue float64) Option {

View File

@ -99,7 +99,8 @@ func castSqueezeOut(x *ts.Tensor, needCast, needSqueeze bool, outDType gotch.DTy
}
func gaussianBlur(x *ts.Tensor, ks []int64, sigma []float64) *ts.Tensor {
dtype := gotch.Float
// dtype := gotch.Float
dtype := x.DType()
if x.DType() == gotch.Float || x.DType() == gotch.Double {
dtype = x.DType()
}
@ -1200,8 +1201,9 @@ func autocontrast(img *ts.Tensor) *ts.Tensor {
log.Fatalf("Input image tensor should have at least 3 dimensions. Got %v\n", len(dim))
}
var bound int64 = 255
dtype := gotch.Float
// NOTE. image tensor expected to be float dtype [0,1]
var bound float64 = 1.0
dtype := img.DType()
// minimum = img.amin(dim=(-2, -1), keepdim=True).to(dtype)
minTs := img.MustAmin([]int64{-2, -1}, true, false).MustTotype(dtype, true)
@ -1221,16 +1223,16 @@ func autocontrast(img *ts.Tensor) *ts.Tensor {
// maximum[eq_idxs] = bound
maxTsView := maxTs.MustIndexSelect(0, eqIdx, false)
boundTs := maxTsView.MustOnesLike(false).MustMul1(ts.IntScalar(bound), true)
boundTs := maxTsView.MustOnesLike(false).MustMul1(ts.FloatScalar(bound), true)
maxTsView.Copy_(boundTs)
boundTs.MustDrop()
maxTsView.MustDrop()
// scale = bound / (maximum - minimum)
scale := maxTs.MustSub(minTs, false).MustPow(ts.IntScalar(-1), true).MustMul1(ts.IntScalar(bound), true)
scale := maxTs.MustSub(minTs, false).MustPow(ts.IntScalar(-1), true).MustMul1(ts.FloatScalar(bound), true)
//
// return ((img - minimum) * scale).clamp(0, bound).to(img.dtype)
out := img.MustSub(minTs, false).MustMul(scale, true).MustClamp(ts.IntScalar(0), ts.IntScalar(bound), true).MustTotype(dtype, true)
out := img.MustSub(minTs, false).MustMul(scale, true).MustClamp(ts.IntScalar(0), ts.FloatScalar(bound), true).MustTotype(dtype, true)
minTs.MustDrop()
maxTs.MustDrop()
@ -1393,7 +1395,7 @@ func scaleChannel(imgChan *ts.Tensor) *ts.Tensor {
// hist = torch.bincount(img_chan.view(-1), minlength=256)
// hist = torch.histc(img_chan.to(torch.float32), bins=256, min=0, max=255)
hist := imgChan.MustHistc(256, false)
hist := imgChan.MustTotype(gotch.Float, false).MustHistc(256, true)
// nonzero_hist = hist[hist != 0]
nonZeroHist := hist.MustNonzero(false) // [n, 1]
@ -1500,13 +1502,34 @@ func normalize(img *ts.Tensor, mean, std []float64) *ts.Tensor {
log.Fatalf("std must be 1 or 3 elements. Got %v\n", len(std))
}
// out := img.MustSub(mTs, false).MustDiv(sTs, true)
x := img.MustDiv1(ts.FloatScalar(255.0), false)
out := x.MustSub(mTs, false).MustDiv(sTs, true).MustMul1(ts.IntScalar(255), true)
x.MustDrop()
out := img.MustSub(mTs, false).MustDiv(sTs, true)
mTs.MustDrop()
sTs.MustDrop()
return out
}
// Byte2FloatImage converts uint8 dtype image tensor to float dtype.
// It's panic if input image is not uint8 dtype.
func Byte2FloatImage(x *ts.Tensor) *ts.Tensor {
dtype := x.DType()
if dtype != gotch.Uint8 {
err := fmt.Errorf("Input tensor is not uint8 dtype (%v)", dtype)
panic(err)
}
return x.MustDiv1(ts.FloatScalar(255.0), false)
}
// Float2ByteImage converts float dtype image to uint8 dtype image.
// It's panic if input is not float dtype tensor.
func Float2ByteImage(x *ts.Tensor) *ts.Tensor {
dtype := x.DType()
if dtype != gotch.Float {
err := fmt.Errorf("Input tensor is not float dtype (%v)", dtype)
panic(err)
}
return x.MustMul1(ts.IntScalar(255), false).MustTotype(gotch.Uint8, true)
}

View File

@ -17,8 +17,15 @@ type Grayscale struct {
}
func (gs *Grayscale) Forward(x *ts.Tensor) *ts.Tensor {
out := rgb2Gray(x, gs.outChan)
return out
fx := Byte2FloatImage(x)
out := rgb2Gray(fx, gs.outChan)
bx := Float2ByteImage(out)
fx.MustDrop()
out.MustDrop()
return bx
}
func newGrayscale(outChanOpt ...int64) *Grayscale {

View File

@ -17,17 +17,22 @@ func newRandomInvert(pOpt ...float64) *RandomInvert {
}
func (ri *RandomInvert) Forward(x *ts.Tensor) *ts.Tensor {
r := randPvalue()
fx := Byte2FloatImage(x)
r := randPvalue()
var out *ts.Tensor
switch {
case r < ri.pvalue:
out = invert(x)
out = invert(fx)
default:
out = x.MustShallowClone()
out = fx.MustShallowClone()
}
return out
bx := Float2ByteImage(out)
fx.MustDrop()
out.MustDrop()
return bx
}
func WithRandomInvert(pvalueOpt ...float64) Option {

View File

@ -79,8 +79,15 @@ func newNormalize(opts ...normalizeOption) *Normalize {
}
func (n *Normalize) Forward(x *ts.Tensor) *ts.Tensor {
out := normalize(x, n.mean, n.std)
return out
fx := Byte2FloatImage(x)
out := normalize(fx, n.mean, n.std)
bx := Float2ByteImage(out)
fx.MustDrop()
out.MustDrop()
return bx
}
func WithNormalize(opts ...normalizeOption) Option {

View File

@ -176,10 +176,17 @@ func (rp *RandomPerspective) getParams(w, h int64) ([][]int64, [][]int64) {
}
func (rp *RandomPerspective) Forward(x *ts.Tensor) *ts.Tensor {
height, width := getImageSize(x)
fx := Byte2FloatImage(x)
height, width := getImageSize(fx)
startPoints, endPoints := rp.getParams(height, width)
out := perspective(x, startPoints, endPoints, rp.interpolationMode, rp.fillValue)
return out
out := perspective(fx, startPoints, endPoints, rp.interpolationMode, rp.fillValue)
bx := Float2ByteImage(out)
fx.MustDrop()
out.MustDrop()
return bx
}
func WithRandomPerspective(opts ...perspectiveOption) Option {

View File

@ -54,8 +54,8 @@ func newRandomPosterize(opts ...posterizeOption) *RandomPosterize {
}
}
// NOTE. Input image must be uint8 dtype otherwise panic!
func (rp *RandomPosterize) Forward(x *ts.Tensor) *ts.Tensor {
r := randPvalue()
var out *ts.Tensor
switch {

View File

@ -1,6 +1,7 @@
package aug
import (
"fmt"
"log"
"github.com/sugarme/gotch"
@ -18,13 +19,17 @@ func newResizeModule(h, w int64) *ResizeModule {
}
// Forward implements ts.Module for RandRotateModule
// NOTE. input tensor must be uint8 (Byte) dtype otherwise panic!
func (rs *ResizeModule) Forward(x *ts.Tensor) *ts.Tensor {
imgTs := x.MustTotype(gotch.Uint8, false)
out, err := vision.Resize(imgTs, rs.width, rs.height)
dtype := x.DType()
if dtype != gotch.Uint8 {
err := fmt.Errorf("Invalid dtype. Expect uint8 (Byte) dtype. Got %v\n", dtype)
panic(err)
}
out, err := vision.Resize(x, rs.width, rs.height)
if err != nil {
log.Fatal(err)
}
imgTs.MustDrop()
return out
}

View File

@ -66,12 +66,18 @@ func newRotate(angle float64) *RotateModule {
// Forward implements ts.Module for RotateModule
func (r *RotateModule) Forward(x *ts.Tensor) *ts.Tensor {
out, err := Rotate(x, r.angle)
fx := Byte2FloatImage(x)
out, err := Rotate(fx, r.angle)
if err != nil {
log.Fatal(err)
}
return out
bx := Float2ByteImage(out)
fx.MustDrop()
out.MustDrop()
return bx
}
func WithRotate(angle float64) Option {

View File

@ -54,16 +54,22 @@ func newRandomAdjustSharpness(opts ...sharpnessOption) *RandomAdjustSharpness {
}
func (ras *RandomAdjustSharpness) Forward(x *ts.Tensor) *ts.Tensor {
fx := Byte2FloatImage(x)
r := randPvalue()
var out *ts.Tensor
switch {
case r < ras.pvalue:
out = adjustSharpness(x, ras.sharpnessFactor)
out = adjustSharpness(fx, ras.sharpnessFactor)
default:
out = x.MustShallowClone()
out = fx.MustShallowClone()
}
return out
bx := Float2ByteImage(out)
fx.MustDrop()
out.MustDrop()
return bx
}
func WithRandomAdjustSharpness(opts ...sharpnessOption) Option {

View File

@ -57,17 +57,22 @@ func newRandomSolarize(opts ...solarizeOption) *RandomSolarize {
}
func (rs *RandomSolarize) Forward(x *ts.Tensor) *ts.Tensor {
r := randPvalue()
fx := Byte2FloatImage(x)
r := randPvalue()
var out *ts.Tensor
switch {
case r < rs.pvalue:
out = solarize(x, rs.threshold)
out = solarize(fx, rs.threshold)
default:
out = x.MustShallowClone()
out = fx.MustShallowClone()
}
return out
bx := Float2ByteImage(out)
fx.MustDrop()
out.MustDrop()
return bx
}
func WithRandomSolarize(opts ...solarizeOption) Option {