From 830f9ad9df6dce9c879914a3a0f0786a15b81162 Mon Sep 17 00:00:00 2001 From: sugarme Date: Tue, 16 Jun 2020 13:39:02 +1000 Subject: [PATCH] feat(nn/data): data interator Iter2 --- example/mnist/linear.go | 60 +++++++++---- libtch/c-generated-sample.go | 9 ++ nn/data.go | 138 ++++++++++++++++++++++++++++++ nn/init.go | 25 +++--- nn/varstore.go | 37 +++++--- tensor/index.go | 4 +- tensor/tensor-generated-sample.go | 43 ++++++++-- vision/dataset.go | 15 +++- 8 files changed, 278 insertions(+), 53 deletions(-) create mode 100644 nn/data.go diff --git a/example/mnist/linear.go b/example/mnist/linear.go index dd1d25d..5bcd0f9 100644 --- a/example/mnist/linear.go +++ b/example/mnist/linear.go @@ -13,17 +13,18 @@ const ( Label int64 = 10 MnistDir string = "../../data/mnist" - epochs = 200 + epochs = 100 + batchSize = 256 ) func runLinear() { var ds vision.Dataset ds = vision.LoadMNISTDir(MnistDir) - fmt.Printf("Train image size: %v\n", ds.TrainImages.MustSize()) - fmt.Printf("Train label size: %v\n", ds.TrainLabels.MustSize()) - fmt.Printf("Test image size: %v\n", ds.TestImages.MustSize()) - fmt.Printf("Test label size: %v\n", ds.TestLabels.MustSize()) + // fmt.Printf("Train image size: %v\n", ds.TrainImages.MustSize()) + // fmt.Printf("Train label size: %v\n", ds.TrainLabels.MustSize()) + // fmt.Printf("Test image size: %v\n", ds.TestImages.MustSize()) + // fmt.Printf("Test label size: %v\n", ds.TestLabels.MustSize()) device := (gotch.CPU).CInt() dtype := (gotch.Float).CInt() @@ -32,22 +33,51 @@ func runLinear() { bs := ts.MustZeros([]int64{Label}, dtype, device).MustSetRequiresGrad(true) for epoch := 0; epoch < epochs; epoch++ { - logits := ds.TrainImages.MustMm(ws).MustAdd(bs) - loss := logits.MustLogSoftmax(-1, dtype).MustNllLoss(ds.TrainLabels) - ws.ZeroGrad() - bs.ZeroGrad() - loss.Backward() + var loss ts.Tensor + trainIter := ds.TrainIter(batchSize) + trainIter.Shuffle().ToDevice(gotch.CPU) + // item a pair of images and labels as 2 tensors + for { + batch, ok := trainIter.Next() + if !ok { + break + } - ts.NoGrad(func() { - ws.MustAdd_(ws.MustGrad().MustMul1(ts.FloatScalar(-1.0))) - bs.MustAdd_(bs.MustGrad().MustMul1(ts.FloatScalar(-1.0))) - }) + logits := batch.Images.MustMm(ws).MustAdd(bs) + loss = logits.MustLogSoftmax(-1, dtype).MustNllLoss(batch.Labels) + ws.ZeroGrad() + bs.ZeroGrad() + loss.Backward() + + ts.NoGrad(func() { + ws.MustAdd_(ws.MustGrad().MustMul1(ts.FloatScalar(-1.0))) + bs.MustAdd_(bs.MustGrad().MustMul1(ts.FloatScalar(-1.0))) + }) + } + + /* + * logits := ds.TrainImages.MustMm(ws).MustAdd(bs) + * loss := logits.MustLogSoftmax(-1, dtype).MustNllLoss(ds.TrainLabels) + * + * ws.ZeroGrad() + * bs.ZeroGrad() + * loss.Backward() + * + * ts.NoGrad(func() { + * ws.MustAdd_(ws.MustGrad().MustMul1(ts.FloatScalar(-1.0))) + * bs.MustAdd_(bs.MustGrad().MustMul1(ts.FloatScalar(-1.0))) + * }) + * loss.Print() + * */ + + // bs.MustGrad().Print() testLogits := ds.TestImages.MustMm(ws).MustAdd(bs) testAccuracy := testLogits.MustArgmax(-1, false).MustEq1(ds.TestLabels).MustTotype(gotch.Float).MustMean(gotch.Float.CInt()).MustView([]int64{-1}).MustFloat64Value([]int64{0}) - fmt.Printf("Epoch: %v - Train loss: %v - Test accuracy: %v\n", epoch, loss.MustView([]int64{-1}).MustFloat64Value([]int64{0}), testAccuracy*100) + fmt.Printf("Epoch: %v - Test accuracy: %v\n", epoch, testAccuracy*100) + // fmt.Printf("Epoch: %v - Train loss: %v - Test accuracy: %v\n", epoch, loss.MustView([]int64{-1}).MustFloat64Value([]int64{0}), testAccuracy*100) } } diff --git a/libtch/c-generated-sample.go b/libtch/c-generated-sample.go index 44b35ad..a4ac85e 100644 --- a/libtch/c-generated-sample.go +++ b/libtch/c-generated-sample.go @@ -233,3 +233,12 @@ func AtgView(ptr *Ctensor, self Ctensor, sizeData []int64, sizeLen int) { func AtgDiv1(ptr *Ctensor, self Ctensor, other Cscalar) { C.atg_div1(ptr, self, other) } + +// void atg_randperm(tensor *, int64_t n, int options_kind, int options_device); +func AtgRandperm(ptr *Ctensor, n int64, optionKind int32, optionDevice int32) { + cn := *(*C.int64_t)(unsafe.Pointer(&n)) + coptionKind := *(*C.int)(unsafe.Pointer(&optionKind)) + coptionDevice := *(*C.int)(unsafe.Pointer(&optionDevice)) + + C.atg_randperm(ptr, cn, coptionKind, coptionDevice) +} diff --git a/nn/data.go b/nn/data.go new file mode 100644 index 0000000..49af887 --- /dev/null +++ b/nn/data.go @@ -0,0 +1,138 @@ +package nn + +import ( + "fmt" + "log" + + "github.com/sugarme/gotch" + ts "github.com/sugarme/gotch/tensor" +) + +// Iter2 is an iterator over a pair of tensors which have the same first dimension +// size. +// The typical use case is to iterate over batches. Each batch is a pair +// containing a (potentially random) slice of each of the two input +// tensors. +type Iter2 struct { + xs ts.Tensor + ys ts.Tensor + batchIndex int64 + batchSize int64 + totalSize int64 + device gotch.Device + returnSmallLastBatch bool +} + +// NewIter2 returns a new iterator. +// +// This takes as input two tensors which first dimension must match. The +// returned iterator can be used to range over mini-batches of data of +// specified size. +// An error is returned if `xs` and `ys` have different first dimension +// sizes. +// +// # Arguments +// +// * `xs` - the features to be used by the model. +// * `ys` - the targets that the model attempts to predict. +// * `batch_size` - the size of batches to be returned. +func NewIter2(xs, ys ts.Tensor, batchSize int64) (retVal Iter2, err error) { + + totalSize := xs.MustSize()[0] + if ys.MustSize()[0] != totalSize { + err = fmt.Errorf("Different dimension for the two inputs: %v - %v", xs.MustSize(), ys.MustSize()) + return retVal, err + } + + retVal = Iter2{ + xs: xs.MustShallowClone(), + ys: ys.MustShallowClone(), + batchIndex: 0, + batchSize: batchSize, + totalSize: totalSize, + returnSmallLastBatch: false, + } + + return retVal, nil +} + +// MustNewIter2 returns a new iterator. +// +// This takes as input two tensors which first dimension must match. The +// returned iterator can be used to range over mini-batches of data of +// specified size. +// Panics if `xs` and `ys` have different first dimension sizes. +// +// # Arguments +// +// * `xs` - the features to be used by the model. +// * `ys` - the targets that the model attempts to predict. +// * `batch_size` - the size of batches to be returned. +func MustNewIter2(xs, ys ts.Tensor, batchSize int64) (retVal Iter2) { + retVal, err := NewIter2(xs, ys, batchSize) + + if err != nil { + log.Fatal(err) + } + + return retVal +} + +// Shuffle shuffles the dataset. +// +// The iterator would still run over the whole dataset but the order in +// which elements are grouped in mini-batches is randomized. +func (it Iter2) Shuffle() (retVal Iter2) { + index := ts.MustRandperm(it.totalSize, gotch.Int64, gotch.CPU) + + it.xs = it.xs.MustIndexSelect(0, index) + it.ys = it.ys.MustIndexSelect(0, index) + return it +} + +// ToDevice transfers the mini-batches to a specified device. +func (it Iter2) ToDevice(device gotch.Device) (retVal Iter2) { + it.device = device + return it +} + +// ReturnSmallLastBatch when set, returns the last batch even if smaller than the batch size. +func (it Iter2) ReturnSmallLastBatch() (retVal Iter2) { + it.returnSmallLastBatch = true + return it +} + +type Iter2Item struct { + Images ts.Tensor + Labels ts.Tensor +} + +// Next implements iterator for Iter2 +func (it *Iter2) Next() (item Iter2Item, ok bool) { + start := it.batchIndex * it.batchSize + size := it.batchSize + if it.totalSize-start < it.batchSize { + size = it.totalSize - start + } + + if (size <= 0) || (!it.returnSmallLastBatch && size < it.batchSize) { + // err = fmt.Errorf("Last small batch error") + return item, false + } else { + it.batchIndex += 1 + + // Indexing + narrowIndex := ts.NewNarrow(start, start+size) + + // ts1 := it.xs.Idx(narrowIndex).MustTo(it.device) + // ts2 := it.ys.Idx(narrowIndex).MustTo(it.device) + + ts1 := it.xs.Idx(narrowIndex) + ts2 := it.ys.Idx(narrowIndex) + + return Iter2Item{ + Images: ts1, + Labels: ts2, + }, true + } +} diff --git a/nn/init.go b/nn/init.go index 8d28e74..c52e1be 100644 --- a/nn/init.go +++ b/nn/init.go @@ -30,12 +30,12 @@ func NewConstInit(v float64) constInit { func (c constInit) InitTensor(dims []int64, device gotch.Device) (retVal ts.Tensor) { var err error - kind := gotch.DType2CInt(gotch.Float) + kind := gotch.Float.CInt() switch { case c.value == 0.0: - retVal = ts.Zeros(dims, kind, device.CInt()) + retVal = ts.MustZeros(dims, kind, device.CInt()) case c.value == 1.0: - retVal = ts.Ones(dims, kind, device.CInt()) + retVal = ts.MustOnes(dims, kind, device.CInt()) default: data := make([]float64, ts.FlattenDim(dims)) for i := range data { @@ -57,7 +57,7 @@ func (c constInit) Set(tensor ts.Tensor) { log.Fatalf("constInit - Set method call error: %v\n", err) } - ts.Fill_(scalarVal) + tensor.Fill_(scalarVal) } // randnInit : @@ -125,9 +125,9 @@ func NewUniformInit(lo, up float64) uniformInit { func (u uniformInit) InitTensor(dims []int64, device gotch.Device) (retVal ts.Tensor) { var err error - kind := gotch.DType2CInt(gotch.Float) - tmpTs := ts.Zeros(dims, kind, device.CInt()) - retVal, err = tmpTs.Uniform_(u.lo, u.up) + kind := gotch.Float.CInt() + retVal = ts.MustZeros(dims, kind, device.CInt()) + retVal.Uniform_(u.lo, u.up) if err != nil { log.Fatalf("uniformInit - InitTensor method call error: %v\n", err) } @@ -150,13 +150,10 @@ func NewKaimingUniformInit() kaimingUniformInit { func (k kaimingUniformInit) InitTensor(dims []int64, device gotch.Device) (retVal ts.Tensor) { fanIn := factorial(uint64(len(dims) - 1)) bound := math.Sqrt(1.0 / float64(fanIn)) - var err error - kind := gotch.DType2CInt(gotch.Float) - tmpTs := ts.Zeros(dims, kind, device.CInt()) - retVal, err = tmpTs.Uniform_(-bound, bound) - if err != nil { - log.Fatalf("uniformInit - InitTensor method call error: %v\n", err) - } + kind := gotch.Float.CInt() + retVal = ts.MustZeros(dims, kind, device.CInt()) + retVal.Uniform_(-bound, bound) + return retVal } diff --git a/nn/varstore.go b/nn/varstore.go index 2620dde..4c51ba8 100644 --- a/nn/varstore.go +++ b/nn/varstore.go @@ -160,7 +160,9 @@ func (vs *VarStore) Load(filepath string) (err error) { return err } - retValErr, err := ts.NoGrad(ts.Copy_(currTs, namedTs.Tensor)) + retValErr, err := ts.NoGrad(func() { + ts.Copy_(currTs, namedTs.Tensor) + }) if err != nil { return err } @@ -205,7 +207,9 @@ func (vs *VarStore) LoadPartial(filepath string) (retVal []string, err error) { } // It's matched. Now, copy in-place the loaded tensor value to var-store - retValErr, err := ts.NoGrad(ts.Copy_(currTs, namedTs.Tensor)) + retValErr, err := ts.NoGrad(func() { + ts.Copy_(currTs, namedTs.Tensor) + }) if err != nil { return nil, err } @@ -274,7 +278,9 @@ func (vs *VarStore) Copy(src VarStore) (err error) { if err != nil { return err } - retValErr, err := ts.NoGrad(ts.Copy_(v, srcDevTs)) + retValErr, err := ts.NoGrad(func() { + ts.Copy_(v, srcDevTs) + }) if err != nil { return err } @@ -520,7 +526,7 @@ func (p *Path) Uniform(name string, dims []int64, lo, up float64) (retVal ts.Ten // will be tracked. // The variable uses a float tensor initialized randomly using a // uniform distribution which bounds follow Kaiming initialization. -func (p *Path) Uniform(name string, dims []int64) (retVal ts.Tensor) { +func (p *Path) KaimingUniform(name string, dims []int64) (retVal ts.Tensor) { // TODO: implement it // self.var(name, dims, Init::KaimingUniform) @@ -542,12 +548,14 @@ func (p *Path) VarCopy(name string, t ts.Tensor) (retVal ts.Tensor) { } v := p.Zeros(name, size) - retValErr, err := ts.NoGrad(ts.Copy_(v, t)) + retValErr, err := ts.NoGrad(func() { + ts.Copy_(v, t) + }) if err != nil { - return err + log.Fatal(err) } if retValErr != nil { - return retValErr.(error) + log.Fatal(retValErr) } return v @@ -555,14 +563,13 @@ func (p *Path) VarCopy(name string, t ts.Tensor) (retVal ts.Tensor) { // Get gets the tensor corresponding to a given name if present. func (p *Path) Get(name string) (retVal ts.Tensor, err error) { - path := p.path(name) p.varstore.variables.mutex.Lock() defer p.varstore.variables.mutex.Unlock() - v, ok := p.varstore.variables.NamedVariables[path] + v, ok := p.varstore.variables.NamedVariables[name] if !ok { - err = fmt.Errorf("Path - Get method call error: Cannot find variable for name: %v\n", path) + err = fmt.Errorf("Path - Get method call error: Cannot find variable for name: %v\n", name) return retVal, err } @@ -577,7 +584,7 @@ func (p *Path) Entry(name string) (retVal Entry) { return Entry{ name: name, variables: p.varstore.variables, - path: &p, + path: *p, } } @@ -608,12 +615,14 @@ func (e *Entry) OrVarCopy(tensor ts.Tensor) (retVal ts.Tensor) { } v := e.OrZeros(size) - retValErr, err := ts.NoGrad(ts.Copy_(v, tensor)) + retValErr, err := ts.NoGrad(func() { + ts.Copy_(v, tensor) + }) if err != nil { - return err + log.Fatal(err) } if retValErr != nil { - return retValErr.(error) + log.Fatal(retValErr) } return v diff --git a/tensor/index.go b/tensor/index.go index dba2b8c..5007623 100644 --- a/tensor/index.go +++ b/tensor/index.go @@ -248,7 +248,7 @@ func (ts Tensor) indexer(indexSpec []TensorIndexer) (retVal Tensor, err error) { // `spec` is a function type implements `TensorIndexer` for _, spec := range indexSpec { - fmt.Printf("spec type: %v\n", reflect.TypeOf(spec).Name()) + // fmt.Printf("spec type: %v\n", reflect.TypeOf(spec).Name()) switch reflect.TypeOf(spec).Name() { case "InsertNewAxis": @@ -291,8 +291,6 @@ func (ts Tensor) indexer(indexSpec []TensorIndexer) (retVal Tensor, err error) { nextIdx = currIdx + 1 } // end of switch - currTensor.Print() - currTensor = nextTensor currIdx = nextIdx } diff --git a/tensor/tensor-generated-sample.go b/tensor/tensor-generated-sample.go index 429db20..1ae247e 100644 --- a/tensor/tensor-generated-sample.go +++ b/tensor/tensor-generated-sample.go @@ -8,11 +8,11 @@ import ( "log" "unsafe" - gt "github.com/sugarme/gotch" + "github.com/sugarme/gotch" lib "github.com/sugarme/gotch/libtch" ) -func (ts Tensor) To(device gt.Device) (retVal Tensor, err error) { +func (ts Tensor) To(device gotch.Device) (retVal Tensor, err error) { // TODO: how to get pointer to CUDA memory??? // C.cuMemAlloc((*C.ulonglong)(cudaPtr), 1) // 0 byte is invalid @@ -28,7 +28,7 @@ func (ts Tensor) To(device gt.Device) (retVal Tensor, err error) { return Tensor{ctensor: *ptr}, nil } -func (ts Tensor) MustTo(device gt.Device) (retVal Tensor) { +func (ts Tensor) MustTo(device gotch.Device) (retVal Tensor) { var err error retVal, err = ts.To(device) if err != nil { @@ -271,10 +271,10 @@ func (ts Tensor) MustAddG(other Tensor) { } // Totype casts type of tensor to a new tensor with specified DType -func (ts Tensor) Totype(dtype gt.DType) (retVal Tensor, err error) { +func (ts Tensor) Totype(dtype gotch.DType) (retVal Tensor, err error) { ptr := (*lib.Ctensor)(unsafe.Pointer(C.malloc(0))) defer C.free(unsafe.Pointer(ptr)) - cint, err := gt.DType2CInt(dtype) + cint, err := gotch.DType2CInt(dtype) if err != nil { return retVal, err } @@ -291,7 +291,7 @@ func (ts Tensor) Totype(dtype gt.DType) (retVal Tensor, err error) { // Totype casts type of tensor to a new tensor with specified DType. It will // panic if error -func (ts Tensor) MustTotype(dtype gt.DType) (retVal Tensor) { +func (ts Tensor) MustTotype(dtype gotch.DType) (retVal Tensor) { retVal, err := ts.Totype(dtype) if err != nil { log.Fatal(err) @@ -361,6 +361,14 @@ func (ts Tensor) IndexSelect(dim int64, index Tensor) (retVal Tensor, err error) return retVal, nil } +func (ts Tensor) MustIndexSelect(dim int64, index Tensor) (retVal Tensor) { + retVal, err := ts.IndexSelect(dim, index) + if err != nil { + log.Fatal(err) + } + + return retVal +} func Zeros(size []int64, optionsKind, optionsDevice int32) (retVal Tensor, err error) { ptr := (*lib.Ctensor)(unsafe.Pointer(C.malloc(0))) @@ -697,3 +705,26 @@ func (ts Tensor) MustDiv1(other Scalar) (retVal Tensor) { return retVal } + +func Randperm(n int64, optionKind gotch.DType, optionDevice gotch.Device) (retVal Tensor, err error) { + ptr := (*lib.Ctensor)(unsafe.Pointer(C.malloc(0))) + defer C.free(unsafe.Pointer(ptr)) + + lib.AtgRandperm(ptr, n, optionKind.CInt(), optionDevice.CInt()) + if err = TorchErr(); err != nil { + return retVal, err + } + + retVal = Tensor{ctensor: *ptr} + + return retVal, nil +} + +func MustRandperm(n int64, optionKind gotch.DType, optionDevice gotch.Device) (retVal Tensor) { + retVal, err := Randperm(n, optionKind, optionDevice) + if err != nil { + log.Fatal(err) + } + + return retVal +} diff --git a/vision/dataset.go b/vision/dataset.go index f483870..7ccd8b6 100644 --- a/vision/dataset.go +++ b/vision/dataset.go @@ -3,6 +3,7 @@ package vision // A simple dataset structure shared by various computer vision datasets. import ( + "github.com/sugarme/gotch/nn" ts "github.com/sugarme/gotch/tensor" ) @@ -14,4 +15,16 @@ type Dataset struct { Labels int64 } -// TODO: implement methods +// Dataset Methods: +//================= + +// TrainIter creates an iterator of Iter type for train images and labels +func (ds Dataset) TrainIter(batchSize int64) (retVal nn.Iter2) { + return nn.MustNewIter2(ds.TrainImages, ds.TrainLabels, batchSize) + +} + +// TestIter creates an iterator of Iter type for test images and labels +func (ds Dataset) TestIter(batchSize int64) (retVal nn.Iter2) { + return nn.MustNewIter2(ds.TestImages, ds.TestLabels, batchSize) +}