gotch/nn/varstore.go

693 lines
19 KiB
Go

package nn
import (
"fmt"
"log"
"reflect"
"strings"
"sync"
"github.com/sugarme/gotch"
ts "github.com/sugarme/gotch/tensor"
)
// SEP is a separator to separate path elements in the tensor names.
const SEP = "."
// Variables represents a collection of tensors.
//
// NOTE: When the variable store is frozen, trainable still is set to tree,
// however the tensor is not set to require gradients.
type Variables struct {
mutex *sync.Mutex
NamedVariables map[string]*ts.Tensor
TrainableVariables []ts.Tensor
}
// VarStore is used to store variables used by one or multiple layers.
// It specifies a SINGLE device where all variables are stored.
type VarStore struct {
device gotch.Device
Vars Variables
}
// Path is variable store with an associated path for variables naming.
type Path struct {
path []string
varstore *VarStore
}
// Entry holds an entry corresponding to a given name in Path.
type Entry struct {
name string
variables *Variables // MutexGuard
path *Path
}
// NewVarStore creates a new variable store located on the specified device
func NewVarStore(device gotch.Device) *VarStore {
variables := Variables{
mutex: &sync.Mutex{},
NamedVariables: make(map[string]*ts.Tensor, 0),
TrainableVariables: make([]ts.Tensor, 0),
}
return &VarStore{
device: device,
Vars: variables,
}
}
// NOTE:
// To get (initiate) a path, call vs.Root()
// VarStore methods:
// =================
// Device returns device for this var-store
func (vs *VarStore) Device() gotch.Device {
return vs.device
}
// Len returns the number of tensors currently stored on this var-store
func (vs *VarStore) Len() (retVal int) {
vs.Vars.mutex.Lock()
defer vs.Vars.mutex.Unlock()
retVal = len(vs.Vars.NamedVariables)
return retVal
}
// IsEmpty returns true if no tensors are currently stored on this var-store
func (vs *VarStore) IsEmpty() (retVal bool) {
vs.Vars.mutex.Lock()
defer vs.Vars.mutex.Unlock()
retVal = (len(vs.Vars.NamedVariables) == 0)
return retVal
}
// TrainableVariabless returns all trainable variables for this var-store
func (vs *VarStore) TrainableVariables() (retVal []ts.Tensor) {
vs.Vars.mutex.Lock()
defer vs.Vars.mutex.Unlock()
retVal = vs.Vars.TrainableVariables
for _, t := range vs.Vars.TrainableVariables {
retVal = append(retVal, *t.MustShallowClone())
}
return retVal
}
// Variables returns all variables and their names in a map[variable_name]Tensor
func (vs *VarStore) Variables() (retVal map[string]ts.Tensor) {
vs.Vars.mutex.Lock()
defer vs.Vars.mutex.Unlock()
retVal = make(map[string]ts.Tensor, 0)
for k, v := range vs.Vars.NamedVariables {
retVal[k] = *v.MustShallowClone()
}
return retVal
}
// Root gets the root path for this var-store
//
// NOTE: Variables are named and organized using paths. This function returns
// the top level path for the var store and can be combined with '/'
// to create sub-paths.
func (vs *VarStore) Root() *Path {
return &Path{
path: []string{},
varstore: vs,
}
}
// Save saves the var-store variable values to a file
//
// NOTE: Weight values for all the tensors currently stored in the
// var-store gets saved in the given file.
func (vs *VarStore) Save(filepath string) error {
vs.Vars.mutex.Lock()
defer vs.Vars.mutex.Unlock()
// Convert map to []NamedTensor
var namedTensors []ts.NamedTensor
for k, v := range vs.Vars.NamedVariables {
namedTensors = append(namedTensors, ts.NamedTensor{
Name: k,
Tensor: v,
})
}
// return ts.SaveMulti(namedTensors, filepath)
return ts.SaveMultiNew(namedTensors, filepath)
}
// Load loads the var-store variable values from a file.
//
// NOTE: Weight values for all the tensors currently stored in the
// var-store gets loaded from the given file. Note that the set of
// variables stored in the var-store is not changed, only the values
// for these tensors are modified.
// It will throw error if name of the loaded tensors can not find
// in the current var-store named tensors set.
func (vs *VarStore) Load(filepath string) error {
namedTensors, err := ts.LoadMultiWithDevice(filepath, vs.device)
if err != nil {
return err
}
var namedTensorsMap map[string]ts.Tensor = make(map[string]ts.Tensor, 0)
for _, namedTensor := range namedTensors {
namedTensorsMap[namedTensor.Name] = *namedTensor.Tensor
}
// Match and in-place copy value (update) from newly loaded tensors
// to existing named tensors if name is matched. Throw error otherwise.
vs.Vars.mutex.Lock()
defer vs.Vars.mutex.Unlock()
// for tsName, _ := range vs.Vars.NamedVariables {
for tsName := range vs.Vars.NamedVariables {
// missing variable
currTs, ok := namedTensorsMap[tsName]
if !ok {
err = fmt.Errorf("Cannot find tensor with name: %v in variable store. \n", tsName)
return err
}
// mismatched shape
destShape := currTs.MustSize()
sourceShape := vs.Vars.NamedVariables[tsName].MustSize()
if !reflect.DeepEqual(destShape, sourceShape) {
err = fmt.Errorf("Mismatched shape error for variable name: %v - At store: %v - At source %v\n", tsName, destShape, sourceShape)
return err
}
ts.NoGrad(func() {
vs.Vars.NamedVariables[tsName].Copy_(&currTs)
})
}
return nil
}
// LoadPartial loads the var-store variable values from a file if it exists.
//
// Weight values for the tensors currently stored in the var-store and the given file get
// loaded from the given file. If a variable in the var store is not present in the given file,
// it is skipped and its values are not updated. This method should be used if pre-trained
// weight for only parts of the model are available.
// Note that the set of variables stored in the var-store is not changed, only the values
// for these tensors are modified.
//
// Returns a String Vector containing the names of missing variables.
func (vs *VarStore) LoadPartial(filepath string) (retVal []string, err error) {
namedTensors, err := ts.LoadMultiWithDevice(filepath, vs.device)
if err != nil {
return nil, err
}
var namedTensorsMap map[string]*ts.Tensor = make(map[string]*ts.Tensor, 0)
for _, namedTensor := range namedTensors {
namedTensorsMap[namedTensor.Name] = namedTensor.Tensor
}
var missingVariables []string
// Match and in-place copy value (update) from newly loaded tensors
// to existing named tensors if name is matched. Throw error otherwise.
vs.Vars.mutex.Lock()
defer vs.Vars.mutex.Unlock()
for tsName := range vs.Vars.NamedVariables {
var currTs *ts.Tensor
var ok bool
// missing variable
if currTs, ok = namedTensorsMap[tsName]; !ok {
missingVariables = append(missingVariables, tsName)
}
// mismatched shape
destShape := currTs.MustSize()
sourceShape := vs.Vars.NamedVariables[tsName].MustSize()
if !reflect.DeepEqual(destShape, sourceShape) {
fmt.Printf("Mismatched shape error for variable name: %v - At store: %v - At source %v\n", tsName, destShape, sourceShape)
missingVariables = append(missingVariables, tsName)
continue
}
ts.NoGrad(func() {
vs.Vars.NamedVariables[tsName].Copy_(currTs)
})
}
return missingVariables, nil
}
// Freeze freezes a var store.
//
// Gradients for the variables in this store are not tracked
// anymore.
func (vs *VarStore) Freeze() {
vs.Vars.mutex.Lock()
defer vs.Vars.mutex.Unlock()
for _, v := range vs.Vars.TrainableVariables {
_, err := v.SetRequiresGrad(false, false)
if err != nil {
log.Fatalf("Freeze() Error: %v\n", err)
}
}
}
// Unfreeze unfreezes a var store.
//
// Gradients for the variables in this store are tracked again.
func (vs *VarStore) Unfreeze() {
vs.Vars.mutex.Lock()
defer vs.Vars.mutex.Unlock()
for _, v := range vs.Vars.TrainableVariables {
_, err := v.SetRequiresGrad(true, false)
if err != nil {
log.Fatalf("Unfreeze() Error: %v\n", err)
}
}
}
// Copy copies variable values from a source var store to this var store.
//
// All the variables in this var store have to exist with the same
// name in the source var store, otherwise an error is returned.
func (vs *VarStore) Copy(src VarStore) (err error) {
vs.Vars.mutex.Lock()
defer vs.Vars.mutex.Unlock()
src.Vars.mutex.Lock()
defer src.Vars.mutex.Unlock()
srcNamedVariables := src.Vars.NamedVariables
device := vs.device
for k := range vs.Vars.NamedVariables {
if _, ok := srcNamedVariables[k]; !ok {
err = fmt.Errorf("VarStore copy error: cannot find %v in the source var store.\n", k)
return err
}
}
for k, v := range vs.Vars.NamedVariables {
srcTs, _ := srcNamedVariables[k]
srcDevTs, err := srcTs.To(device, false)
if err != nil {
return err
}
ts.NoGrad(func() {
ts.Copy_(v, srcDevTs)
})
}
return nil
}
// Path methods:
// =============
// Sub gets a sub-path of the given path.
func (p *Path) Sub(str string) *Path {
if strings.Contains(str, SEP) {
log.Fatalf("Sub name cannot contain %v (%v)\n", SEP, str)
}
path := p.path
path = append(path, str)
return &Path{
path: path,
varstore: p.varstore,
}
}
// Device gets the device where the var-store variables are stored.
func (p *Path) Device() gotch.Device {
return p.varstore.device
}
// NOTE: Cannot name as `path` as having a field name `path`
func (p *Path) getpath(name string) (retVal string) {
if strings.Contains(name, SEP) {
log.Fatalf("Sub name cannot contain %v (%v)\n", SEP, name)
}
if len(p.path) == 0 {
return name
} else {
retVal = fmt.Sprintf("%v%v%v", strings.Join(p.path, SEP), SEP, name)
return retVal
}
}
func (p *Path) add(name string, newTs *ts.Tensor, trainable bool) (retVal *ts.Tensor) {
path := p.getpath(name)
p.varstore.Vars.mutex.Lock()
defer p.varstore.Vars.mutex.Unlock()
if _, ok := p.varstore.Vars.NamedVariables[path]; ok {
path = fmt.Sprintf("%v__%v", path, len(p.varstore.Vars.NamedVariables))
}
var (
tensor *ts.Tensor
err error
)
if trainable {
tensor, err = newTs.MustShallowClone().SetRequiresGrad(true, false)
if err != nil {
log.Fatalf("Path 'add' method error: %v\n", err)
}
} else {
tensor = newTs.MustShallowClone()
}
if trainable {
p.varstore.Vars.TrainableVariables = append(p.varstore.Vars.TrainableVariables, *tensor)
}
p.varstore.Vars.NamedVariables[path] = tensor
return tensor
}
func (p *Path) getOrAddWithLock(name string, tensor *ts.Tensor, trainable bool, variables Variables) (retVal *ts.Tensor) {
path := p.getpath(name)
// if found, return it
if v, ok := variables.NamedVariables[path]; ok {
return v
}
// not found, add it
var err error
var ttensor *ts.Tensor
if trainable {
ttensor, err = tensor.SetRequiresGrad(true, false)
if err != nil {
log.Fatalf("Path - call method 'getOrAddWithLock' error: %v\n", err)
}
} else {
ttensor = tensor
}
if trainable {
variables.TrainableVariables = append(variables.TrainableVariables, *ttensor)
}
variables.NamedVariables[path] = ttensor
return ttensor
}
// ZerosNoTrain creates a new variable initialized with zeros.
//
// The new variable is named according to the name parameter and
// has the specified shape. The variable will not be trainable so
// gradients will not be tracked.
// The variable uses a float tensor initialized with zeros.
func (p *Path) ZerosNoTrain(name string, dims []int64) (retVal *ts.Tensor) {
device := p.Device()
z, err := ts.Zeros(dims, gotch.Float, device)
if err != nil {
log.Fatalf("Path - 'ZerosNoTrain' method call error: %v\n", err)
}
return p.add(name, z, false)
}
// OnesNoTrain creates a new variable initialized with ones.
//
// The new variable is named according to the name parameter and
// has the specified shape. The variable will not be trainable so
// gradients will not be tracked.
// The variable uses a float tensor initialized with ones.
func (p *Path) OnesNoTrain(name string, dims []int64) (retVal *ts.Tensor) {
device := p.Device()
z, err := ts.Ones(dims, gotch.Float, device)
if err != nil {
log.Fatalf("Path - 'OnesNoTrain' method call error: %v\n", err)
}
return p.add(name, z, false)
}
// NewVar creates a new variable.
//
// The new variable is named according to the name parameter and
// has the specified shape. The variable is trainable, its gradient
// will be tracked.
// The variable uses a float tensor initialized as per the
// related argument.
func (p *Path) NewVar(name string, dims []int64, ini Init) (retVal *ts.Tensor) {
v := ini.InitTensor(dims, p.varstore.device)
return p.add(name, v, true)
}
// Zeros creates a new variable initialized with zeros.
//
// The new variable is named according to the name parameter and
// has the specified shape. The variable is trainable, its gradient
// will be tracked.
// The variable uses a float tensor initialized with zeros.
func (p *Path) Zeros(name string, dims []int64) (retVal *ts.Tensor) {
return p.NewVar(name, dims, NewConstInit(0.0))
}
// Ones creates a new variable initialized with ones.
//
// The new variable is named according to the name parameter and
// has the specified shape. The variable is trainable, its gradient
// will be tracked.
// The variable uses a float tensor initialized with ones.
func (p *Path) Ones(name string, dims []int64) (retVal *ts.Tensor) {
return p.NewVar(name, dims, NewConstInit(1.0))
}
// RandnStandard creates a new variable initialized randomly with normal distribution.
//
// The new variable is named according to the name parameter and
// has the specified shape. The variable is trainable, its gradient
// will be tracked.
// The variable uses a float tensor initialized randomly using a
// standard normal distribution.
func (p *Path) RandnStandard(name string, dims []int64) (retVal *ts.Tensor) {
return p.NewVar(name, dims, NewRandnInit(0.0, 1.0))
}
// Randn creates a new variable initialized randomly with normal distribution.
//
// The new variable is named according to the name parameter and
// has the specified shape. The variable is trainable, its gradient
// will be tracked.
// The variable uses a float tensor initialized randomly using a
// normal distribution with the specified mean and standard deviation.
func (p *Path) Randn(name string, dims []int64, mean float64, stdev float64) (retVal *ts.Tensor) {
return p.NewVar(name, dims, NewRandnInit(mean, stdev))
}
// Uniform creates a new variable initialized randomly with uniform distribution.
//
// The new variable is named according to the name parameter and
// has the specified shape. The variable is trainable, its gradient
// will be tracked.
// The variable uses a float tensor initialized randomly using a
// uniform distribution between the specified bounds.
func (p *Path) Uniform(name string, dims []int64, lo, up float64) (retVal *ts.Tensor) {
return p.NewVar(name, dims, NewUniformInit(lo, up))
}
// KaimingUniform creates a new variable initialized randomly with kaiming uniform.
//
// The new variable is named according to the name parameter and
// has the specified shape. The variable is trainable, its gradient
// will be tracked.
// The variable uses a float tensor initialized randomly using a
// uniform distribution which bounds follow Kaiming initialization.
func (p *Path) KaimingUniform(name string, dims []int64) (retVal *ts.Tensor) {
return p.NewVar(name, dims, NewKaimingUniformInit())
}
// VarCopy creates a new variable initialized by copying an existing tensor.
//
// The new variable is named according to the name parameter and
// has the specified shape. The variable is trainable, its gradient
// will be tracked.
// The variable uses a float tensor initialized by copying some
// given tensor.
func (p *Path) VarCopy(name string, t *ts.Tensor) (retVal *ts.Tensor) {
size, err := t.Size()
if err != nil {
log.Fatalf("Path - VarCopy method call error: %v\n", err)
}
v := p.Zeros(name, size)
ts.NoGrad(func() {
ts.Copy_(v, t)
})
return v
}
// Get gets the tensor corresponding to a given name if present.
func (p *Path) Get(name string) (retVal *ts.Tensor, err error) {
p.varstore.Vars.mutex.Lock()
defer p.varstore.Vars.mutex.Unlock()
v, ok := p.varstore.Vars.NamedVariables[name]
if !ok {
err = fmt.Errorf("Path - Get method call error: Cannot find variable for name: %v\n", name)
return retVal, err
}
return v.ShallowClone()
}
// Entry gets the entry corresponding to a given name for in-place manipulation.
func (p *Path) Entry(name string) *Entry {
p.varstore.Vars.mutex.Lock()
defer p.varstore.Vars.mutex.Unlock()
return &Entry{
name: name,
variables: &p.varstore.Vars,
path: p,
}
}
// Entry methods:
// ==============
// OrVar returns the existing entry if, otherwise create a new variable.
//
// If this entry name matches the name of a variables stored in the
// var store, the corresponding tensor is returned. Otherwise a new
// variable is added to the var-store with the entry name and is
// initialized according to the init parameter.
func (e *Entry) OrVar(dims []int64, init Init) (retVal *ts.Tensor) {
v := init.InitTensor(dims, e.path.varstore.device)
return e.path.getOrAddWithLock(e.name, v, true, *e.variables)
}
// Returns the existing entry if, otherwise create a new variable.
func (e *Entry) OrVarCopy(tensor *ts.Tensor) (retVal *ts.Tensor) {
size, err := tensor.Size()
if err != nil {
log.Fatalf("Entry - OrVarCopy method call error: %v\n", err)
}
v := e.OrZeros(size)
ts.NoGrad(func() {
ts.Copy_(v, tensor)
})
return v
}
// Returns the existing entry if, otherwise create a new variable.
func (e *Entry) OrKaimingUniform(dims []int64) (retVal *ts.Tensor) {
return e.OrVar(dims, NewKaimingUniformInit())
}
// OrOnes returns the existing entry if, otherwise create a new variable.
func (e *Entry) OrOnes(dims []int64) (retVal *ts.Tensor) {
return e.OrVar(dims, NewConstInit(1.0))
}
// OrOnesNoTrain returns the existing entry if, otherwise create a new variable.
func (e *Entry) OrOnesNoTrain(dims []int64) (retVal *ts.Tensor) {
o := ts.MustOnes(dims, gotch.Float, e.path.Device())
return e.path.getOrAddWithLock(e.name, o, true, *e.variables)
}
// OrRandn returns the existing entry if, otherwise create a new variable.
func (e *Entry) OrRandn(dims []int64, mean, stdev float64) (retVal *ts.Tensor) {
return e.OrVar(dims, NewRandnInit(mean, stdev))
}
// OrRandnStandard returns the existing entry if, otherwise create a new variable.
func (e *Entry) OrRandnStandard(dims []int64) (retVal *ts.Tensor) {
return e.OrVar(dims, NewRandnInit(0.0, 1.0))
}
// OrUniform returns the existing entry if, otherwise create a new variable.
func (e *Entry) OrUniform(dims []int64, lo, up float64) (retVal *ts.Tensor) {
return e.OrVar(dims, NewUniformInit(lo, up))
}
// OrZeros returns the existing entry if, otherwise create a new variable.
func (e *Entry) OrZeros(dims []int64) (retVal *ts.Tensor) {
return e.OrVar(dims, NewConstInit(0.0))
}
// OrZerosNoTrain returns the existing entry if, otherwise create a new variable.
func (e *Entry) OrZerosNoTrain(dims []int64) (retVal *ts.Tensor) {
z := ts.MustZeros(dims, gotch.Float, e.path.Device())
return e.path.getOrAddWithLock(e.name, z, true, *e.variables)
}
// TODO: can we implement `Div` operator in Go?
// NOTE: `Rhs` (right hand side) is a generic type parameter
// If not given, it will be default to `self` type
/*
* impl<'a, T> Div<T> for &'a mut Path<'a>
* where
* T: std::string::ToString,
* {
* type Output = Path<'a>;
*
* fn div(self, rhs: T) -> Self::Output {
* self.sub(rhs.to_string())
* }
* }
*
* impl<'a, T> Div<T> for &'a Path<'a>
* where
* T: std::string::ToString,
* {
* type Output = Path<'a>;
*
* fn div(self, rhs: T) -> Self::Output {
* self.sub(rhs.to_string())
* }
* }
* */