fixed runer
This commit is contained in:
parent
972b9b9b67
commit
bf5a706ecc
@ -26,7 +26,7 @@ type Layer struct {
|
||||
ExpType int `db:"mdl.exp_type" json:"exp_type"`
|
||||
}
|
||||
|
||||
func (x *Layer) ShapeToSize() error {
|
||||
func (x *Layer) ShapeToSize() {
|
||||
v := x.GetShape()
|
||||
switch x.LayerType {
|
||||
case LAYER_INPUT:
|
||||
@ -35,9 +35,7 @@ func (x *Layer) ShapeToSize() error {
|
||||
x.Shape = fmt.Sprintf("(%d)", v[0])
|
||||
default:
|
||||
x.Shape = "ERROR"
|
||||
// DO nothing
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func ShapeToString(args ...int) string {
|
||||
|
@ -339,7 +339,7 @@ func generateCvsExpandExp(c BasePack, run_path string, model_id string, offset i
|
||||
return
|
||||
}
|
||||
|
||||
func trainDefinitionExpandExp(c BasePack, model *BaseModel, definition_id string, load_prev bool) (accuracy float64, err error) {
|
||||
func trainDefinitionExpandExp(c BasePack, model *BaseModel, def Definition, load_prev bool) (accuracy float64, err error) {
|
||||
accuracy = 0
|
||||
|
||||
l := c.GetLogger()
|
||||
@ -354,7 +354,7 @@ func trainDefinitionExpandExp(c BasePack, model *BaseModel, definition_id string
|
||||
}
|
||||
|
||||
// status = 2 (INIT) 3 (TRAINING)
|
||||
heads, err := GetDbMultitple[ExpHead](c.GetDb(), "exp_model_head where def_id=$1 and (status = 2 or status = 3)", definition_id)
|
||||
heads, err := GetDbMultitple[ExpHead](c.GetDb(), "exp_model_head where def_id=$1 and (status = 2 or status = 3)", def.Id)
|
||||
if err != nil {
|
||||
return
|
||||
} else if len(heads) == 0 {
|
||||
@ -373,62 +373,49 @@ func trainDefinitionExpandExp(c BasePack, model *BaseModel, definition_id string
|
||||
return
|
||||
}
|
||||
|
||||
layers, err := c.GetDb().Query("select layer_type, shape, exp_type from model_definition_layer where def_id=$1 order by layer_order asc;", definition_id)
|
||||
layers, err := def.GetLayers(c.GetDb(), " order by layer_order asc;")
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
defer layers.Close()
|
||||
|
||||
type layerrow struct {
|
||||
LayerType int
|
||||
Shape string
|
||||
ExpType int
|
||||
LayerNum int
|
||||
}
|
||||
|
||||
got := []layerrow{}
|
||||
var got []*Layer
|
||||
|
||||
i := 1
|
||||
var last *layerrow = nil
|
||||
var last *Layer = nil
|
||||
got_2 := false
|
||||
|
||||
var first *layerrow = nil
|
||||
var first *Layer = nil
|
||||
|
||||
for layers.Next() {
|
||||
var row = layerrow{}
|
||||
if err = layers.Scan(&row.LayerType, &row.Shape, &row.ExpType); err != nil {
|
||||
return
|
||||
}
|
||||
for _, layer := range layers {
|
||||
layer.ShapeToSize()
|
||||
|
||||
// Keep track of the first layer so we can keep the size of the image
|
||||
if first == nil {
|
||||
first = &row
|
||||
first = layer
|
||||
}
|
||||
|
||||
row.LayerNum = i
|
||||
row.Shape = shapeToSize(row.Shape)
|
||||
if row.ExpType == 2 {
|
||||
if layer.ExpType == 2 {
|
||||
if !got_2 {
|
||||
got = append(got, *last)
|
||||
got = append(got, last)
|
||||
got_2 = true
|
||||
}
|
||||
got = append(got, row)
|
||||
got = append(got, layer)
|
||||
}
|
||||
last = &row
|
||||
last = layer
|
||||
i += 1
|
||||
}
|
||||
|
||||
got = append(got, layerrow{
|
||||
got = append(got, &Layer{
|
||||
LayerType: LAYER_DENSE,
|
||||
Shape: fmt.Sprintf("%d", exp.End-exp.Start+1),
|
||||
ExpType: 2,
|
||||
LayerNum: i,
|
||||
LayerOrder: len(got),
|
||||
})
|
||||
|
||||
l.Info("Got layers", "layers", got)
|
||||
|
||||
// Generate run folder
|
||||
run_path := path.Join("/tmp", model.Id+"-defs-"+definition_id+"-retrain")
|
||||
run_path := path.Join("/tmp", model.Id+"-defs-"+def.Id+"-retrain")
|
||||
|
||||
err = os.MkdirAll(run_path, os.ModePerm)
|
||||
if err != nil {
|
||||
@ -459,7 +446,7 @@ func trainDefinitionExpandExp(c BasePack, model *BaseModel, definition_id string
|
||||
}
|
||||
|
||||
// Copy result around
|
||||
result_path := path.Join("savedData", model.Id, "defs", definition_id)
|
||||
result_path := path.Join("savedData", model.Id, "defs", def.Id)
|
||||
|
||||
if err = tmpl.Execute(f, AnyMap{
|
||||
"Layers": got,
|
||||
@ -515,7 +502,7 @@ func trainDefinitionExpandExp(c BasePack, model *BaseModel, definition_id string
|
||||
return
|
||||
}
|
||||
|
||||
func trainDefinitionExp(c BasePack, model *BaseModel, definition_id string, load_prev bool) (accuracy float64, err error) {
|
||||
func trainDefinitionExp(c BasePack, model *BaseModel, def Definition, load_prev bool) (accuracy float64, err error) {
|
||||
accuracy = 0
|
||||
l := c.GetLogger()
|
||||
db := c.GetDb()
|
||||
@ -531,7 +518,7 @@ func trainDefinitionExp(c BasePack, model *BaseModel, definition_id string, load
|
||||
}
|
||||
|
||||
// status = 2 (INIT) 3 (TRAINING)
|
||||
heads, err := GetDbMultitple[ExpHead](db, "exp_model_head where def_id=$1 and (status = 2 or status = 3)", definition_id)
|
||||
heads, err := GetDbMultitple[ExpHead](db, "exp_model_head where def_id=$1 and (status = 2 or status = 3)", def.Id)
|
||||
if err != nil {
|
||||
return
|
||||
} else if len(heads) == 0 {
|
||||
@ -549,42 +536,24 @@ func trainDefinitionExp(c BasePack, model *BaseModel, definition_id string, load
|
||||
return
|
||||
}
|
||||
|
||||
layers, err := db.Query("select layer_type, shape, exp_type from model_definition_layer where def_id=$1 order by layer_order asc;", definition_id)
|
||||
layers, err := def.GetLayers(db, " order by layer_order asc;")
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
defer layers.Close()
|
||||
|
||||
type layerrow struct {
|
||||
LayerType int
|
||||
Shape string
|
||||
ExpType int
|
||||
LayerNum int
|
||||
for _, layer := range layers {
|
||||
layer.ShapeToSize()
|
||||
}
|
||||
|
||||
got := []layerrow{}
|
||||
i := 1
|
||||
|
||||
for layers.Next() {
|
||||
var row = layerrow{}
|
||||
if err = layers.Scan(&row.LayerType, &row.Shape, &row.ExpType); err != nil {
|
||||
return
|
||||
}
|
||||
row.LayerNum = i
|
||||
row.Shape = shapeToSize(row.Shape)
|
||||
got = append(got, row)
|
||||
i += 1
|
||||
}
|
||||
|
||||
got = append(got, layerrow{
|
||||
layers = append(layers, &Layer{
|
||||
LayerType: LAYER_DENSE,
|
||||
Shape: fmt.Sprintf("%d", exp.End-exp.Start+1),
|
||||
ExpType: 2,
|
||||
LayerNum: i,
|
||||
LayerOrder: len(layers),
|
||||
})
|
||||
|
||||
// Generate run folder
|
||||
run_path := path.Join("/tmp", model.Id+"-defs-"+definition_id)
|
||||
run_path := path.Join("/tmp", model.Id+"-defs-"+def.Id)
|
||||
|
||||
err = os.MkdirAll(run_path, os.ModePerm)
|
||||
if err != nil {
|
||||
@ -611,11 +580,11 @@ func trainDefinitionExp(c BasePack, model *BaseModel, definition_id string, load
|
||||
}
|
||||
|
||||
// Copy result around
|
||||
result_path := path.Join("savedData", model.Id, "defs", definition_id)
|
||||
result_path := path.Join("savedData", model.Id, "defs", def.Id)
|
||||
|
||||
if err = tmpl.Execute(f, AnyMap{
|
||||
"Layers": got,
|
||||
"Size": got[0].Shape,
|
||||
"Layers": layers,
|
||||
"Size": layers[0].Shape,
|
||||
"DataDir": path.Join(getDir(), "savedData", model.Id, "data"),
|
||||
"HeadId": exp.Id,
|
||||
"RunPath": run_path,
|
||||
@ -683,21 +652,6 @@ func remove[T interface{}](lst []T, i int) []T {
|
||||
return append(lst[:i], lst[i+1:]...)
|
||||
}
|
||||
|
||||
type TrainModelRow struct {
|
||||
id string
|
||||
target_accuracy int
|
||||
epoch int
|
||||
acuracy float64
|
||||
}
|
||||
|
||||
type TraingModelRowDefinitions []TrainModelRow
|
||||
|
||||
func (nf TraingModelRowDefinitions) Len() int { return len(nf) }
|
||||
func (nf TraingModelRowDefinitions) Swap(i, j int) { nf[i], nf[j] = nf[j], nf[i] }
|
||||
func (nf TraingModelRowDefinitions) Less(i, j int) bool {
|
||||
return nf[i].acuracy < nf[j].acuracy
|
||||
}
|
||||
|
||||
type ToRemoveList []int
|
||||
|
||||
func (nf ToRemoveList) Len() int { return len(nf) }
|
||||
@ -886,33 +840,18 @@ func trainModel(c BasePack, model *BaseModel) (err error) {
|
||||
return
|
||||
}
|
||||
|
||||
type TrainModelRowUsable struct {
|
||||
Id string
|
||||
TargetAccuracy int `db:"target_accuracy"`
|
||||
Epoch int
|
||||
Acuracy float64 `db:"0"`
|
||||
}
|
||||
|
||||
type TrainModelRowUsables []*TrainModelRowUsable
|
||||
|
||||
func (nf TrainModelRowUsables) Len() int { return len(nf) }
|
||||
func (nf TrainModelRowUsables) Swap(i, j int) { nf[i], nf[j] = nf[j], nf[i] }
|
||||
func (nf TrainModelRowUsables) Less(i, j int) bool {
|
||||
return nf[i].Acuracy < nf[j].Acuracy
|
||||
}
|
||||
|
||||
func trainModelExp(c BasePack, model *BaseModel) (err error) {
|
||||
l := c.GetLogger()
|
||||
db := c.GetDb()
|
||||
|
||||
var definitions TrainModelRowUsables
|
||||
|
||||
definitions, err = GetDbMultitple[TrainModelRowUsable](db, "model_definition where status=$1 and model_id=$2", MODEL_DEFINITION_STATUS_INIT, model.Id)
|
||||
defs_, err := model.GetDefinitions(db, " and status=$2;", MODEL_DEFINITION_STATUS_INIT)
|
||||
if err != nil {
|
||||
l.Error("Failed to get definitions")
|
||||
return
|
||||
}
|
||||
if len(definitions) == 0 {
|
||||
var defs SortByAccuracyDefinitions = defs_
|
||||
|
||||
if len(defs) == 0 {
|
||||
l.Error("No Definitions defined!")
|
||||
return errors.New("No Definitions found")
|
||||
}
|
||||
@ -922,9 +861,9 @@ func trainModelExp(c BasePack, model *BaseModel) (err error) {
|
||||
|
||||
for {
|
||||
var toRemove ToRemoveList = []int{}
|
||||
for i, def := range definitions {
|
||||
for i, def := range defs {
|
||||
ModelDefinitionUpdateStatus(c, def.Id, MODEL_DEFINITION_STATUS_TRAINING)
|
||||
accuracy, err := trainDefinitionExp(c, model, def.Id, !firstRound)
|
||||
accuracy, err := trainDefinitionExp(c, model, *def, !firstRound)
|
||||
if err != nil {
|
||||
l.Error("Failed to train definition!Err:", "err", err)
|
||||
ModelDefinitionUpdateStatus(c, def.Id, MODEL_DEFINITION_STATUS_FAILED_TRAINING)
|
||||
@ -933,10 +872,10 @@ func trainModelExp(c BasePack, model *BaseModel) (err error) {
|
||||
}
|
||||
def.Epoch += EPOCH_PER_RUN
|
||||
accuracy = accuracy * 100
|
||||
def.Acuracy = float64(accuracy)
|
||||
def.Accuracy = float64(accuracy)
|
||||
|
||||
definitions[i].Epoch += EPOCH_PER_RUN
|
||||
definitions[i].Acuracy = accuracy
|
||||
defs[i].Epoch += EPOCH_PER_RUN
|
||||
defs[i].Accuracy = accuracy
|
||||
|
||||
if accuracy >= float64(def.TargetAccuracy) {
|
||||
l.Info("Found a definition that reaches target_accuracy!")
|
||||
@ -986,10 +925,10 @@ func trainModelExp(c BasePack, model *BaseModel) (err error) {
|
||||
l.Info("Round done", "toRemove", toRemove)
|
||||
|
||||
for _, n := range toRemove {
|
||||
definitions = remove(definitions, n)
|
||||
defs = remove(defs, n)
|
||||
}
|
||||
|
||||
len_def := len(definitions)
|
||||
len_def := len(defs)
|
||||
|
||||
if len_def == 0 {
|
||||
break
|
||||
@ -997,14 +936,14 @@ func trainModelExp(c BasePack, model *BaseModel) (err error) {
|
||||
continue
|
||||
}
|
||||
|
||||
sort.Sort(sort.Reverse(definitions))
|
||||
acc := definitions[0].Acuracy - 20.0
|
||||
sort.Sort(sort.Reverse(defs))
|
||||
acc := defs[0].Accuracy - 20.0
|
||||
|
||||
l.Info("Training models, Highest acc", "acc", definitions[0].Acuracy, "mod_acc", acc)
|
||||
l.Info("Training models, Highest acc", "acc", defs[0].Accuracy, "mod_acc", acc)
|
||||
|
||||
toRemove = []int{}
|
||||
for i, def := range definitions {
|
||||
if def.Acuracy < acc {
|
||||
for i, def := range defs {
|
||||
if def.Accuracy < acc {
|
||||
toRemove = append(toRemove, i)
|
||||
}
|
||||
}
|
||||
@ -1014,8 +953,8 @@ func trainModelExp(c BasePack, model *BaseModel) (err error) {
|
||||
sort.Sort(sort.Reverse(toRemove))
|
||||
for _, n := range toRemove {
|
||||
l.Warn("Removing definition not fast enough learning", "n", n)
|
||||
ModelDefinitionUpdateStatus(c, definitions[n].Id, MODEL_DEFINITION_STATUS_FAILED_TRAINING)
|
||||
definitions = remove(definitions, n)
|
||||
ModelDefinitionUpdateStatus(c, defs[n].Id, MODEL_DEFINITION_STATUS_FAILED_TRAINING)
|
||||
defs = remove(defs, n)
|
||||
}
|
||||
}
|
||||
|
||||
@ -1030,6 +969,12 @@ func trainModelExp(c BasePack, model *BaseModel) (err error) {
|
||||
return err
|
||||
}
|
||||
|
||||
err = model.UpdateStatus(db, FAILED_TRAINING)
|
||||
if err != nil {
|
||||
l.Error("All definitions failed to train! And Failed to set model status")
|
||||
return err
|
||||
}
|
||||
|
||||
l.Error("All definitions failed to train!")
|
||||
return err
|
||||
} else if err != nil {
|
||||
@ -1485,31 +1430,31 @@ func trainExpandable(c *Context, model *BaseModel) {
|
||||
ResetClasses(c, model)
|
||||
}
|
||||
|
||||
var definitions TrainModelRowUsables
|
||||
|
||||
definitions, err = GetDbMultitple[TrainModelRowUsable](c, "model_definition where status=$1 and model_id=$2", MODEL_DEFINITION_STATUS_READY, model.Id)
|
||||
defs_, err := model.GetDefinitions(c, " and status=$2", MODEL_DEFINITION_STATUS_READY)
|
||||
if err != nil {
|
||||
failed("Failed to get definitions")
|
||||
return
|
||||
}
|
||||
if len(definitions) != 1 {
|
||||
var defs SortByAccuracyDefinitions = defs_
|
||||
|
||||
if len(defs) != 1 {
|
||||
failed("There should only be one definition available!")
|
||||
return
|
||||
}
|
||||
|
||||
firstRound := true
|
||||
def := definitions[0]
|
||||
def := defs[0]
|
||||
epoch := 0
|
||||
|
||||
for {
|
||||
acc, err := trainDefinitionExp(c, model, def.Id, !firstRound)
|
||||
acc, err := trainDefinitionExp(c, model, *def, !firstRound)
|
||||
if err != nil {
|
||||
failed("Failed to train definition!")
|
||||
return
|
||||
}
|
||||
epoch += EPOCH_PER_RUN
|
||||
|
||||
if float64(acc*100) >= float64(def.Acuracy) {
|
||||
if float64(acc*100) >= float64(def.Accuracy) {
|
||||
c.Logger.Info("Found a definition that reaches target_accuracy!")
|
||||
|
||||
_, err = c.Db.Exec("update exp_model_head set status=$1 where def_id=$2 and status=$3;", MODEL_HEAD_STATUS_READY, def.Id, MODEL_HEAD_STATUS_TRAINING)
|
||||
@ -1614,22 +1559,18 @@ func RunTaskRetrain(b BasePack, task Task) (err error) {
|
||||
|
||||
task.UpdateStatusLog(b, TASK_RUNNING, "Model retraining")
|
||||
|
||||
var defData struct {
|
||||
Id string `db:"md.id"`
|
||||
TargetAcuuracy float64 `db:"md.target_accuracy"`
|
||||
}
|
||||
|
||||
err = GetDBOnce(db, &defData, "models as m inner join model_definition as md on m.id = md.model_id where m.id=$1;", task.ModelId)
|
||||
defs, err := model.GetDefinitions(db, "")
|
||||
if err != nil {
|
||||
failed()
|
||||
return
|
||||
}
|
||||
def := *defs[0]
|
||||
|
||||
failed = func() {
|
||||
ResetClasses(b, model)
|
||||
ModelUpdateStatus(b, model.Id, READY_RETRAIN_FAILED)
|
||||
task.UpdateStatusLog(b, TASK_FAILED_RUNNING, "Model failed retraining")
|
||||
_, err_ := db.Exec("delete from exp_model_head where def_id=$1 and status in (2,3)", defData.Id)
|
||||
_, err_ := db.Exec("delete from exp_model_head where def_id=$1 and status in (2,3)", def.Id)
|
||||
if err_ != nil {
|
||||
panic(err_)
|
||||
}
|
||||
@ -1640,21 +1581,21 @@ func RunTaskRetrain(b BasePack, task Task) (err error) {
|
||||
var epocs = 0
|
||||
// TODO make max epochs come from db
|
||||
// TODO re increase the target accuracy
|
||||
for acc*100 < defData.TargetAcuuracy-5 && epocs < 10 {
|
||||
for acc*100 < float64(def.TargetAccuracy)-5 && epocs < 10 {
|
||||
// This is something I have to check
|
||||
acc, err = trainDefinitionExpandExp(b, model, defData.Id, epocs > 0)
|
||||
acc, err = trainDefinitionExpandExp(b, model, def, epocs > 0)
|
||||
if err != nil {
|
||||
failed()
|
||||
return
|
||||
}
|
||||
|
||||
l.Info("Retrained model", "accuracy", acc, "target", defData.TargetAcuuracy)
|
||||
l.Info("Retrained model", "accuracy", acc, "target", def.TargetAccuracy)
|
||||
|
||||
epocs += 1
|
||||
}
|
||||
|
||||
if acc*100 < defData.TargetAcuuracy {
|
||||
l.Error("Model never achived targetd accuracy", "acc", acc*100, "target", defData.TargetAcuuracy)
|
||||
if acc*100 < float64(def.TargetAccuracy)-5 {
|
||||
l.Error("Model never achived targetd accuracy", "acc", acc*100, "target", def.TargetAccuracy)
|
||||
failed()
|
||||
return
|
||||
}
|
||||
@ -1675,7 +1616,7 @@ func RunTaskRetrain(b BasePack, task Task) (err error) {
|
||||
return
|
||||
}
|
||||
|
||||
_, err = db.Exec("update exp_model_head set status=$1 where status=$2 and model_id=$3", MODEL_HEAD_STATUS_READY, MODEL_HEAD_STATUS_TRAINING, model.Id)
|
||||
_, err = db.Exec("update exp_model_head set status=$1 where status=$2 and def_id=$3", MODEL_HEAD_STATUS_READY, MODEL_HEAD_STATUS_TRAINING, def.Id)
|
||||
if err != nil {
|
||||
l.Error("Error while updating the classes", "error", err)
|
||||
failed()
|
||||
|
@ -215,7 +215,7 @@
|
||||
</div>
|
||||
{:else if m.status == -3 || m.status == -4}
|
||||
<BaseModelInfo model={m} />
|
||||
<form on:submit={resetModel}>
|
||||
<form on:submit|preventDefault={resetModel}>
|
||||
Failed Prepare for training.<br />
|
||||
<div class="spacer"></div>
|
||||
<MessageSimple bind:this={resetMessages} />
|
||||
|
Loading…
Reference in New Issue
Block a user