From bf5a706ecc04c43e6ebe491512d1a27e54ba35e0 Mon Sep 17 00:00:00 2001 From: Andre Henriques Date: Thu, 9 May 2024 11:14:52 +0100 Subject: [PATCH] fixed runer --- logic/db_types/layer.go | 4 +- logic/models/train/train.go | 207 +++++++------------- webpage/src/routes/models/edit/+page.svelte | 2 +- 3 files changed, 76 insertions(+), 137 deletions(-) diff --git a/logic/db_types/layer.go b/logic/db_types/layer.go index 5e00ad3..c8e12cd 100644 --- a/logic/db_types/layer.go +++ b/logic/db_types/layer.go @@ -26,7 +26,7 @@ type Layer struct { ExpType int `db:"mdl.exp_type" json:"exp_type"` } -func (x *Layer) ShapeToSize() error { +func (x *Layer) ShapeToSize() { v := x.GetShape() switch x.LayerType { case LAYER_INPUT: @@ -35,9 +35,7 @@ func (x *Layer) ShapeToSize() error { x.Shape = fmt.Sprintf("(%d)", v[0]) default: x.Shape = "ERROR" - // DO nothing } - return nil } func ShapeToString(args ...int) string { diff --git a/logic/models/train/train.go b/logic/models/train/train.go index cba1a22..80ed5b9 100644 --- a/logic/models/train/train.go +++ b/logic/models/train/train.go @@ -339,7 +339,7 @@ func generateCvsExpandExp(c BasePack, run_path string, model_id string, offset i return } -func trainDefinitionExpandExp(c BasePack, model *BaseModel, definition_id string, load_prev bool) (accuracy float64, err error) { +func trainDefinitionExpandExp(c BasePack, model *BaseModel, def Definition, load_prev bool) (accuracy float64, err error) { accuracy = 0 l := c.GetLogger() @@ -354,7 +354,7 @@ func trainDefinitionExpandExp(c BasePack, model *BaseModel, definition_id string } // status = 2 (INIT) 3 (TRAINING) - heads, err := GetDbMultitple[ExpHead](c.GetDb(), "exp_model_head where def_id=$1 and (status = 2 or status = 3)", definition_id) + heads, err := GetDbMultitple[ExpHead](c.GetDb(), "exp_model_head where def_id=$1 and (status = 2 or status = 3)", def.Id) if err != nil { return } else if len(heads) == 0 { @@ -373,62 +373,49 @@ func trainDefinitionExpandExp(c BasePack, model *BaseModel, definition_id string return } - layers, err := c.GetDb().Query("select layer_type, shape, exp_type from model_definition_layer where def_id=$1 order by layer_order asc;", definition_id) + layers, err := def.GetLayers(c.GetDb(), " order by layer_order asc;") if err != nil { return } - defer layers.Close() - type layerrow struct { - LayerType int - Shape string - ExpType int - LayerNum int - } - - got := []layerrow{} + var got []*Layer i := 1 - var last *layerrow = nil + var last *Layer = nil got_2 := false - var first *layerrow = nil + var first *Layer = nil - for layers.Next() { - var row = layerrow{} - if err = layers.Scan(&row.LayerType, &row.Shape, &row.ExpType); err != nil { - return - } + for _, layer := range layers { + layer.ShapeToSize() // Keep track of the first layer so we can keep the size of the image if first == nil { - first = &row + first = layer } - row.LayerNum = i - row.Shape = shapeToSize(row.Shape) - if row.ExpType == 2 { + if layer.ExpType == 2 { if !got_2 { - got = append(got, *last) + got = append(got, last) got_2 = true } - got = append(got, row) + got = append(got, layer) } - last = &row + last = layer i += 1 } - got = append(got, layerrow{ - LayerType: LAYER_DENSE, - Shape: fmt.Sprintf("%d", exp.End-exp.Start+1), - ExpType: 2, - LayerNum: i, + got = append(got, &Layer{ + LayerType: LAYER_DENSE, + Shape: fmt.Sprintf("%d", exp.End-exp.Start+1), + ExpType: 2, + LayerOrder: len(got), }) l.Info("Got layers", "layers", got) // Generate run folder - run_path := path.Join("/tmp", model.Id+"-defs-"+definition_id+"-retrain") + run_path := path.Join("/tmp", model.Id+"-defs-"+def.Id+"-retrain") err = os.MkdirAll(run_path, os.ModePerm) if err != nil { @@ -459,7 +446,7 @@ func trainDefinitionExpandExp(c BasePack, model *BaseModel, definition_id string } // Copy result around - result_path := path.Join("savedData", model.Id, "defs", definition_id) + result_path := path.Join("savedData", model.Id, "defs", def.Id) if err = tmpl.Execute(f, AnyMap{ "Layers": got, @@ -515,7 +502,7 @@ func trainDefinitionExpandExp(c BasePack, model *BaseModel, definition_id string return } -func trainDefinitionExp(c BasePack, model *BaseModel, definition_id string, load_prev bool) (accuracy float64, err error) { +func trainDefinitionExp(c BasePack, model *BaseModel, def Definition, load_prev bool) (accuracy float64, err error) { accuracy = 0 l := c.GetLogger() db := c.GetDb() @@ -531,7 +518,7 @@ func trainDefinitionExp(c BasePack, model *BaseModel, definition_id string, load } // status = 2 (INIT) 3 (TRAINING) - heads, err := GetDbMultitple[ExpHead](db, "exp_model_head where def_id=$1 and (status = 2 or status = 3)", definition_id) + heads, err := GetDbMultitple[ExpHead](db, "exp_model_head where def_id=$1 and (status = 2 or status = 3)", def.Id) if err != nil { return } else if len(heads) == 0 { @@ -549,42 +536,24 @@ func trainDefinitionExp(c BasePack, model *BaseModel, definition_id string, load return } - layers, err := db.Query("select layer_type, shape, exp_type from model_definition_layer where def_id=$1 order by layer_order asc;", definition_id) + layers, err := def.GetLayers(db, " order by layer_order asc;") if err != nil { return } - defer layers.Close() - type layerrow struct { - LayerType int - Shape string - ExpType int - LayerNum int + for _, layer := range layers { + layer.ShapeToSize() } - got := []layerrow{} - i := 1 - - for layers.Next() { - var row = layerrow{} - if err = layers.Scan(&row.LayerType, &row.Shape, &row.ExpType); err != nil { - return - } - row.LayerNum = i - row.Shape = shapeToSize(row.Shape) - got = append(got, row) - i += 1 - } - - got = append(got, layerrow{ - LayerType: LAYER_DENSE, - Shape: fmt.Sprintf("%d", exp.End-exp.Start+1), - ExpType: 2, - LayerNum: i, + layers = append(layers, &Layer{ + LayerType: LAYER_DENSE, + Shape: fmt.Sprintf("%d", exp.End-exp.Start+1), + ExpType: 2, + LayerOrder: len(layers), }) // Generate run folder - run_path := path.Join("/tmp", model.Id+"-defs-"+definition_id) + run_path := path.Join("/tmp", model.Id+"-defs-"+def.Id) err = os.MkdirAll(run_path, os.ModePerm) if err != nil { @@ -611,11 +580,11 @@ func trainDefinitionExp(c BasePack, model *BaseModel, definition_id string, load } // Copy result around - result_path := path.Join("savedData", model.Id, "defs", definition_id) + result_path := path.Join("savedData", model.Id, "defs", def.Id) if err = tmpl.Execute(f, AnyMap{ - "Layers": got, - "Size": got[0].Shape, + "Layers": layers, + "Size": layers[0].Shape, "DataDir": path.Join(getDir(), "savedData", model.Id, "data"), "HeadId": exp.Id, "RunPath": run_path, @@ -683,21 +652,6 @@ func remove[T interface{}](lst []T, i int) []T { return append(lst[:i], lst[i+1:]...) } -type TrainModelRow struct { - id string - target_accuracy int - epoch int - acuracy float64 -} - -type TraingModelRowDefinitions []TrainModelRow - -func (nf TraingModelRowDefinitions) Len() int { return len(nf) } -func (nf TraingModelRowDefinitions) Swap(i, j int) { nf[i], nf[j] = nf[j], nf[i] } -func (nf TraingModelRowDefinitions) Less(i, j int) bool { - return nf[i].acuracy < nf[j].acuracy -} - type ToRemoveList []int func (nf ToRemoveList) Len() int { return len(nf) } @@ -886,33 +840,18 @@ func trainModel(c BasePack, model *BaseModel) (err error) { return } -type TrainModelRowUsable struct { - Id string - TargetAccuracy int `db:"target_accuracy"` - Epoch int - Acuracy float64 `db:"0"` -} - -type TrainModelRowUsables []*TrainModelRowUsable - -func (nf TrainModelRowUsables) Len() int { return len(nf) } -func (nf TrainModelRowUsables) Swap(i, j int) { nf[i], nf[j] = nf[j], nf[i] } -func (nf TrainModelRowUsables) Less(i, j int) bool { - return nf[i].Acuracy < nf[j].Acuracy -} - func trainModelExp(c BasePack, model *BaseModel) (err error) { l := c.GetLogger() db := c.GetDb() - var definitions TrainModelRowUsables - - definitions, err = GetDbMultitple[TrainModelRowUsable](db, "model_definition where status=$1 and model_id=$2", MODEL_DEFINITION_STATUS_INIT, model.Id) + defs_, err := model.GetDefinitions(db, " and status=$2;", MODEL_DEFINITION_STATUS_INIT) if err != nil { l.Error("Failed to get definitions") return } - if len(definitions) == 0 { + var defs SortByAccuracyDefinitions = defs_ + + if len(defs) == 0 { l.Error("No Definitions defined!") return errors.New("No Definitions found") } @@ -922,9 +861,9 @@ func trainModelExp(c BasePack, model *BaseModel) (err error) { for { var toRemove ToRemoveList = []int{} - for i, def := range definitions { + for i, def := range defs { ModelDefinitionUpdateStatus(c, def.Id, MODEL_DEFINITION_STATUS_TRAINING) - accuracy, err := trainDefinitionExp(c, model, def.Id, !firstRound) + accuracy, err := trainDefinitionExp(c, model, *def, !firstRound) if err != nil { l.Error("Failed to train definition!Err:", "err", err) ModelDefinitionUpdateStatus(c, def.Id, MODEL_DEFINITION_STATUS_FAILED_TRAINING) @@ -933,10 +872,10 @@ func trainModelExp(c BasePack, model *BaseModel) (err error) { } def.Epoch += EPOCH_PER_RUN accuracy = accuracy * 100 - def.Acuracy = float64(accuracy) + def.Accuracy = float64(accuracy) - definitions[i].Epoch += EPOCH_PER_RUN - definitions[i].Acuracy = accuracy + defs[i].Epoch += EPOCH_PER_RUN + defs[i].Accuracy = accuracy if accuracy >= float64(def.TargetAccuracy) { l.Info("Found a definition that reaches target_accuracy!") @@ -986,10 +925,10 @@ func trainModelExp(c BasePack, model *BaseModel) (err error) { l.Info("Round done", "toRemove", toRemove) for _, n := range toRemove { - definitions = remove(definitions, n) + defs = remove(defs, n) } - len_def := len(definitions) + len_def := len(defs) if len_def == 0 { break @@ -997,14 +936,14 @@ func trainModelExp(c BasePack, model *BaseModel) (err error) { continue } - sort.Sort(sort.Reverse(definitions)) - acc := definitions[0].Acuracy - 20.0 + sort.Sort(sort.Reverse(defs)) + acc := defs[0].Accuracy - 20.0 - l.Info("Training models, Highest acc", "acc", definitions[0].Acuracy, "mod_acc", acc) + l.Info("Training models, Highest acc", "acc", defs[0].Accuracy, "mod_acc", acc) toRemove = []int{} - for i, def := range definitions { - if def.Acuracy < acc { + for i, def := range defs { + if def.Accuracy < acc { toRemove = append(toRemove, i) } } @@ -1014,8 +953,8 @@ func trainModelExp(c BasePack, model *BaseModel) (err error) { sort.Sort(sort.Reverse(toRemove)) for _, n := range toRemove { l.Warn("Removing definition not fast enough learning", "n", n) - ModelDefinitionUpdateStatus(c, definitions[n].Id, MODEL_DEFINITION_STATUS_FAILED_TRAINING) - definitions = remove(definitions, n) + ModelDefinitionUpdateStatus(c, defs[n].Id, MODEL_DEFINITION_STATUS_FAILED_TRAINING) + defs = remove(defs, n) } } @@ -1030,6 +969,12 @@ func trainModelExp(c BasePack, model *BaseModel) (err error) { return err } + err = model.UpdateStatus(db, FAILED_TRAINING) + if err != nil { + l.Error("All definitions failed to train! And Failed to set model status") + return err + } + l.Error("All definitions failed to train!") return err } else if err != nil { @@ -1485,31 +1430,31 @@ func trainExpandable(c *Context, model *BaseModel) { ResetClasses(c, model) } - var definitions TrainModelRowUsables - - definitions, err = GetDbMultitple[TrainModelRowUsable](c, "model_definition where status=$1 and model_id=$2", MODEL_DEFINITION_STATUS_READY, model.Id) + defs_, err := model.GetDefinitions(c, " and status=$2", MODEL_DEFINITION_STATUS_READY) if err != nil { failed("Failed to get definitions") return } - if len(definitions) != 1 { + var defs SortByAccuracyDefinitions = defs_ + + if len(defs) != 1 { failed("There should only be one definition available!") return } firstRound := true - def := definitions[0] + def := defs[0] epoch := 0 for { - acc, err := trainDefinitionExp(c, model, def.Id, !firstRound) + acc, err := trainDefinitionExp(c, model, *def, !firstRound) if err != nil { failed("Failed to train definition!") return } epoch += EPOCH_PER_RUN - if float64(acc*100) >= float64(def.Acuracy) { + if float64(acc*100) >= float64(def.Accuracy) { c.Logger.Info("Found a definition that reaches target_accuracy!") _, err = c.Db.Exec("update exp_model_head set status=$1 where def_id=$2 and status=$3;", MODEL_HEAD_STATUS_READY, def.Id, MODEL_HEAD_STATUS_TRAINING) @@ -1614,22 +1559,18 @@ func RunTaskRetrain(b BasePack, task Task) (err error) { task.UpdateStatusLog(b, TASK_RUNNING, "Model retraining") - var defData struct { - Id string `db:"md.id"` - TargetAcuuracy float64 `db:"md.target_accuracy"` - } - - err = GetDBOnce(db, &defData, "models as m inner join model_definition as md on m.id = md.model_id where m.id=$1;", task.ModelId) + defs, err := model.GetDefinitions(db, "") if err != nil { failed() return } + def := *defs[0] failed = func() { ResetClasses(b, model) ModelUpdateStatus(b, model.Id, READY_RETRAIN_FAILED) task.UpdateStatusLog(b, TASK_FAILED_RUNNING, "Model failed retraining") - _, err_ := db.Exec("delete from exp_model_head where def_id=$1 and status in (2,3)", defData.Id) + _, err_ := db.Exec("delete from exp_model_head where def_id=$1 and status in (2,3)", def.Id) if err_ != nil { panic(err_) } @@ -1640,21 +1581,21 @@ func RunTaskRetrain(b BasePack, task Task) (err error) { var epocs = 0 // TODO make max epochs come from db // TODO re increase the target accuracy - for acc*100 < defData.TargetAcuuracy-5 && epocs < 10 { + for acc*100 < float64(def.TargetAccuracy)-5 && epocs < 10 { // This is something I have to check - acc, err = trainDefinitionExpandExp(b, model, defData.Id, epocs > 0) + acc, err = trainDefinitionExpandExp(b, model, def, epocs > 0) if err != nil { failed() return } - l.Info("Retrained model", "accuracy", acc, "target", defData.TargetAcuuracy) + l.Info("Retrained model", "accuracy", acc, "target", def.TargetAccuracy) epocs += 1 } - if acc*100 < defData.TargetAcuuracy { - l.Error("Model never achived targetd accuracy", "acc", acc*100, "target", defData.TargetAcuuracy) + if acc*100 < float64(def.TargetAccuracy)-5 { + l.Error("Model never achived targetd accuracy", "acc", acc*100, "target", def.TargetAccuracy) failed() return } @@ -1675,7 +1616,7 @@ func RunTaskRetrain(b BasePack, task Task) (err error) { return } - _, err = db.Exec("update exp_model_head set status=$1 where status=$2 and model_id=$3", MODEL_HEAD_STATUS_READY, MODEL_HEAD_STATUS_TRAINING, model.Id) + _, err = db.Exec("update exp_model_head set status=$1 where status=$2 and def_id=$3", MODEL_HEAD_STATUS_READY, MODEL_HEAD_STATUS_TRAINING, def.Id) if err != nil { l.Error("Error while updating the classes", "error", err) failed() diff --git a/webpage/src/routes/models/edit/+page.svelte b/webpage/src/routes/models/edit/+page.svelte index d3af0c9..f3a2d2f 100644 --- a/webpage/src/routes/models/edit/+page.svelte +++ b/webpage/src/routes/models/edit/+page.svelte @@ -215,7 +215,7 @@ {:else if m.status == -3 || m.status == -4} -
+ Failed Prepare for training.