fix the simple on the default one

This commit is contained in:
2024-05-09 01:23:43 +01:00
parent a0aed71b3c
commit 972b9b9b67
3 changed files with 77 additions and 92 deletions

View File

@@ -161,40 +161,23 @@ func generateCvsExp(c BasePack, run_path string, model_id string, doPanic bool)
return
}
func trainDefinition(c BasePack, model *BaseModel, definition_id string, load_prev bool) (accuracy float64, err error) {
func trainDefinition(c BasePack, model *BaseModel, def Definition, load_prev bool) (accuracy float64, err error) {
l := c.GetLogger()
db := c.GetDb()
l.Warn("About to start training definition")
accuracy = 0
layers, err := db.Query("select layer_type, shape from model_definition_layer where def_id=$1 order by layer_order asc;", definition_id)
layers, err := def.GetLayers(c.GetDb(), " order by layer_order asc;")
if err != nil {
return
}
defer layers.Close()
type layerrow struct {
LayerType int
Shape string
LayerNum int
}
got := []layerrow{}
i := 1
for layers.Next() {
var row = layerrow{}
if err = layers.Scan(&row.LayerType, &row.Shape); err != nil {
return
}
row.Shape = shapeToSize(row.Shape)
row.LayerNum = 1
got = append(got, row)
i = i + 1
for _, layer := range layers {
layer.ShapeToSize()
}
// Generate run folder
run_path := path.Join("/tmp", model.Id, "defs", definition_id)
run_path := path.Join("/tmp", model.Id, "defs", def.Id)
err = os.MkdirAll(run_path, os.ModePerm)
if err != nil {
@@ -219,17 +202,17 @@ func trainDefinition(c BasePack, model *BaseModel, definition_id string, load_pr
}
// Copy result around
result_path := path.Join("savedData", model.Id, "defs", definition_id)
result_path := path.Join("savedData", model.Id, "defs", def.Id)
if err = tmpl.Execute(f, AnyMap{
"Layers": got,
"Size": got[0].Shape,
"Layers": layers,
"Size": layers[0].Shape,
"DataDir": path.Join(getDir(), "savedData", model.Id, "data"),
"RunPath": run_path,
"ColorMode": model.ImageMode,
"Model": model,
"EPOCH_PER_RUN": EPOCH_PER_RUN,
"DefId": definition_id,
"DefId": def.Id,
"LoadPrev": load_prev,
"LastModelRunPath": path.Join(getDir(), result_path, "model.keras"),
"SaveModelPath": path.Join(getDir(), result_path),
@@ -727,30 +710,16 @@ func trainModel(c BasePack, model *BaseModel) (err error) {
db := c.GetDb()
l := c.GetLogger()
definitionsRows, err := db.Query("select id, target_accuracy, epoch from model_definition where status=$1 and model_id=$2", MODEL_DEFINITION_STATUS_INIT, model.Id)
defs_, err := model.GetDefinitions(db, "and md.status=$2", MODEL_DEFINITION_STATUS_INIT)
if err != nil {
l.Error("Failed to train Model! Err:")
l.Error(err)
l.Error("Failed to train Model!", "err", err)
ModelUpdateStatus(c, model.Id, int(FAILED_TRAINING))
return
}
defer definitionsRows.Close()
var definitions TraingModelRowDefinitions = []TrainModelRow{}
var defs SortByAccuracyDefinitions = defs_
for definitionsRows.Next() {
var rowv TrainModelRow
rowv.acuracy = 0
if err = definitionsRows.Scan(&rowv.id, &rowv.target_accuracy, &rowv.epoch); err != nil {
l.Error("Failed to train Model Could not read definition from db!Err:")
l.Error(err)
ModelUpdateStatus(c, model.Id, int(FAILED_TRAINING))
return
}
definitions = append(definitions, rowv)
}
if len(definitions) == 0 {
if len(defs) == 0 {
l.Error("No Definitions defined!")
ModelUpdateStatus(c, model.Id, int(FAILED_TRAINING))
return
@@ -761,32 +730,29 @@ func trainModel(c BasePack, model *BaseModel) (err error) {
for {
var toRemove ToRemoveList = []int{}
for i, def := range definitions {
ModelDefinitionUpdateStatus(c, def.id, MODEL_DEFINITION_STATUS_TRAINING)
accuracy, err := trainDefinition(c, model, def.id, !firstRound)
for i, def := range defs {
ModelDefinitionUpdateStatus(c, def.Id, MODEL_DEFINITION_STATUS_TRAINING)
accuracy, err := trainDefinition(c, model, *def, !firstRound)
if err != nil {
l.Error("Failed to train definition!Err:", "err", err)
ModelDefinitionUpdateStatus(c, def.id, MODEL_DEFINITION_STATUS_FAILED_TRAINING)
ModelDefinitionUpdateStatus(c, def.Id, MODEL_DEFINITION_STATUS_FAILED_TRAINING)
toRemove = append(toRemove, i)
continue
}
def.epoch += EPOCH_PER_RUN
def.Epoch += EPOCH_PER_RUN
accuracy = accuracy * 100
def.acuracy = float64(accuracy)
def.Accuracy = float64(accuracy)
definitions[i].epoch += EPOCH_PER_RUN
definitions[i].acuracy = accuracy
if accuracy >= float64(def.target_accuracy) {
if accuracy >= float64(def.TargetAccuracy) {
l.Info("Found a definition that reaches target_accuracy!")
_, err = db.Exec("update model_definition set accuracy=$1, status=$2, epoch=$3 where id=$4", accuracy, MODEL_DEFINITION_STATUS_TRANIED, def.epoch, def.id)
_, err = db.Exec("update model_definition set accuracy=$1, status=$2, epoch=$3 where id=$4", accuracy, MODEL_DEFINITION_STATUS_TRANIED, def.Epoch, def.Id)
if err != nil {
l.Error("Failed to train definition!Err:\n", "err", err)
ModelUpdateStatus(c, model.Id, int(FAILED_TRAINING))
return err
}
_, err = db.Exec("update model_definition set status=$1 where id!=$2 and model_id=$3 and status!=$4", MODEL_DEFINITION_STATUS_CANCELD_TRAINING, def.id, model.Id, MODEL_DEFINITION_STATUS_FAILED_TRAINING)
_, err = db.Exec("update model_definition set status=$1 where id!=$2 and model_id=$3 and status!=$4", MODEL_DEFINITION_STATUS_CANCELD_TRAINING, def.Id, model.Id, MODEL_DEFINITION_STATUS_FAILED_TRAINING)
if err != nil {
l.Error("Failed to train definition!Err:\n", "err", err)
ModelUpdateStatus(c, model.Id, int(FAILED_TRAINING))
@@ -797,14 +763,14 @@ func trainModel(c BasePack, model *BaseModel) (err error) {
break
}
if def.epoch > MAX_EPOCH {
fmt.Printf("Failed to train definition! Accuracy less %f < %d\n", accuracy, def.target_accuracy)
ModelDefinitionUpdateStatus(c, def.id, MODEL_DEFINITION_STATUS_FAILED_TRAINING)
if def.Epoch > MAX_EPOCH {
fmt.Printf("Failed to train definition! Accuracy less %f < %d\n", accuracy, def.TargetAccuracy)
ModelDefinitionUpdateStatus(c, def.Id, MODEL_DEFINITION_STATUS_FAILED_TRAINING)
toRemove = append(toRemove, i)
continue
}
_, err = db.Exec("update model_definition set accuracy=$1, epoch=$2, status=$3 where id=$4", accuracy, def.epoch, MODEL_DEFINITION_STATUS_PAUSED_TRAINING, def.id)
_, err = db.Exec("update model_definition set accuracy=$1, epoch=$2, status=$3 where id=$4", accuracy, def.Epoch, MODEL_DEFINITION_STATUS_PAUSED_TRAINING, def.Id)
if err != nil {
l.Error("Failed to train definition!Err:\n", "err", err)
ModelUpdateStatus(c, model.Id, int(FAILED_TRAINING))
@@ -822,28 +788,26 @@ func trainModel(c BasePack, model *BaseModel) (err error) {
l.Info("Round done", "toRemove", toRemove)
for _, n := range toRemove {
definitions = remove(definitions, n)
defs = remove(defs, n)
}
len_def := len(definitions)
len_def := len(defs)
if len_def == 0 {
break
}
if len_def == 1 {
} else if len_def == 1 {
continue
}
sort.Sort(sort.Reverse(definitions))
sort.Sort(sort.Reverse(defs))
acc := definitions[0].acuracy - 20.0
acc := defs[0].Accuracy - 20.0
l.Info("Training models, Highest acc", "acc", definitions[0].acuracy, "mod_acc", acc)
l.Info("Training models, Highest acc", "acc", defs[0].Accuracy, "mod_acc", acc)
toRemove = []int{}
for i, def := range definitions {
if def.acuracy < acc {
for i, def := range defs {
if def.Accuracy < acc {
toRemove = append(toRemove, i)
}
}
@@ -853,8 +817,8 @@ func trainModel(c BasePack, model *BaseModel) (err error) {
sort.Sort(sort.Reverse(toRemove))
for _, n := range toRemove {
l.Warn("Removing definition not fast enough learning", "n", n)
ModelDefinitionUpdateStatus(c, definitions[n].id, MODEL_DEFINITION_STATUS_FAILED_TRAINING)
definitions = remove(definitions, n)
ModelDefinitionUpdateStatus(c, defs[n].Id, MODEL_DEFINITION_STATUS_FAILED_TRAINING)
defs = remove(defs, n)
}
}