runner-go #102

Merged
andr3 merged 9 commits from runner-go into main 2024-05-10 02:13:02 +01:00
3 changed files with 76 additions and 137 deletions
Showing only changes of commit bf5a706ecc - Show all commits

View File

@ -26,7 +26,7 @@ type Layer struct {
ExpType int `db:"mdl.exp_type" json:"exp_type"` ExpType int `db:"mdl.exp_type" json:"exp_type"`
} }
func (x *Layer) ShapeToSize() error { func (x *Layer) ShapeToSize() {
v := x.GetShape() v := x.GetShape()
switch x.LayerType { switch x.LayerType {
case LAYER_INPUT: case LAYER_INPUT:
@ -35,9 +35,7 @@ func (x *Layer) ShapeToSize() error {
x.Shape = fmt.Sprintf("(%d)", v[0]) x.Shape = fmt.Sprintf("(%d)", v[0])
default: default:
x.Shape = "ERROR" x.Shape = "ERROR"
// DO nothing
} }
return nil
} }
func ShapeToString(args ...int) string { func ShapeToString(args ...int) string {

View File

@ -339,7 +339,7 @@ func generateCvsExpandExp(c BasePack, run_path string, model_id string, offset i
return return
} }
func trainDefinitionExpandExp(c BasePack, model *BaseModel, definition_id string, load_prev bool) (accuracy float64, err error) { func trainDefinitionExpandExp(c BasePack, model *BaseModel, def Definition, load_prev bool) (accuracy float64, err error) {
accuracy = 0 accuracy = 0
l := c.GetLogger() l := c.GetLogger()
@ -354,7 +354,7 @@ func trainDefinitionExpandExp(c BasePack, model *BaseModel, definition_id string
} }
// status = 2 (INIT) 3 (TRAINING) // status = 2 (INIT) 3 (TRAINING)
heads, err := GetDbMultitple[ExpHead](c.GetDb(), "exp_model_head where def_id=$1 and (status = 2 or status = 3)", definition_id) heads, err := GetDbMultitple[ExpHead](c.GetDb(), "exp_model_head where def_id=$1 and (status = 2 or status = 3)", def.Id)
if err != nil { if err != nil {
return return
} else if len(heads) == 0 { } else if len(heads) == 0 {
@ -373,62 +373,49 @@ func trainDefinitionExpandExp(c BasePack, model *BaseModel, definition_id string
return return
} }
layers, err := c.GetDb().Query("select layer_type, shape, exp_type from model_definition_layer where def_id=$1 order by layer_order asc;", definition_id) layers, err := def.GetLayers(c.GetDb(), " order by layer_order asc;")
if err != nil { if err != nil {
return return
} }
defer layers.Close()
type layerrow struct { var got []*Layer
LayerType int
Shape string
ExpType int
LayerNum int
}
got := []layerrow{}
i := 1 i := 1
var last *layerrow = nil var last *Layer = nil
got_2 := false got_2 := false
var first *layerrow = nil var first *Layer = nil
for layers.Next() { for _, layer := range layers {
var row = layerrow{} layer.ShapeToSize()
if err = layers.Scan(&row.LayerType, &row.Shape, &row.ExpType); err != nil {
return
}
// Keep track of the first layer so we can keep the size of the image // Keep track of the first layer so we can keep the size of the image
if first == nil { if first == nil {
first = &row first = layer
} }
row.LayerNum = i if layer.ExpType == 2 {
row.Shape = shapeToSize(row.Shape)
if row.ExpType == 2 {
if !got_2 { if !got_2 {
got = append(got, *last) got = append(got, last)
got_2 = true got_2 = true
} }
got = append(got, row) got = append(got, layer)
} }
last = &row last = layer
i += 1 i += 1
} }
got = append(got, layerrow{ got = append(got, &Layer{
LayerType: LAYER_DENSE, LayerType: LAYER_DENSE,
Shape: fmt.Sprintf("%d", exp.End-exp.Start+1), Shape: fmt.Sprintf("%d", exp.End-exp.Start+1),
ExpType: 2, ExpType: 2,
LayerNum: i, LayerOrder: len(got),
}) })
l.Info("Got layers", "layers", got) l.Info("Got layers", "layers", got)
// Generate run folder // Generate run folder
run_path := path.Join("/tmp", model.Id+"-defs-"+definition_id+"-retrain") run_path := path.Join("/tmp", model.Id+"-defs-"+def.Id+"-retrain")
err = os.MkdirAll(run_path, os.ModePerm) err = os.MkdirAll(run_path, os.ModePerm)
if err != nil { if err != nil {
@ -459,7 +446,7 @@ func trainDefinitionExpandExp(c BasePack, model *BaseModel, definition_id string
} }
// Copy result around // Copy result around
result_path := path.Join("savedData", model.Id, "defs", definition_id) result_path := path.Join("savedData", model.Id, "defs", def.Id)
if err = tmpl.Execute(f, AnyMap{ if err = tmpl.Execute(f, AnyMap{
"Layers": got, "Layers": got,
@ -515,7 +502,7 @@ func trainDefinitionExpandExp(c BasePack, model *BaseModel, definition_id string
return return
} }
func trainDefinitionExp(c BasePack, model *BaseModel, definition_id string, load_prev bool) (accuracy float64, err error) { func trainDefinitionExp(c BasePack, model *BaseModel, def Definition, load_prev bool) (accuracy float64, err error) {
accuracy = 0 accuracy = 0
l := c.GetLogger() l := c.GetLogger()
db := c.GetDb() db := c.GetDb()
@ -531,7 +518,7 @@ func trainDefinitionExp(c BasePack, model *BaseModel, definition_id string, load
} }
// status = 2 (INIT) 3 (TRAINING) // status = 2 (INIT) 3 (TRAINING)
heads, err := GetDbMultitple[ExpHead](db, "exp_model_head where def_id=$1 and (status = 2 or status = 3)", definition_id) heads, err := GetDbMultitple[ExpHead](db, "exp_model_head where def_id=$1 and (status = 2 or status = 3)", def.Id)
if err != nil { if err != nil {
return return
} else if len(heads) == 0 { } else if len(heads) == 0 {
@ -549,42 +536,24 @@ func trainDefinitionExp(c BasePack, model *BaseModel, definition_id string, load
return return
} }
layers, err := db.Query("select layer_type, shape, exp_type from model_definition_layer where def_id=$1 order by layer_order asc;", definition_id) layers, err := def.GetLayers(db, " order by layer_order asc;")
if err != nil { if err != nil {
return return
} }
defer layers.Close()
type layerrow struct { for _, layer := range layers {
LayerType int layer.ShapeToSize()
Shape string
ExpType int
LayerNum int
} }
got := []layerrow{} layers = append(layers, &Layer{
i := 1 LayerType: LAYER_DENSE,
Shape: fmt.Sprintf("%d", exp.End-exp.Start+1),
for layers.Next() { ExpType: 2,
var row = layerrow{} LayerOrder: len(layers),
if err = layers.Scan(&row.LayerType, &row.Shape, &row.ExpType); err != nil {
return
}
row.LayerNum = i
row.Shape = shapeToSize(row.Shape)
got = append(got, row)
i += 1
}
got = append(got, layerrow{
LayerType: LAYER_DENSE,
Shape: fmt.Sprintf("%d", exp.End-exp.Start+1),
ExpType: 2,
LayerNum: i,
}) })
// Generate run folder // Generate run folder
run_path := path.Join("/tmp", model.Id+"-defs-"+definition_id) run_path := path.Join("/tmp", model.Id+"-defs-"+def.Id)
err = os.MkdirAll(run_path, os.ModePerm) err = os.MkdirAll(run_path, os.ModePerm)
if err != nil { if err != nil {
@ -611,11 +580,11 @@ func trainDefinitionExp(c BasePack, model *BaseModel, definition_id string, load
} }
// Copy result around // Copy result around
result_path := path.Join("savedData", model.Id, "defs", definition_id) result_path := path.Join("savedData", model.Id, "defs", def.Id)
if err = tmpl.Execute(f, AnyMap{ if err = tmpl.Execute(f, AnyMap{
"Layers": got, "Layers": layers,
"Size": got[0].Shape, "Size": layers[0].Shape,
"DataDir": path.Join(getDir(), "savedData", model.Id, "data"), "DataDir": path.Join(getDir(), "savedData", model.Id, "data"),
"HeadId": exp.Id, "HeadId": exp.Id,
"RunPath": run_path, "RunPath": run_path,
@ -683,21 +652,6 @@ func remove[T interface{}](lst []T, i int) []T {
return append(lst[:i], lst[i+1:]...) return append(lst[:i], lst[i+1:]...)
} }
type TrainModelRow struct {
id string
target_accuracy int
epoch int
acuracy float64
}
type TraingModelRowDefinitions []TrainModelRow
func (nf TraingModelRowDefinitions) Len() int { return len(nf) }
func (nf TraingModelRowDefinitions) Swap(i, j int) { nf[i], nf[j] = nf[j], nf[i] }
func (nf TraingModelRowDefinitions) Less(i, j int) bool {
return nf[i].acuracy < nf[j].acuracy
}
type ToRemoveList []int type ToRemoveList []int
func (nf ToRemoveList) Len() int { return len(nf) } func (nf ToRemoveList) Len() int { return len(nf) }
@ -886,33 +840,18 @@ func trainModel(c BasePack, model *BaseModel) (err error) {
return return
} }
type TrainModelRowUsable struct {
Id string
TargetAccuracy int `db:"target_accuracy"`
Epoch int
Acuracy float64 `db:"0"`
}
type TrainModelRowUsables []*TrainModelRowUsable
func (nf TrainModelRowUsables) Len() int { return len(nf) }
func (nf TrainModelRowUsables) Swap(i, j int) { nf[i], nf[j] = nf[j], nf[i] }
func (nf TrainModelRowUsables) Less(i, j int) bool {
return nf[i].Acuracy < nf[j].Acuracy
}
func trainModelExp(c BasePack, model *BaseModel) (err error) { func trainModelExp(c BasePack, model *BaseModel) (err error) {
l := c.GetLogger() l := c.GetLogger()
db := c.GetDb() db := c.GetDb()
var definitions TrainModelRowUsables defs_, err := model.GetDefinitions(db, " and status=$2;", MODEL_DEFINITION_STATUS_INIT)
definitions, err = GetDbMultitple[TrainModelRowUsable](db, "model_definition where status=$1 and model_id=$2", MODEL_DEFINITION_STATUS_INIT, model.Id)
if err != nil { if err != nil {
l.Error("Failed to get definitions") l.Error("Failed to get definitions")
return return
} }
if len(definitions) == 0 { var defs SortByAccuracyDefinitions = defs_
if len(defs) == 0 {
l.Error("No Definitions defined!") l.Error("No Definitions defined!")
return errors.New("No Definitions found") return errors.New("No Definitions found")
} }
@ -922,9 +861,9 @@ func trainModelExp(c BasePack, model *BaseModel) (err error) {
for { for {
var toRemove ToRemoveList = []int{} var toRemove ToRemoveList = []int{}
for i, def := range definitions { for i, def := range defs {
ModelDefinitionUpdateStatus(c, def.Id, MODEL_DEFINITION_STATUS_TRAINING) ModelDefinitionUpdateStatus(c, def.Id, MODEL_DEFINITION_STATUS_TRAINING)
accuracy, err := trainDefinitionExp(c, model, def.Id, !firstRound) accuracy, err := trainDefinitionExp(c, model, *def, !firstRound)
if err != nil { if err != nil {
l.Error("Failed to train definition!Err:", "err", err) l.Error("Failed to train definition!Err:", "err", err)
ModelDefinitionUpdateStatus(c, def.Id, MODEL_DEFINITION_STATUS_FAILED_TRAINING) ModelDefinitionUpdateStatus(c, def.Id, MODEL_DEFINITION_STATUS_FAILED_TRAINING)
@ -933,10 +872,10 @@ func trainModelExp(c BasePack, model *BaseModel) (err error) {
} }
def.Epoch += EPOCH_PER_RUN def.Epoch += EPOCH_PER_RUN
accuracy = accuracy * 100 accuracy = accuracy * 100
def.Acuracy = float64(accuracy) def.Accuracy = float64(accuracy)
definitions[i].Epoch += EPOCH_PER_RUN defs[i].Epoch += EPOCH_PER_RUN
definitions[i].Acuracy = accuracy defs[i].Accuracy = accuracy
if accuracy >= float64(def.TargetAccuracy) { if accuracy >= float64(def.TargetAccuracy) {
l.Info("Found a definition that reaches target_accuracy!") l.Info("Found a definition that reaches target_accuracy!")
@ -986,10 +925,10 @@ func trainModelExp(c BasePack, model *BaseModel) (err error) {
l.Info("Round done", "toRemove", toRemove) l.Info("Round done", "toRemove", toRemove)
for _, n := range toRemove { for _, n := range toRemove {
definitions = remove(definitions, n) defs = remove(defs, n)
} }
len_def := len(definitions) len_def := len(defs)
if len_def == 0 { if len_def == 0 {
break break
@ -997,14 +936,14 @@ func trainModelExp(c BasePack, model *BaseModel) (err error) {
continue continue
} }
sort.Sort(sort.Reverse(definitions)) sort.Sort(sort.Reverse(defs))
acc := definitions[0].Acuracy - 20.0 acc := defs[0].Accuracy - 20.0
l.Info("Training models, Highest acc", "acc", definitions[0].Acuracy, "mod_acc", acc) l.Info("Training models, Highest acc", "acc", defs[0].Accuracy, "mod_acc", acc)
toRemove = []int{} toRemove = []int{}
for i, def := range definitions { for i, def := range defs {
if def.Acuracy < acc { if def.Accuracy < acc {
toRemove = append(toRemove, i) toRemove = append(toRemove, i)
} }
} }
@ -1014,8 +953,8 @@ func trainModelExp(c BasePack, model *BaseModel) (err error) {
sort.Sort(sort.Reverse(toRemove)) sort.Sort(sort.Reverse(toRemove))
for _, n := range toRemove { for _, n := range toRemove {
l.Warn("Removing definition not fast enough learning", "n", n) l.Warn("Removing definition not fast enough learning", "n", n)
ModelDefinitionUpdateStatus(c, definitions[n].Id, MODEL_DEFINITION_STATUS_FAILED_TRAINING) ModelDefinitionUpdateStatus(c, defs[n].Id, MODEL_DEFINITION_STATUS_FAILED_TRAINING)
definitions = remove(definitions, n) defs = remove(defs, n)
} }
} }
@ -1030,6 +969,12 @@ func trainModelExp(c BasePack, model *BaseModel) (err error) {
return err return err
} }
err = model.UpdateStatus(db, FAILED_TRAINING)
if err != nil {
l.Error("All definitions failed to train! And Failed to set model status")
return err
}
l.Error("All definitions failed to train!") l.Error("All definitions failed to train!")
return err return err
} else if err != nil { } else if err != nil {
@ -1485,31 +1430,31 @@ func trainExpandable(c *Context, model *BaseModel) {
ResetClasses(c, model) ResetClasses(c, model)
} }
var definitions TrainModelRowUsables defs_, err := model.GetDefinitions(c, " and status=$2", MODEL_DEFINITION_STATUS_READY)
definitions, err = GetDbMultitple[TrainModelRowUsable](c, "model_definition where status=$1 and model_id=$2", MODEL_DEFINITION_STATUS_READY, model.Id)
if err != nil { if err != nil {
failed("Failed to get definitions") failed("Failed to get definitions")
return return
} }
if len(definitions) != 1 { var defs SortByAccuracyDefinitions = defs_
if len(defs) != 1 {
failed("There should only be one definition available!") failed("There should only be one definition available!")
return return
} }
firstRound := true firstRound := true
def := definitions[0] def := defs[0]
epoch := 0 epoch := 0
for { for {
acc, err := trainDefinitionExp(c, model, def.Id, !firstRound) acc, err := trainDefinitionExp(c, model, *def, !firstRound)
if err != nil { if err != nil {
failed("Failed to train definition!") failed("Failed to train definition!")
return return
} }
epoch += EPOCH_PER_RUN epoch += EPOCH_PER_RUN
if float64(acc*100) >= float64(def.Acuracy) { if float64(acc*100) >= float64(def.Accuracy) {
c.Logger.Info("Found a definition that reaches target_accuracy!") c.Logger.Info("Found a definition that reaches target_accuracy!")
_, err = c.Db.Exec("update exp_model_head set status=$1 where def_id=$2 and status=$3;", MODEL_HEAD_STATUS_READY, def.Id, MODEL_HEAD_STATUS_TRAINING) _, err = c.Db.Exec("update exp_model_head set status=$1 where def_id=$2 and status=$3;", MODEL_HEAD_STATUS_READY, def.Id, MODEL_HEAD_STATUS_TRAINING)
@ -1614,22 +1559,18 @@ func RunTaskRetrain(b BasePack, task Task) (err error) {
task.UpdateStatusLog(b, TASK_RUNNING, "Model retraining") task.UpdateStatusLog(b, TASK_RUNNING, "Model retraining")
var defData struct { defs, err := model.GetDefinitions(db, "")
Id string `db:"md.id"`
TargetAcuuracy float64 `db:"md.target_accuracy"`
}
err = GetDBOnce(db, &defData, "models as m inner join model_definition as md on m.id = md.model_id where m.id=$1;", task.ModelId)
if err != nil { if err != nil {
failed() failed()
return return
} }
def := *defs[0]
failed = func() { failed = func() {
ResetClasses(b, model) ResetClasses(b, model)
ModelUpdateStatus(b, model.Id, READY_RETRAIN_FAILED) ModelUpdateStatus(b, model.Id, READY_RETRAIN_FAILED)
task.UpdateStatusLog(b, TASK_FAILED_RUNNING, "Model failed retraining") task.UpdateStatusLog(b, TASK_FAILED_RUNNING, "Model failed retraining")
_, err_ := db.Exec("delete from exp_model_head where def_id=$1 and status in (2,3)", defData.Id) _, err_ := db.Exec("delete from exp_model_head where def_id=$1 and status in (2,3)", def.Id)
if err_ != nil { if err_ != nil {
panic(err_) panic(err_)
} }
@ -1640,21 +1581,21 @@ func RunTaskRetrain(b BasePack, task Task) (err error) {
var epocs = 0 var epocs = 0
// TODO make max epochs come from db // TODO make max epochs come from db
// TODO re increase the target accuracy // TODO re increase the target accuracy
for acc*100 < defData.TargetAcuuracy-5 && epocs < 10 { for acc*100 < float64(def.TargetAccuracy)-5 && epocs < 10 {
// This is something I have to check // This is something I have to check
acc, err = trainDefinitionExpandExp(b, model, defData.Id, epocs > 0) acc, err = trainDefinitionExpandExp(b, model, def, epocs > 0)
if err != nil { if err != nil {
failed() failed()
return return
} }
l.Info("Retrained model", "accuracy", acc, "target", defData.TargetAcuuracy) l.Info("Retrained model", "accuracy", acc, "target", def.TargetAccuracy)
epocs += 1 epocs += 1
} }
if acc*100 < defData.TargetAcuuracy { if acc*100 < float64(def.TargetAccuracy)-5 {
l.Error("Model never achived targetd accuracy", "acc", acc*100, "target", defData.TargetAcuuracy) l.Error("Model never achived targetd accuracy", "acc", acc*100, "target", def.TargetAccuracy)
failed() failed()
return return
} }
@ -1675,7 +1616,7 @@ func RunTaskRetrain(b BasePack, task Task) (err error) {
return return
} }
_, err = db.Exec("update exp_model_head set status=$1 where status=$2 and model_id=$3", MODEL_HEAD_STATUS_READY, MODEL_HEAD_STATUS_TRAINING, model.Id) _, err = db.Exec("update exp_model_head set status=$1 where status=$2 and def_id=$3", MODEL_HEAD_STATUS_READY, MODEL_HEAD_STATUS_TRAINING, def.Id)
if err != nil { if err != nil {
l.Error("Error while updating the classes", "error", err) l.Error("Error while updating the classes", "error", err)
failed() failed()

View File

@ -215,7 +215,7 @@
</div> </div>
{:else if m.status == -3 || m.status == -4} {:else if m.status == -3 || m.status == -4}
<BaseModelInfo model={m} /> <BaseModelInfo model={m} />
<form on:submit={resetModel}> <form on:submit|preventDefault={resetModel}>
Failed Prepare for training.<br /> Failed Prepare for training.<br />
<div class="spacer"></div> <div class="spacer"></div>
<MessageSimple bind:this={resetMessages} /> <MessageSimple bind:this={resetMessages} />