runner-go #102

Merged
andr3 merged 9 commits from runner-go into main 2024-05-10 02:13:02 +01:00
3 changed files with 76 additions and 137 deletions
Showing only changes of commit bf5a706ecc - Show all commits

View File

@ -26,7 +26,7 @@ type Layer struct {
ExpType int `db:"mdl.exp_type" json:"exp_type"`
}
func (x *Layer) ShapeToSize() error {
func (x *Layer) ShapeToSize() {
v := x.GetShape()
switch x.LayerType {
case LAYER_INPUT:
@ -35,9 +35,7 @@ func (x *Layer) ShapeToSize() error {
x.Shape = fmt.Sprintf("(%d)", v[0])
default:
x.Shape = "ERROR"
// DO nothing
}
return nil
}
func ShapeToString(args ...int) string {

View File

@ -339,7 +339,7 @@ func generateCvsExpandExp(c BasePack, run_path string, model_id string, offset i
return
}
func trainDefinitionExpandExp(c BasePack, model *BaseModel, definition_id string, load_prev bool) (accuracy float64, err error) {
func trainDefinitionExpandExp(c BasePack, model *BaseModel, def Definition, load_prev bool) (accuracy float64, err error) {
accuracy = 0
l := c.GetLogger()
@ -354,7 +354,7 @@ func trainDefinitionExpandExp(c BasePack, model *BaseModel, definition_id string
}
// status = 2 (INIT) 3 (TRAINING)
heads, err := GetDbMultitple[ExpHead](c.GetDb(), "exp_model_head where def_id=$1 and (status = 2 or status = 3)", definition_id)
heads, err := GetDbMultitple[ExpHead](c.GetDb(), "exp_model_head where def_id=$1 and (status = 2 or status = 3)", def.Id)
if err != nil {
return
} else if len(heads) == 0 {
@ -373,62 +373,49 @@ func trainDefinitionExpandExp(c BasePack, model *BaseModel, definition_id string
return
}
layers, err := c.GetDb().Query("select layer_type, shape, exp_type from model_definition_layer where def_id=$1 order by layer_order asc;", definition_id)
layers, err := def.GetLayers(c.GetDb(), " order by layer_order asc;")
if err != nil {
return
}
defer layers.Close()
type layerrow struct {
LayerType int
Shape string
ExpType int
LayerNum int
}
got := []layerrow{}
var got []*Layer
i := 1
var last *layerrow = nil
var last *Layer = nil
got_2 := false
var first *layerrow = nil
var first *Layer = nil
for layers.Next() {
var row = layerrow{}
if err = layers.Scan(&row.LayerType, &row.Shape, &row.ExpType); err != nil {
return
}
for _, layer := range layers {
layer.ShapeToSize()
// Keep track of the first layer so we can keep the size of the image
if first == nil {
first = &row
first = layer
}
row.LayerNum = i
row.Shape = shapeToSize(row.Shape)
if row.ExpType == 2 {
if layer.ExpType == 2 {
if !got_2 {
got = append(got, *last)
got = append(got, last)
got_2 = true
}
got = append(got, row)
got = append(got, layer)
}
last = &row
last = layer
i += 1
}
got = append(got, layerrow{
LayerType: LAYER_DENSE,
Shape: fmt.Sprintf("%d", exp.End-exp.Start+1),
ExpType: 2,
LayerNum: i,
got = append(got, &Layer{
LayerType: LAYER_DENSE,
Shape: fmt.Sprintf("%d", exp.End-exp.Start+1),
ExpType: 2,
LayerOrder: len(got),
})
l.Info("Got layers", "layers", got)
// Generate run folder
run_path := path.Join("/tmp", model.Id+"-defs-"+definition_id+"-retrain")
run_path := path.Join("/tmp", model.Id+"-defs-"+def.Id+"-retrain")
err = os.MkdirAll(run_path, os.ModePerm)
if err != nil {
@ -459,7 +446,7 @@ func trainDefinitionExpandExp(c BasePack, model *BaseModel, definition_id string
}
// Copy result around
result_path := path.Join("savedData", model.Id, "defs", definition_id)
result_path := path.Join("savedData", model.Id, "defs", def.Id)
if err = tmpl.Execute(f, AnyMap{
"Layers": got,
@ -515,7 +502,7 @@ func trainDefinitionExpandExp(c BasePack, model *BaseModel, definition_id string
return
}
func trainDefinitionExp(c BasePack, model *BaseModel, definition_id string, load_prev bool) (accuracy float64, err error) {
func trainDefinitionExp(c BasePack, model *BaseModel, def Definition, load_prev bool) (accuracy float64, err error) {
accuracy = 0
l := c.GetLogger()
db := c.GetDb()
@ -531,7 +518,7 @@ func trainDefinitionExp(c BasePack, model *BaseModel, definition_id string, load
}
// status = 2 (INIT) 3 (TRAINING)
heads, err := GetDbMultitple[ExpHead](db, "exp_model_head where def_id=$1 and (status = 2 or status = 3)", definition_id)
heads, err := GetDbMultitple[ExpHead](db, "exp_model_head where def_id=$1 and (status = 2 or status = 3)", def.Id)
if err != nil {
return
} else if len(heads) == 0 {
@ -549,42 +536,24 @@ func trainDefinitionExp(c BasePack, model *BaseModel, definition_id string, load
return
}
layers, err := db.Query("select layer_type, shape, exp_type from model_definition_layer where def_id=$1 order by layer_order asc;", definition_id)
layers, err := def.GetLayers(db, " order by layer_order asc;")
if err != nil {
return
}
defer layers.Close()
type layerrow struct {
LayerType int
Shape string
ExpType int
LayerNum int
for _, layer := range layers {
layer.ShapeToSize()
}
got := []layerrow{}
i := 1
for layers.Next() {
var row = layerrow{}
if err = layers.Scan(&row.LayerType, &row.Shape, &row.ExpType); err != nil {
return
}
row.LayerNum = i
row.Shape = shapeToSize(row.Shape)
got = append(got, row)
i += 1
}
got = append(got, layerrow{
LayerType: LAYER_DENSE,
Shape: fmt.Sprintf("%d", exp.End-exp.Start+1),
ExpType: 2,
LayerNum: i,
layers = append(layers, &Layer{
LayerType: LAYER_DENSE,
Shape: fmt.Sprintf("%d", exp.End-exp.Start+1),
ExpType: 2,
LayerOrder: len(layers),
})
// Generate run folder
run_path := path.Join("/tmp", model.Id+"-defs-"+definition_id)
run_path := path.Join("/tmp", model.Id+"-defs-"+def.Id)
err = os.MkdirAll(run_path, os.ModePerm)
if err != nil {
@ -611,11 +580,11 @@ func trainDefinitionExp(c BasePack, model *BaseModel, definition_id string, load
}
// Copy result around
result_path := path.Join("savedData", model.Id, "defs", definition_id)
result_path := path.Join("savedData", model.Id, "defs", def.Id)
if err = tmpl.Execute(f, AnyMap{
"Layers": got,
"Size": got[0].Shape,
"Layers": layers,
"Size": layers[0].Shape,
"DataDir": path.Join(getDir(), "savedData", model.Id, "data"),
"HeadId": exp.Id,
"RunPath": run_path,
@ -683,21 +652,6 @@ func remove[T interface{}](lst []T, i int) []T {
return append(lst[:i], lst[i+1:]...)
}
type TrainModelRow struct {
id string
target_accuracy int
epoch int
acuracy float64
}
type TraingModelRowDefinitions []TrainModelRow
func (nf TraingModelRowDefinitions) Len() int { return len(nf) }
func (nf TraingModelRowDefinitions) Swap(i, j int) { nf[i], nf[j] = nf[j], nf[i] }
func (nf TraingModelRowDefinitions) Less(i, j int) bool {
return nf[i].acuracy < nf[j].acuracy
}
type ToRemoveList []int
func (nf ToRemoveList) Len() int { return len(nf) }
@ -886,33 +840,18 @@ func trainModel(c BasePack, model *BaseModel) (err error) {
return
}
type TrainModelRowUsable struct {
Id string
TargetAccuracy int `db:"target_accuracy"`
Epoch int
Acuracy float64 `db:"0"`
}
type TrainModelRowUsables []*TrainModelRowUsable
func (nf TrainModelRowUsables) Len() int { return len(nf) }
func (nf TrainModelRowUsables) Swap(i, j int) { nf[i], nf[j] = nf[j], nf[i] }
func (nf TrainModelRowUsables) Less(i, j int) bool {
return nf[i].Acuracy < nf[j].Acuracy
}
func trainModelExp(c BasePack, model *BaseModel) (err error) {
l := c.GetLogger()
db := c.GetDb()
var definitions TrainModelRowUsables
definitions, err = GetDbMultitple[TrainModelRowUsable](db, "model_definition where status=$1 and model_id=$2", MODEL_DEFINITION_STATUS_INIT, model.Id)
defs_, err := model.GetDefinitions(db, " and status=$2;", MODEL_DEFINITION_STATUS_INIT)
if err != nil {
l.Error("Failed to get definitions")
return
}
if len(definitions) == 0 {
var defs SortByAccuracyDefinitions = defs_
if len(defs) == 0 {
l.Error("No Definitions defined!")
return errors.New("No Definitions found")
}
@ -922,9 +861,9 @@ func trainModelExp(c BasePack, model *BaseModel) (err error) {
for {
var toRemove ToRemoveList = []int{}
for i, def := range definitions {
for i, def := range defs {
ModelDefinitionUpdateStatus(c, def.Id, MODEL_DEFINITION_STATUS_TRAINING)
accuracy, err := trainDefinitionExp(c, model, def.Id, !firstRound)
accuracy, err := trainDefinitionExp(c, model, *def, !firstRound)
if err != nil {
l.Error("Failed to train definition!Err:", "err", err)
ModelDefinitionUpdateStatus(c, def.Id, MODEL_DEFINITION_STATUS_FAILED_TRAINING)
@ -933,10 +872,10 @@ func trainModelExp(c BasePack, model *BaseModel) (err error) {
}
def.Epoch += EPOCH_PER_RUN
accuracy = accuracy * 100
def.Acuracy = float64(accuracy)
def.Accuracy = float64(accuracy)
definitions[i].Epoch += EPOCH_PER_RUN
definitions[i].Acuracy = accuracy
defs[i].Epoch += EPOCH_PER_RUN
defs[i].Accuracy = accuracy
if accuracy >= float64(def.TargetAccuracy) {
l.Info("Found a definition that reaches target_accuracy!")
@ -986,10 +925,10 @@ func trainModelExp(c BasePack, model *BaseModel) (err error) {
l.Info("Round done", "toRemove", toRemove)
for _, n := range toRemove {
definitions = remove(definitions, n)
defs = remove(defs, n)
}
len_def := len(definitions)
len_def := len(defs)
if len_def == 0 {
break
@ -997,14 +936,14 @@ func trainModelExp(c BasePack, model *BaseModel) (err error) {
continue
}
sort.Sort(sort.Reverse(definitions))
acc := definitions[0].Acuracy - 20.0
sort.Sort(sort.Reverse(defs))
acc := defs[0].Accuracy - 20.0
l.Info("Training models, Highest acc", "acc", definitions[0].Acuracy, "mod_acc", acc)
l.Info("Training models, Highest acc", "acc", defs[0].Accuracy, "mod_acc", acc)
toRemove = []int{}
for i, def := range definitions {
if def.Acuracy < acc {
for i, def := range defs {
if def.Accuracy < acc {
toRemove = append(toRemove, i)
}
}
@ -1014,8 +953,8 @@ func trainModelExp(c BasePack, model *BaseModel) (err error) {
sort.Sort(sort.Reverse(toRemove))
for _, n := range toRemove {
l.Warn("Removing definition not fast enough learning", "n", n)
ModelDefinitionUpdateStatus(c, definitions[n].Id, MODEL_DEFINITION_STATUS_FAILED_TRAINING)
definitions = remove(definitions, n)
ModelDefinitionUpdateStatus(c, defs[n].Id, MODEL_DEFINITION_STATUS_FAILED_TRAINING)
defs = remove(defs, n)
}
}
@ -1030,6 +969,12 @@ func trainModelExp(c BasePack, model *BaseModel) (err error) {
return err
}
err = model.UpdateStatus(db, FAILED_TRAINING)
if err != nil {
l.Error("All definitions failed to train! And Failed to set model status")
return err
}
l.Error("All definitions failed to train!")
return err
} else if err != nil {
@ -1485,31 +1430,31 @@ func trainExpandable(c *Context, model *BaseModel) {
ResetClasses(c, model)
}
var definitions TrainModelRowUsables
definitions, err = GetDbMultitple[TrainModelRowUsable](c, "model_definition where status=$1 and model_id=$2", MODEL_DEFINITION_STATUS_READY, model.Id)
defs_, err := model.GetDefinitions(c, " and status=$2", MODEL_DEFINITION_STATUS_READY)
if err != nil {
failed("Failed to get definitions")
return
}
if len(definitions) != 1 {
var defs SortByAccuracyDefinitions = defs_
if len(defs) != 1 {
failed("There should only be one definition available!")
return
}
firstRound := true
def := definitions[0]
def := defs[0]
epoch := 0
for {
acc, err := trainDefinitionExp(c, model, def.Id, !firstRound)
acc, err := trainDefinitionExp(c, model, *def, !firstRound)
if err != nil {
failed("Failed to train definition!")
return
}
epoch += EPOCH_PER_RUN
if float64(acc*100) >= float64(def.Acuracy) {
if float64(acc*100) >= float64(def.Accuracy) {
c.Logger.Info("Found a definition that reaches target_accuracy!")
_, err = c.Db.Exec("update exp_model_head set status=$1 where def_id=$2 and status=$3;", MODEL_HEAD_STATUS_READY, def.Id, MODEL_HEAD_STATUS_TRAINING)
@ -1614,22 +1559,18 @@ func RunTaskRetrain(b BasePack, task Task) (err error) {
task.UpdateStatusLog(b, TASK_RUNNING, "Model retraining")
var defData struct {
Id string `db:"md.id"`
TargetAcuuracy float64 `db:"md.target_accuracy"`
}
err = GetDBOnce(db, &defData, "models as m inner join model_definition as md on m.id = md.model_id where m.id=$1;", task.ModelId)
defs, err := model.GetDefinitions(db, "")
if err != nil {
failed()
return
}
def := *defs[0]
failed = func() {
ResetClasses(b, model)
ModelUpdateStatus(b, model.Id, READY_RETRAIN_FAILED)
task.UpdateStatusLog(b, TASK_FAILED_RUNNING, "Model failed retraining")
_, err_ := db.Exec("delete from exp_model_head where def_id=$1 and status in (2,3)", defData.Id)
_, err_ := db.Exec("delete from exp_model_head where def_id=$1 and status in (2,3)", def.Id)
if err_ != nil {
panic(err_)
}
@ -1640,21 +1581,21 @@ func RunTaskRetrain(b BasePack, task Task) (err error) {
var epocs = 0
// TODO make max epochs come from db
// TODO re increase the target accuracy
for acc*100 < defData.TargetAcuuracy-5 && epocs < 10 {
for acc*100 < float64(def.TargetAccuracy)-5 && epocs < 10 {
// This is something I have to check
acc, err = trainDefinitionExpandExp(b, model, defData.Id, epocs > 0)
acc, err = trainDefinitionExpandExp(b, model, def, epocs > 0)
if err != nil {
failed()
return
}
l.Info("Retrained model", "accuracy", acc, "target", defData.TargetAcuuracy)
l.Info("Retrained model", "accuracy", acc, "target", def.TargetAccuracy)
epocs += 1
}
if acc*100 < defData.TargetAcuuracy {
l.Error("Model never achived targetd accuracy", "acc", acc*100, "target", defData.TargetAcuuracy)
if acc*100 < float64(def.TargetAccuracy)-5 {
l.Error("Model never achived targetd accuracy", "acc", acc*100, "target", def.TargetAccuracy)
failed()
return
}
@ -1675,7 +1616,7 @@ func RunTaskRetrain(b BasePack, task Task) (err error) {
return
}
_, err = db.Exec("update exp_model_head set status=$1 where status=$2 and model_id=$3", MODEL_HEAD_STATUS_READY, MODEL_HEAD_STATUS_TRAINING, model.Id)
_, err = db.Exec("update exp_model_head set status=$1 where status=$2 and def_id=$3", MODEL_HEAD_STATUS_READY, MODEL_HEAD_STATUS_TRAINING, def.Id)
if err != nil {
l.Error("Error while updating the classes", "error", err)
failed()

View File

@ -215,7 +215,7 @@
</div>
{:else if m.status == -3 || m.status == -4}
<BaseModelInfo model={m} />
<form on:submit={resetModel}>
<form on:submit|preventDefault={resetModel}>
Failed Prepare for training.<br />
<div class="spacer"></div>
<MessageSimple bind:this={resetMessages} />