Fixed the model not training and running forever

This commit is contained in:
2024-04-19 22:03:14 +01:00
parent 2fa7680d0b
commit 29b69deaf6
4 changed files with 52 additions and 56 deletions

View File

@@ -1185,7 +1185,14 @@ func splitModel(c BasePack, model *BaseModel) (err error) {
count := -1
for layers.Next() {
var layerrow layerrow
if err = layers.Scan(&layerrow.ExpType); err != nil {
return
}
count += 1
if layerrow.ExpType == 2 {
break
}
}
if count == -1 {
@@ -1294,63 +1301,32 @@ func generateDefinition(c BasePack, model *BaseModel, target_accuracy int, numbe
order++
}
if complexity == 0 {
err = MakeLayer(db, def_id, order, LAYER_FLATTEN, "")
loop := max(int((math.Log(float64(model.Width)) / math.Log(float64(10)))), 1)
for i := 0; i < loop; i++ {
err = MakeLayer(db, def_id, order, LAYER_SIMPLE_BLOCK, "")
order++
if err != nil {
failed()
return
}
order++
}
loop := int(math.Log2(float64(number_of_classes)))
for i := 0; i < loop; i++ {
err = MakeLayer(db, def_id, order, LAYER_DENSE, fmt.Sprintf("%d,1", number_of_classes*(loop-i)))
order++
if err != nil {
ModelUpdateStatus(c, model.Id, FAILED_PREPARING_TRAINING)
return
}
}
} else if complexity == 1 || complexity == 2 {
loop := int((math.Log(float64(model.Width)) / math.Log(float64(10))))
if loop == 0 {
loop = 1
}
for i := 0; i < loop; i++ {
err = MakeLayer(db, def_id, order, LAYER_SIMPLE_BLOCK, "")
order++
if err != nil {
failed()
return
}
}
err = MakeLayer(db, def_id, order, LAYER_FLATTEN, "")
if err != nil {
failed()
return
}
order++
loop = int((math.Log(float64(number_of_classes)) / math.Log(float64(10))) / 2)
if loop == 0 {
loop = 1
}
for i := 0; i < loop; i++ {
err = MakeLayer(db, def_id, order, LAYER_DENSE, fmt.Sprintf("%d,1", number_of_classes*(loop-i)))
order++
if err != nil {
failed()
return
}
}
} else {
log.Error("Unkown complexity", "complexity", complexity)
err = MakeLayer(db, def_id, order, LAYER_FLATTEN, "")
if err != nil {
failed()
return
}
order++
loop = max(int((math.Log(float64(number_of_classes))/math.Log(float64(10)))/2), 1)
for i := 0; i < loop; i++ {
err = MakeLayer(db, def_id, order, LAYER_DENSE, fmt.Sprintf("%d,1", number_of_classes*(loop-i)))
order++
if err != nil {
failed()
return
}
}
err = ModelDefinitionUpdateStatus(c, def_id, MODEL_DEFINITION_STATUS_INIT)
if err != nil {
@@ -1486,10 +1462,10 @@ func generateExpandableDefinition(c BasePack, model *BaseModel, target_accuracy
log.Info("Size of the dense layers", "loop", loop)
// loop = max(loop, 3)
loop = max(loop, 3)
for i := 0; i < loop; i++ {
err = MakeLayer(db, def_id, order, LAYER_DENSE, fmt.Sprintf("%d,1", number_of_classes*(loop-i)))
err = MakeLayerExpandable(db, def_id, order, LAYER_DENSE, fmt.Sprintf("%d,1", number_of_classes*(loop-i)*2), 2)
order++
if err != nil {
failed()
@@ -1712,10 +1688,22 @@ func RunTaskRetrain(b BasePack, task Task) (err error) {
return
}
failed = func() {
ResetClasses(b, model)
ModelUpdateStatus(b, model.Id, READY_RETRAIN_FAILED)
task.UpdateStatusLog(b, TASK_FAILED_RUNNING, "Model failed retraining")
_, err_ := db.Exec("delete from exp_model_head where def_id=$1 and status in (2,3)", defData.Id)
if err_ != nil {
panic(err_)
}
l.Error("Failed to retrain", "err", err)
}
var acc float64 = 0
var epocs = 0
// TODO make max epochs come from db
for acc*100 < defData.TargetAcuuracy && epocs < 20 {
// TODO re increase the target accuracy
for acc*100 < defData.TargetAcuuracy-5 && epocs < 10 {
// This is something I have to check
acc, err = trainDefinitionExpandExp(b, model, defData.Id, epocs > 0)
if err != nil {