Fixed the model not training and running forever
This commit is contained in:
@@ -1185,7 +1185,14 @@ func splitModel(c BasePack, model *BaseModel) (err error) {
|
||||
count := -1
|
||||
|
||||
for layers.Next() {
|
||||
var layerrow layerrow
|
||||
if err = layers.Scan(&layerrow.ExpType); err != nil {
|
||||
return
|
||||
}
|
||||
count += 1
|
||||
if layerrow.ExpType == 2 {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if count == -1 {
|
||||
@@ -1294,63 +1301,32 @@ func generateDefinition(c BasePack, model *BaseModel, target_accuracy int, numbe
|
||||
order++
|
||||
}
|
||||
|
||||
if complexity == 0 {
|
||||
err = MakeLayer(db, def_id, order, LAYER_FLATTEN, "")
|
||||
loop := max(int((math.Log(float64(model.Width)) / math.Log(float64(10)))), 1)
|
||||
for i := 0; i < loop; i++ {
|
||||
err = MakeLayer(db, def_id, order, LAYER_SIMPLE_BLOCK, "")
|
||||
order++
|
||||
if err != nil {
|
||||
failed()
|
||||
return
|
||||
}
|
||||
order++
|
||||
}
|
||||
|
||||
loop := int(math.Log2(float64(number_of_classes)))
|
||||
for i := 0; i < loop; i++ {
|
||||
err = MakeLayer(db, def_id, order, LAYER_DENSE, fmt.Sprintf("%d,1", number_of_classes*(loop-i)))
|
||||
order++
|
||||
if err != nil {
|
||||
ModelUpdateStatus(c, model.Id, FAILED_PREPARING_TRAINING)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
} else if complexity == 1 || complexity == 2 {
|
||||
|
||||
loop := int((math.Log(float64(model.Width)) / math.Log(float64(10))))
|
||||
if loop == 0 {
|
||||
loop = 1
|
||||
}
|
||||
for i := 0; i < loop; i++ {
|
||||
err = MakeLayer(db, def_id, order, LAYER_SIMPLE_BLOCK, "")
|
||||
order++
|
||||
if err != nil {
|
||||
failed()
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
err = MakeLayer(db, def_id, order, LAYER_FLATTEN, "")
|
||||
if err != nil {
|
||||
failed()
|
||||
return
|
||||
}
|
||||
order++
|
||||
|
||||
loop = int((math.Log(float64(number_of_classes)) / math.Log(float64(10))) / 2)
|
||||
if loop == 0 {
|
||||
loop = 1
|
||||
}
|
||||
for i := 0; i < loop; i++ {
|
||||
err = MakeLayer(db, def_id, order, LAYER_DENSE, fmt.Sprintf("%d,1", number_of_classes*(loop-i)))
|
||||
order++
|
||||
if err != nil {
|
||||
failed()
|
||||
return
|
||||
}
|
||||
}
|
||||
} else {
|
||||
log.Error("Unkown complexity", "complexity", complexity)
|
||||
err = MakeLayer(db, def_id, order, LAYER_FLATTEN, "")
|
||||
if err != nil {
|
||||
failed()
|
||||
return
|
||||
}
|
||||
order++
|
||||
|
||||
loop = max(int((math.Log(float64(number_of_classes))/math.Log(float64(10)))/2), 1)
|
||||
for i := 0; i < loop; i++ {
|
||||
err = MakeLayer(db, def_id, order, LAYER_DENSE, fmt.Sprintf("%d,1", number_of_classes*(loop-i)))
|
||||
order++
|
||||
if err != nil {
|
||||
failed()
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
err = ModelDefinitionUpdateStatus(c, def_id, MODEL_DEFINITION_STATUS_INIT)
|
||||
if err != nil {
|
||||
@@ -1486,10 +1462,10 @@ func generateExpandableDefinition(c BasePack, model *BaseModel, target_accuracy
|
||||
|
||||
log.Info("Size of the dense layers", "loop", loop)
|
||||
|
||||
// loop = max(loop, 3)
|
||||
loop = max(loop, 3)
|
||||
|
||||
for i := 0; i < loop; i++ {
|
||||
err = MakeLayer(db, def_id, order, LAYER_DENSE, fmt.Sprintf("%d,1", number_of_classes*(loop-i)))
|
||||
err = MakeLayerExpandable(db, def_id, order, LAYER_DENSE, fmt.Sprintf("%d,1", number_of_classes*(loop-i)*2), 2)
|
||||
order++
|
||||
if err != nil {
|
||||
failed()
|
||||
@@ -1712,10 +1688,22 @@ func RunTaskRetrain(b BasePack, task Task) (err error) {
|
||||
return
|
||||
}
|
||||
|
||||
failed = func() {
|
||||
ResetClasses(b, model)
|
||||
ModelUpdateStatus(b, model.Id, READY_RETRAIN_FAILED)
|
||||
task.UpdateStatusLog(b, TASK_FAILED_RUNNING, "Model failed retraining")
|
||||
_, err_ := db.Exec("delete from exp_model_head where def_id=$1 and status in (2,3)", defData.Id)
|
||||
if err_ != nil {
|
||||
panic(err_)
|
||||
}
|
||||
l.Error("Failed to retrain", "err", err)
|
||||
}
|
||||
|
||||
var acc float64 = 0
|
||||
var epocs = 0
|
||||
// TODO make max epochs come from db
|
||||
for acc*100 < defData.TargetAcuuracy && epocs < 20 {
|
||||
// TODO re increase the target accuracy
|
||||
for acc*100 < defData.TargetAcuuracy-5 && epocs < 10 {
|
||||
// This is something I have to check
|
||||
acc, err = trainDefinitionExpandExp(b, model, defData.Id, epocs > 0)
|
||||
if err != nil {
|
||||
|
||||
Reference in New Issue
Block a user