Fixed the model not training and running forever

This commit is contained in:
Andre Henriques 2024-04-19 22:03:14 +01:00
parent 2fa7680d0b
commit 29b69deaf6
4 changed files with 52 additions and 56 deletions

View File

@ -8,6 +8,7 @@ import (
. "git.andr3h3nriqu3s.com/andr3/fyp/logic/db_types"
. "git.andr3h3nriqu3s.com/andr3/fyp/logic/tasks/utils"
"github.com/charmbracelet/log"
tf "github.com/galeone/tensorflow/tensorflow/go"
"github.com/galeone/tensorflow/tensorflow/go/op"
tg "github.com/galeone/tfgo"
@ -19,6 +20,7 @@ func ReadPNG(scope *op.Scope, imagePath string, channels int64) *image.Image {
contents := op.ReadFile(scope.SubScope("ReadFile"), op.Const(scope.SubScope("filename"), imagePath))
output := op.DecodePng(scope.SubScope("DecodePng"), contents, op.DecodePngChannels(channels))
output = op.ExpandDims(scope.SubScope("ExpandDims"), output, op.Const(scope.SubScope("axis"), []int32{0}))
output = op.ExpandDims(scope.SubScope("Stack"), output, op.Const(scope.SubScope("axis"), []int32{1}))
image := &image.Image{
Tensor: tg.NewTensor(scope, output)}
return image.Scale(0, 255)
@ -29,6 +31,7 @@ func ReadJPG(scope *op.Scope, imagePath string, channels int64) *image.Image {
contents := op.ReadFile(scope.SubScope("ReadFile"), op.Const(scope.SubScope("filename"), imagePath))
output := op.DecodePng(scope.SubScope("DecodeJpeg"), contents, op.DecodePngChannels(channels))
output = op.ExpandDims(scope.SubScope("ExpandDims"), output, op.Const(scope.SubScope("axis"), []int32{0}))
output = op.ExpandDims(scope.SubScope("Stack"), output, op.Const(scope.SubScope("axis"), []int32{1}))
image := &image.Image{
Tensor: tg.NewTensor(scope, output)}
return image.Scale(0, 255)
@ -49,6 +52,8 @@ func runModelNormal(base BasePack, model *BaseModel, def_id string, inputImage *
var vmax float32 = 0.0
var predictions = results[0].Value().([][]float32)[0]
log.Info("preds", "preds", predictions)
for i, v := range predictions {
if v > vmax {
order = i
@ -62,10 +67,13 @@ func runModelNormal(base BasePack, model *BaseModel, def_id string, inputImage *
}
func runModelExp(base BasePack, model *BaseModel, def_id string, inputImage *tf.Tensor) (order int, confidence float32, err error) {
log := base.GetLogger()
err = nil
order = 0
log.Info("Running base")
base_model := tg.LoadModel(path.Join("savedData", model.Id, "defs", def_id, "base", "model"), []string{"serve"}, nil)
//results := base_model.Exec([]tf.Output{
@ -86,7 +94,7 @@ func runModelExp(base BasePack, model *BaseModel, def_id string, inputImage *tf.
return
}
base.GetLogger().Info("test", "count", len(heads))
log.Info("Running heads", "heads", heads)
var vmax float32 = 0.0

View File

@ -1185,7 +1185,14 @@ func splitModel(c BasePack, model *BaseModel) (err error) {
count := -1
for layers.Next() {
var layerrow layerrow
if err = layers.Scan(&layerrow.ExpType); err != nil {
return
}
count += 1
if layerrow.ExpType == 2 {
break
}
}
if count == -1 {
@ -1294,63 +1301,32 @@ func generateDefinition(c BasePack, model *BaseModel, target_accuracy int, numbe
order++
}
if complexity == 0 {
err = MakeLayer(db, def_id, order, LAYER_FLATTEN, "")
loop := max(int((math.Log(float64(model.Width)) / math.Log(float64(10)))), 1)
for i := 0; i < loop; i++ {
err = MakeLayer(db, def_id, order, LAYER_SIMPLE_BLOCK, "")
order++
if err != nil {
failed()
return
}
order++
}
loop := int(math.Log2(float64(number_of_classes)))
for i := 0; i < loop; i++ {
err = MakeLayer(db, def_id, order, LAYER_DENSE, fmt.Sprintf("%d,1", number_of_classes*(loop-i)))
order++
if err != nil {
ModelUpdateStatus(c, model.Id, FAILED_PREPARING_TRAINING)
return
}
}
} else if complexity == 1 || complexity == 2 {
loop := int((math.Log(float64(model.Width)) / math.Log(float64(10))))
if loop == 0 {
loop = 1
}
for i := 0; i < loop; i++ {
err = MakeLayer(db, def_id, order, LAYER_SIMPLE_BLOCK, "")
order++
if err != nil {
failed()
return
}
}
err = MakeLayer(db, def_id, order, LAYER_FLATTEN, "")
if err != nil {
failed()
return
}
order++
loop = int((math.Log(float64(number_of_classes)) / math.Log(float64(10))) / 2)
if loop == 0 {
loop = 1
}
for i := 0; i < loop; i++ {
err = MakeLayer(db, def_id, order, LAYER_DENSE, fmt.Sprintf("%d,1", number_of_classes*(loop-i)))
order++
if err != nil {
failed()
return
}
}
} else {
log.Error("Unkown complexity", "complexity", complexity)
err = MakeLayer(db, def_id, order, LAYER_FLATTEN, "")
if err != nil {
failed()
return
}
order++
loop = max(int((math.Log(float64(number_of_classes))/math.Log(float64(10)))/2), 1)
for i := 0; i < loop; i++ {
err = MakeLayer(db, def_id, order, LAYER_DENSE, fmt.Sprintf("%d,1", number_of_classes*(loop-i)))
order++
if err != nil {
failed()
return
}
}
err = ModelDefinitionUpdateStatus(c, def_id, MODEL_DEFINITION_STATUS_INIT)
if err != nil {
@ -1486,10 +1462,10 @@ func generateExpandableDefinition(c BasePack, model *BaseModel, target_accuracy
log.Info("Size of the dense layers", "loop", loop)
// loop = max(loop, 3)
loop = max(loop, 3)
for i := 0; i < loop; i++ {
err = MakeLayer(db, def_id, order, LAYER_DENSE, fmt.Sprintf("%d,1", number_of_classes*(loop-i)))
err = MakeLayerExpandable(db, def_id, order, LAYER_DENSE, fmt.Sprintf("%d,1", number_of_classes*(loop-i)*2), 2)
order++
if err != nil {
failed()
@ -1712,10 +1688,22 @@ func RunTaskRetrain(b BasePack, task Task) (err error) {
return
}
failed = func() {
ResetClasses(b, model)
ModelUpdateStatus(b, model.Id, READY_RETRAIN_FAILED)
task.UpdateStatusLog(b, TASK_FAILED_RUNNING, "Model failed retraining")
_, err_ := db.Exec("delete from exp_model_head where def_id=$1 and status in (2,3)", defData.Id)
if err_ != nil {
panic(err_)
}
l.Error("Failed to retrain", "err", err)
}
var acc float64 = 0
var epocs = 0
// TODO make max epochs come from db
for acc*100 < defData.TargetAcuuracy && epocs < 20 {
// TODO re increase the target accuracy
for acc*100 < defData.TargetAcuuracy-5 && epocs < 10 {
// This is something I have to check
acc, err = trainDefinitionExpandExp(b, model, defData.Id, epocs > 0)
if err != nil {

View File

@ -9,9 +9,9 @@ import requests
class NotifyServerCallback(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, log, *args, **kwargs):
{{ if .HeadId }}
requests.get(f'{{ .Host }}/api/model/head/epoch/update?epoch={epoch + 1}&accuracy={log["accuracy"]}&head_id={{.HeadId}}')
requests.get(f'{{ .Host }}/api/model/head/epoch/update?epoch={epoch + 1}&accuracy={log["val_accuracy"]}&head_id={{.HeadId}}')
{{ else }}
requests.get(f'{{ .Host }}/api/model/epoch/update?model_id={{.Model.Id}}&epoch={epoch + 1}&accuracy={log["accuracy"]}&definition={{.DefId}}')
requests.get(f'{{ .Host }}/api/model/epoch/update?model_id={{.Model.Id}}&epoch={epoch + 1}&accuracy={log["val_accuracy"]}&definition={{.DefId}}')
{{end}}

View File

@ -10,7 +10,7 @@ import numpy as np
class NotifyServerCallback(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, log, *args, **kwargs):
requests.get(f'{{ .Host }}/api/model/head/epoch/update?epoch={epoch + 1}&accuracy={log["accuracy"]}&head_id={{.HeadId}}')
requests.get(f'{{ .Host }}/api/model/head/epoch/update?epoch={epoch + 1}&accuracy={log["val_accuracy"]}&head_id={{.HeadId}}')
DATA_DIR = "{{ .DataDir }}"