Fixed the model not training and running forever
This commit is contained in:
parent
2fa7680d0b
commit
29b69deaf6
@ -8,6 +8,7 @@ import (
|
|||||||
. "git.andr3h3nriqu3s.com/andr3/fyp/logic/db_types"
|
. "git.andr3h3nriqu3s.com/andr3/fyp/logic/db_types"
|
||||||
. "git.andr3h3nriqu3s.com/andr3/fyp/logic/tasks/utils"
|
. "git.andr3h3nriqu3s.com/andr3/fyp/logic/tasks/utils"
|
||||||
|
|
||||||
|
"github.com/charmbracelet/log"
|
||||||
tf "github.com/galeone/tensorflow/tensorflow/go"
|
tf "github.com/galeone/tensorflow/tensorflow/go"
|
||||||
"github.com/galeone/tensorflow/tensorflow/go/op"
|
"github.com/galeone/tensorflow/tensorflow/go/op"
|
||||||
tg "github.com/galeone/tfgo"
|
tg "github.com/galeone/tfgo"
|
||||||
@ -19,6 +20,7 @@ func ReadPNG(scope *op.Scope, imagePath string, channels int64) *image.Image {
|
|||||||
contents := op.ReadFile(scope.SubScope("ReadFile"), op.Const(scope.SubScope("filename"), imagePath))
|
contents := op.ReadFile(scope.SubScope("ReadFile"), op.Const(scope.SubScope("filename"), imagePath))
|
||||||
output := op.DecodePng(scope.SubScope("DecodePng"), contents, op.DecodePngChannels(channels))
|
output := op.DecodePng(scope.SubScope("DecodePng"), contents, op.DecodePngChannels(channels))
|
||||||
output = op.ExpandDims(scope.SubScope("ExpandDims"), output, op.Const(scope.SubScope("axis"), []int32{0}))
|
output = op.ExpandDims(scope.SubScope("ExpandDims"), output, op.Const(scope.SubScope("axis"), []int32{0}))
|
||||||
|
output = op.ExpandDims(scope.SubScope("Stack"), output, op.Const(scope.SubScope("axis"), []int32{1}))
|
||||||
image := &image.Image{
|
image := &image.Image{
|
||||||
Tensor: tg.NewTensor(scope, output)}
|
Tensor: tg.NewTensor(scope, output)}
|
||||||
return image.Scale(0, 255)
|
return image.Scale(0, 255)
|
||||||
@ -29,6 +31,7 @@ func ReadJPG(scope *op.Scope, imagePath string, channels int64) *image.Image {
|
|||||||
contents := op.ReadFile(scope.SubScope("ReadFile"), op.Const(scope.SubScope("filename"), imagePath))
|
contents := op.ReadFile(scope.SubScope("ReadFile"), op.Const(scope.SubScope("filename"), imagePath))
|
||||||
output := op.DecodePng(scope.SubScope("DecodeJpeg"), contents, op.DecodePngChannels(channels))
|
output := op.DecodePng(scope.SubScope("DecodeJpeg"), contents, op.DecodePngChannels(channels))
|
||||||
output = op.ExpandDims(scope.SubScope("ExpandDims"), output, op.Const(scope.SubScope("axis"), []int32{0}))
|
output = op.ExpandDims(scope.SubScope("ExpandDims"), output, op.Const(scope.SubScope("axis"), []int32{0}))
|
||||||
|
output = op.ExpandDims(scope.SubScope("Stack"), output, op.Const(scope.SubScope("axis"), []int32{1}))
|
||||||
image := &image.Image{
|
image := &image.Image{
|
||||||
Tensor: tg.NewTensor(scope, output)}
|
Tensor: tg.NewTensor(scope, output)}
|
||||||
return image.Scale(0, 255)
|
return image.Scale(0, 255)
|
||||||
@ -49,6 +52,8 @@ func runModelNormal(base BasePack, model *BaseModel, def_id string, inputImage *
|
|||||||
var vmax float32 = 0.0
|
var vmax float32 = 0.0
|
||||||
var predictions = results[0].Value().([][]float32)[0]
|
var predictions = results[0].Value().([][]float32)[0]
|
||||||
|
|
||||||
|
log.Info("preds", "preds", predictions)
|
||||||
|
|
||||||
for i, v := range predictions {
|
for i, v := range predictions {
|
||||||
if v > vmax {
|
if v > vmax {
|
||||||
order = i
|
order = i
|
||||||
@ -62,10 +67,13 @@ func runModelNormal(base BasePack, model *BaseModel, def_id string, inputImage *
|
|||||||
}
|
}
|
||||||
|
|
||||||
func runModelExp(base BasePack, model *BaseModel, def_id string, inputImage *tf.Tensor) (order int, confidence float32, err error) {
|
func runModelExp(base BasePack, model *BaseModel, def_id string, inputImage *tf.Tensor) (order int, confidence float32, err error) {
|
||||||
|
log := base.GetLogger()
|
||||||
|
|
||||||
err = nil
|
err = nil
|
||||||
order = 0
|
order = 0
|
||||||
|
|
||||||
|
log.Info("Running base")
|
||||||
|
|
||||||
base_model := tg.LoadModel(path.Join("savedData", model.Id, "defs", def_id, "base", "model"), []string{"serve"}, nil)
|
base_model := tg.LoadModel(path.Join("savedData", model.Id, "defs", def_id, "base", "model"), []string{"serve"}, nil)
|
||||||
|
|
||||||
//results := base_model.Exec([]tf.Output{
|
//results := base_model.Exec([]tf.Output{
|
||||||
@ -86,7 +94,7 @@ func runModelExp(base BasePack, model *BaseModel, def_id string, inputImage *tf.
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
base.GetLogger().Info("test", "count", len(heads))
|
log.Info("Running heads", "heads", heads)
|
||||||
|
|
||||||
var vmax float32 = 0.0
|
var vmax float32 = 0.0
|
||||||
|
|
||||||
|
@ -1185,7 +1185,14 @@ func splitModel(c BasePack, model *BaseModel) (err error) {
|
|||||||
count := -1
|
count := -1
|
||||||
|
|
||||||
for layers.Next() {
|
for layers.Next() {
|
||||||
|
var layerrow layerrow
|
||||||
|
if err = layers.Scan(&layerrow.ExpType); err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
count += 1
|
count += 1
|
||||||
|
if layerrow.ExpType == 2 {
|
||||||
|
break
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if count == -1 {
|
if count == -1 {
|
||||||
@ -1294,63 +1301,32 @@ func generateDefinition(c BasePack, model *BaseModel, target_accuracy int, numbe
|
|||||||
order++
|
order++
|
||||||
}
|
}
|
||||||
|
|
||||||
if complexity == 0 {
|
loop := max(int((math.Log(float64(model.Width)) / math.Log(float64(10)))), 1)
|
||||||
err = MakeLayer(db, def_id, order, LAYER_FLATTEN, "")
|
for i := 0; i < loop; i++ {
|
||||||
|
err = MakeLayer(db, def_id, order, LAYER_SIMPLE_BLOCK, "")
|
||||||
|
order++
|
||||||
if err != nil {
|
if err != nil {
|
||||||
failed()
|
failed()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
order++
|
}
|
||||||
|
|
||||||
loop := int(math.Log2(float64(number_of_classes)))
|
err = MakeLayer(db, def_id, order, LAYER_FLATTEN, "")
|
||||||
for i := 0; i < loop; i++ {
|
if err != nil {
|
||||||
err = MakeLayer(db, def_id, order, LAYER_DENSE, fmt.Sprintf("%d,1", number_of_classes*(loop-i)))
|
|
||||||
order++
|
|
||||||
if err != nil {
|
|
||||||
ModelUpdateStatus(c, model.Id, FAILED_PREPARING_TRAINING)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} else if complexity == 1 || complexity == 2 {
|
|
||||||
|
|
||||||
loop := int((math.Log(float64(model.Width)) / math.Log(float64(10))))
|
|
||||||
if loop == 0 {
|
|
||||||
loop = 1
|
|
||||||
}
|
|
||||||
for i := 0; i < loop; i++ {
|
|
||||||
err = MakeLayer(db, def_id, order, LAYER_SIMPLE_BLOCK, "")
|
|
||||||
order++
|
|
||||||
if err != nil {
|
|
||||||
failed()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
err = MakeLayer(db, def_id, order, LAYER_FLATTEN, "")
|
|
||||||
if err != nil {
|
|
||||||
failed()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
order++
|
|
||||||
|
|
||||||
loop = int((math.Log(float64(number_of_classes)) / math.Log(float64(10))) / 2)
|
|
||||||
if loop == 0 {
|
|
||||||
loop = 1
|
|
||||||
}
|
|
||||||
for i := 0; i < loop; i++ {
|
|
||||||
err = MakeLayer(db, def_id, order, LAYER_DENSE, fmt.Sprintf("%d,1", number_of_classes*(loop-i)))
|
|
||||||
order++
|
|
||||||
if err != nil {
|
|
||||||
failed()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
log.Error("Unkown complexity", "complexity", complexity)
|
|
||||||
failed()
|
failed()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
order++
|
||||||
|
|
||||||
|
loop = max(int((math.Log(float64(number_of_classes))/math.Log(float64(10)))/2), 1)
|
||||||
|
for i := 0; i < loop; i++ {
|
||||||
|
err = MakeLayer(db, def_id, order, LAYER_DENSE, fmt.Sprintf("%d,1", number_of_classes*(loop-i)))
|
||||||
|
order++
|
||||||
|
if err != nil {
|
||||||
|
failed()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
err = ModelDefinitionUpdateStatus(c, def_id, MODEL_DEFINITION_STATUS_INIT)
|
err = ModelDefinitionUpdateStatus(c, def_id, MODEL_DEFINITION_STATUS_INIT)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -1486,10 +1462,10 @@ func generateExpandableDefinition(c BasePack, model *BaseModel, target_accuracy
|
|||||||
|
|
||||||
log.Info("Size of the dense layers", "loop", loop)
|
log.Info("Size of the dense layers", "loop", loop)
|
||||||
|
|
||||||
// loop = max(loop, 3)
|
loop = max(loop, 3)
|
||||||
|
|
||||||
for i := 0; i < loop; i++ {
|
for i := 0; i < loop; i++ {
|
||||||
err = MakeLayer(db, def_id, order, LAYER_DENSE, fmt.Sprintf("%d,1", number_of_classes*(loop-i)))
|
err = MakeLayerExpandable(db, def_id, order, LAYER_DENSE, fmt.Sprintf("%d,1", number_of_classes*(loop-i)*2), 2)
|
||||||
order++
|
order++
|
||||||
if err != nil {
|
if err != nil {
|
||||||
failed()
|
failed()
|
||||||
@ -1712,10 +1688,22 @@ func RunTaskRetrain(b BasePack, task Task) (err error) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
failed = func() {
|
||||||
|
ResetClasses(b, model)
|
||||||
|
ModelUpdateStatus(b, model.Id, READY_RETRAIN_FAILED)
|
||||||
|
task.UpdateStatusLog(b, TASK_FAILED_RUNNING, "Model failed retraining")
|
||||||
|
_, err_ := db.Exec("delete from exp_model_head where def_id=$1 and status in (2,3)", defData.Id)
|
||||||
|
if err_ != nil {
|
||||||
|
panic(err_)
|
||||||
|
}
|
||||||
|
l.Error("Failed to retrain", "err", err)
|
||||||
|
}
|
||||||
|
|
||||||
var acc float64 = 0
|
var acc float64 = 0
|
||||||
var epocs = 0
|
var epocs = 0
|
||||||
// TODO make max epochs come from db
|
// TODO make max epochs come from db
|
||||||
for acc*100 < defData.TargetAcuuracy && epocs < 20 {
|
// TODO re increase the target accuracy
|
||||||
|
for acc*100 < defData.TargetAcuuracy-5 && epocs < 10 {
|
||||||
// This is something I have to check
|
// This is something I have to check
|
||||||
acc, err = trainDefinitionExpandExp(b, model, defData.Id, epocs > 0)
|
acc, err = trainDefinitionExpandExp(b, model, defData.Id, epocs > 0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -9,9 +9,9 @@ import requests
|
|||||||
class NotifyServerCallback(tf.keras.callbacks.Callback):
|
class NotifyServerCallback(tf.keras.callbacks.Callback):
|
||||||
def on_epoch_end(self, epoch, log, *args, **kwargs):
|
def on_epoch_end(self, epoch, log, *args, **kwargs):
|
||||||
{{ if .HeadId }}
|
{{ if .HeadId }}
|
||||||
requests.get(f'{{ .Host }}/api/model/head/epoch/update?epoch={epoch + 1}&accuracy={log["accuracy"]}&head_id={{.HeadId}}')
|
requests.get(f'{{ .Host }}/api/model/head/epoch/update?epoch={epoch + 1}&accuracy={log["val_accuracy"]}&head_id={{.HeadId}}')
|
||||||
{{ else }}
|
{{ else }}
|
||||||
requests.get(f'{{ .Host }}/api/model/epoch/update?model_id={{.Model.Id}}&epoch={epoch + 1}&accuracy={log["accuracy"]}&definition={{.DefId}}')
|
requests.get(f'{{ .Host }}/api/model/epoch/update?model_id={{.Model.Id}}&epoch={epoch + 1}&accuracy={log["val_accuracy"]}&definition={{.DefId}}')
|
||||||
{{end}}
|
{{end}}
|
||||||
|
|
||||||
|
|
||||||
|
@ -10,7 +10,7 @@ import numpy as np
|
|||||||
|
|
||||||
class NotifyServerCallback(tf.keras.callbacks.Callback):
|
class NotifyServerCallback(tf.keras.callbacks.Callback):
|
||||||
def on_epoch_end(self, epoch, log, *args, **kwargs):
|
def on_epoch_end(self, epoch, log, *args, **kwargs):
|
||||||
requests.get(f'{{ .Host }}/api/model/head/epoch/update?epoch={epoch + 1}&accuracy={log["accuracy"]}&head_id={{.HeadId}}')
|
requests.get(f'{{ .Host }}/api/model/head/epoch/update?epoch={epoch + 1}&accuracy={log["val_accuracy"]}&head_id={{.HeadId}}')
|
||||||
|
|
||||||
|
|
||||||
DATA_DIR = "{{ .DataDir }}"
|
DATA_DIR = "{{ .DataDir }}"
|
||||||
|
Loading…
Reference in New Issue
Block a user