Fixed the model not training and running forever
This commit is contained in:
		
							parent
							
								
									2fa7680d0b
								
							
						
					
					
						commit
						29b69deaf6
					
				| @ -8,6 +8,7 @@ import ( | |||||||
| 	. "git.andr3h3nriqu3s.com/andr3/fyp/logic/db_types" | 	. "git.andr3h3nriqu3s.com/andr3/fyp/logic/db_types" | ||||||
| 	. "git.andr3h3nriqu3s.com/andr3/fyp/logic/tasks/utils" | 	. "git.andr3h3nriqu3s.com/andr3/fyp/logic/tasks/utils" | ||||||
| 
 | 
 | ||||||
|  | 	"github.com/charmbracelet/log" | ||||||
| 	tf "github.com/galeone/tensorflow/tensorflow/go" | 	tf "github.com/galeone/tensorflow/tensorflow/go" | ||||||
| 	"github.com/galeone/tensorflow/tensorflow/go/op" | 	"github.com/galeone/tensorflow/tensorflow/go/op" | ||||||
| 	tg "github.com/galeone/tfgo" | 	tg "github.com/galeone/tfgo" | ||||||
| @ -19,6 +20,7 @@ func ReadPNG(scope *op.Scope, imagePath string, channels int64) *image.Image { | |||||||
| 	contents := op.ReadFile(scope.SubScope("ReadFile"), op.Const(scope.SubScope("filename"), imagePath)) | 	contents := op.ReadFile(scope.SubScope("ReadFile"), op.Const(scope.SubScope("filename"), imagePath)) | ||||||
| 	output := op.DecodePng(scope.SubScope("DecodePng"), contents, op.DecodePngChannels(channels)) | 	output := op.DecodePng(scope.SubScope("DecodePng"), contents, op.DecodePngChannels(channels)) | ||||||
| 	output = op.ExpandDims(scope.SubScope("ExpandDims"), output, op.Const(scope.SubScope("axis"), []int32{0})) | 	output = op.ExpandDims(scope.SubScope("ExpandDims"), output, op.Const(scope.SubScope("axis"), []int32{0})) | ||||||
|  | 	output = op.ExpandDims(scope.SubScope("Stack"), output, op.Const(scope.SubScope("axis"), []int32{1})) | ||||||
| 	image := &image.Image{ | 	image := &image.Image{ | ||||||
| 		Tensor: tg.NewTensor(scope, output)} | 		Tensor: tg.NewTensor(scope, output)} | ||||||
| 	return image.Scale(0, 255) | 	return image.Scale(0, 255) | ||||||
| @ -29,6 +31,7 @@ func ReadJPG(scope *op.Scope, imagePath string, channels int64) *image.Image { | |||||||
| 	contents := op.ReadFile(scope.SubScope("ReadFile"), op.Const(scope.SubScope("filename"), imagePath)) | 	contents := op.ReadFile(scope.SubScope("ReadFile"), op.Const(scope.SubScope("filename"), imagePath)) | ||||||
| 	output := op.DecodePng(scope.SubScope("DecodeJpeg"), contents, op.DecodePngChannels(channels)) | 	output := op.DecodePng(scope.SubScope("DecodeJpeg"), contents, op.DecodePngChannels(channels)) | ||||||
| 	output = op.ExpandDims(scope.SubScope("ExpandDims"), output, op.Const(scope.SubScope("axis"), []int32{0})) | 	output = op.ExpandDims(scope.SubScope("ExpandDims"), output, op.Const(scope.SubScope("axis"), []int32{0})) | ||||||
|  | 	output = op.ExpandDims(scope.SubScope("Stack"), output, op.Const(scope.SubScope("axis"), []int32{1})) | ||||||
| 	image := &image.Image{ | 	image := &image.Image{ | ||||||
| 		Tensor: tg.NewTensor(scope, output)} | 		Tensor: tg.NewTensor(scope, output)} | ||||||
| 	return image.Scale(0, 255) | 	return image.Scale(0, 255) | ||||||
| @ -49,6 +52,8 @@ func runModelNormal(base BasePack, model *BaseModel, def_id string, inputImage * | |||||||
| 	var vmax float32 = 0.0 | 	var vmax float32 = 0.0 | ||||||
| 	var predictions = results[0].Value().([][]float32)[0] | 	var predictions = results[0].Value().([][]float32)[0] | ||||||
| 
 | 
 | ||||||
|  | 	log.Info("preds", "preds", predictions) | ||||||
|  | 
 | ||||||
| 	for i, v := range predictions { | 	for i, v := range predictions { | ||||||
| 		if v > vmax { | 		if v > vmax { | ||||||
| 			order = i | 			order = i | ||||||
| @ -62,10 +67,13 @@ func runModelNormal(base BasePack, model *BaseModel, def_id string, inputImage * | |||||||
| } | } | ||||||
| 
 | 
 | ||||||
| func runModelExp(base BasePack, model *BaseModel, def_id string, inputImage *tf.Tensor) (order int, confidence float32, err error) { | func runModelExp(base BasePack, model *BaseModel, def_id string, inputImage *tf.Tensor) (order int, confidence float32, err error) { | ||||||
|  | 	log := base.GetLogger() | ||||||
| 
 | 
 | ||||||
| 	err = nil | 	err = nil | ||||||
| 	order = 0 | 	order = 0 | ||||||
| 
 | 
 | ||||||
|  | 	log.Info("Running base") | ||||||
|  | 
 | ||||||
| 	base_model := tg.LoadModel(path.Join("savedData", model.Id, "defs", def_id, "base", "model"), []string{"serve"}, nil) | 	base_model := tg.LoadModel(path.Join("savedData", model.Id, "defs", def_id, "base", "model"), []string{"serve"}, nil) | ||||||
| 
 | 
 | ||||||
| 	//results := base_model.Exec([]tf.Output{
 | 	//results := base_model.Exec([]tf.Output{
 | ||||||
| @ -86,7 +94,7 @@ func runModelExp(base BasePack, model *BaseModel, def_id string, inputImage *tf. | |||||||
| 		return | 		return | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	base.GetLogger().Info("test", "count", len(heads)) | 	log.Info("Running heads", "heads", heads) | ||||||
| 
 | 
 | ||||||
| 	var vmax float32 = 0.0 | 	var vmax float32 = 0.0 | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -1185,7 +1185,14 @@ func splitModel(c BasePack, model *BaseModel) (err error) { | |||||||
| 	count := -1 | 	count := -1 | ||||||
| 
 | 
 | ||||||
| 	for layers.Next() { | 	for layers.Next() { | ||||||
|  | 		var layerrow layerrow | ||||||
|  | 		if err = layers.Scan(&layerrow.ExpType); err != nil { | ||||||
|  | 			return | ||||||
|  | 		} | ||||||
| 		count += 1 | 		count += 1 | ||||||
|  | 		if layerrow.ExpType == 2 { | ||||||
|  | 			break | ||||||
|  | 		} | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if count == -1 { | 	if count == -1 { | ||||||
| @ -1294,63 +1301,32 @@ func generateDefinition(c BasePack, model *BaseModel, target_accuracy int, numbe | |||||||
| 		order++ | 		order++ | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if complexity == 0 { | 	loop := max(int((math.Log(float64(model.Width)) / math.Log(float64(10)))), 1) | ||||||
| 		err = MakeLayer(db, def_id, order, LAYER_FLATTEN, "") | 	for i := 0; i < loop; i++ { | ||||||
|  | 		err = MakeLayer(db, def_id, order, LAYER_SIMPLE_BLOCK, "") | ||||||
|  | 		order++ | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			failed() | 			failed() | ||||||
| 			return | 			return | ||||||
| 		} | 		} | ||||||
| 		order++ | 	} | ||||||
| 
 | 
 | ||||||
| 		loop := int(math.Log2(float64(number_of_classes))) | 	err = MakeLayer(db, def_id, order, LAYER_FLATTEN, "") | ||||||
| 		for i := 0; i < loop; i++ { | 	if err != nil { | ||||||
| 			err = MakeLayer(db, def_id, order, LAYER_DENSE, fmt.Sprintf("%d,1", number_of_classes*(loop-i))) |  | ||||||
| 			order++ |  | ||||||
| 			if err != nil { |  | ||||||
| 				ModelUpdateStatus(c, model.Id, FAILED_PREPARING_TRAINING) |  | ||||||
| 				return |  | ||||||
| 			} |  | ||||||
| 		} |  | ||||||
| 
 |  | ||||||
| 	} else if complexity == 1 || complexity == 2 { |  | ||||||
| 
 |  | ||||||
| 		loop := int((math.Log(float64(model.Width)) / math.Log(float64(10)))) |  | ||||||
| 		if loop == 0 { |  | ||||||
| 			loop = 1 |  | ||||||
| 		} |  | ||||||
| 		for i := 0; i < loop; i++ { |  | ||||||
| 			err = MakeLayer(db, def_id, order, LAYER_SIMPLE_BLOCK, "") |  | ||||||
| 			order++ |  | ||||||
| 			if err != nil { |  | ||||||
| 				failed() |  | ||||||
| 				return |  | ||||||
| 			} |  | ||||||
| 		} |  | ||||||
| 
 |  | ||||||
| 		err = MakeLayer(db, def_id, order, LAYER_FLATTEN, "") |  | ||||||
| 		if err != nil { |  | ||||||
| 			failed() |  | ||||||
| 			return |  | ||||||
| 		} |  | ||||||
| 		order++ |  | ||||||
| 
 |  | ||||||
| 		loop = int((math.Log(float64(number_of_classes)) / math.Log(float64(10))) / 2) |  | ||||||
| 		if loop == 0 { |  | ||||||
| 			loop = 1 |  | ||||||
| 		} |  | ||||||
| 		for i := 0; i < loop; i++ { |  | ||||||
| 			err = MakeLayer(db, def_id, order, LAYER_DENSE, fmt.Sprintf("%d,1", number_of_classes*(loop-i))) |  | ||||||
| 			order++ |  | ||||||
| 			if err != nil { |  | ||||||
| 				failed() |  | ||||||
| 				return |  | ||||||
| 			} |  | ||||||
| 		} |  | ||||||
| 	} else { |  | ||||||
| 		log.Error("Unkown complexity", "complexity", complexity) |  | ||||||
| 		failed() | 		failed() | ||||||
| 		return | 		return | ||||||
| 	} | 	} | ||||||
|  | 	order++ | ||||||
|  | 
 | ||||||
|  | 	loop = max(int((math.Log(float64(number_of_classes))/math.Log(float64(10)))/2), 1) | ||||||
|  | 	for i := 0; i < loop; i++ { | ||||||
|  | 		err = MakeLayer(db, def_id, order, LAYER_DENSE, fmt.Sprintf("%d,1", number_of_classes*(loop-i))) | ||||||
|  | 		order++ | ||||||
|  | 		if err != nil { | ||||||
|  | 			failed() | ||||||
|  | 			return | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
| 
 | 
 | ||||||
| 	err = ModelDefinitionUpdateStatus(c, def_id, MODEL_DEFINITION_STATUS_INIT) | 	err = ModelDefinitionUpdateStatus(c, def_id, MODEL_DEFINITION_STATUS_INIT) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| @ -1486,10 +1462,10 @@ func generateExpandableDefinition(c BasePack, model *BaseModel, target_accuracy | |||||||
| 
 | 
 | ||||||
| 	log.Info("Size of the dense layers", "loop", loop) | 	log.Info("Size of the dense layers", "loop", loop) | ||||||
| 
 | 
 | ||||||
| 	// loop = max(loop, 3)
 | 	loop = max(loop, 3) | ||||||
| 
 | 
 | ||||||
| 	for i := 0; i < loop; i++ { | 	for i := 0; i < loop; i++ { | ||||||
| 		err = MakeLayer(db, def_id, order, LAYER_DENSE, fmt.Sprintf("%d,1", number_of_classes*(loop-i))) | 		err = MakeLayerExpandable(db, def_id, order, LAYER_DENSE, fmt.Sprintf("%d,1", number_of_classes*(loop-i)*2), 2) | ||||||
| 		order++ | 		order++ | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			failed() | 			failed() | ||||||
| @ -1712,10 +1688,22 @@ func RunTaskRetrain(b BasePack, task Task) (err error) { | |||||||
| 		return | 		return | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | 	failed = func() { | ||||||
|  | 		ResetClasses(b, model) | ||||||
|  | 		ModelUpdateStatus(b, model.Id, READY_RETRAIN_FAILED) | ||||||
|  | 		task.UpdateStatusLog(b, TASK_FAILED_RUNNING, "Model failed retraining") | ||||||
|  | 		_, err_ := db.Exec("delete from exp_model_head where def_id=$1 and status in (2,3)", defData.Id) | ||||||
|  | 		if err_ != nil { | ||||||
|  | 			panic(err_) | ||||||
|  | 		} | ||||||
|  | 		l.Error("Failed to retrain", "err", err) | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
| 	var acc float64 = 0 | 	var acc float64 = 0 | ||||||
| 	var epocs = 0 | 	var epocs = 0 | ||||||
| 	// TODO make max epochs come from db
 | 	// TODO make max epochs come from db
 | ||||||
| 	for acc*100 < defData.TargetAcuuracy && epocs < 20 { | 	// TODO re increase the target accuracy
 | ||||||
|  | 	for acc*100 < defData.TargetAcuuracy-5 && epocs < 10 { | ||||||
| 		// This is something I have to check
 | 		// This is something I have to check
 | ||||||
| 		acc, err = trainDefinitionExpandExp(b, model, defData.Id, epocs > 0) | 		acc, err = trainDefinitionExpandExp(b, model, defData.Id, epocs > 0) | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
|  | |||||||
| @ -9,9 +9,9 @@ import requests | |||||||
| class NotifyServerCallback(tf.keras.callbacks.Callback): | class NotifyServerCallback(tf.keras.callbacks.Callback): | ||||||
|     def on_epoch_end(self, epoch, log, *args, **kwargs): |     def on_epoch_end(self, epoch, log, *args, **kwargs): | ||||||
|         {{ if .HeadId }} |         {{ if .HeadId }} | ||||||
|         requests.get(f'{{ .Host }}/api/model/head/epoch/update?epoch={epoch + 1}&accuracy={log["accuracy"]}&head_id={{.HeadId}}') |         requests.get(f'{{ .Host }}/api/model/head/epoch/update?epoch={epoch + 1}&accuracy={log["val_accuracy"]}&head_id={{.HeadId}}') | ||||||
|         {{ else }} |         {{ else }} | ||||||
|         requests.get(f'{{ .Host }}/api/model/epoch/update?model_id={{.Model.Id}}&epoch={epoch + 1}&accuracy={log["accuracy"]}&definition={{.DefId}}') |         requests.get(f'{{ .Host }}/api/model/epoch/update?model_id={{.Model.Id}}&epoch={epoch + 1}&accuracy={log["val_accuracy"]}&definition={{.DefId}}') | ||||||
|         {{end}} |         {{end}} | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -10,7 +10,7 @@ import numpy as np | |||||||
| 
 | 
 | ||||||
| class NotifyServerCallback(tf.keras.callbacks.Callback): | class NotifyServerCallback(tf.keras.callbacks.Callback): | ||||||
|     def on_epoch_end(self, epoch, log, *args, **kwargs): |     def on_epoch_end(self, epoch, log, *args, **kwargs): | ||||||
|         requests.get(f'{{ .Host }}/api/model/head/epoch/update?epoch={epoch + 1}&accuracy={log["accuracy"]}&head_id={{.HeadId}}') |         requests.get(f'{{ .Host }}/api/model/head/epoch/update?epoch={epoch + 1}&accuracy={log["val_accuracy"]}&head_id={{.HeadId}}') | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| DATA_DIR = "{{ .DataDir }}" | DATA_DIR = "{{ .DataDir }}" | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user