feat: closes #40

2023-10-19 10:44:13 +01:00 · 2023-10-19 10:44:13 +01:00 · 2c3539b81a
commit 2c3539b81a
parent f163e25fba
4 changed files with 184 additions and 105 deletions
--- a/logic/models/train/train.go
+++ b/logic/models/train/train.go
@ -17,6 +17,9 @@ import (
 	. "git.andr3h3nriqu3s.com/andr3/fyp/logic/utils"
 )
 const EPOCH_PER_RUN = 20;
 const MAX_EPOCH = 100
 func MakeDefenition(db *sql.DB, model_id string, target_accuracy int) (id string, err error) {
 	id = ""
 	rows, err := db.Query("insert into model_definition (model_id, target_accuracy) values ($1, $2) returning id;", model_id, target_accuracy)
@ -34,6 +37,7 @@ func MakeDefenition(db *sql.DB, model_id string, target_accuracy int) (id string
 type ModelDefinitionStatus int
 const (
 	MODEL_DEFINITION_STATUS_CANCELD_TRAINING                       = -4
 	MODEL_DEFINITION_STATUS_FAILED_TRAINING                        = -3
 	MODEL_DEFINITION_STATUS_PRE_INIT         ModelDefinitionStatus = 1
 	MODEL_DEFINITION_STATUS_INIT                                   = 2
@ -104,7 +108,8 @@ func generateCvs(c *Context, run_path string, model_id string) (count int, err e
 	return
 }
-func trainDefinition(c *Context, model *BaseModel, definition_id string) (accuracy float64, err error) {
+func trainDefinition(c *Context, model *BaseModel, definition_id string, load_prev bool) (accuracy float64, err error) {
 	c.Logger.Warn("About to start training definition")
 	accuracy = 0
 	layers, err := c.Db.Query("select layer_type, shape from model_definition_layer where def_id=$1 order by layer_order asc;", definition_id)
 	if err != nil {
@ -153,6 +158,9 @@ func trainDefinition(c *Context, model *BaseModel, definition_id string) (accura
 		return
 	}
 	// Copy result around
 	result_path := path.Join("savedData", model.Id, "defs", definition_id)
 	if err = tmpl.Execute(f, AnyMap{
 		"Layers":           got,
 		"Size":             got[0].Shape,
@ -160,7 +168,10 @@ func trainDefinition(c *Context, model *BaseModel, definition_id string) (accura
 		"RunPath":          run_path,
 		"ColorMode":        model.ImageMode,
 		"Model":            model,
 		"EPOCH_PER_RUN":    EPOCH_PER_RUN,
 		"DefId":            definition_id,
 		"LoadPrev":         load_prev,
 		"LastModelRunPath": path.Join(getDir(), result_path, "model.keras"),
 	}); err != nil {
 		return
 	}
@ -172,9 +183,6 @@ func trainDefinition(c *Context, model *BaseModel, definition_id string) (accura
 		return
 	}
 	// Copy result around
 	result_path := path.Join("savedData", model.Id, "defs", definition_id)
 	if err = os.MkdirAll(result_path, os.ModePerm); err != nil {
 		return
 	}
@ -183,6 +191,10 @@ func trainDefinition(c *Context, model *BaseModel, definition_id string) (accura
 		return
 	}
 	if err = exec.Command("cp", "-r", path.Join(run_path, "model.keras"), path.Join(result_path, "model.keras")).Run(); err != nil {
 		return
 	}
 	accuracy_file, err := os.Open(path.Join(run_path, "accuracy.val"))
 	if err != nil {
 		return
@ -194,7 +206,7 @@ func trainDefinition(c *Context, model *BaseModel, definition_id string) (accura
 		return
 	}
-	fmt.Println(string(accuracy_file_bytes))
+	c.Logger.Info("Model finished training!", "accuracy", accuracy)
 	accuracy, err = strconv.ParseFloat(string(accuracy_file_bytes), 64)
 	if err != nil {
@ -205,8 +217,25 @@ func trainDefinition(c *Context, model *BaseModel, definition_id string) (accura
 	return
 }
 func remove[T interface{}](lst []T, i int) []T {
 	lng := len(lst)
 	if i >= lng {
 		return []T{}
 	}
 	if i+1 >= lng {
 		return lst[:lng-1]
 	}
 	if i == 0 {
 		return lst[1:]
 	}
 	return append(lst[:i], lst[i+1:]...)
 }
 func trainModel(c *Context, model *BaseModel) {
-	definitionsRows, err := c.Db.Query("select id, target_accuracy from model_definition where status=$1 and model_id=$2", MODEL_DEFINITION_STATUS_INIT, model.Id)
+	definitionsRows, err := c.Db.Query("select id, target_accuracy, epoch from model_definition where status=$1 and model_id=$2", MODEL_DEFINITION_STATUS_INIT, model.Id)
 	if err != nil {
 		c.Logger.Error("Failed to trainModel!Err:")
 		c.Logger.Error(err)
@ -218,13 +247,14 @@ func trainModel(c *Context, model *BaseModel) {
 	type row struct {
 		id              string
 		target_accuracy int
 		epoch           int
 	}
 	definitions := []row{}
 	for definitionsRows.Next() {
 		var rowv row
-		if err = definitionsRows.Scan(&rowv.id, &rowv.target_accuracy); err != nil {
+		if err = definitionsRows.Scan(&rowv.id, &rowv.target_accuracy, &rowv.epoch); err != nil {
 			c.Logger.Error("Failed to train Model Could not read definition from db!Err:")
 			c.Logger.Error(err)
 			ModelUpdateStatus(c, model.Id, FAILED_TRAINING)
@ -239,30 +269,58 @@ func trainModel(c *Context, model *BaseModel) {
 		return
 	}
-	for _, def := range definitions {
+	toTrain := len(definitions)
 	firstRound := true
 	var newDefinitions = []row{}
 	copy(newDefinitions, definitions)
 	for {
 		for i, def := range definitions {
 			ModelDefinitionUpdateStatus(c, def.id, MODEL_DEFINITION_STATUS_TRAINING)
-		accuracy, err := trainDefinition(c, model, def.id)
+			accuracy, err := trainDefinition(c, model, def.id, !firstRound)
 			if err != nil {
-			c.Logger.Error("Failed to train definition!Err:")
+				c.Logger.Error("Failed to train definition!Err:", "err", err)
 			c.Logger.Error(err)
 				ModelDefinitionUpdateStatus(c, def.id, MODEL_DEFINITION_STATUS_FAILED_TRAINING)
 				toTrain = toTrain - 1
 				newDefinitions = remove(newDefinitions, i)
 				continue
 			}
 			def.epoch += EPOCH_PER_RUN
 			int_accuracy := int(accuracy * 100)
-		if int_accuracy < def.target_accuracy {
+			if int_accuracy >= def.target_accuracy {
 				c.Logger.Info("Found a definition that reaches target_accuracy!")
 				_, err = c.Db.Exec("update model_definition set accuracy=$1, status=$2, epoch=$3 where id=$4", int_accuracy, MODEL_DEFINITION_STATUS_TRANIED, def.epoch, def.id)
 				if err != nil {
 					c.Logger.Error("Failed to train definition!Err:\n", "err", err)
 					ModelUpdateStatus(c, model.Id, FAILED_TRAINING)
 					return
 				}
 				_, err = c.Db.Exec("update model_definition set status=$1 where id!=$2 and model_id=$3 and status!=$4", MODEL_DEFINITION_STATUS_CANCELD_TRAINING, def.id, model.Id, MODEL_DEFINITION_STATUS_FAILED_TRAINING)
 				if err != nil {
 					c.Logger.Error("Failed to train definition!Err:\n", "err", err)
 					ModelUpdateStatus(c, model.Id, FAILED_TRAINING)
 					return
 				}
 				toTrain = 0
 				break
 			}
 			if def.epoch > MAX_EPOCH {
 				fmt.Printf("Failed to train definition! Accuracy less %d < %d\n", int_accuracy, def.target_accuracy)
 				ModelDefinitionUpdateStatus(c, def.id, MODEL_DEFINITION_STATUS_FAILED_TRAINING)
 				toTrain = toTrain - 1
 				newDefinitions = remove(newDefinitions, i)
 				continue
 			}
-		_, err = c.Db.Exec("update model_definition set accuracy=$1, status=$2 where id=$3", int_accuracy, MODEL_DEFINITION_STATUS_TRANIED, def.id)
+		}
-		if err != nil {
+		copy(definitions, newDefinitions)
-			fmt.Printf("Failed to train definition!Err:\n")
+		firstRound = false
-			fmt.Println(err)
+		if toTrain == 0 {
-			ModelUpdateStatus(c, model.Id, FAILED_TRAINING)
+			break
 			return
 		}
 	}
--- a/sql/models.sql
+++ b/sql/models.sql
@ -40,7 +40,6 @@ create table if not exists model_data_point (
    status_message text
 );
 -- drop table if exists model_definition; 
 -- drop table if exists model_definition; 
 create table if not exists model_definition (
    id uuid primary key default gen_random_uuid(),
--- a/views/models/edit.html
+++ b/views/models/edit.html
@ -434,19 +434,36 @@
            {{/* TODO improve this */}}
            Training the model...<br/>
            {{/* TODO Add progress status on definitions */}}
            <table>
                <thead>
                    <tr>
                        <th>
                            Status
                        </th>
                        <th>
                            EpochProgress
                        </th>
                        <th>
                            Accuracy
                        </th>
                    </tr>
                </thead>
                <tbody>
                    {{ range .Defs}}
-                <div>
+                        <tr>
-                    <div>
+                            <td>
                                {{.Status}}
-                    </div>
+                            </td>
-                    <div>
+                            <td>
                                {{.EpochProgress}}
-                    </div>
+                            </td>
-                    <div>
+                            <td>
                                {{.Accuracy}}
-                    </div>
+                            </td>
-                </div>
+                        </tr>
                    {{ end }}
                </tbody>
            </table>
            {{/* TODO Add ability to stop training */}}
        </div>
      {{/* Model Ready */}}
--- a/views/py/python_model_template.py
+++ b/views/py/python_model_template.py
@ -93,6 +93,10 @@ val_ds = list_ds.take(val_size)
 dataset = prepare_dataset(train_ds)
 dataset_validation   = prepare_dataset(val_ds)
 {{ if .LoadPrev }}
 model = tf.keras.saving.load_model('{{.LastModelRunPath}}')
 {{ else }}
 model = keras.Sequential([
    {{- range .Layers }}
    {{- if eq .LayerType 1}}
@ -106,13 +110,14 @@ model = keras.Sequential([
    {{- end }}
    {{- end }}
 ])
 {{ end }}
 model.compile(
    loss=losses.SparseCategoricalCrossentropy(),
    optimizer=tf.keras.optimizers.Adam(),
    metrics=['accuracy'])
-his = model.fit(dataset, validation_data= dataset_validation, epochs=50, callbacks=[NotifyServerCallback()])
+his = model.fit(dataset, validation_data= dataset_validation, epochs={{.EPOCH_PER_RUN}}, callbacks=[NotifyServerCallback()])
 acc = his.history["accuracy"]
@ -120,6 +125,6 @@ f = open("accuracy.val", "w")
 f.write(str(acc[-1]))
 f.close()
 tf.saved_model.save(model, "model")
-# model.save("model.keras", save_format="tf")
+tf.saved_model.save(model, "model")
 model.save("model.keras")