956 lines
		
	
	
		
			29 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			956 lines
		
	
	
		
			29 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| package tasks
 | |
| 
 | |
| import (
 | |
| 	"os"
 | |
| 	"path"
 | |
| 	"sync"
 | |
| 	"time"
 | |
| 
 | |
| 	. "git.andr3h3nriqu3s.com/andr3/fyp/logic/db_types"
 | |
| 	. "git.andr3h3nriqu3s.com/andr3/fyp/logic/models/train"
 | |
| 	. "git.andr3h3nriqu3s.com/andr3/fyp/logic/tasks/utils"
 | |
| 	. "git.andr3h3nriqu3s.com/andr3/fyp/logic/utils"
 | |
| 	"github.com/charmbracelet/log"
 | |
| )
 | |
| 
 | |
| func verifyRunner(c *Context, dat *JustId) (runner *Runner, e *Error) {
 | |
| 	runner, err := GetRunner(c, dat.Id)
 | |
| 	if err == NotFoundError {
 | |
| 		e = c.JsonBadRequest("Could not find runner, please register runner first")
 | |
| 		return
 | |
| 	} else if err != nil {
 | |
| 		e = c.E500M("Failed to get information about the runner", err)
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	if runner.Token != *c.Token {
 | |
| 		return nil, c.SendJSONStatus(401, "Only runners can use this funcion")
 | |
| 	}
 | |
| 
 | |
| 	return
 | |
| }
 | |
| 
 | |
| type VerifyTask struct {
 | |
| 	Id     string `json:"id" validate:"required"`
 | |
| 	TaskId string `json:"taskId" validate:"required"`
 | |
| }
 | |
| 
 | |
| type RunnerTrainDef struct {
 | |
| 	Id     string `json:"id" validate:"required"`
 | |
| 	TaskId string `json:"taskId" validate:"required"`
 | |
| 	DefId  string `json:"defId" validate:"required"`
 | |
| }
 | |
| 
 | |
| func verifyTask(x *Handle, c *Context, dat *VerifyTask) (task *Task, error *Error) {
 | |
| 	mutex := x.DataMap["runners_mutex"].(*sync.Mutex)
 | |
| 	mutex.Lock()
 | |
| 	defer mutex.Unlock()
 | |
| 
 | |
| 	var runners map[string]interface{} = x.DataMap["runners"].(map[string]interface{})
 | |
| 	if runners[dat.Id] == nil {
 | |
| 		return nil, c.JsonBadRequest("Runner not active")
 | |
| 	}
 | |
| 
 | |
| 	var runner_data map[string]interface{} = runners[dat.Id].(map[string]interface{})
 | |
| 
 | |
| 	if runner_data["task"] == nil {
 | |
| 		return nil, c.SendJSONStatus(404, "No active task")
 | |
| 	}
 | |
| 
 | |
| 	return runner_data["task"].(*Task), nil
 | |
| }
 | |
| 
 | |
| func clearRunnerTask(x *Handle, runner_id string) {
 | |
| 	mutex := x.DataMap["runners_mutex"].(*sync.Mutex)
 | |
| 	mutex.Lock()
 | |
| 	defer mutex.Unlock()
 | |
| 
 | |
| 	var runners map[string]interface{} = x.DataMap["runners"].(map[string]interface{})
 | |
| 	var runner_data map[string]interface{} = runners[runner_id].(map[string]interface{})
 | |
| 	runner_data["task"] = nil
 | |
| 	runners[runner_id] = runner_data
 | |
| 	x.DataMap["runners"] = runners
 | |
| }
 | |
| 
 | |
| func handleRemoteRunner(x *Handle) {
 | |
| 
 | |
| 	type RegisterRunner struct {
 | |
| 		Token string     `json:"token" validate:"required"`
 | |
| 		Type  RunnerType `json:"type" validate:"required"`
 | |
| 	}
 | |
| 	PostAuthJson(x, "/tasks/runner/register", User_Normal, func(c *Context, dat *RegisterRunner) *Error {
 | |
| 		if *c.Token != dat.Token {
 | |
| 			// TODO do admin
 | |
| 			return c.E500M("Please make sure that the token is the same that is being registered", nil)
 | |
| 		}
 | |
| 
 | |
| 		c.Logger.Info("test", "dat", dat)
 | |
| 
 | |
| 		var runner Runner
 | |
| 		err := GetDBOnce(c, &runner, "remote_runner as ru where token=$1", dat.Token)
 | |
| 		if err != NotFoundError && err != nil {
 | |
| 			return c.E500M("Failed to get information remote runners", err)
 | |
| 		}
 | |
| 		if err != NotFoundError {
 | |
| 			return c.JsonBadRequest("Token is already registered by a runner")
 | |
| 		}
 | |
| 
 | |
| 		// TODO get id from token passed by when doing admin
 | |
| 		var userId = c.User.Id
 | |
| 
 | |
| 		var new_runner = struct {
 | |
| 			Type   RunnerType
 | |
| 			UserId string `db:"user_id"`
 | |
| 			Token  string
 | |
| 		}{
 | |
| 			Type:   dat.Type,
 | |
| 			Token:  dat.Token,
 | |
| 			UserId: userId,
 | |
| 		}
 | |
| 
 | |
| 		id, err := InsertReturnId(c, &new_runner, "remote_runner", "id")
 | |
| 		if err != nil {
 | |
| 			return c.E500M("Failed to create remote runner", err)
 | |
| 		}
 | |
| 
 | |
| 		return c.SendJSON(struct {
 | |
| 			Id string `json:"id"`
 | |
| 		}{
 | |
| 			Id: id,
 | |
| 		})
 | |
| 	})
 | |
| 
 | |
| 	// TODO remove runner
 | |
| 
 | |
| 	PostAuthJson(x, "/tasks/runner/init", User_Normal, func(c *Context, dat *JustId) *Error {
 | |
| 		runner, error := verifyRunner(c, dat)
 | |
| 		if error != nil {
 | |
| 			return error
 | |
| 		}
 | |
| 
 | |
| 		mutex := x.DataMap["runners_mutex"].(*sync.Mutex)
 | |
| 		mutex.Lock()
 | |
| 		defer mutex.Unlock()
 | |
| 
 | |
| 		var runners map[string]interface{} = x.DataMap["runners"].(map[string]interface{})
 | |
| 		if runners[dat.Id] != nil {
 | |
| 			c.Logger.Info("Logger trying to register but already registerd")
 | |
| 			c.ShowMessage = false
 | |
| 			return c.SendJSON("Ok")
 | |
| 		}
 | |
| 
 | |
| 		var new_runner = map[string]interface{}{}
 | |
| 		new_runner["last_time_check"] = time.Now()
 | |
| 		new_runner["runner_info"] = runner
 | |
| 
 | |
| 		runners[dat.Id] = new_runner
 | |
| 
 | |
| 		x.DataMap["runners"] = runners
 | |
| 
 | |
| 		return c.SendJSON("Ok")
 | |
| 	})
 | |
| 
 | |
| 	PostAuthJson(x, "/tasks/runner/active", User_Normal, func(c *Context, dat *JustId) *Error {
 | |
| 		_, error := verifyRunner(c, dat)
 | |
| 		if error != nil {
 | |
| 			return error
 | |
| 		}
 | |
| 
 | |
| 		mutex := x.DataMap["runners_mutex"].(*sync.Mutex)
 | |
| 		mutex.Lock()
 | |
| 		defer mutex.Unlock()
 | |
| 
 | |
| 		var runners map[string]interface{} = x.DataMap["runners"].(map[string]interface{})
 | |
| 		if runners[dat.Id] == nil {
 | |
| 			return c.JsonBadRequest("Runner not active")
 | |
| 		}
 | |
| 
 | |
| 		var runner_data map[string]interface{} = runners[dat.Id].(map[string]interface{})
 | |
| 
 | |
| 		if runner_data["task"] == nil {
 | |
| 			c.ShowMessage = false
 | |
| 			return c.SendJSONStatus(404, "No active task")
 | |
| 		}
 | |
| 
 | |
| 		c.ShowMessage = false
 | |
| 		// This should be a task obj
 | |
| 		return c.SendJSON(runner_data["task"])
 | |
| 	})
 | |
| 
 | |
| 	PostAuthJson(x, "/tasks/runner/ready", User_Normal, func(c *Context, dat *VerifyTask) *Error {
 | |
| 		_, error := verifyRunner(c, &JustId{Id: dat.Id})
 | |
| 		if error != nil {
 | |
| 			return error
 | |
| 		}
 | |
| 
 | |
| 		task, error := verifyTask(x, c, dat)
 | |
| 		if error != nil {
 | |
| 			return error
 | |
| 		}
 | |
| 
 | |
| 		err := task.UpdateStatus(c, TASK_RUNNING, "Task Running on Runner")
 | |
| 		if err != nil {
 | |
| 			return c.E500M("Failed to set task status", err)
 | |
| 		}
 | |
| 
 | |
| 		return c.SendJSON("Ok")
 | |
| 	})
 | |
| 
 | |
| 	type TaskFail struct {
 | |
| 		Id     string `json:"id" validate:"required"`
 | |
| 		TaskId string `json:"taskId" validate:"required"`
 | |
| 		Reason string `json:"reason" validate:"required"`
 | |
| 	}
 | |
| 	PostAuthJson(x, "/tasks/runner/fail", User_Normal, func(c *Context, dat *TaskFail) *Error {
 | |
| 		_, error := verifyRunner(c, &JustId{Id: dat.Id})
 | |
| 		if error != nil {
 | |
| 			return error
 | |
| 		}
 | |
| 
 | |
| 		task, error := verifyTask(x, c, &VerifyTask{Id: dat.Id, TaskId: dat.TaskId})
 | |
| 		if error != nil {
 | |
| 			return error
 | |
| 		}
 | |
| 
 | |
| 		err := task.UpdateStatus(c, TASK_FAILED_RUNNING, dat.Reason)
 | |
| 		if err != nil {
 | |
| 			return c.E500M("Failed to set task status", err)
 | |
| 		}
 | |
| 
 | |
| 		// Do extra clean up on tasks
 | |
| 		switch task.TaskType {
 | |
| 		case int(TASK_TYPE_TRAINING):
 | |
| 			CleanUpFailed(c, task)
 | |
| 		case int(TASK_TYPE_RETRAINING):
 | |
| 			CleanUpFailedRetrain(c, task)
 | |
| 		case int(TASK_TYPE_CLASSIFICATION):
 | |
| 			// DO nothing
 | |
| 		default:
 | |
| 			panic("Do not know how to handle this")
 | |
| 		}
 | |
| 
 | |
| 		mutex := x.DataMap["runners_mutex"].(*sync.Mutex)
 | |
| 		mutex.Lock()
 | |
| 		defer mutex.Unlock()
 | |
| 
 | |
| 		var runners map[string]interface{} = x.DataMap["runners"].(map[string]interface{})
 | |
| 		var runner_data map[string]interface{} = runners[dat.Id].(map[string]interface{})
 | |
| 		runner_data["task"] = nil
 | |
| 
 | |
| 		runners[dat.Id] = runner_data
 | |
| 		x.DataMap["runners"] = runners
 | |
| 
 | |
| 		return c.SendJSON("Ok")
 | |
| 	})
 | |
| 
 | |
| 	PostAuthJson(x, "/tasks/runner/defs", User_Normal, func(c *Context, dat *VerifyTask) *Error {
 | |
| 		_, error := verifyRunner(c, &JustId{Id: dat.Id})
 | |
| 		if error != nil {
 | |
| 			return error
 | |
| 		}
 | |
| 
 | |
| 		task, error := verifyTask(x, c, dat)
 | |
| 		if error != nil {
 | |
| 			return error
 | |
| 		}
 | |
| 
 | |
| 		var status DefinitionStatus
 | |
| 		switch task.TaskType {
 | |
| 		case int(TASK_TYPE_TRAINING):
 | |
| 			status = DEFINITION_STATUS_INIT
 | |
| 		case int(TASK_TYPE_RETRAINING):
 | |
| 			fallthrough
 | |
| 		case int(TASK_TYPE_CLASSIFICATION):
 | |
| 			status = DEFINITION_STATUS_READY
 | |
| 		default:
 | |
| 			c.Logger.Error("Task not is not the right type to get the definitions", "task type", task.TaskType)
 | |
| 			return c.JsonBadRequest("Task is not the right type go get the definitions")
 | |
| 		}
 | |
| 
 | |
| 		model, err := GetBaseModel(c, *task.ModelId)
 | |
| 		if err != nil {
 | |
| 			return c.E500M("Failed to get model information", err)
 | |
| 		}
 | |
| 
 | |
| 		defs, err := model.GetDefinitions(c, "and md.status=$2", status)
 | |
| 		if err != nil {
 | |
| 			return c.E500M("Failed to get the model definitions", err)
 | |
| 		}
 | |
| 
 | |
| 		return c.SendJSON(defs)
 | |
| 	})
 | |
| 
 | |
| 	PostAuthJson(x, "/tasks/runner/classes", User_Normal, func(c *Context, dat *VerifyTask) *Error {
 | |
| 		_, error := verifyRunner(c, &JustId{Id: dat.Id})
 | |
| 		if error != nil {
 | |
| 			return error
 | |
| 		}
 | |
| 
 | |
| 		task, error := verifyTask(x, c, dat)
 | |
| 		if error != nil {
 | |
| 			return error
 | |
| 		}
 | |
| 
 | |
| 		model, err := GetBaseModel(c, *task.ModelId)
 | |
| 		if err != nil {
 | |
| 			return c.E500M("Failed to get model information", err)
 | |
| 		}
 | |
| 
 | |
| 		switch task.TaskType {
 | |
| 		case int(TASK_TYPE_TRAINING):
 | |
| 			classes, err := model.GetClasses(c, "and status in ($2, $3) order by mc.class_order asc", CLASS_STATUS_TO_TRAIN, CLASS_STATUS_TRAINING)
 | |
| 			if err != nil {
 | |
| 				return c.E500M("Failed to get the model classes", err)
 | |
| 			}
 | |
| 			return c.SendJSON(classes)
 | |
| 		case int(TASK_TYPE_RETRAINING):
 | |
| 			classes, err := model.GetClasses(c, "and status=$2 order by mc.class_order asc", CLASS_STATUS_TRAINING)
 | |
| 			if err != nil {
 | |
| 				return c.E500M("Failed to get the model classes", err)
 | |
| 			}
 | |
| 			return c.SendJSON(classes)
 | |
| 		case int(TASK_TYPE_CLASSIFICATION):
 | |
| 			classes, err := model.GetClasses(c, "and status=$2 order by mc.class_order asc", CLASS_STATUS_TRAINED)
 | |
| 			if err != nil {
 | |
| 				return c.E500M("Failed to get the model classes", err)
 | |
| 			}
 | |
| 			return c.SendJSON(classes)
 | |
| 		default:
 | |
| 			c.Logger.Error("Task not is not the right type to get the definitions", "task type", task.TaskType)
 | |
| 			return c.JsonBadRequest("Task is not the right type go get the definitions")
 | |
| 		}
 | |
| 
 | |
| 	})
 | |
| 
 | |
| 	type RunnerTrainDefStatus struct {
 | |
| 		Id     string           `json:"id" validate:"required"`
 | |
| 		TaskId string           `json:"taskId" validate:"required"`
 | |
| 		DefId  string           `json:"defId" validate:"required"`
 | |
| 		Status DefinitionStatus `json:"status" validate:"required"`
 | |
| 	}
 | |
| 	PostAuthJson(x, "/tasks/runner/train/def/status", User_Normal, func(c *Context, dat *RunnerTrainDefStatus) *Error {
 | |
| 		_, error := verifyRunner(c, &JustId{Id: dat.Id})
 | |
| 		if error != nil {
 | |
| 			return error
 | |
| 		}
 | |
| 
 | |
| 		task, error := verifyTask(x, c, &VerifyTask{Id: dat.Id, TaskId: dat.TaskId})
 | |
| 		if error != nil {
 | |
| 			return error
 | |
| 		}
 | |
| 
 | |
| 		if task.TaskType != int(TASK_TYPE_TRAINING) {
 | |
| 			c.Logger.Error("Task not is not the right type to get the definitions", "task type", task.TaskType)
 | |
| 			return c.JsonBadRequest("Task is not the right type go get the definitions")
 | |
| 		}
 | |
| 
 | |
| 		def, err := GetDefinition(c, dat.DefId)
 | |
| 		if err != nil {
 | |
| 			return c.E500M("Failed to get definition information", err)
 | |
| 		}
 | |
| 
 | |
| 		err = def.UpdateStatus(c, dat.Status)
 | |
| 		if err != nil {
 | |
| 			return c.E500M("Failed to update model status", err)
 | |
| 		}
 | |
| 
 | |
| 		return c.SendJSON("Ok")
 | |
| 	})
 | |
| 
 | |
| 	type RunnerTrainDefHeadStatus struct {
 | |
| 		Id     string          `json:"id" validate:"required"`
 | |
| 		TaskId string          `json:"taskId" validate:"required"`
 | |
| 		DefId  string          `json:"defId" validate:"required"`
 | |
| 		Status ModelHeadStatus `json:"status" validate:"required"`
 | |
| 	}
 | |
| 	PostAuthJson(x, "/tasks/runner/train/def/head/status", User_Normal, func(c *Context, dat *RunnerTrainDefHeadStatus) *Error {
 | |
| 		_, error := verifyRunner(c, &JustId{Id: dat.Id})
 | |
| 		if error != nil {
 | |
| 			return error
 | |
| 		}
 | |
| 
 | |
| 		task, error := verifyTask(x, c, &VerifyTask{Id: dat.Id, TaskId: dat.TaskId})
 | |
| 		if error != nil {
 | |
| 			return error
 | |
| 		}
 | |
| 
 | |
| 		if task.TaskType != int(TASK_TYPE_TRAINING) {
 | |
| 			c.Logger.Error("Task not is not the right type to get the definitions", "task type", task.TaskType)
 | |
| 			return c.JsonBadRequest("Task is not the right type go get the definitions")
 | |
| 		}
 | |
| 
 | |
| 		def, err := GetDefinition(c, dat.DefId)
 | |
| 		if err != nil {
 | |
| 			return c.E500M("Failed to get definition information", err)
 | |
| 		}
 | |
| 
 | |
| 		_, err = c.Exec("update exp_model_head set status=$1 where def_id=$2;", dat.Status, def.Id)
 | |
| 		if err != nil {
 | |
| 			log.Error("Failed to train definition!")
 | |
| 			return c.E500M("Failed to train definition", err)
 | |
| 		}
 | |
| 
 | |
| 		return c.SendJSON("Ok")
 | |
| 	})
 | |
| 
 | |
| 	type RunnerRetrainDefHeadStatus struct {
 | |
| 		Id     string          `json:"id" validate:"required"`
 | |
| 		TaskId string          `json:"taskId" validate:"required"`
 | |
| 		HeadId string          `json:"defId" validate:"required"`
 | |
| 		Status ModelHeadStatus `json:"status" validate:"required"`
 | |
| 	}
 | |
| 	PostAuthJson(x, "/tasks/runner/retrain/def/head/status", User_Normal, func(c *Context, dat *RunnerRetrainDefHeadStatus) *Error {
 | |
| 		_, error := verifyRunner(c, &JustId{Id: dat.Id})
 | |
| 		if error != nil {
 | |
| 			return error
 | |
| 		}
 | |
| 
 | |
| 		task, error := verifyTask(x, c, &VerifyTask{Id: dat.Id, TaskId: dat.TaskId})
 | |
| 		if error != nil {
 | |
| 			return error
 | |
| 		}
 | |
| 
 | |
| 		if task.TaskType != int(TASK_TYPE_RETRAINING) {
 | |
| 			c.Logger.Error("Task not is not the right type to get the definitions", "task type", task.TaskType)
 | |
| 			return c.JsonBadRequest("Task is not the right type go get the definitions")
 | |
| 		}
 | |
| 
 | |
| 		if err := UpdateStatus(c.GetDb(), "exp_model_head", dat.HeadId, MODEL_DEFINITION_STATUS_TRAINING); err != nil {
 | |
| 			return c.E500M("Failed to update head status", err)
 | |
| 		}
 | |
| 
 | |
| 		return c.SendJSON("Ok")
 | |
| 	})
 | |
| 	PostAuthJson(x, "/tasks/runner/train/def/layers", User_Normal, func(c *Context, dat *RunnerTrainDef) *Error {
 | |
| 		_, error := verifyRunner(c, &JustId{Id: dat.Id})
 | |
| 		if error != nil {
 | |
| 			return error
 | |
| 		}
 | |
| 
 | |
| 		task, error := verifyTask(x, c, &VerifyTask{Id: dat.Id, TaskId: dat.TaskId})
 | |
| 		if error != nil {
 | |
| 			return error
 | |
| 		}
 | |
| 
 | |
| 		switch task.TaskType {
 | |
| 		case int(TASK_TYPE_TRAINING):
 | |
| 			// Do nothing
 | |
| 		case int(TASK_TYPE_RETRAINING):
 | |
| 			// Do nothing
 | |
| 		default:
 | |
| 			c.Logger.Error("Task not is not the right type to get the layers", "task type", task.TaskType)
 | |
| 			return c.JsonBadRequest("Task is not the right type go get the layers")
 | |
| 		}
 | |
| 
 | |
| 		def, err := GetDefinition(c, dat.DefId)
 | |
| 		if err != nil {
 | |
| 			return c.E500M("Failed to get definition information", err)
 | |
| 		}
 | |
| 
 | |
| 		layers, err := def.GetLayers(c, " order by layer_order asc")
 | |
| 		if err != nil {
 | |
| 			return c.E500M("Failed to get layers", err)
 | |
| 		}
 | |
| 
 | |
| 		return c.SendJSON(layers)
 | |
| 	})
 | |
| 
 | |
| 	PostAuthJson(x, "/tasks/runner/train/datapoints", User_Normal, func(c *Context, dat *VerifyTask) *Error {
 | |
| 		_, error := verifyRunner(c, &JustId{Id: dat.Id})
 | |
| 		if error != nil {
 | |
| 			return error
 | |
| 		}
 | |
| 
 | |
| 		task, error := verifyTask(x, c, dat)
 | |
| 		if error != nil {
 | |
| 			return error
 | |
| 		}
 | |
| 
 | |
| 		switch task.TaskType {
 | |
| 		case int(TASK_TYPE_TRAINING):
 | |
| 			// DO nothing
 | |
| 		case int(TASK_TYPE_RETRAINING):
 | |
| 			// DO nothing
 | |
| 		default:
 | |
| 			c.Logger.Error("Task not is not the right type to get the definitions", "task type", task.TaskType)
 | |
| 			return c.JsonBadRequest("Task is not the right type go get the definitions")
 | |
| 		}
 | |
| 
 | |
| 		model, err := GetBaseModel(c, *task.ModelId)
 | |
| 		if err != nil {
 | |
| 			return c.E500M("Failed to get model information", err)
 | |
| 		}
 | |
| 
 | |
| 		training_points, err := model.DataPoints(c, DATA_POINT_MODE_TRAINING)
 | |
| 		if err != nil {
 | |
| 			return c.E500M("Failed to get the model classes", err)
 | |
| 		}
 | |
| 		testing_points, err := model.DataPoints(c, DATA_POINT_MODE_TRAINING)
 | |
| 		if err != nil {
 | |
| 			return c.E500M("Failed to get the model classes", err)
 | |
| 		}
 | |
| 
 | |
| 		return c.SendJSON(struct {
 | |
| 			Testing  []DataPoint `json:"testing"`
 | |
| 			Training []DataPoint `json:"training"`
 | |
| 		}{
 | |
| 			Testing:  testing_points,
 | |
| 			Training: training_points,
 | |
| 		})
 | |
| 	})
 | |
| 
 | |
| 	type RunnerTrainDefEpoch struct {
 | |
| 		Id       string  `json:"id" validate:"required"`
 | |
| 		TaskId   string  `json:"taskId" validate:"required"`
 | |
| 		DefId    string  `json:"defId" validate:"required"`
 | |
| 		Epoch    int     `json:"epoch" validate:"required"`
 | |
| 		Accuracy float64 `json:"accuracy" validate:"required"`
 | |
| 	}
 | |
| 	PostAuthJson(x, "/tasks/runner/train/epoch", User_Normal, func(c *Context, dat *RunnerTrainDefEpoch) *Error {
 | |
| 		_, error := verifyRunner(c, &JustId{Id: dat.Id})
 | |
| 		if error != nil {
 | |
| 			return error
 | |
| 		}
 | |
| 
 | |
| 		task, error := verifyTask(x, c, &VerifyTask{
 | |
| 			Id:     dat.Id,
 | |
| 			TaskId: dat.TaskId,
 | |
| 		})
 | |
| 		if error != nil {
 | |
| 			return error
 | |
| 		}
 | |
| 
 | |
| 		if task.TaskType != int(TASK_TYPE_TRAINING) {
 | |
| 			c.Logger.Error("Task not is not the right type to get the definitions", "task type", task.TaskType)
 | |
| 			return c.JsonBadRequest("Task is not the right type go get the definitions")
 | |
| 		}
 | |
| 
 | |
| 		def, err := GetDefinition(c, dat.DefId)
 | |
| 		if err != nil {
 | |
| 			return c.E500M("Failed to get definition information", err)
 | |
| 		}
 | |
| 
 | |
| 		err = def.UpdateAfterEpoch(c, dat.Accuracy, dat.Epoch)
 | |
| 		if err != nil {
 | |
| 			return c.E500M("Failed to update model", err)
 | |
| 		}
 | |
| 
 | |
| 		return c.SendJSON("Ok")
 | |
| 	})
 | |
| 
 | |
| 	PostAuthJson(x, "/tasks/runner/train/mark-failed", User_Normal, func(c *Context, dat *VerifyTask) *Error {
 | |
| 		_, error := verifyRunner(c, &JustId{Id: dat.Id})
 | |
| 		if error != nil {
 | |
| 			return error
 | |
| 		}
 | |
| 
 | |
| 		task, error := verifyTask(x, c, &VerifyTask{
 | |
| 			Id:     dat.Id,
 | |
| 			TaskId: dat.TaskId,
 | |
| 		})
 | |
| 		if error != nil {
 | |
| 			return error
 | |
| 		}
 | |
| 
 | |
| 		if task.TaskType != int(TASK_TYPE_TRAINING) {
 | |
| 			c.Logger.Error("Task not is not the right type to get the definitions", "task type", task.TaskType)
 | |
| 			return c.JsonBadRequest("Task is not the right type go get the definitions")
 | |
| 		}
 | |
| 
 | |
| 		_, err := c.Exec(
 | |
| 			"update model_definition set status=$1 "+
 | |
| 				"where model_id=$2 and status in ($3, $4)",
 | |
| 			MODEL_DEFINITION_STATUS_CANCELD_TRAINING,
 | |
| 			task.ModelId,
 | |
| 			MODEL_DEFINITION_STATUS_TRAINING,
 | |
| 			MODEL_DEFINITION_STATUS_PAUSED_TRAINING,
 | |
| 		)
 | |
| 		if err != nil {
 | |
| 			return c.E500M("Failed to mark definition as failed", err)
 | |
| 		}
 | |
| 
 | |
| 		return c.SendJSON("Ok")
 | |
| 	})
 | |
| 
 | |
| 	PostAuthJson(x, "/tasks/runner/model", User_Normal, func(c *Context, dat *VerifyTask) *Error {
 | |
| 		_, error := verifyRunner(c, &JustId{Id: dat.Id})
 | |
| 		if error != nil {
 | |
| 			return error
 | |
| 		}
 | |
| 
 | |
| 		task, error := verifyTask(x, c, dat)
 | |
| 		if error != nil {
 | |
| 			return error
 | |
| 		}
 | |
| 
 | |
| 		switch task.TaskType {
 | |
| 		case int(TASK_TYPE_TRAINING):
 | |
| 			//DO NOTHING
 | |
| 		case int(TASK_TYPE_RETRAINING):
 | |
| 			//DO NOTHING
 | |
| 		case int(TASK_TYPE_CLASSIFICATION):
 | |
| 			//DO NOTHING
 | |
| 		default:
 | |
| 			c.Logger.Error("Task not is not the right type to get the definitions", "task type", task.TaskType)
 | |
| 			return c.JsonBadRequest("Task is not the right type go get the definitions")
 | |
| 		}
 | |
| 
 | |
| 		model, err := GetBaseModel(c, *task.ModelId)
 | |
| 		if err != nil {
 | |
| 			return c.E500M("Failed to get model information", err)
 | |
| 		}
 | |
| 
 | |
| 		return c.SendJSON(model)
 | |
| 	})
 | |
| 
 | |
| 	PostAuthJson(x, "/tasks/runner/heads", User_Normal, func(c *Context, dat *RunnerTrainDef) *Error {
 | |
| 		_, error := verifyRunner(c, &JustId{Id: dat.Id})
 | |
| 		if error != nil {
 | |
| 			return error
 | |
| 		}
 | |
| 
 | |
| 		task, error := verifyTask(x, c, &VerifyTask{Id: dat.Id, TaskId: dat.TaskId})
 | |
| 		if error != nil {
 | |
| 			return error
 | |
| 		}
 | |
| 
 | |
| 		type ExpHead struct {
 | |
| 			Id    string `json:"id"`
 | |
| 			Start int    `db:"range_start" json:"start"`
 | |
| 			End   int    `db:"range_end" json:"end"`
 | |
| 		}
 | |
| 
 | |
| 		switch task.TaskType {
 | |
| 		case int(TASK_TYPE_TRAINING):
 | |
| 			fallthrough
 | |
| 		case int(TASK_TYPE_RETRAINING):
 | |
| 			// status = 2 (INIT) 3 (TRAINING)
 | |
| 			heads, err := GetDbMultitple[ExpHead](c, "exp_model_head where def_id=$1 and status in (2,3)", dat.DefId)
 | |
| 			if err != nil {
 | |
| 				return c.E500M("Failed getting active heads", err)
 | |
| 			}
 | |
| 			return c.SendJSON(heads)
 | |
| 		case int(TASK_TYPE_CLASSIFICATION):
 | |
| 			heads, err := GetDbMultitple[ExpHead](c, "exp_model_head where def_id=$1", dat.DefId)
 | |
| 			if err != nil {
 | |
| 				return c.E500M("Failed getting active heads", err)
 | |
| 			}
 | |
| 			return c.SendJSON(heads)
 | |
| 		default:
 | |
| 			c.Logger.Error("Task not is not the right type to get the definitions", "task type", task.TaskType)
 | |
| 			return c.JsonBadRequest("Task is not the right type go get the definitions")
 | |
| 		}
 | |
| 	})
 | |
| 
 | |
| 	PostAuthJson(x, "/tasks/runner/train_exp/class/status/train", User_Normal, func(c *Context, dat *VerifyTask) *Error {
 | |
| 		_, error := verifyRunner(c, &JustId{Id: dat.Id})
 | |
| 		if error != nil {
 | |
| 			return error
 | |
| 		}
 | |
| 
 | |
| 		task, error := verifyTask(x, c, dat)
 | |
| 		if error != nil {
 | |
| 			return error
 | |
| 		}
 | |
| 
 | |
| 		if task.TaskType != int(TASK_TYPE_TRAINING) {
 | |
| 			c.Logger.Error("Task not is not the right type to get the definitions", "task type", task.TaskType)
 | |
| 			return c.JsonBadRequest("Task is not the right type go get the definitions")
 | |
| 		}
 | |
| 
 | |
| 		model, err := GetBaseModel(c, *task.ModelId)
 | |
| 		if err != nil {
 | |
| 			return c.E500M("Failed to get model", err)
 | |
| 		}
 | |
| 
 | |
| 		err = SetModelClassStatus(c, CLASS_STATUS_TRAINING, "model_id=$1 and status=$2;", model.Id, CLASS_STATUS_TO_TRAIN)
 | |
| 		if err != nil {
 | |
| 			return c.E500M("Failed update status", err)
 | |
| 		}
 | |
| 
 | |
| 		return c.SendJSON("Ok")
 | |
| 	})
 | |
| 
 | |
| 	PostAuthJson(x, "/tasks/runner/train/done", User_Normal, func(c *Context, dat *VerifyTask) *Error {
 | |
| 		_, error := verifyRunner(c, &JustId{Id: dat.Id})
 | |
| 		if error != nil {
 | |
| 			return error
 | |
| 		}
 | |
| 
 | |
| 		task, error := verifyTask(x, c, dat)
 | |
| 		if error != nil {
 | |
| 			return error
 | |
| 		}
 | |
| 
 | |
| 		if task.TaskType != int(TASK_TYPE_TRAINING) {
 | |
| 			c.Logger.Error("Task not is not the right type to get the definitions", "task type", task.TaskType)
 | |
| 			return c.JsonBadRequest("Task is not the right type go get the definitions")
 | |
| 		}
 | |
| 
 | |
| 		model, err := GetBaseModel(c, *task.ModelId)
 | |
| 		if err != nil {
 | |
| 			c.Logger.Error("Failed to get model", "err", err)
 | |
| 			return c.E500M("Failed to get mode", err)
 | |
| 		}
 | |
| 
 | |
| 		var def Definition
 | |
| 		err = GetDBOnce(c, &def, "model_definition as md where model_id=$1 and status=$2 order by accuracy desc limit 1;", task.ModelId, DEFINITION_STATUS_TRANIED)
 | |
| 		if err == NotFoundError {
 | |
| 			// TODO Make the Model status have a message
 | |
| 			c.Logger.Error("All definitions failed to train!")
 | |
| 			model.UpdateStatus(c, FAILED_TRAINING)
 | |
| 			task.UpdateStatusLog(c, TASK_FAILED_RUNNING, "All definition failed to train!")
 | |
| 			return c.SendJSON("Ok")
 | |
| 		} else if err != nil {
 | |
| 			model.UpdateStatus(c, FAILED_TRAINING)
 | |
| 			task.UpdateStatusLog(c, TASK_FAILED_RUNNING, "Failed to get model definition")
 | |
| 			return c.E500M("Failed to get model definition", err)
 | |
| 		}
 | |
| 
 | |
| 		if err = def.UpdateStatus(c, DEFINITION_STATUS_READY); err != nil {
 | |
| 			model.UpdateStatus(c, FAILED_TRAINING)
 | |
| 			task.UpdateStatusLog(c, TASK_FAILED_RUNNING, "Failed to update model definition")
 | |
| 			return c.E500M("Failed to update model definition", err)
 | |
| 		}
 | |
| 
 | |
| 		to_delete, err := c.Query("select id from model_definition where status != $1 and model_id=$2", MODEL_DEFINITION_STATUS_READY, model.Id)
 | |
| 		if err != nil {
 | |
| 			model.UpdateStatus(c, FAILED_TRAINING)
 | |
| 			task.UpdateStatusLog(c, TASK_FAILED_RUNNING, "Failed to delete unsed definitions")
 | |
| 			return c.E500M("Failed to delete unsed definitions", err)
 | |
| 		}
 | |
| 		defer to_delete.Close()
 | |
| 
 | |
| 		for to_delete.Next() {
 | |
| 			var id string
 | |
| 			if err = to_delete.Scan(&id); err != nil {
 | |
| 				model.UpdateStatus(c, FAILED_TRAINING)
 | |
| 				task.UpdateStatusLog(c, TASK_FAILED_RUNNING, "Failed to delete unsed definitions")
 | |
| 				return c.E500M("Failed to delete unsed definitions", err)
 | |
| 			}
 | |
| 			os.RemoveAll(path.Join("savedData", model.Id, "defs", id))
 | |
| 		}
 | |
| 
 | |
| 		// TODO Check if returning also works here
 | |
| 		if _, err = c.Exec("delete from model_definition where status!=$1 and model_id=$2;", MODEL_DEFINITION_STATUS_READY, model.Id); err != nil {
 | |
| 			model.UpdateStatus(c, FAILED_TRAINING)
 | |
| 			task.UpdateStatusLog(c, TASK_FAILED_RUNNING, "Failed to delete unsed definitions")
 | |
| 			return c.E500M("Failed to delete unsed definitions", err)
 | |
| 		}
 | |
| 
 | |
| 		// Set the class status to trained
 | |
| 		err = SetModelClassStatus(c, CLASS_STATUS_TRAINED, "model_id=$1;", model.Id)
 | |
| 		if err != nil {
 | |
| 			c.Logger.Error("Failed to set class status")
 | |
| 			return c.E500M("Failed to set class status", err)
 | |
| 		}
 | |
| 
 | |
| 		if err = model.UpdateStatus(c, READY); err != nil {
 | |
| 			model.UpdateStatus(c, FAILED_TRAINING)
 | |
| 			task.UpdateStatusLog(c, TASK_FAILED_RUNNING, "Failed to delete unsed definitions")
 | |
| 			return c.E500M("Failed to update status of model", err)
 | |
| 		}
 | |
| 
 | |
| 		task.UpdateStatusLog(c, TASK_DONE, "Model finished training")
 | |
| 
 | |
| 		clearRunnerTask(x, dat.Id)
 | |
| 		return c.SendJSON("Ok")
 | |
| 	})
 | |
| 
 | |
| 	type RunnerClassDone struct {
 | |
| 		Id     string `json:"id" validate:"required"`
 | |
| 		TaskId string `json:"taskId" validate:"required"`
 | |
| 		Result string `json:"result" validate:"required"`
 | |
| 	}
 | |
| 	PostAuthJson(x, "/tasks/runner/class/done", User_Normal, func(c *Context, dat *RunnerClassDone) *Error {
 | |
| 		_, error := verifyRunner(c, &JustId{Id: dat.Id})
 | |
| 		if error != nil {
 | |
| 			return error
 | |
| 		}
 | |
| 
 | |
| 		task, error := verifyTask(x, c, &VerifyTask{
 | |
| 			Id:     dat.Id,
 | |
| 			TaskId: dat.TaskId,
 | |
| 		})
 | |
| 		if error != nil {
 | |
| 			return error
 | |
| 		}
 | |
| 
 | |
| 		if task.TaskType != int(TASK_TYPE_CLASSIFICATION) {
 | |
| 			c.Logger.Error("Task not is not the right type to get the definitions", "task type", task.TaskType)
 | |
| 			return c.JsonBadRequest("Task is not the right type go get the definitions")
 | |
| 		}
 | |
| 
 | |
| 		err := task.SetResultText(c, dat.Result)
 | |
| 		if err != nil {
 | |
| 			return c.E500M("Failed to update the task", err)
 | |
| 		}
 | |
| 
 | |
| 		err = task.UpdateStatus(c, TASK_DONE, "Task completed")
 | |
| 		if err != nil {
 | |
| 			return c.E500M("Failed to update task", err)
 | |
| 		}
 | |
| 
 | |
| 		mutex := x.DataMap["runners_mutex"].(*sync.Mutex)
 | |
| 		mutex.Lock()
 | |
| 		defer mutex.Unlock()
 | |
| 
 | |
| 		var runners map[string]interface{} = x.DataMap["runners"].(map[string]interface{})
 | |
| 		var runner_data map[string]interface{} = runners[dat.Id].(map[string]interface{})
 | |
| 		runner_data["task"] = nil
 | |
| 		runners[dat.Id] = runner_data
 | |
| 		x.DataMap["runners"] = runners
 | |
| 
 | |
| 		return c.SendJSON("Ok")
 | |
| 	})
 | |
| 
 | |
| 	PostAuthJson(x, "/tasks/runner/train_exp/done", User_Normal, func(c *Context, dat *VerifyTask) *Error {
 | |
| 		_, error := verifyRunner(c, &JustId{Id: dat.Id})
 | |
| 		if error != nil {
 | |
| 			return error
 | |
| 		}
 | |
| 
 | |
| 		task, error := verifyTask(x, c, dat)
 | |
| 		if error != nil {
 | |
| 			return error
 | |
| 		}
 | |
| 
 | |
| 		if task.TaskType != int(TASK_TYPE_TRAINING) {
 | |
| 			c.Logger.Error("Task not is not the right type to get the definitions", "task type", task.TaskType)
 | |
| 			return c.JsonBadRequest("Task is not the right type go get the definitions")
 | |
| 		}
 | |
| 
 | |
| 		model, err := GetBaseModel(c, *task.ModelId)
 | |
| 		if err != nil {
 | |
| 			c.Logger.Error("Failed to get model", "err", err)
 | |
| 			return c.E500M("Failed to get mode", err)
 | |
| 		}
 | |
| 
 | |
| 		// TODO add check the to the model
 | |
| 
 | |
| 		var def Definition
 | |
| 		err = GetDBOnce(c, &def, "model_definition as md where model_id=$1 and status=$2 order by accuracy desc limit 1;", task.ModelId, DEFINITION_STATUS_TRANIED)
 | |
| 		if err == NotFoundError {
 | |
| 			// TODO Make the Model status have a message
 | |
| 			c.Logger.Error("All definitions failed to train!")
 | |
| 			model.UpdateStatus(c, FAILED_TRAINING)
 | |
| 			task.UpdateStatusLog(c, TASK_FAILED_RUNNING, "All definition failed to train!")
 | |
| 			clearRunnerTask(x, dat.Id)
 | |
| 			return c.SendJSON("Ok")
 | |
| 		} else if err != nil {
 | |
| 			model.UpdateStatus(c, FAILED_TRAINING)
 | |
| 			task.UpdateStatusLog(c, TASK_FAILED_RUNNING, "Failed to get model definition")
 | |
| 			return c.E500M("Failed to get model definition", err)
 | |
| 		}
 | |
| 
 | |
| 		if err = def.UpdateStatus(c, DEFINITION_STATUS_READY); err != nil {
 | |
| 			model.UpdateStatus(c, FAILED_TRAINING)
 | |
| 			task.UpdateStatusLog(c, TASK_FAILED_RUNNING, "Failed to update model definition")
 | |
| 			return c.E500M("Failed to update model definition", err)
 | |
| 		}
 | |
| 
 | |
| 		to_delete, err := GetDbMultitple[JustId](c, "model_definition where status!=$1 and model_id=$2", MODEL_DEFINITION_STATUS_READY, model.Id)
 | |
| 		if err != nil {
 | |
| 			c.GetLogger().Error("Failed to select model_definition to delete")
 | |
| 			return c.E500M("Failed to select model definition to delete", err)
 | |
| 		}
 | |
| 
 | |
| 		for _, d := range to_delete {
 | |
| 			os.RemoveAll(path.Join("savedData", model.Id, "defs", d.Id))
 | |
| 		}
 | |
| 
 | |
| 		// TODO Check if returning also works here
 | |
| 		if _, err = c.Exec("delete from model_definition where status!=$1 and model_id=$2;", MODEL_DEFINITION_STATUS_READY, model.Id); err != nil {
 | |
| 			model.UpdateStatus(c, FAILED_TRAINING)
 | |
| 			task.UpdateStatusLog(c, TASK_FAILED_RUNNING, "Failed to delete unsed definitions")
 | |
| 			return c.E500M("Failed to delete unsed definitions", err)
 | |
| 		}
 | |
| 
 | |
| 		if err = SplitModel(c, model); err != nil {
 | |
| 			err = SetModelClassStatus(c, CLASS_STATUS_TO_TRAIN, "model_id=$1 and status=$2;", model.Id, CLASS_STATUS_TRAINING)
 | |
| 			if err != nil {
 | |
| 				c.Logger.Error("Failed to split the model! And Failed to set class status")
 | |
| 				return c.E500M("Failed to split the model", err)
 | |
| 			}
 | |
| 
 | |
| 			c.Logger.Error("Failed to split the model")
 | |
| 			return c.E500M("Failed to split the model", err)
 | |
| 		}
 | |
| 
 | |
| 		// Set the class status to trained
 | |
| 		err = SetModelClassStatus(c, CLASS_STATUS_TRAINED, "model_id=$1 and status=$2;", model.Id, CLASS_STATUS_TRAINING)
 | |
| 		if err != nil {
 | |
| 			c.Logger.Error("Failed to set class status")
 | |
| 			return c.E500M("Failed to set class status", err)
 | |
| 		}
 | |
| 
 | |
| 		c.Logger.Warn("Removing base model for", "model", model.Id, "def", def.Id)
 | |
| 		os.RemoveAll(path.Join("savedData", model.Id, "defs", def.Id, "model"))
 | |
| 		os.RemoveAll(path.Join("savedData", model.Id, "defs", def.Id, "model.keras"))
 | |
| 
 | |
| 		if err = model.UpdateStatus(c, READY); err != nil {
 | |
| 			model.UpdateStatus(c, FAILED_TRAINING)
 | |
| 			task.UpdateStatusLog(c, TASK_FAILED_RUNNING, "Failed to delete unsed definitions")
 | |
| 			return c.E500M("Failed to update status of model", err)
 | |
| 		}
 | |
| 
 | |
| 		task.UpdateStatusLog(c, TASK_DONE, "Model finished training")
 | |
| 
 | |
| 		mutex := x.DataMap["runners_mutex"].(*sync.Mutex)
 | |
| 		mutex.Lock()
 | |
| 		defer mutex.Unlock()
 | |
| 
 | |
| 		var runners map[string]interface{} = x.DataMap["runners"].(map[string]interface{})
 | |
| 		var runner_data map[string]interface{} = runners[dat.Id].(map[string]interface{})
 | |
| 		runner_data["task"] = nil
 | |
| 		runners[dat.Id] = runner_data
 | |
| 		x.DataMap["runners"] = runners
 | |
| 
 | |
| 		return c.SendJSON("Ok")
 | |
| 	})
 | |
| 
 | |
| 	PostAuthJson(x, "/tasks/runner/retrain/done", User_Normal, func(c *Context, dat *VerifyTask) *Error {
 | |
| 		_, error := verifyRunner(c, &JustId{Id: dat.Id})
 | |
| 		if error != nil {
 | |
| 			return error
 | |
| 		}
 | |
| 
 | |
| 		task, error := verifyTask(x, c, dat)
 | |
| 		if error != nil {
 | |
| 			return error
 | |
| 		}
 | |
| 
 | |
| 		if task.TaskType != int(TASK_TYPE_RETRAINING) {
 | |
| 			c.Logger.Error("Task not is not the right type to get the definitions", "task type", task.TaskType)
 | |
| 			return c.JsonBadRequest("Task is not the right type go get the definitions")
 | |
| 		}
 | |
| 
 | |
| 		model, err := GetBaseModel(c, *task.ModelId)
 | |
| 		if err != nil {
 | |
| 			c.Logger.Error("Failed to get model", "err", err)
 | |
| 			return c.E500M("Failed to get mode", err)
 | |
| 		}
 | |
| 
 | |
| 		err = SetModelClassStatus(c, CLASS_STATUS_TRAINED, "model_id=$1 and status=$2;", model.Id, CLASS_STATUS_TRAINING)
 | |
| 		if err != nil {
 | |
| 			return c.E500M("Failed to set class status", err)
 | |
| 		}
 | |
| 
 | |
| 		_, err = c.Exec("update exp_model_head set status=$1 where status=$2 and model_id=$3", MODEL_HEAD_STATUS_READY, MODEL_HEAD_STATUS_TRAINING, model.Id)
 | |
| 		if err != nil {
 | |
| 			return c.E500M("Failed to set head status", err)
 | |
| 		}
 | |
| 
 | |
| 		err = model.UpdateStatus(c, READY)
 | |
| 		if err != nil {
 | |
| 			return c.E500M("Failed to set class status", err)
 | |
| 		}
 | |
| 
 | |
| 		task.UpdateStatusLog(c, TASK_DONE, "Model finished training")
 | |
| 		clearRunnerTask(x, dat.Id)
 | |
| 
 | |
| 		return c.SendJSON("Ok")
 | |
| 	})
 | |
| 
 | |
| }
 |