more worker on go-runner
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
package tasks
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
@@ -383,4 +385,149 @@ func handleRemoteRunner(x *Handle) {
|
||||
Training: training_points,
|
||||
})
|
||||
})
|
||||
|
||||
type RunnerTrainDefEpoch struct {
|
||||
Id string `json:"id" validate:"required"`
|
||||
TaskId string `json:"taskId" validate:"required"`
|
||||
DefId string `json:"defId" validate:"required"`
|
||||
Epoch int `json:"epoch" validate:"required"`
|
||||
Accuracy float64 `json:"accuracy" validate:"required"`
|
||||
}
|
||||
PostAuthJson(x, "/tasks/runner/train/epoch", User_Normal, func(c *Context, dat *RunnerTrainDefEpoch) *Error {
|
||||
_, error := verifyRunner(c, &JustId{Id: dat.Id})
|
||||
if error != nil {
|
||||
return error
|
||||
}
|
||||
|
||||
task, error := verifyTask(x, c, &VerifyTask{
|
||||
Id: dat.Id,
|
||||
TaskId: dat.TaskId,
|
||||
})
|
||||
if error != nil {
|
||||
return error
|
||||
}
|
||||
|
||||
if task.TaskType != int(TASK_TYPE_TRAINING) {
|
||||
c.Logger.Error("Task not is not the right type to get the definitions", "task type", task.TaskType)
|
||||
return c.JsonBadRequest("Task is not the right type go get the definitions")
|
||||
}
|
||||
|
||||
def, err := GetDefinition(c, dat.DefId)
|
||||
if err != nil {
|
||||
return c.E500M("Failed to get definition information", err)
|
||||
}
|
||||
|
||||
err = def.UpdateAfterEpoch(c, dat.Accuracy, dat.Epoch)
|
||||
if err != nil {
|
||||
return c.E500M("Failed to update model", err)
|
||||
}
|
||||
|
||||
return c.SendJSON("Ok")
|
||||
})
|
||||
|
||||
PostAuthJson(x, "/task/runner/train/mark-failed", User_Normal, func(c *Context, dat *VerifyTask) *Error {
|
||||
_, error := verifyRunner(c, &JustId{Id: dat.Id})
|
||||
if error != nil {
|
||||
return error
|
||||
}
|
||||
|
||||
task, error := verifyTask(x, c, &VerifyTask{
|
||||
Id: dat.Id,
|
||||
TaskId: dat.TaskId,
|
||||
})
|
||||
if error != nil {
|
||||
return error
|
||||
}
|
||||
|
||||
if task.TaskType != int(TASK_TYPE_TRAINING) {
|
||||
c.Logger.Error("Task not is not the right type to get the definitions", "task type", task.TaskType)
|
||||
return c.JsonBadRequest("Task is not the right type go get the definitions")
|
||||
}
|
||||
|
||||
_, err := c.Exec(
|
||||
"update model_definition set status=$1 "+
|
||||
"where model_id=$2 and status in ($3, $4)",
|
||||
MODEL_DEFINITION_STATUS_CANCELD_TRAINING,
|
||||
task.ModelId,
|
||||
MODEL_DEFINITION_STATUS_TRAINING,
|
||||
MODEL_DEFINITION_STATUS_PAUSED_TRAINING,
|
||||
)
|
||||
if err != nil {
|
||||
return c.E500M("Failed to mark definition as failed", err)
|
||||
}
|
||||
|
||||
return c.SendJSON("Ok")
|
||||
})
|
||||
|
||||
PostAuthJson(x, "/task/runner/train/done", User_Normal, func(c *Context, dat *VerifyTask) *Error {
|
||||
_, error := verifyRunner(c, &JustId{Id: dat.Id})
|
||||
if error != nil {
|
||||
return error
|
||||
}
|
||||
|
||||
task, error := verifyTask(x, c, dat)
|
||||
if error != nil {
|
||||
return error
|
||||
}
|
||||
|
||||
if task.TaskType != int(TASK_TYPE_TRAINING) {
|
||||
c.Logger.Error("Task not is not the right type to get the definitions", "task type", task.TaskType)
|
||||
return c.JsonBadRequest("Task is not the right type go get the definitions")
|
||||
}
|
||||
|
||||
model, err := GetBaseModel(c, *task.ModelId)
|
||||
if err != nil {
|
||||
c.Logger.Error("Failed to get model", "err", err)
|
||||
return c.E500M("Failed to get mode", err)
|
||||
}
|
||||
|
||||
var def Definition
|
||||
err = GetDBOnce(c, &def, "from model_definition as md where model_id=$1 and status=$2 order by accuracy desc limit 1;", task.ModelId, DEFINITION_STATUS_TRANIED)
|
||||
if err == NotFoundError {
|
||||
// TODO Make the Model status have a message
|
||||
c.Logger.Error("All definitions failed to train!")
|
||||
model.UpdateStatus(c, FAILED_TRAINING)
|
||||
task.UpdateStatusLog(c, TASK_FAILED_RUNNING, "All definition failed to train!")
|
||||
return c.SendJSON("Ok")
|
||||
} else if err != nil {
|
||||
model.UpdateStatus(c, FAILED_TRAINING)
|
||||
task.UpdateStatusLog(c, TASK_FAILED_RUNNING, "Failed to get model definition")
|
||||
return c.E500M("Failed to get model definition", err)
|
||||
}
|
||||
|
||||
if err = def.UpdateStatus(c, DEFINITION_STATUS_READY); err != nil {
|
||||
model.UpdateStatus(c, FAILED_TRAINING)
|
||||
task.UpdateStatusLog(c, TASK_FAILED_RUNNING, "Failed to update model definition")
|
||||
return c.E500M("Failed to update model definition", err)
|
||||
}
|
||||
|
||||
to_delete, err := c.Query("select id from model_definition where status != $1 and model_id=$2", MODEL_DEFINITION_STATUS_READY, model.Id)
|
||||
if err != nil {
|
||||
model.UpdateStatus(c, FAILED_TRAINING)
|
||||
task.UpdateStatusLog(c, TASK_FAILED_RUNNING, "Failed to delete unsed definitions")
|
||||
return c.E500M("Failed to delete unsed definitions", err)
|
||||
}
|
||||
defer to_delete.Close()
|
||||
|
||||
for to_delete.Next() {
|
||||
var id string
|
||||
if err = to_delete.Scan(&id); err != nil {
|
||||
model.UpdateStatus(c, FAILED_TRAINING)
|
||||
task.UpdateStatusLog(c, TASK_FAILED_RUNNING, "Failed to delete unsed definitions")
|
||||
return c.E500M("Failed to delete unsed definitions", err)
|
||||
}
|
||||
os.RemoveAll(path.Join("savedData", model.Id, "defs", id))
|
||||
}
|
||||
|
||||
// TODO Check if returning also works here
|
||||
if _, err = c.Exec("delete from model_definition where status!=$1 and model_id=$2;", MODEL_DEFINITION_STATUS_READY, model.Id); err != nil {
|
||||
model.UpdateStatus(c, FAILED_TRAINING)
|
||||
task.UpdateStatusLog(c, TASK_FAILED_RUNNING, "Failed to delete unsed definitions")
|
||||
return c.E500M("Failed to delete unsed definitions", err)
|
||||
}
|
||||
|
||||
model.UpdateStatus(c, READY)
|
||||
|
||||
return c.SendJSON("Ok")
|
||||
})
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user