@@ -10,6 +10,7 @@ import (
|
||||
"os"
|
||||
"os/exec"
|
||||
"path"
|
||||
"sort"
|
||||
"strconv"
|
||||
"text/template"
|
||||
|
||||
@@ -43,6 +44,7 @@ const (
|
||||
MODEL_DEFINITION_STATUS_PRE_INIT ModelDefinitionStatus = 1
|
||||
MODEL_DEFINITION_STATUS_INIT = 2
|
||||
MODEL_DEFINITION_STATUS_TRAINING = 3
|
||||
MODEL_DEFINITION_STATUS_PAUSED_TRAINING = 6
|
||||
MODEL_DEFINITION_STATUS_TRANIED = 4
|
||||
MODEL_DEFINITION_STATUS_READY = 5
|
||||
)
|
||||
@@ -50,10 +52,10 @@ const (
|
||||
type LayerType int
|
||||
|
||||
const (
|
||||
LAYER_INPUT LayerType = 1
|
||||
LAYER_DENSE = 2
|
||||
LAYER_FLATTEN = 3
|
||||
LAYER_SIMPLE_BLOCK = 4
|
||||
LAYER_INPUT LayerType = 1
|
||||
LAYER_DENSE = 2
|
||||
LAYER_FLATTEN = 3
|
||||
LAYER_SIMPLE_BLOCK = 4
|
||||
)
|
||||
|
||||
func ModelDefinitionUpdateStatus(c *Context, id string, status ModelDefinitionStatus) (err error) {
|
||||
@@ -142,6 +144,7 @@ func trainDefinition(c *Context, model *BaseModel, definition_id string, load_pr
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
defer os.RemoveAll(run_path)
|
||||
|
||||
_, err = generateCvs(c, run_path, model.Id)
|
||||
if err != nil {
|
||||
@@ -174,29 +177,24 @@ func trainDefinition(c *Context, model *BaseModel, definition_id string, load_pr
|
||||
"DefId": definition_id,
|
||||
"LoadPrev": load_prev,
|
||||
"LastModelRunPath": path.Join(getDir(), result_path, "model.keras"),
|
||||
"SaveModelPath": path.Join(getDir(), result_path),
|
||||
}); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Run the command
|
||||
out, err := exec.Command("bash", "-c", fmt.Sprintf("cd %s && python run.py", run_path)).Output()
|
||||
out, err := exec.Command("bash", "-c", fmt.Sprintf("cd %s && python run.py", run_path)).CombinedOutput()
|
||||
if err != nil {
|
||||
c.Logger.Debug(string(out))
|
||||
return
|
||||
}
|
||||
|
||||
c.Logger.Info("Python finished running")
|
||||
|
||||
if err = os.MkdirAll(result_path, os.ModePerm); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
if err = exec.Command("cp", "-r", path.Join(run_path, "model"), path.Join(result_path, "model")).Run(); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
if err = exec.Command("cp", "-r", path.Join(run_path, "model.keras"), path.Join(result_path, "model.keras")).Run(); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
accuracy_file, err := os.Open(path.Join(run_path, "accuracy.val"))
|
||||
if err != nil {
|
||||
return
|
||||
@@ -214,8 +212,6 @@ func trainDefinition(c *Context, model *BaseModel, definition_id string, load_pr
|
||||
}
|
||||
|
||||
c.Logger.Info("Model finished training!", "accuracy", accuracy)
|
||||
|
||||
os.RemoveAll(run_path)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -236,6 +232,29 @@ func remove[T interface{}](lst []T, i int) []T {
|
||||
return append(lst[:i], lst[i+1:]...)
|
||||
}
|
||||
|
||||
type TrainModelRow struct {
|
||||
id string
|
||||
target_accuracy int
|
||||
epoch int
|
||||
acuracy float64
|
||||
}
|
||||
|
||||
type TraingModelRowDefinitions []TrainModelRow
|
||||
|
||||
func (nf TraingModelRowDefinitions) Len() int { return len(nf) }
|
||||
func (nf TraingModelRowDefinitions) Swap(i, j int) { nf[i], nf[j] = nf[j], nf[i] }
|
||||
func (nf TraingModelRowDefinitions) Less(i, j int) bool {
|
||||
return nf[i].acuracy < nf[j].acuracy
|
||||
}
|
||||
|
||||
type ToRemoveList []int
|
||||
|
||||
func (nf ToRemoveList) Len() int { return len(nf) }
|
||||
func (nf ToRemoveList) Swap(i, j int) { nf[i], nf[j] = nf[j], nf[i] }
|
||||
func (nf ToRemoveList) Less(i, j int) bool {
|
||||
return nf[i] < nf[j]
|
||||
}
|
||||
|
||||
func trainModel(c *Context, model *BaseModel) {
|
||||
definitionsRows, err := c.Db.Query("select id, target_accuracy, epoch from model_definition where status=$1 and model_id=$2", MODEL_DEFINITION_STATUS_INIT, model.Id)
|
||||
if err != nil {
|
||||
@@ -246,16 +265,11 @@ func trainModel(c *Context, model *BaseModel) {
|
||||
}
|
||||
defer definitionsRows.Close()
|
||||
|
||||
type row struct {
|
||||
id string
|
||||
target_accuracy int
|
||||
epoch int
|
||||
}
|
||||
|
||||
definitions := []row{}
|
||||
var definitions TraingModelRowDefinitions = []TrainModelRow{}
|
||||
|
||||
for definitionsRows.Next() {
|
||||
var rowv row
|
||||
var rowv TrainModelRow
|
||||
rowv.acuracy = 0
|
||||
if err = definitionsRows.Scan(&rowv.id, &rowv.target_accuracy, &rowv.epoch); err != nil {
|
||||
c.Logger.Error("Failed to train Model Could not read definition from db!Err:")
|
||||
c.Logger.Error(err)
|
||||
@@ -271,23 +285,23 @@ func trainModel(c *Context, model *BaseModel) {
|
||||
return
|
||||
}
|
||||
|
||||
toTrain := len(definitions)
|
||||
firstRound := true
|
||||
var newDefinitions = []row{}
|
||||
copy(newDefinitions, definitions)
|
||||
finished := false
|
||||
|
||||
for {
|
||||
var toRemove ToRemoveList = []int{}
|
||||
for i, def := range definitions {
|
||||
ModelDefinitionUpdateStatus(c, def.id, MODEL_DEFINITION_STATUS_TRAINING)
|
||||
accuracy, err := trainDefinition(c, model, def.id, !firstRound)
|
||||
if err != nil {
|
||||
c.Logger.Error("Failed to train definition!Err:", "err", err)
|
||||
ModelDefinitionUpdateStatus(c, def.id, MODEL_DEFINITION_STATUS_FAILED_TRAINING)
|
||||
toTrain = toTrain - 1
|
||||
newDefinitions = remove(newDefinitions, i)
|
||||
toRemove = append(toRemove, i)
|
||||
continue
|
||||
}
|
||||
def.epoch += EPOCH_PER_RUN
|
||||
accuracy = accuracy * 100
|
||||
def.acuracy = accuracy
|
||||
|
||||
if accuracy >= float64(def.target_accuracy) {
|
||||
c.Logger.Info("Found a definition that reaches target_accuracy!")
|
||||
@@ -305,30 +319,68 @@ func trainModel(c *Context, model *BaseModel) {
|
||||
return
|
||||
}
|
||||
|
||||
toTrain = 0
|
||||
finished = true
|
||||
break
|
||||
}
|
||||
|
||||
if def.epoch > MAX_EPOCH {
|
||||
fmt.Printf("Failed to train definition! Accuracy less %f < %d\n", accuracy, def.target_accuracy)
|
||||
ModelDefinitionUpdateStatus(c, def.id, MODEL_DEFINITION_STATUS_FAILED_TRAINING)
|
||||
toTrain = toTrain - 1
|
||||
newDefinitions = remove(newDefinitions, i)
|
||||
toRemove = append(toRemove, i)
|
||||
continue
|
||||
}
|
||||
|
||||
_, err = c.Db.Exec("update model_definition set accuracy=$1, epoch=$2 where id=$3", accuracy, def.epoch, def.id)
|
||||
if err != nil {
|
||||
c.Logger.Error("Failed to train definition!Err:\n", "err", err)
|
||||
ModelUpdateStatus(c, model.Id, FAILED_TRAINING)
|
||||
return
|
||||
}
|
||||
_, err = c.Db.Exec("update model_definition set accuracy=$1, epoch=$2, status=$3 where id=$4", accuracy, def.epoch, MODEL_DEFINITION_STATUS_PAUSED_TRAINING, def.id)
|
||||
if err != nil {
|
||||
c.Logger.Error("Failed to train definition!Err:\n", "err", err)
|
||||
ModelUpdateStatus(c, model.Id, FAILED_TRAINING)
|
||||
return
|
||||
}
|
||||
}
|
||||
copy(definitions, newDefinitions)
|
||||
|
||||
firstRound = false
|
||||
if toTrain == 0 {
|
||||
if finished {
|
||||
break
|
||||
}
|
||||
|
||||
sort.Reverse(toRemove)
|
||||
|
||||
c.Logger.Info("Round done", "toRemove", toRemove)
|
||||
|
||||
for _, n := range toRemove {
|
||||
definitions = remove(definitions, n)
|
||||
}
|
||||
|
||||
len_def := len(definitions)
|
||||
|
||||
if len_def == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
if len_def == 1 {
|
||||
continue
|
||||
}
|
||||
|
||||
sort.Sort(definitions)
|
||||
|
||||
acc := definitions[0].acuracy - 20
|
||||
|
||||
c.Logger.Info("Training models, Highest acc", "acc", acc)
|
||||
|
||||
toRemove = []int{}
|
||||
for i, def := range definitions {
|
||||
if def.acuracy < acc {
|
||||
toRemove = append(toRemove, i)
|
||||
}
|
||||
}
|
||||
|
||||
c.Logger.Info("Removing due to accuracy", "toRemove", toRemove)
|
||||
|
||||
sort.Reverse(toRemove)
|
||||
for _, n := range toRemove {
|
||||
c.Logger.Warn("Removing definition not fast enough learning", "n", n)
|
||||
definitions = remove(definitions, n)
|
||||
}
|
||||
}
|
||||
|
||||
rows, err := c.Db.Query("select id from model_definition where model_id=$1 and status=$2 order by accuracy desc limit 1;", model.Id, MODEL_DEFINITION_STATUS_TRANIED)
|
||||
@@ -437,14 +489,26 @@ func generateDefinition(c *Context, model *BaseModel, target_accuracy int, numbe
|
||||
return failed()
|
||||
}
|
||||
|
||||
order := 1;
|
||||
order := 1
|
||||
|
||||
// Note the shape for now is no used
|
||||
err = MakeLayer(c.Db, def_id, order, LAYER_INPUT, fmt.Sprintf("%d,%d,1", model.Width, model.Height))
|
||||
if err != nil {
|
||||
return failed()
|
||||
// Note the shape of the first layer defines the import size
|
||||
if complexity == 2 {
|
||||
// Note the shape for now is no used
|
||||
width := int(math.Pow(2, math.Floor(math.Log(float64(model.Width))/math.Log(2.0))))
|
||||
height := int(math.Pow(2, math.Floor(math.Log(float64(model.Height))/math.Log(2.0))))
|
||||
c.Logger.Warn("Complexity 2 creating model with smaller size", "width", width, "height", height)
|
||||
err = MakeLayer(c.Db, def_id, order, LAYER_INPUT, fmt.Sprintf("%d,%d,1", width, height))
|
||||
if err != nil {
|
||||
return failed()
|
||||
}
|
||||
order++
|
||||
} else {
|
||||
err = MakeLayer(c.Db, def_id, order, LAYER_INPUT, fmt.Sprintf("%d,%d,1", model.Width, model.Height))
|
||||
if err != nil {
|
||||
return failed()
|
||||
}
|
||||
order++
|
||||
}
|
||||
order++;
|
||||
|
||||
if complexity == 0 {
|
||||
|
||||
@@ -452,12 +516,12 @@ func generateDefinition(c *Context, model *BaseModel, target_accuracy int, numbe
|
||||
if err != nil {
|
||||
return failed()
|
||||
}
|
||||
order++;
|
||||
order++
|
||||
|
||||
loop := int(math.Log2(float64(number_of_classes)))
|
||||
for i := 0; i < loop; i++ {
|
||||
err = MakeLayer(c.Db, def_id, order, LAYER_DENSE, fmt.Sprintf("%d,1", number_of_classes*(loop-i)))
|
||||
order++;
|
||||
order++
|
||||
if err != nil {
|
||||
ModelUpdateStatus(c, model.Id, FAILED_PREPARING_TRAINING)
|
||||
// TODO improve this response
|
||||
@@ -465,17 +529,17 @@ func generateDefinition(c *Context, model *BaseModel, target_accuracy int, numbe
|
||||
}
|
||||
}
|
||||
|
||||
} else if (complexity == 1) {
|
||||
} else if complexity == 1 {
|
||||
|
||||
loop := int((math.Log(float64(model.Width))/math.Log(float64(10))))
|
||||
if loop == 0 {
|
||||
loop = 1;
|
||||
}
|
||||
loop := int((math.Log(float64(model.Width)) / math.Log(float64(10))))
|
||||
if loop == 0 {
|
||||
loop = 1
|
||||
}
|
||||
for i := 0; i < loop; i++ {
|
||||
err = MakeLayer(c.Db, def_id, order, LAYER_SIMPLE_BLOCK, "")
|
||||
order++;
|
||||
order++
|
||||
if err != nil {
|
||||
return failed();
|
||||
return failed()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -483,17 +547,49 @@ func generateDefinition(c *Context, model *BaseModel, target_accuracy int, numbe
|
||||
if err != nil {
|
||||
return failed()
|
||||
}
|
||||
order++;
|
||||
order++
|
||||
|
||||
loop = int((math.Log(float64(number_of_classes))/math.Log(float64(10)))/2)
|
||||
if loop == 0 {
|
||||
loop = 1;
|
||||
}
|
||||
loop = int((math.Log(float64(number_of_classes)) / math.Log(float64(10))) / 2)
|
||||
if loop == 0 {
|
||||
loop = 1
|
||||
}
|
||||
for i := 0; i < loop; i++ {
|
||||
err = MakeLayer(c.Db, def_id, order, LAYER_DENSE, fmt.Sprintf("%d,1", number_of_classes*(loop-i)))
|
||||
order++;
|
||||
order++
|
||||
if err != nil {
|
||||
return failed();
|
||||
return failed()
|
||||
}
|
||||
}
|
||||
|
||||
} else if complexity == 2 {
|
||||
|
||||
loop := int((math.Log(float64(model.Width)) / math.Log(float64(10))))
|
||||
if loop == 0 {
|
||||
loop = 1
|
||||
}
|
||||
for i := 0; i < loop; i++ {
|
||||
err = MakeLayer(c.Db, def_id, order, LAYER_SIMPLE_BLOCK, "")
|
||||
order++
|
||||
if err != nil {
|
||||
return failed()
|
||||
}
|
||||
}
|
||||
|
||||
err = MakeLayer(c.Db, def_id, order, LAYER_FLATTEN, "")
|
||||
if err != nil {
|
||||
return failed()
|
||||
}
|
||||
order++
|
||||
|
||||
loop = int((math.Log(float64(number_of_classes)) / math.Log(float64(10))) / 2)
|
||||
if loop == 0 {
|
||||
loop = 1
|
||||
}
|
||||
for i := 0; i < loop; i++ {
|
||||
err = MakeLayer(c.Db, def_id, order, LAYER_DENSE, fmt.Sprintf("%d,1", number_of_classes*(loop-i)))
|
||||
order++
|
||||
if err != nil {
|
||||
return failed()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -523,19 +619,26 @@ func generateDefinitions(c *Context, model *BaseModel, target_accuracy int, numb
|
||||
return c.Error500(err)
|
||||
}
|
||||
|
||||
if (number_of_models == 1) {
|
||||
if (model.Width < 100 && model.Height < 100 && len(cls) < 30) {
|
||||
generateDefinition(c, model, target_accuracy, len(cls), 0)
|
||||
} else {
|
||||
generateDefinition(c, model, target_accuracy, len(cls), 1)
|
||||
}
|
||||
} else {
|
||||
// TODO handle incrisea the complexity
|
||||
for i := 0; i < number_of_models; i++ {
|
||||
generateDefinition(c, model, target_accuracy, len(cls), 0)
|
||||
}
|
||||
}
|
||||
cls_len := len(cls)
|
||||
|
||||
if number_of_models == 1 {
|
||||
if model.Width < 100 && model.Height < 100 && cls_len < 30 {
|
||||
generateDefinition(c, model, target_accuracy, cls_len, 0)
|
||||
} else if model.Width > 100 && model.Height > 100 {
|
||||
generateDefinition(c, model, target_accuracy, cls_len, 2)
|
||||
} else {
|
||||
generateDefinition(c, model, target_accuracy, cls_len, 1)
|
||||
}
|
||||
} else if number_of_models == 3 {
|
||||
for i := 0; i < number_of_models; i++ {
|
||||
generateDefinition(c, model, target_accuracy, cls_len, i)
|
||||
}
|
||||
} else {
|
||||
// TODO handle incrisea the complexity
|
||||
for i := 0; i < number_of_models; i++ {
|
||||
generateDefinition(c, model, target_accuracy, cls_len, 0)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -624,14 +727,14 @@ func handleTrain(handle *Handle) {
|
||||
|
||||
f := r.URL.Query()
|
||||
|
||||
accuracy := 0.0
|
||||
accuracy := 0.0
|
||||
|
||||
if !CheckId(f, "model_id") || !CheckId(f, "definition") || CheckEmpty(f, "epoch") || !CheckFloat64(f, "accuracy", &accuracy){
|
||||
if !CheckId(f, "model_id") || !CheckId(f, "definition") || CheckEmpty(f, "epoch") || !CheckFloat64(f, "accuracy", &accuracy) {
|
||||
c.Logger.Warn("Invalid: model_id or definition or epoch or accuracy")
|
||||
return c.UnsafeErrorCode(nil, 400, nil)
|
||||
}
|
||||
|
||||
accuracy = accuracy * 100
|
||||
accuracy = accuracy * 100
|
||||
|
||||
model_id := f.Get("model_id")
|
||||
def_id := f.Get("definition")
|
||||
@@ -665,7 +768,7 @@ func handleTrain(handle *Handle) {
|
||||
return c.UnsafeErrorCode(nil, 400, nil)
|
||||
}
|
||||
|
||||
c.Logger.Info("Updated model_definition!", "model", model_id, "progress", epoch, "accuracy", accuracy)
|
||||
c.Logger.Info("Updated model_definition!", "model", model_id, "progress", epoch, "accuracy", accuracy)
|
||||
|
||||
_, err = c.Db.Exec("update model_definition set epoch_progress=$1, accuracy=$2 where id=$3", epoch, accuracy, def_id)
|
||||
if err != nil {
|
||||
|
||||
Reference in New Issue
Block a user