feat: improved visibility of dataset and improved speed for expand import zip file

This commit is contained in:
2024-04-09 17:28:59 +01:00
parent 00369640a8
commit 143ad3b02b
14 changed files with 727 additions and 464 deletions

View File

@@ -309,58 +309,69 @@ func processZipFileExpand(c *Context, model *BaseModel) {
ids[name] = id
}
back_channel := make(chan int, c.Handle.Config.NumberOfWorkers)
file_chans := make([]chan *zip.File, c.Handle.Config.NumberOfWorkers)
for i := 0; i < c.Handle.Config.NumberOfWorkers; i++ {
file_chans[i] = make(chan *zip.File, 2)
go fileProcessor(c, model, reader, ids, base_path, i, file_chans[i], back_channel)
}
clean_up_channels := func() {
for i := 0; i < c.Handle.Config.NumberOfWorkers; i++ {
close(file_chans[i])
}
for i := 0; i < c.Handle.Config.NumberOfWorkers - 1; i++ {
_ = <- back_channel
}
close(back_channel)
}
first_round := true
channel_to_send := 0
// Parelalize this
for _, file := range reader.Reader.File {
// Skip if dir
if file.Name[len(file.Name)-1] == '/' {
continue
}
data, err := reader.Open(file.Name)
if err != nil {
failed(fmt.Sprintf("Could not open file in zip %s\n", file.Name))
return
}
defer data.Close()
file_data, err := io.ReadAll(data)
if err != nil {
failed(fmt.Sprintf("Could not read file file in zip %s\n", file.Name))
return
}
file_chans[channel_to_send] <- file
// TODO check if the file is a valid photo that matched the defined photo on the database
parts := strings.Split(file.Name, "/")
if first_round {
channel_to_send += 1
if c.Handle.Config.NumberOfWorkers == channel_to_send {
first_round = false
}
}
// Can not do else if because need to handle the case where the value changes in
// previous if
if !first_round {
new_id, ok := <- back_channel
if !ok {
c.Logger.Fatal("Something is very wrong please check as this line should be unreachable")
}
mode := model_classes.DATA_POINT_MODE_TRAINING
if parts[0] == "testing" {
mode = model_classes.DATA_POINT_MODE_TESTING
}
if new_id < 0 {
c.Logger.Error("Worker failed", "worker id", -(new_id + 1))
clean_up_channels()
failed("One of the workers failed due to db error")
return
}
data_point_id, err := model_classes.AddDataPoint(c.Db, ids[parts[1]], "id://", mode)
if err != nil {
failed(fmt.Sprintf("Failed to add data point for %s\n", model.Id))
return
}
channel_to_send = new_id
}
file_path := path.Join(base_path, data_point_id+"."+model.Format)
f, err := os.Create(file_path)
if err != nil {
failed(fmt.Sprintf("Could not create file %s\n", file_path))
return
}
defer f.Close()
f.Write(file_data)
if !testImgForModel(c, model, file_path) {
c.Logger.Errorf("Image did not have valid format for model %s (in zip: %s)!", file_path, file.Name)
c.Logger.Warn("Not failling updating data point to status -1")
message := "Image did not have valid format for the model"
if err = model_classes.UpdateDataPointStatus(c.Db, data_point_id, -1, &message); err != nil {
failed(fmt.Sprintf("Failed to update data point status"))
return
}
}
}
clean_up_channels()
c.Logger.Info("Added data to model", "id", model.Id)
ModelUpdateStatus(c, model.Id, READY)
}