fyp/views/py/python_model_template.py

import tensorflow as tf
import random
import pandas as pd
from tensorflow import keras
from tensorflow.data import AUTOTUNE
from keras import layers, losses, optimizers

DATA_DIR = "{{ .DataDir }}"
image_size = ({{ .Size }})

df = pd.read_csv("{{ .RunPath }}/train.csv", dtype=str)
keys = tf.constant(df['Id'].dropna())
values = tf.constant(list(map(int, df['Index'].dropna())))

table = tf.lookup.StaticHashTable(
    initializer=tf.lookup.KeyValueTensorInitializer(
        keys=keys,
        values=values,
    ),
    default_value=tf.constant(-1),
    name="Indexes"
)

DATA_DIR_PREPARE = DATA_DIR + "/"

#based on https://www.tensorflow.org/tutorials/load_data/images
def pathToLabel(path):
  path = tf.strings.regex_replace(path, DATA_DIR_PREPARE, "")
  path = tf.strings.regex_replace(path, ".jpg", "")
  path = tf.strings.regex_replace(path, ".png", "")
  return table.lookup(tf.strings.as_string([path]))
  #return tf.strings.as_string([path])

def decode_image(img):
  # channels were reduced to 1 since image is grayscale
  # TODO chnage channel number based if grayscale
  img = tf.io.decode_png(img, channels=1)

  return tf.image.resize(img, image_size)

def process_path(path):
  label = pathToLabel(path)

  img = tf.io.read_file(path)
  img = decode_image(img)

  return img, label

def configure_for_performance(ds: tf.data.Dataset) -> tf.data.Dataset:
  #ds = ds.cache()
  ds = ds.shuffle(buffer_size= 1000)
  ds = ds.batch(batch_size)
  ds = ds.prefetch(AUTOTUNE)
  return ds

def prepare_dataset(ds: tf.data.Dataset) -> tf.data.Dataset:
  ds = ds.map(process_path, num_parallel_calls=AUTOTUNE)
  ds = configure_for_performance(ds)
  return ds

seed = random.randint(0, 100000000)

batch_size = 100

# Read all the files from the direcotry
list_ds = tf.data.Dataset.list_files(str(f'{DATA_DIR}/*'), shuffle=False)

image_count = len(list_ds)

list_ds = list_ds.shuffle(image_count, seed=seed)

val_size = int(image_count * 0.3)

train_ds = list_ds.skip(val_size)
val_ds = list_ds.take(val_size)

dataset = prepare_dataset(train_ds)
dataset_validation   = prepare_dataset(val_ds)

model = keras.Sequential([
    {{- range .Layers }}
    {{- if eq .LayerType 1}}
    layers.Rescaling(1./255),
    {{- else if eq .LayerType 2 }}
    layers.Dense({{ .Shape }}, activation="sigmoid"),
    {{- else if eq .LayerType 3}}
    layers.Flatten(),
    {{- else }}
    ERROR
    {{- end }}
    {{- end }}
])

model.compile(
    loss=losses.SparseCategoricalCrossentropy(),
    optimizer=tf.keras.optimizers.Adam(),
    metrics=['accuracy'])

his = model.fit(dataset, validation_data= dataset_validation, epochs=50)

acc = his.history["accuracy"]

f = open("accuracy.val", "w")
f.write(str(acc[-1]))
f.close()

tf.saved_model.save(model, "model")

# model.save("model.keras", save_format="tf")