!pip install scikeras[tensorflow] > /dev/null 2>&1     # gpu compute platform
!pip install scikeras[tensorflow-cpu] > /dev/null 2>&1
!pip install scikeras > /dev/null 2>&1

!pip uninstall -y scikit-learn
!pip install scikit-learn==1.5.2

Found existing installation: scikit-learn 1.5.2
Uninstalling scikit-learn-1.5.2:
  Successfully uninstalled scikit-learn-1.5.2
Collecting scikit-learn==1.5.2
  Using cached scikit_learn-1.5.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Requirement already satisfied: numpy>=1.19.5 in /usr/local/lib/python3.11/dist-packages (from scikit-learn==1.5.2) (2.0.2)
Requirement already satisfied: scipy>=1.6.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn==1.5.2) (1.15.3)
Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn==1.5.2) (1.5.1)
Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn==1.5.2) (3.6.0)
Using cached scikit_learn-1.5.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.3 MB)
Installing collected packages: scikit-learn
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
umap-learn 0.5.9.post2 requires scikit-learn>=1.6, but you have scikit-learn 1.5.2 which is incompatible.
Successfully installed scikit-learn-1.5.2


import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pprint as pp # for nicely formatting complex data structures
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization, Dropout
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import Adam, SGD
from sklearn.model_selection import RandomizedSearchCV

from scikeras.wrappers import KerasClassifier


# Set a random seed for reproducibility
np.random.seed(100)
tf.random.set_seed(221)


# Load the dataset
fashion_mnist = tf.keras.datasets.fashion_mnist
(sample_images, sample_labels), (test_images, test_labels) = fashion_mnist.load_data()


# Randomly choose 30,000 indices from the range of train_images length
# replace:  Whether the sample is with or without replacement. Default is True, meaning that a value of a can be selected multiple times. In this case we need without replacement
indices = np.random.choice(sample_images.shape[0], 30000, replace=False)

# Use these indices to sample images and labels
train_images = sample_images[indices]
train_labels = sample_labels[indices]

# Now sample_images and sample_labels contain your 30,000 samples
print("train_images shape:", train_images.shape)
print("train_labels shape:", train_labels.shape)

train_images shape: (30000, 28, 28)
train_labels shape: (30000,)


# As an example this will show just one image

# Display the first image and its label
plt.figure(figsize=(3, 3))
plt.imshow(train_images[0], cmap='gray')
plt.title(f'Label: {train_labels[0]}')
plt.axis('off')  # Hide axis ticks
plt.show()


# Display the first few images and their labels
plt.figure(figsize=(5, 5))
for i in range(9):
    plt.subplot(3, 3, i + 1)
    plt.imshow(train_images[i], cmap='gray')
    plt.title(f'Label: {train_labels[i]}')
    plt.axis('off')
plt.show()


X_train= train_images.astype('float32') / 255.0
X_test = test_images.astype('float32') / 255.0


# One-hot encode the labels, if train_labels[0] = 3, it becomes: [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
y_train = tf.keras.utils.to_categorical(train_labels, num_classes=10)
y_test = tf.keras.utils.to_categorical(test_labels, num_classes=10)


# Split the training data into training and validation sets
X_train, X_val = X_train[:20000], X_train[20000:]
y_train, y_val = y_train[:20000], y_train[20000:]


model = Sequential([
    Flatten(input_shape=(28, 28)),  # Input layer to flatten the images
    Dense(256, activation='relu'),  # Hidden layer with considerable complexity
    Dense(128, activation='relu'),  # Subsequent hidden layer to further refine the learned features
    Dense(10, activation='softmax')  # Output layer with 10 units for each category
])

/usr/local/lib/python3.11/dist-packages/keras/src/layers/reshaping/flatten.py:37: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)


tf.keras.utils.plot_model(model, show_shapes=True, dpi=66)


# model.compile(...) sets up the learning process before you start training with model.fit().
model.compile(optimizer='Adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])


# Print the model summary
model.summary()

Model: "sequential"

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ flatten (Flatten)               │ (None, 784)            │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense (Dense)                   │ (None, 256)            │       200,960 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_1 (Dense)                 │ (None, 128)            │        32,896 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_2 (Dense)                 │ (None, 10)             │         1,290 │
└─────────────────────────────────┴────────────────────────┴───────────────┘

 Total params: 235,146 (918.54 KB)

 Trainable params: 235,146 (918.54 KB)

 Non-trainable params: 0 (0.00 B)


model_history = model.fit(X_train, y_train, epochs=10, batch_size=64, validation_data=(X_val, y_val))
tf.keras.backend.clear_session()

Epoch 1/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 10s 18ms/step - accuracy: 0.7084 - loss: 0.8310 - val_accuracy: 0.8375 - val_loss: 0.4496
Epoch 2/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 3s 4ms/step - accuracy: 0.8352 - loss: 0.4541 - val_accuracy: 0.8409 - val_loss: 0.4280
Epoch 3/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8555 - loss: 0.3920 - val_accuracy: 0.8576 - val_loss: 0.3927
Epoch 4/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8709 - loss: 0.3489 - val_accuracy: 0.8633 - val_loss: 0.3787
Epoch 5/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - accuracy: 0.8796 - loss: 0.3237 - val_accuracy: 0.8659 - val_loss: 0.3787
Epoch 6/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 2s 4ms/step - accuracy: 0.8857 - loss: 0.2993 - val_accuracy: 0.8704 - val_loss: 0.3727
Epoch 7/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 0.8938 - loss: 0.2784 - val_accuracy: 0.8699 - val_loss: 0.3672
Epoch 8/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 2s 3ms/step - accuracy: 0.9003 - loss: 0.2619 - val_accuracy: 0.8740 - val_loss: 0.3664
Epoch 9/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.9053 - loss: 0.2470 - val_accuracy: 0.8682 - val_loss: 0.3866
Epoch 10/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.9142 - loss: 0.2343 - val_accuracy: 0.8801 - val_loss: 0.3480

from keras.backend import clear_session
clear_session()

# For plotting
model_history.history['accuracy']


plt.figure(figsize=(12, 5))

# Plotting model accuracy
plt.subplot(1, 2, 1)
plt.plot(model_history.history['accuracy'], label='Train Accuracy')
plt.plot(model_history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(loc='upper left')

# Plotting model loss
plt.subplot(1, 2, 2)
plt.plot(model_history.history['loss'], label='Train Loss')
plt.plot(model_history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(loc='upper left')

plt.show()


def plot_training_history(model_history):

    plt.figure(figsize=(12, 5))

    # Plotting accuracy
    plt.subplot(1, 2, 1)
    plt.plot(model_history.history['accuracy'], label='Training Accuracy')
    plt.plot(model_history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Model Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend(loc='lower right')

    # Plotting loss
    plt.subplot(1, 2, 2)
    plt.plot(model_history.history['loss'], label='Training Loss')
    plt.plot(model_history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend(loc='upper right')

    plt.show()


# Evaluate the model on test dataset
print('Train accuracy:', model_history.history['accuracy'][-1])
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=False)
print('Test accuracy:', test_acc)

Train accuracy: 0.913349986076355
Test accuracy: 0.8676000237464905


# Experiment with different learning rates to find the optimal one
learning_rates = [0.001, 0.02, 0.1]
model_histories = {}

for lr in learning_rates:
    print(f"Training model with learning rate: {lr}")

    model = Sequential([
              Flatten(input_shape=(28, 28)),
              Dense(256, activation='relu'),
              Dense(128, activation='relu'),
              Dense(10, activation='softmax')
    ])

    # Compile the model with a specified learning rate
    model.compile(optimizer=Adam(learning_rate=lr),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    # Train the model and save the history
    model_history = model.fit(X_train, y_train, epochs=10, batch_size=64, validation_data=(X_val, y_val))

    # Store the history
    model_histories[lr] = model_history

    # Clear the TensorFlow backend to reset model state
    tf.keras.backend.clear_session()

Training model with learning rate: 0.001
Epoch 1/10

/usr/local/lib/python3.11/dist-packages/keras/src/layers/reshaping/flatten.py:37: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)

313/313 ━━━━━━━━━━━━━━━━━━━━ 5s 9ms/step - accuracy: 0.7163 - loss: 0.8057 - val_accuracy: 0.8311 - val_loss: 0.4566
Epoch 2/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8372 - loss: 0.4487 - val_accuracy: 0.8443 - val_loss: 0.4224
Epoch 3/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8562 - loss: 0.3878 - val_accuracy: 0.8633 - val_loss: 0.3839
Epoch 4/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8706 - loss: 0.3497 - val_accuracy: 0.8709 - val_loss: 0.3692
Epoch 5/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 0.8787 - loss: 0.3224 - val_accuracy: 0.8752 - val_loss: 0.3617
Epoch 6/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 3s 4ms/step - accuracy: 0.8884 - loss: 0.2974 - val_accuracy: 0.8779 - val_loss: 0.3574
Epoch 7/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - accuracy: 0.8957 - loss: 0.2790 - val_accuracy: 0.8801 - val_loss: 0.3495
Epoch 8/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 0.9028 - loss: 0.2607 - val_accuracy: 0.8800 - val_loss: 0.3512
Epoch 9/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 0.9080 - loss: 0.2512 - val_accuracy: 0.8816 - val_loss: 0.3489
Epoch 10/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - accuracy: 0.9128 - loss: 0.2335 - val_accuracy: 0.8836 - val_loss: 0.3452
Training model with learning rate: 0.02
Epoch 1/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 3s 6ms/step - accuracy: 0.6305 - loss: 1.6214 - val_accuracy: 0.8147 - val_loss: 0.5256
Epoch 2/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.7970 - loss: 0.5665 - val_accuracy: 0.8309 - val_loss: 0.4992
Epoch 3/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8265 - loss: 0.4996 - val_accuracy: 0.8342 - val_loss: 0.4761
Epoch 4/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - accuracy: 0.8309 - loss: 0.4728 - val_accuracy: 0.7984 - val_loss: 0.5818
Epoch 5/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 2s 4ms/step - accuracy: 0.8300 - loss: 0.4841 - val_accuracy: 0.8309 - val_loss: 0.4898
Epoch 6/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 0.8419 - loss: 0.4455 - val_accuracy: 0.8406 - val_loss: 0.4813
Epoch 7/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 2s 3ms/step - accuracy: 0.8395 - loss: 0.4550 - val_accuracy: 0.8043 - val_loss: 0.5573
Epoch 8/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8370 - loss: 0.4590 - val_accuracy: 0.8380 - val_loss: 0.4970
Epoch 9/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 2s 4ms/step - accuracy: 0.8414 - loss: 0.4388 - val_accuracy: 0.8243 - val_loss: 0.5658
Epoch 10/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 2s 4ms/step - accuracy: 0.8421 - loss: 0.4436 - val_accuracy: 0.8414 - val_loss: 0.4809
Training model with learning rate: 0.1
Epoch 1/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 3s 6ms/step - accuracy: 0.4187 - loss: 19.3952 - val_accuracy: 0.4133 - val_loss: 1.5945
Epoch 2/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 2s 4ms/step - accuracy: 0.4656 - loss: 1.3649 - val_accuracy: 0.5738 - val_loss: 1.1986
Epoch 3/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.4474 - loss: 1.4342 - val_accuracy: 0.5404 - val_loss: 1.1808
Epoch 4/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.4621 - loss: 1.3402 - val_accuracy: 0.5282 - val_loss: 1.2398
Epoch 5/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.4259 - loss: 1.4996 - val_accuracy: 0.4551 - val_loss: 1.2825
Epoch 6/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 0.4352 - loss: 1.3020 - val_accuracy: 0.4289 - val_loss: 1.3803
Epoch 7/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - accuracy: 0.4289 - loss: 1.3770 - val_accuracy: 0.4509 - val_loss: 1.2908
Epoch 8/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 0.4499 - loss: 1.2989 - val_accuracy: 0.4467 - val_loss: 1.2884
Epoch 9/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.4519 - loss: 1.2821 - val_accuracy: 0.4305 - val_loss: 1.3381
Epoch 10/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.3949 - loss: 1.9914 - val_accuracy: 0.4139 - val_loss: 1.2921


max = 0
for key, item in model_histories.items():
      print (key,model_histories[key].history['accuracy'][-1])
      if (model_histories[key].history['accuracy'][-1] > max):
        max = model_histories[key].history['accuracy'][-1]
        max_key = key

print ("Validation accuracy for the best model is: ", model_histories[max_key].history['accuracy'][-1])

0.001 0.9132500290870667
0.02 0.8453500270843506
0.1 0.3953999876976013
Validation accuracy for the best model is:  0.9132500290870667


plot_training_history(model_histories[max_key])


# Define different batch sizes to experiment with
batch_sizes = [64, 128, 256]
model_histories = {}

# Ensure clear separation in the output for each batch size experiment
print("\nStarting batch size experiments...\n" + "-"*50)

# Iterate over different batch sizes
for batch_size in batch_sizes:
    print(f"\nTraining model with batch size: {batch_size}")

    # Define the model architecture
    model = Sequential([
        Flatten(input_shape=(28, 28)),
        Dense(256, activation='relu'),
        Dense(128, activation='relu'),
        Dense(10, activation='softmax')
    ])

    # Compile the model
    model.compile(optimizer='Adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    # Fit the model
    model_history = model.fit(X_train, y_train,
                        batch_size=batch_size,
                        epochs=10,
                        validation_data=(X_val, y_val))

    # Store the history of each model training session

    # Store the history
    model_histories[batch_size] = model_history

    # Clear the TensorFlow backend to reset model state
    tf.keras.backend.clear_session()

Starting batch size experiments...
--------------------------------------------------

Training model with batch size: 64
Epoch 1/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 4s 8ms/step - accuracy: 0.7090 - loss: 0.8417 - val_accuracy: 0.8372 - val_loss: 0.4454
Epoch 2/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8377 - loss: 0.4497 - val_accuracy: 0.8459 - val_loss: 0.4117
Epoch 3/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - accuracy: 0.8548 - loss: 0.3882 - val_accuracy: 0.8602 - val_loss: 0.3811
Epoch 4/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 2s 3ms/step - accuracy: 0.8697 - loss: 0.3507 - val_accuracy: 0.8693 - val_loss: 0.3705
Epoch 5/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8777 - loss: 0.3224 - val_accuracy: 0.8689 - val_loss: 0.3729
Epoch 6/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8891 - loss: 0.3007 - val_accuracy: 0.8758 - val_loss: 0.3633
Epoch 7/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 2s 4ms/step - accuracy: 0.8977 - loss: 0.2802 - val_accuracy: 0.8771 - val_loss: 0.3581
Epoch 8/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 0.9022 - loss: 0.2663 - val_accuracy: 0.8785 - val_loss: 0.3524
Epoch 9/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.9027 - loss: 0.2560 - val_accuracy: 0.8854 - val_loss: 0.3441
Epoch 10/10
313/313 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.9107 - loss: 0.2368 - val_accuracy: 0.8849 - val_loss: 0.3502

Training model with batch size: 128
Epoch 1/10
157/157 ━━━━━━━━━━━━━━━━━━━━ 4s 10ms/step - accuracy: 0.6681 - loss: 0.9627 - val_accuracy: 0.8347 - val_loss: 0.4811
Epoch 2/10
157/157 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 0.8354 - loss: 0.4642 - val_accuracy: 0.8495 - val_loss: 0.4428
Epoch 3/10
157/157 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 0.8549 - loss: 0.4044 - val_accuracy: 0.8517 - val_loss: 0.4180
Epoch 4/10
157/157 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 0.8705 - loss: 0.3661 - val_accuracy: 0.8609 - val_loss: 0.3929
Epoch 5/10
157/157 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 0.8790 - loss: 0.3386 - val_accuracy: 0.8696 - val_loss: 0.3704
Epoch 6/10
157/157 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 0.8892 - loss: 0.3167 - val_accuracy: 0.8741 - val_loss: 0.3583
Epoch 7/10
157/157 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 0.8937 - loss: 0.2953 - val_accuracy: 0.8738 - val_loss: 0.3563
Epoch 8/10
157/157 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 0.9002 - loss: 0.2789 - val_accuracy: 0.8787 - val_loss: 0.3463
Epoch 9/10
157/157 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 0.9033 - loss: 0.2657 - val_accuracy: 0.8801 - val_loss: 0.3489
Epoch 10/10
157/157 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 0.9073 - loss: 0.2530 - val_accuracy: 0.8767 - val_loss: 0.3649

Training model with batch size: 256
Epoch 1/10
79/79 ━━━━━━━━━━━━━━━━━━━━ 4s 22ms/step - accuracy: 0.6551 - loss: 1.0585 - val_accuracy: 0.8250 - val_loss: 0.5056
Epoch 2/10
79/79 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - accuracy: 0.8269 - loss: 0.4931 - val_accuracy: 0.8492 - val_loss: 0.4396
Epoch 3/10
79/79 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - accuracy: 0.8507 - loss: 0.4225 - val_accuracy: 0.8596 - val_loss: 0.4106
Epoch 4/10
79/79 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - accuracy: 0.8638 - loss: 0.3876 - val_accuracy: 0.8635 - val_loss: 0.3892
Epoch 5/10
79/79 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - accuracy: 0.8707 - loss: 0.3626 - val_accuracy: 0.8678 - val_loss: 0.3786
Epoch 6/10
79/79 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - accuracy: 0.8796 - loss: 0.3366 - val_accuracy: 0.8727 - val_loss: 0.3647
Epoch 7/10
79/79 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - accuracy: 0.8897 - loss: 0.3146 - val_accuracy: 0.8744 - val_loss: 0.3607
Epoch 8/10
79/79 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - accuracy: 0.8926 - loss: 0.2985 - val_accuracy: 0.8743 - val_loss: 0.3561
Epoch 9/10
79/79 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - accuracy: 0.8965 - loss: 0.2854 - val_accuracy: 0.8745 - val_loss: 0.3590
Epoch 10/10
79/79 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - accuracy: 0.9010 - loss: 0.2737 - val_accuracy: 0.8746 - val_loss: 0.3602


max = 0
for key, item in model_histories.items():
      print (key,model_histories[key].history['accuracy'][-1])
      if (model_histories[key].history['accuracy'][-1] > max):
        max = model_histories[key].history['accuracy'][-1]
        max_key = key

print ("Validation accuracy for the best model is: ", model_histories[max_key].history['accuracy'][-1])

64 0.9122999906539917
128 0.9097999930381775
256 0.9027000069618225
Validation accuracy for the best model is:  0.9122999906539917


plot_training_history(model_histories[max_key])


def create_model(num_units, dropout_rate, learning_rate):
    model = Sequential()
    model.add(Flatten(input_shape=(28, 28)))
    model.add(Dense(num_units, activation='relu'))
    model.add(Dropout(dropout_rate))
    model.add(Dense(num_units, activation='relu'))  # Reusing num_units for simplicity
    model.add(Dropout(dropout_rate))
    model.add(Dense(10, activation='softmax'))

    model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model


param_grid = {
    'num_units': [128, 256],               # Different neuron counts
    'dropout_rate': [0.2, 0.3],            # Diverse dropout rates
    'learning_rate': [0.001, 0.01]        # Several learning rates
}


# Parameter grid for grid search
# Hyperparameters to be tuned need to be added as arguments to KerasClassifier from scikeras (https://adriangb.com/scikeras/stable/migration.html#default-arguments-in-build-fn-model)
model = KerasClassifier(model=create_model,
                  epochs = 15,
                  batch_size=64,
                  num_units = 128,
                  dropout_rate = 0.2,
                  learning_rate = 0.01,
                  verbose=True)


grid = RandomizedSearchCV(
    estimator=model, # This is the model you want to tune.
    param_distributions=param_grid, # This is a dictionary of hyperparameters you want to search over
    n_iter=3,   # Number of random combinations
    cv=5, # Sets 5-fold cross-validation
    verbose=2, # Controls the level of log output during training.
    n_jobs=1  # Here is where n_jobs should be set, n_jobs=1: use only 1 core
)


grid_result = grid.fit(X_train, y_train)

Fitting 5 folds for each of 3 candidates, totalling 15 fits
Epoch 1/15

/usr/local/lib/python3.11/dist-packages/keras/src/layers/reshaping/flatten.py:37: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)

250/250 ━━━━━━━━━━━━━━━━━━━━ 3s 2ms/step - accuracy: 0.6572 - loss: 0.9639
Epoch 2/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8194 - loss: 0.5072
Epoch 3/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8335 - loss: 0.4520
Epoch 4/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8499 - loss: 0.4071
Epoch 5/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8546 - loss: 0.3866
Epoch 6/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8647 - loss: 0.3670
Epoch 7/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8735 - loss: 0.3465
Epoch 8/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8762 - loss: 0.3401
Epoch 9/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8782 - loss: 0.3279
Epoch 10/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8834 - loss: 0.3134
Epoch 11/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8843 - loss: 0.3037
Epoch 12/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8863 - loss: 0.2919
Epoch 13/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8883 - loss: 0.2912
Epoch 14/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8968 - loss: 0.2750
Epoch 15/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8953 - loss: 0.2778
63/63 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step
[CV] END dropout_rate=0.2, learning_rate=0.01, num_units=256; total time=  14.5s
Epoch 1/15

/usr/local/lib/python3.11/dist-packages/keras/src/layers/reshaping/flatten.py:37: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)

250/250 ━━━━━━━━━━━━━━━━━━━━ 3s 3ms/step - accuracy: 0.6586 - loss: 0.9627
Epoch 2/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8172 - loss: 0.5165
Epoch 3/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8418 - loss: 0.4465
Epoch 4/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8493 - loss: 0.4106
Epoch 5/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8595 - loss: 0.3819
Epoch 6/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8691 - loss: 0.3563
Epoch 7/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8735 - loss: 0.3465
Epoch 8/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8742 - loss: 0.3370
Epoch 9/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8807 - loss: 0.3249
Epoch 10/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8862 - loss: 0.3071
Epoch 11/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8904 - loss: 0.2994
Epoch 12/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8931 - loss: 0.2888
Epoch 13/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8926 - loss: 0.2823
Epoch 14/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8970 - loss: 0.2810
Epoch 15/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.9008 - loss: 0.2681
63/63 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step
[CV] END dropout_rate=0.2, learning_rate=0.01, num_units=256; total time=  12.9s
Epoch 1/15

/usr/local/lib/python3.11/dist-packages/keras/src/layers/reshaping/flatten.py:37: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)

250/250 ━━━━━━━━━━━━━━━━━━━━ 3s 2ms/step - accuracy: 0.6531 - loss: 0.9726
Epoch 2/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8182 - loss: 0.5144
Epoch 3/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8390 - loss: 0.4467
Epoch 4/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8510 - loss: 0.4064
Epoch 5/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8585 - loss: 0.3813
Epoch 6/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8686 - loss: 0.3620
Epoch 7/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8722 - loss: 0.3429
Epoch 8/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8757 - loss: 0.3266
Epoch 9/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8871 - loss: 0.3088
Epoch 10/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8819 - loss: 0.3155
Epoch 11/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8879 - loss: 0.2962
Epoch 12/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8920 - loss: 0.2835
Epoch 13/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8911 - loss: 0.2848
Epoch 14/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8954 - loss: 0.2718
Epoch 15/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8980 - loss: 0.2689
63/63 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step
[CV] END dropout_rate=0.2, learning_rate=0.01, num_units=256; total time=  14.5s

/usr/local/lib/python3.11/dist-packages/keras/src/layers/reshaping/flatten.py:37: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)

Epoch 1/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 3s 2ms/step - accuracy: 0.6586 - loss: 0.9534
Epoch 2/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 2s 2ms/step - accuracy: 0.8179 - loss: 0.5037
Epoch 3/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8432 - loss: 0.4358
Epoch 4/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8539 - loss: 0.3946
Epoch 5/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8661 - loss: 0.3731
Epoch 6/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8754 - loss: 0.3451
Epoch 7/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8750 - loss: 0.3324
Epoch 8/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8823 - loss: 0.3264
Epoch 9/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8902 - loss: 0.3014
Epoch 10/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8854 - loss: 0.3022
Epoch 11/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8951 - loss: 0.2892
Epoch 12/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8964 - loss: 0.2829
Epoch 13/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8947 - loss: 0.2748
Epoch 14/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.9000 - loss: 0.2640
Epoch 15/15
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.9028 - loss: 0.2636
63/63 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
[CV] END dropout_rate=0.2, learning_rate=0.01, num_units=256; total time=  15.1s
Epoch 1/15

/usr/local/lib/python3.11/dist-packages/keras/src/layers/reshaping/flatten.py:37: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)


print(grid_result.best_score_)
print(grid_result.best_params_)

0.873
{'num_units': 256, 'learning_rate': 0.01, 'dropout_rate': 0.2}


test_accuracy = grid.score(X_test, y_test)
test_accuracy

157/157 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step

0.8679

Layer	Input Units	Output Neurons	Parameters
Flatten	28×28	784	0
Dense1	784	256	784×256 + 256 = 200,960
Dense2	256	128	256×128 + 128 = 32,896
Dense3	128	10	128×10 + 10 = 1,290
Total	—	—	235,146

Task	Output Type	Last-layer Activation	Loss Function	Metric(s)
Regression	Numerical	Linear	meanSquaredError (MSE), meanAbsoluteError (MAE)	Same as loss
Binary Classification	Binary	Sigmoid	binary_crossentropy	Accuracy, precision, recall, sensitivity, TPR, FPR, ROC, AUC
Classification: Single Label, Multiple Classes	Categorical	Softmax	categorical_crossentropy	Accuracy, confusion matrix
Classification: Multiple Labels, Multiple Classes	Categorical	Sigmoid	binary_crossentropy	Accuracy, precision, recall, sensitivity, TPR, FPR, ROC, AUC

Practical: Deep Learning Models with Fashion-MNIST¶

Let's get started¶

Build a model¶

Sequential neural network¶

Tune learning rate¶

Batch_sizes (Optional Part)¶

Hyperparameter Optimization¶