In [ ]:
Copied!
%load_ext autoreload
%autoreload 2
%load_ext autoreload
%autoreload 2
The autoreload extension is already loaded. To reload it, use: %reload_ext autoreload
In [ ]:
Copied!
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
Classification Homepage¶
Datasets¶
circle | exclusive_or | guassian | spiral |
---|---|---|---|
Features¶
- Start, pause, resume and reset training
- Set max epochs
- Adjust Learning rate (+ Optimizer)
- Adjust activation (overall except last layer)
- First layer activation option also
- Add Regularization (Overall all weights, [L1, L2])
- Adjusting regularization rate
- Choose datasets - circle, exclusive_or, gaussian, spiral
- Ratio of training to test data
- Noise level
- Choose Features - $X_1, X_2, X_1^2, X_2^2, X_1X_2, \sin(X_1), \sin(X_2)$
- Choose architecture - no of hidden layers, no or neurons in each layer
- Monitor/Plot learning curves
- Monitor/Plot decision boundary
- Option to show test data
- Option to discretize output
TODO: Add type hints%3A%20...&text=Or%20at%20least%2C%20it%20can,of%20up%20to%20the%20tools. )
Custom TfPlayDataset
¶
In [ ]:
Copied!
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
FEATURE_STORE = {
'X1':lambda X: X[:, 0],
'X2': lambda X: X[:, 1],
'X1_sq': lambda X: X[:, 0]**2,
'X2_sq': lambda X: X[:, 1]**2,
'X1X2': lambda X: X[:, 0]*X[:, 1],
'sin(X1)': lambda X: np.sin(X[:, 0]),
'sin(X2)': lambda X: np.sin(X[:, 1])
}
class TfPlayDataset:
def __init__(self, X: np.array, y: np.array, features=['X1', 'X2'],
scale=True, train_test_ratio=0.5, random_state=None):
assert X.shape[1] == 2
assert X.shape[0] == y.shape[0]
self.features = features
self.__data = self._make_dataframe(X, y)
idx = {}
idx['train'], idx['test'] = train_test_split(self.__data.index, test_size=1/(1+train_test_ratio))
# TODO: Make below code DRY
self.train = {}
self.train['data'] = self.__data.loc[idx['train'], ['X1', 'X2']].copy()
self.test = {}
self.test['data'] = self.__data.loc[idx['test'], ['X1', 'X2']].copy()
if scale:
self.scaler = StandardScaler()
self.__data.loc[idx['train'], features] = self.scaler.fit_transform(self.__data.loc[idx['train'], features])
self.__data.loc[idx['test'], features] = self.scaler.transform(self.__data.loc[idx['test'], features])
self.train['features'] = self.__data.loc[idx['train'], features]
self.train['label'] = self.__data.loc[idx['train'], ['label']]
self.test['features'] = self.__data.loc[idx['test'], features]
self.test['label'] = self.__data.loc[idx['test'], ['label']]
def _make_dataframe(self, X, y):
df = pd.DataFrame(X, columns=['X1', 'X2'])
features = [f for f in self.features if f not in ['X1', 'X2']]
df = pd.concat([df, self._featurize(X, features)], axis=1)
df['label'] = y
return df
@staticmethod
def _featurize(X, features):
features_df = pd.DataFrame({feat: FEATURE_STORE[feat](X) for feat in features})
return features_df
def plot(self, subset='train', ax=None):
if ax is None:
fig, ax = plt.subplots()
subset_dict = self.__getattribute__(subset)
X = subset_dict['data'].values
y = subset_dict['label'].values.flatten()
ax.scatter(X[y == 0, 0], X[y == 0, 1], color='red', label='0')
ax.scatter(X[y == 1, 0], X[y == 1, 1], color='blue', label='1')
ax.set(xlabel='X1', ylabel='X2')
plt.legend()
return ax
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
FEATURE_STORE = {
'X1':lambda X: X[:, 0],
'X2': lambda X: X[:, 1],
'X1_sq': lambda X: X[:, 0]**2,
'X2_sq': lambda X: X[:, 1]**2,
'X1X2': lambda X: X[:, 0]*X[:, 1],
'sin(X1)': lambda X: np.sin(X[:, 0]),
'sin(X2)': lambda X: np.sin(X[:, 1])
}
class TfPlayDataset:
def __init__(self, X: np.array, y: np.array, features=['X1', 'X2'],
scale=True, train_test_ratio=0.5, random_state=None):
assert X.shape[1] == 2
assert X.shape[0] == y.shape[0]
self.features = features
self.__data = self._make_dataframe(X, y)
idx = {}
idx['train'], idx['test'] = train_test_split(self.__data.index, test_size=1/(1+train_test_ratio))
# TODO: Make below code DRY
self.train = {}
self.train['data'] = self.__data.loc[idx['train'], ['X1', 'X2']].copy()
self.test = {}
self.test['data'] = self.__data.loc[idx['test'], ['X1', 'X2']].copy()
if scale:
self.scaler = StandardScaler()
self.__data.loc[idx['train'], features] = self.scaler.fit_transform(self.__data.loc[idx['train'], features])
self.__data.loc[idx['test'], features] = self.scaler.transform(self.__data.loc[idx['test'], features])
self.train['features'] = self.__data.loc[idx['train'], features]
self.train['label'] = self.__data.loc[idx['train'], ['label']]
self.test['features'] = self.__data.loc[idx['test'], features]
self.test['label'] = self.__data.loc[idx['test'], ['label']]
def _make_dataframe(self, X, y):
df = pd.DataFrame(X, columns=['X1', 'X2'])
features = [f for f in self.features if f not in ['X1', 'X2']]
df = pd.concat([df, self._featurize(X, features)], axis=1)
df['label'] = y
return df
@staticmethod
def _featurize(X, features):
features_df = pd.DataFrame({feat: FEATURE_STORE[feat](X) for feat in features})
return features_df
def plot(self, subset='train', ax=None):
if ax is None:
fig, ax = plt.subplots()
subset_dict = self.__getattribute__(subset)
X = subset_dict['data'].values
y = subset_dict['label'].values.flatten()
ax.scatter(X[y == 0, 0], X[y == 0, 1], color='red', label='0')
ax.scatter(X[y == 1, 0], X[y == 1, 1], color='blue', label='1')
ax.set(xlabel='X1', ylabel='X2')
plt.legend()
return ax
In [ ]:
Copied!
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from src.tfplayground import TfPlayDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from src.tfplayground import TfPlayDataset
In [ ]:
Copied!
from tensorflow.keras import layers, losses, optimizers, regularizers
from src.visualize import plot_learning_curve, plot2d_decision_function, plot_confusion_matrix
import sklearn.datasets as skdata
from tensorflow.keras import layers, losses, optimizers, regularizers
from src.visualize import plot_learning_curve, plot2d_decision_function, plot_confusion_matrix
import sklearn.datasets as skdata
TensorflowPlayground
¶
In [ ]:
Copied!
from tensorflow.keras import layers, losses, optimizers, regularizers
from src.visualize import plot_learning_curve, plot2d_decision_function, plot_confusion_matrix
import sklearn.datasets as skdata
import tensorflow as tf
class TensorflowPlayground:
def __init__(self,
dataset: str,
train_test_ratio=0.5,
noise=0,
features=['X1', 'X2'],
scale=True,
n_samples=1000,
neurons=(4, ),
learning_rate=0.03,
activation='tanh',
regularization=None,
regularization_rate=0,
random_state=None):
if dataset != 'circle':
raise NotImplementedError("Only 'circle' dataset is implemented!")
self.dataset = dataset
self.train_test_ratio = train_test_ratio
self.noise = noise
self.features = features
self.scale = scale
self.n_samples = n_samples
self.neurons = neurons
self.learning_rate = learning_rate
self.activation = activation
self.regularization = regularization
self.regularization_rate = regularization_rate
self.random_state = random_state
# Make dataset
self.make_data()
# Make model
self.make_model()
def make_data(self):
if self.dataset == 'circle':
X, y = self._make_circle_X_y(self.noise, self.n_samples, self.random_state)
self.data = TfPlayDataset(X, y, self.features, self.scale, self.train_test_ratio, self.random_state)
@staticmethod
def _make_circle_X_y(noise=0, n_samples=1000, random_state=None):
return skdata.make_circles(n_samples=n_samples, noise=noise, random_state=random_state)
@staticmethod
def _make_model(input_shape, neurons, learning_rate,
activation, regularization=None, regularization_rate=0):
if regularization == 'L1':
reg = regularizers.l1(regularization_rate)
elif regularization == 'L2':
reg = regularizers.l2(regularization_rate)
else:
reg = None
model = tf.keras.models.Sequential()
# Input layer
model.add(layers.Input(shape=input_shape))
num_layers = len(neurons)
# Hidden layers
for i in range(num_layers):
model.add(layers.Dense(neurons[i], activation=activation, kernel_regularizer=reg))
# Output layer
model.add(layers.Dense(1, activation='sigmoid'))
# Compile model
model.compile(loss=losses.binary_crossentropy, optimizer=optimizers.Adam(learning_rate=learning_rate))
return model
def make_model(self):
num_feats = len(self.features)
self.model = self._make_model(num_feats, self.neurons, self.learning_rate,
self.activation, self.regularization, self.regularization_rate)
def train(self, epochs=10, batch_size=None):
self.model.fit(self.data.train['features'], self.data.train['label'], epochs=epochs, batch_size=batch_size)
def predict(self, X):
X_feat = self.data._featurize(X, self.features)
if self.scale:
X_feat = self.data.scaler.transform(X_feat)
return self.model.predict(X_feat)
def plot_learning_curve(self):
return plot_learning_curve(self.model.history.history)
def plot_decision_function(self, ax=None):
subset_dict = self.data.train
cp = plot2d_decision_function(self.predict, subset_dict['data'].values, ax=ax)
return cp
def plot_confusion_matrix(self, subset='test'):
subset_dict = self.data.__getattribute__(subset)
y_true = subset_dict['label']
y_pred = self.predict(subset_dict['data'].values).round()
return plot_confusion_matrix(y_true, y_pred)
from tensorflow.keras import layers, losses, optimizers, regularizers
from src.visualize import plot_learning_curve, plot2d_decision_function, plot_confusion_matrix
import sklearn.datasets as skdata
import tensorflow as tf
class TensorflowPlayground:
def __init__(self,
dataset: str,
train_test_ratio=0.5,
noise=0,
features=['X1', 'X2'],
scale=True,
n_samples=1000,
neurons=(4, ),
learning_rate=0.03,
activation='tanh',
regularization=None,
regularization_rate=0,
random_state=None):
if dataset != 'circle':
raise NotImplementedError("Only 'circle' dataset is implemented!")
self.dataset = dataset
self.train_test_ratio = train_test_ratio
self.noise = noise
self.features = features
self.scale = scale
self.n_samples = n_samples
self.neurons = neurons
self.learning_rate = learning_rate
self.activation = activation
self.regularization = regularization
self.regularization_rate = regularization_rate
self.random_state = random_state
# Make dataset
self.make_data()
# Make model
self.make_model()
def make_data(self):
if self.dataset == 'circle':
X, y = self._make_circle_X_y(self.noise, self.n_samples, self.random_state)
self.data = TfPlayDataset(X, y, self.features, self.scale, self.train_test_ratio, self.random_state)
@staticmethod
def _make_circle_X_y(noise=0, n_samples=1000, random_state=None):
return skdata.make_circles(n_samples=n_samples, noise=noise, random_state=random_state)
@staticmethod
def _make_model(input_shape, neurons, learning_rate,
activation, regularization=None, regularization_rate=0):
if regularization == 'L1':
reg = regularizers.l1(regularization_rate)
elif regularization == 'L2':
reg = regularizers.l2(regularization_rate)
else:
reg = None
model = tf.keras.models.Sequential()
# Input layer
model.add(layers.Input(shape=input_shape))
num_layers = len(neurons)
# Hidden layers
for i in range(num_layers):
model.add(layers.Dense(neurons[i], activation=activation, kernel_regularizer=reg))
# Output layer
model.add(layers.Dense(1, activation='sigmoid'))
# Compile model
model.compile(loss=losses.binary_crossentropy, optimizer=optimizers.Adam(learning_rate=learning_rate))
return model
def make_model(self):
num_feats = len(self.features)
self.model = self._make_model(num_feats, self.neurons, self.learning_rate,
self.activation, self.regularization, self.regularization_rate)
def train(self, epochs=10, batch_size=None):
self.model.fit(self.data.train['features'], self.data.train['label'], epochs=epochs, batch_size=batch_size)
def predict(self, X):
X_feat = self.data._featurize(X, self.features)
if self.scale:
X_feat = self.data.scaler.transform(X_feat)
return self.model.predict(X_feat)
def plot_learning_curve(self):
return plot_learning_curve(self.model.history.history)
def plot_decision_function(self, ax=None):
subset_dict = self.data.train
cp = plot2d_decision_function(self.predict, subset_dict['data'].values, ax=ax)
return cp
def plot_confusion_matrix(self, subset='test'):
subset_dict = self.data.__getattribute__(subset)
y_true = subset_dict['label']
y_pred = self.predict(subset_dict['data'].values).round()
return plot_confusion_matrix(y_true, y_pred)
Setup your playground!¶
In [ ]:
Copied!
tfp = TensorflowPlayground('circle', features=['X1', 'X2'], neurons=[8, 4], activation='sigmoid', learning_rate=0.1, scale=True)
tfp = TensorflowPlayground('circle', features=['X1', 'X2'], neurons=[8, 4], activation='sigmoid', learning_rate=0.1, scale=True)
Plot the data!¶
In [ ]:
Copied!
tfp.data.plot()
tfp.data.plot()
Out[ ]:
<AxesSubplot:xlabel='X1', ylabel='X2'>
Train the model!¶
In [ ]:
Copied!
tfp.train(epochs=15, batch_size=30)
tfp.train(epochs=15, batch_size=30)
Epoch 1/15 12/12 [==============================] - 1s 3ms/step - loss: 0.7062 Epoch 2/15 12/12 [==============================] - 0s 3ms/step - loss: 0.6944 Epoch 3/15 12/12 [==============================] - 0s 3ms/step - loss: 0.7057 Epoch 4/15 12/12 [==============================] - 0s 3ms/step - loss: 0.6963 Epoch 5/15 12/12 [==============================] - 0s 3ms/step - loss: 0.6937 Epoch 6/15 12/12 [==============================] - 0s 3ms/step - loss: 0.6931 Epoch 7/15 12/12 [==============================] - 0s 3ms/step - loss: 0.6909 Epoch 8/15 12/12 [==============================] - 0s 3ms/step - loss: 0.6891 Epoch 9/15 12/12 [==============================] - 0s 3ms/step - loss: 0.6931 Epoch 10/15 12/12 [==============================] - 0s 3ms/step - loss: 0.6960 Epoch 11/15 12/12 [==============================] - 0s 5ms/step - loss: 0.6851 Epoch 12/15 12/12 [==============================] - 0s 3ms/step - loss: 0.6787 Epoch 13/15 12/12 [==============================] - 0s 3ms/step - loss: 0.6704 Epoch 14/15 12/12 [==============================] - 0s 3ms/step - loss: 0.6470 Epoch 15/15 12/12 [==============================] - 0s 3ms/step - loss: 0.6076
Plot the learning curve!¶
In [ ]:
Copied!
tfp.plot_learning_curve()
tfp.plot_learning_curve()
Out[ ]:
(<Figure size 432x288 with 1 Axes>, array([<AxesSubplot:title={'center':'loss'}>], dtype=object))
Plot the predictions!¶
In [ ]:
Copied!
fig, axn = plt.subplots(1, 2, figsize=(12, 4))
for ax, subset in zip(axn, ['train', 'test']):
cp = tfp.plot_decision_function(ax=ax)
tfp.data.plot(subset=subset, ax=cp.axes)
ax.set_title(subset, fontdict=dict(weight='bold', size=20))
fig.colorbar(cp)
fig, axn = plt.subplots(1, 2, figsize=(12, 4))
for ax, subset in zip(axn, ['train', 'test']):
cp = tfp.plot_decision_function(ax=ax)
tfp.data.plot(subset=subset, ax=cp.axes)
ax.set_title(subset, fontdict=dict(weight='bold', size=20))
fig.colorbar(cp)
No handles with labels found to put in legend.
Out[ ]:
<matplotlib.colorbar.Colorbar at 0x1e35d18ed88>
Analyze the performance!¶
In [ ]:
Copied!
tfp.plot_confusion_matrix();
tfp.plot_confusion_matrix();