Tpot: Add deep learning features to pipeline

Created on 23 Nov 2018  路  4Comments  路  Source: EpistasisLab/tpot

Since the layer before the last layer in DNN have important features extracted from origin input features. So, if you add the extracted features to auto_ml pipeline will enhance the whole performance of pipeline.

like as https://github.com/ClimbsRocks/auto_ml done.

enhancement

Most helpful comment

If you know what you want you can write your own custom transformer like this:

# adding keras model
import numpy as np
import pandas as pd

from tpot import TPOTClassifier
from sklearn.base import TransformerMixin, BaseEstimator
from tpot.config import classifier_config_dict_light
from sklearn.datasets import make_classification

from tensorflow.keras import backend as K
import tensorflow.keras as keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
import copy

class CustomKerasFeatures(TransformerMixin, BaseEstimator):
    def __init__(self, input_shape, epochs=1000, batch_size=32):
        model = Sequential()
        model.add(Dense(32, activation='relu', input_dim=100))
        model.add(Dense(20, activation='relu')) # embedding size
        model.add(Dense(2, activation='softmax'))
        model.compile(optimizer='rmsprop',
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])
        self.model = model
        self.epochs = epochs
        self.batch_size = batch_size
        self.input_shape = input_shape

    def _prepare_X(self, X):
        if X.shape[1] >= self.input_shape:
            X = X[:, :self.input_shape]
        else:
            result = np.zeros((X.shape[0], self.input_shape))
            result[:X.shape[0], :X.shape[1]] = X
            X = result
        return X

    def fit(self, X, y=None):
        X_ = self._prepare_X(X.copy())
        y_ohe = keras.utils.to_categorical(y.copy())
        self.model.fit(X_, y_ohe, epochs=self.epochs, batch_size=self.batch_size, verbose=0)
        return self

    def transform(self, X):
        X_ = self._prepare_X(X.copy())
        # get embedding or the 2nd last layer
        get_embedding = K.function([self.model.layers[0].input],
                                   [self.model.layers[-2].output])
        return np.hstack([X, get_embedding([X_])[0]])

X, y = make_classification()

# using TPOT config
config = copy.deepcopy(classifier_config_dict_light)
config["__main__.CustomKerasFeatures"] = {
    "input_shape": [100],
    "epochs": [10]
}

tpot = TPOTClassifier(config_dict=config, verbosity=3, generations=5, population_size=2, early_stop=2, max_time_mins=5,
                     template='CustomKerasFeatures-Selector-Transformer-Classifier')
tpot.fit(X, y)

All 4 comments

Or just add sklearn.neural_network.MLPRegressor to the standard regressor config for now?

If you know what you want you can write your own custom transformer like this:

# adding keras model
import numpy as np
import pandas as pd

from tpot import TPOTClassifier
from sklearn.base import TransformerMixin, BaseEstimator
from tpot.config import classifier_config_dict_light
from sklearn.datasets import make_classification

from tensorflow.keras import backend as K
import tensorflow.keras as keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
import copy

class CustomKerasFeatures(TransformerMixin, BaseEstimator):
    def __init__(self, input_shape, epochs=1000, batch_size=32):
        model = Sequential()
        model.add(Dense(32, activation='relu', input_dim=100))
        model.add(Dense(20, activation='relu')) # embedding size
        model.add(Dense(2, activation='softmax'))
        model.compile(optimizer='rmsprop',
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])
        self.model = model
        self.epochs = epochs
        self.batch_size = batch_size
        self.input_shape = input_shape

    def _prepare_X(self, X):
        if X.shape[1] >= self.input_shape:
            X = X[:, :self.input_shape]
        else:
            result = np.zeros((X.shape[0], self.input_shape))
            result[:X.shape[0], :X.shape[1]] = X
            X = result
        return X

    def fit(self, X, y=None):
        X_ = self._prepare_X(X.copy())
        y_ohe = keras.utils.to_categorical(y.copy())
        self.model.fit(X_, y_ohe, epochs=self.epochs, batch_size=self.batch_size, verbose=0)
        return self

    def transform(self, X):
        X_ = self._prepare_X(X.copy())
        # get embedding or the 2nd last layer
        get_embedding = K.function([self.model.layers[0].input],
                                   [self.model.layers[-2].output])
        return np.hstack([X, get_embedding([X_])[0]])

X, y = make_classification()

# using TPOT config
config = copy.deepcopy(classifier_config_dict_light)
config["__main__.CustomKerasFeatures"] = {
    "input_shape": [100],
    "epochs": [10]
}

tpot = TPOTClassifier(config_dict=config, verbosity=3, generations=5, population_size=2, early_stop=2, max_time_mins=5,
                     template='CustomKerasFeatures-Selector-Transformer-Classifier')
tpot.fit(X, y)

I'm happy to submit a PR with something like this if there is interest. IMO, you'll need parameters to initialise the model dynamically (can be done trivially with some broad assumptions) - if we don't want the annoying input_shape hack, we can initialise the model when the fit method is called.

In terms of the transform method; specifically the np.hstack personally I think this should be avoided; from a design perspective it will suffer from the same challenges as implementing the text one that I demoed on #507 - which is we should be using the in-built scikit-learn ColumnTransformer rather than a np.hstack.

I think its worth opening up the discussion; though in the meantime people can just hack the pipelines with their own custom transformers :) (though I do realise its not as user friendly).

We can take inspiration from https://github.com/uber/ludwig/ as well which has a focus on automating pipelines specifically around controlling the embedding and stacking embeddings ontop of each other...

@chappers Thank you for sharing this demo and information. It looks interesting to me so PR is welcome. We need test its performance later.

Was this page helpful?
0 / 5 - 0 ratings