What have you tried so far?
I just know, it should select the activation of max top k units, and only update the weights of max top k units, and it should use mask. I don't know how to implement mask in theano.
You can try something like this
import numpy as np
import theano.tensor as T
from keras.layers.core import MaskedLayer
class KMaxPooling(MaskedLayer):
def __init__(self, pooling_size):
super(MaskedLayer, self).__init__()
self.pooling_size = pooling_size
self.input = T.tensor3()
def get_output_mask(self, train=False):
return None
def get_output(self, train=False):
data = self.get_input(train)
mask = self.get_input_mask(train)
if mask is None:
mask = T.sum(T.ones_like(data), axis=-1)
mask = mask.dimshuffle(0, 1, "x")
masked_data = T.switch(T.eq(mask, 0), -np.inf, data)
result = masked_data[T.arange(masked_data.shape[0]).dimshuffle(0, "x", "x"),
T.sort(T.argsort(masked_data, axis=1)[:, -self.pooling_size:, :], axis=1),
T.arange(masked_data.shape[2]).dimshuffle("x", "x", 0)]
return result
def get_config(self):
return {"name" : self.__class__.__name__, "pooling_size" : self.pooling_size}
Theano takes care of correct gradient propagation when you do array indexing, no need to worry about it
Did this ever make it into the main branch of Keras?
Is there a tensorflow backend implementation of the above code?
Not the most generic, well-engineered or thoroughly-tested solution, but this seems to do the trick for me:
from keras.engine import Layer, InputSpec
from keras.layers import Flatten
import tensorflow as tf
class KMaxPooling(Layer):
"""
K-max pooling layer that extracts the k-highest activations from a sequence (2nd dimension).
TensorFlow backend.
"""
def __init__(self, k=1, **kwargs):
super().__init__(**kwargs)
self.input_spec = InputSpec(ndim=3)
self.k = k
def compute_output_shape(self, input_shape):
return (input_shape[0], (input_shape[2] * self.k))
def call(self, inputs):
# swap last two dimensions since top_k will be applied along the last dimension
shifted_input = tf.transpose(inputs, [0, 2, 1])
# extract top_k, returns two tensors [values, indices]
top_k = tf.nn.top_k(shifted_input, k=self.k, sorted=True, name=None)[0]
# return flattened output
return Flatten()(top_k)
@arbackus please help figure out the right dimensions ?
https://stackoverflow.com/questions/45891148/k-maxpooling-layer-in-keras
@arbackus this solution works, but is very slow. In my case adding this more than doubles the entire network training time. A faster solution would be practical.
if the backend is tf, how to use the kmaxpooling ,for example:
conv1 = Conv2D(32, (3, 3), activation="relu", padding="same")(conv1)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
i replace MaxPooling2D to Kmaxpooling, but it don't work,so how to use the Kmaxpooling
@anttttti
Is this still an issue? Can you try with latest TF version? Thanks
Check this out a custom keras layer. Not thoroughly tested but works fine for me. Let me know what you think. P.S. TF 2.1.0
import tensorflow as tf
from tensorflow.keras import layers
class KMaxPooling(layers.Layer):
"""
K-max pooling layer that extracts the k-highest activations from a sequence (2nd dimension).
TensorFlow backend.
"""
def __init__(self, k=1, axis=1, **kwargs):
super(KMaxPooling, self).__init__(**kwargs)
self.input_spec = layers.InputSpec(ndim=3)
self.k = k
assert axis in [1,2], 'expected dimensions (samples, filters, convolved_values),\
cannot fold along samples dimension or axis not in list [1,2]'
self.axis = axis
# need to switch the axis with the last elemnet
# to perform transpose for tok k elements since top_k works in last axis
self.transpose_perm = [0,1,2] #default
self.transpose_perm[self.axis] = 2
self.transpose_perm[2] = self.axis
def compute_output_shape(self, input_shape):
input_shape_list = list(input_shape)
input_shape_list[self.axis] = self.k
return tuple(input_shape_list)
def call(self, x):
# swap sequence dimension to get top k elements along axis=1
transposed_for_topk = tf.transpose(x, perm=self.transpose_perm)
# extract top_k, returns two tensors [values, indices]
top_k_vals, top_k_indices = tf.math.top_k(transposed_for_topk,
k=self.k, sorted=True,
name=None)
# maintain the order of values as in the paper
# sort indices
sorted_top_k_ind = tf.sort(top_k_indices)
flatten_seq = tf.reshape(transposed_for_topk, (-1,))
shape_seq = tf.shape(transposed_for_topk)
len_seq = tf.shape(flatten_seq)[0]
indices_seq = tf.range(len_seq)
indices_seq = tf.reshape(indices_seq, shape_seq)
indices_gather = tf.gather(indices_seq, 0, axis=-1)
indices_sum = tf.expand_dims(indices_gather, axis=-1)
sorted_top_k_ind += indices_sum
k_max_out = tf.gather(flatten_seq, sorted_top_k_ind)
# return back to normal dimension but now sequence dimension has only k elements
# performing another transpose will get the tensor back to its original shape
# but will have k as its axis_1 size
transposed_back = tf.transpose(k_max_out, perm=self.transpose_perm)
return transposed_back
@makaveli10 How to use it instead of Maxpooling?
Most helpful comment
Not the most generic, well-engineered or thoroughly-tested solution, but this seems to do the trick for me: