Dear all,
would it please be possible to provide a simple example of a HybridBlock wrapper around an nd.array object? I need to create a matrix object in numpy (it would be very time consuming to create it from scratch in nd.array - it鈥檚 basically BSplines definitions which already exist in python), and then transfer it to nd.array. Something like
class ndarray_wrap(HybridBlock):
def __init__(self, const_numpy_array, *kwards):
HybridBlock.__init__(self,**kwards)
# Some operations that take constant const_numpy_array
# transforms it to a layer with no differentiation
self.constant_layer = ...
def hybrid_forward(self,F,x):
return self.constant_layer
such as it can be used in combination with other HybridBlocks and eventually hybridize the whole network?
I would like better constant support for symbols (and consequently, HybridBlocks) as well.
A workaround you can use for now though is to do it as follows. In the block that needs them, make your constants parameters; name each constant parameter something like 'constant_x' (this naming makes it impossible for usual initializer patterns to init them), assign the parameter a custom initializer that will set it to the right value, and have your block wrap the constant usage with a block grad operation so that no optimization will ever touch them.
Thanks @jmacglashan , based on your suggestions I tried the following solution:
I am trying to create a HybridBlock custom Convolutional layer. I am creating a custom initializer and then I create inside HybridBlock a custom parameter self.bijkl that is initialized with the custom initializer CustomInit. The full example is here:
Essentials:
import mxnet as mx
from mxnet import nd, gluon
from mxnet.gluon import HybridBlock
@mx.init.register
class CustomInit(mx.initializer.Initializer):
def __init__(self,
kernel_size = 3,
kernel_effective_size = 5,
**kwards):
mx.initializer.Initializer.__init__(self,**kwards)
# A trivial constant tensor
self.Bijkl = nd.random_uniform(shape=[kernel_size,kernel_size,kernel_effective_size,kernel_effective_size])
def _init_weight(self,name,arr):
arr[:] = self.Bijkl
class Conv2DS(HybridBlock):
def __init__(self, nfilters, nchannels=0, kernel_size = 3, kernel_effective_size = 5, use_bias = True, padding = (0,0), **kwards):
HybridBlock.__init__(self,**kwards)
self.nchannels = nchannels
self.nfilters = nfilters
self.kernel_size = kernel_size
self.kernel_eff = kernel_effective_size
self.use_bias = use_bias
self.pad = padding
with self.name_scope():
self.weight = self.params.get(
'weight',allow_deferred_init=True,
shape=(nfilters,nchannels,kernel_size,kernel_size))
# This is the custom tensor I need to create, a constant.
self.Bijkl = self.params.get(
'bijkl',allow_deferred_init=True,
init = CustomInit(self.kernel_size, self.kernel_eff),
grad_req='null',
shape=(kernel_size,kernel_size,kernel_effective_size,kernel_effective_size))
if self.use_bias:
self.bias = self.params.get(
'bias',allow_deferred_init=True,
init = mx.init.Zero(),
shape=(self.nfilters,))
def hybrid_forward(self,F,_x):
# These finalize deferring the input shape
# --------------------------------------------------------------------------------
#self.weight.shape = (self.nfilters,_x.shape[1],self.kernel_size,self.kernel_size)
#self.weight._finish_deferred_init()
# --------------------------------------------------------------------------------
weight = F.sum(F.dot(self.weight.data() , self.Bijkl.data()),axis=[2,3])
if self.use_bias:
conv = F.Convolution(data=_x,
weight=weight,
bias=self.bias.data(),
num_filter=self.nfilters,
kernel=[self.kernel_eff,self.kernel_eff],
pad = self.pad)
else :
conv = F.Convolution(data=_x,
weight=weight,
no_bias=True,
num_filter=self.nfilters,
kernel=[self.kernel_eff,self.kernel_eff],
pad = self.pad)
return conv
I can initialize my layer:
nchannels = 8
nfilters = 16
dim = 128
mynet = Conv2DS(nfilters, kernel_size=5, kernel_effective_size= 15, use_bias=False)
mynet.initialize(mx.init.Xavier(),ctx=mx.gpu())
but when I run a single pass I get the following error:
xx = nd.random_uniform(shape = [4,nchannels,dim,dim],ctx=mx.gpu())
temp = mynet (xx)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-5-27f92d21b3eb> in <module>()
----> 1 temp = mynet (xx)
/home/dia021/anaconda2/lib/python2.7/site-packages/mxnet/gluon/block.pyc in __call__(self, *args)
358 def __call__(self, *args):
359 """Calls forward. Only accepts positional arguments."""
--> 360 return self.forward(*args)
361
362 def forward(self, *args):
/home/dia021/anaconda2/lib/python2.7/site-packages/mxnet/gluon/block.pyc in forward(self, x, *args)
568 params = {i: j.data(ctx) for i, j in self._reg_params.items()}
569 except DeferredInitializationError:
--> 570 self._finish_deferred_init(self._active, x, *args)
571
572 if self._active:
/home/dia021/anaconda2/lib/python2.7/site-packages/mxnet/gluon/block.pyc in _finish_deferred_init(self, hybrid, *args)
458
459 def _finish_deferred_init(self, hybrid, *args):
--> 460 self.infer_shape(*args)
461 if hybrid:
462 for is_arg, i in self._cached_op_args:
/home/dia021/anaconda2/lib/python2.7/site-packages/mxnet/gluon/block.pyc in infer_shape(self, *args)
519 def infer_shape(self, *args):
520 """Infers shape of Parameters from inputs."""
--> 521 self._infer_attrs('infer_shape', 'shape', *args)
522
523 def infer_type(self, *args):
/home/dia021/anaconda2/lib/python2.7/site-packages/mxnet/gluon/block.pyc in _infer_attrs(self, infer_fn, attr, *args)
507 def _infer_attrs(self, infer_fn, attr, *args):
508 """Generic infer attributes."""
--> 509 inputs, out = self._get_graph(*args)
510 args, _ = _flatten(args)
511 arg_attrs, _, aux_attrs = getattr(out, infer_fn)(
/home/dia021/anaconda2/lib/python2.7/site-packages/mxnet/gluon/block.pyc in _get_graph(self, *args)
426 params = {i: j.var() for i, j in self._reg_params.items()}
427 with self.name_scope():
--> 428 out = self.hybrid_forward(symbol, *grouped_inputs, **params) # pylint: disable=no-value-for-parameter
429 out, self._out_format = _flatten(out)
430
TypeError: hybrid_forward() got an unexpected keyword argument 'Bijkl'
I tihnk the problem is with the variable name? self.Bijkl ? So I guess my question is, is there currently a simple way to wrap a constant variable (tensor, of shape [n1,n2,n3,n4], inside a HybridBlock? I have a solution for Block, but I want to take advantage of HybridBlock performance.
Thank you very much for your time.
It is surprising here isn't already support for constants (see #8428). It is possible to implement a constant CustomOp in python, but it requires you to serialize the data as string arguments passed to the CustomProp implementation.
@feevos I haven't tried your code yet, but I think the issue is that your hybrid_forward does not have **kwargs nor an argument named the same as the parameter.
Mxnet hybrid blocks will push parameters as inputs to the hybrid forward method (I believe because this is how it resolves passing them in as variable symbols when it compiles a graph). So you should add that argument and get the "constant" parameter from the function arguments.
For example, consider the hybrid_forward definition of the Dense block in Mxnet:
def hybrid_forward(self, F, x, weight, bias=None):
if bias is None:
act = F.FullyConnected(x, weight, no_bias=True, num_hidden=self._units,
name='fwd')
else:
act = F.FullyConnected(x, weight, bias, num_hidden=self._units,
name='fwd')
if self.act is not None:
act = self.act(act)
return act
Note that the method receives weight and bias as arguments. These are defined as parameters inside the Block's ParameterDict and the forward operation of the HybridBlock will automatically push all parameters to hybrid_forward.
So you should change your code to be:
def hybrid_forward(self,F,_x, Bijkl):
And then you don't need to pull it from the parameter dict in the hybrid forward, just use the arg.
Thanks @jmacglashan you are right. On mxnet discuss forum I was given a solution that does exactly what you describe:
class CustomConv(HybridBlock):
def __init__(self, const_ndarray, use_bias = True, **kwargs):
super(CustomConv, self).__init__(**kwargs)
self.use_bias = use_bias
with self.name_scope():
self.weight = self.params.get('weight',
shape=(100, 100, 3, 3),
allow_deferred_init=True)
self.bijkl = self.params.get(
'bijkl',
shape=const_ndarray.shape,
init=mx.init.Constant(const_ndarray.asnumpy().tolist()),
differentiable=False)
if self.use_bias:
self.bias = self.params.get(
'bias',
allow_deferred_init=True,
init = mx.init.Zero(),
shape=(100,))
def hybrid_forward(self, F, x, weight, bijkl, bias=None):
proj_weight = F.sum(F.dot(weight, bijkl), axis=[2, 3])
if self.use_bias:
return F.Convolution(data=x, weight=proj_weight, bias=bias, num_filter=100, kernel=(5, 5))
else:
return F.Convolution(data=x, weight=proj_weight, no_bias=True, num_filter=100, kernel=(5, 5))
Only issue still left is that I cannot (yet) infer the shape (of some dimension) during first run. Getting there ...
@feevos FYI there is self.params.get_constant() now.
Most helpful comment
@feevos I haven't tried your code yet, but I think the issue is that your
hybrid_forwarddoes not have **kwargs nor an argument named the same as the parameter.Mxnet hybrid blocks will push parameters as inputs to the hybrid forward method (I believe because this is how it resolves passing them in as variable symbols when it compiles a graph). So you should add that argument and get the "constant" parameter from the function arguments.
For example, consider the
hybrid_forwarddefinition of theDenseblock in Mxnet:Note that the method receives
weightandbiasas arguments. These are defined as parameters inside the Block's ParameterDict and the forward operation of the HybridBlock will automatically push all parameters tohybrid_forward.So you should change your code to be:
And then you don't need to pull it from the parameter dict in the hybrid forward, just use the arg.