I'm trying to output all outputs and gradients for this small computation graph. If I use a Group symbol I can get all the outputs (in c_exec.outputs) but then the backward pass wants the gradient for each of those nodes which defeats the point of having it calculate them. It seems to only calculate the gradient for arguments and not any of the intermediate outputs.
Can you tell me how to achieve this? If I call net.get_internals() it shows the outputs that I want but then I cannot bind to it to calculate them.
import mxnet as mx
import cmath
w0 = mx.symbol.Variable('w0')
x0 = mx.symbol.Variable('x0')
w1 = mx.symbol.Variable('w1')
x1 = mx.symbol.Variable('x1')
w2 = mx.symbol.Variable('w2')
net = 1/(1 + (mx.symbol.pow(cmath.e, -1*(w0*x0 + w1*x1 + w2))))
args={'w0': mx.nd.ones(1) * 2.0,
'x0': mx.nd.ones(1) * -1.0,
'w1': mx.nd.ones(1) * -3.0,
'x1': mx.nd.ones(1) * -2.0,
'w2': mx.nd.ones(1) * -3.0}
data_shape = (1,)
focus = net
c_exec = focus.simple_bind(ctx=mx.cpu(),
w0 = data_shape,
x0 = data_shape,
w1 = data_shape,
x1 = data_shape,
w2 = data_shape,
grad_req='write')
c_exec.copy_params_from(arg_params = args)
c_exec.forward()
c_exec.backward(out_grads=mx.nd.ones(1)*1)
args_dict = dict(zip(args,[o.asnumpy()[0] for o in c_exec.arg_arrays]))
outputs_dict = dict(zip(focus.list_outputs(),[o.asnumpy()[0] for o in c_exec.outputs]))
grads_dict = dict(zip([n for n in args],[o.asnumpy()[0] for o in c_exec.grad_arrays]))
print "Args : " + str(["%s=%.2f" %(n,o) for n,o in args_dict.iteritems()])
print "Outputs: " + str(["%s=%.2f" %(n,o) for n,o in outputs_dict.iteritems()])
print "Grads : " + str(["%s=%.2f" %(n,o) for n,o in grads_dict.iteritems()])
Args : ['x0=2.00', 'x1=-1.00', 'w2=-3.00', 'w1=-2.00', 'w0=-3.00']
Outputs: ['_divscalar0_output=0.73']
Grads : ['x0=-0.20', 'x1=0.39', 'w2=-0.39', 'w1=-0.59', 'w0=0.20']
add blockgrad layer on top of the internel nodes.
So I added a BlockGrad but now c_exec.grad_arrays just contains all 0 values. Do you have an example of how to use it?
import mxnet as mx
import cmath
import numpy as np
w0 = mx.symbol.Variable('w0')
x0 = mx.symbol.Variable('x0')
w1 = mx.symbol.Variable('w1')
x1 = mx.symbol.Variable('x1')
w2 = mx.symbol.Variable('w2')
a = mx.symbol.BlockGrad(-1*(w0*x0 + w1*x1 + w2))
net = 1/(1 + (mx.symbol.pow(cmath.e, a)))
args={'w0': mx.nd.ones(1) * 2.0,
'x0': mx.nd.ones(1) * -1.0,
'w1': mx.nd.ones(1) * -3.0,
'x1': mx.nd.ones(1) * -2.0,
'w2': mx.nd.ones(1) * -3.0}
data_shape = (1,)
focus = net
c_exec = focus.simple_bind(ctx=mx.cpu(),
w0 = data_shape,
x0 = data_shape,
w1 = data_shape,
x1 = data_shape,
w2 = data_shape,
grad_req='write')
c_exec.copy_params_from(arg_params = args)
c_exec.forward()
c_exec.backward(out_grads=mx.nd.ones(1)*1)
args_dict = dict(zip(args,[o.asnumpy()[0] for o in c_exec.arg_arrays]))
outputs_dict = dict(zip(focus.list_outputs(),[o.asnumpy()[0] for o in c_exec.outputs]))
grads_dict = dict(zip([n for n in args],[o.asnumpy()[0] for o in c_exec.grad_arrays]))
print "Args : " + str(["%s=%.2f" %(n,o) for n,o in args_dict.iteritems()])
print "Outputs: " + str(["%s=%.2f" %(n,o) for n,o in outputs_dict.iteritems()])
print "Grads : " + str(["%s=%.2f" %(n,o) for n,o in grads_dict.iteritems()])
Args : ['x0=2.00', 'x1=-1.00', 'w2=-3.00', 'w1=-2.00', 'w0=-3.00']
Outputs: ['_divscalar2_output=0.73']
Grads : ['x0=0.00', 'x1=-0.00', 'w2=0.00', 'w1=0.00', 'w0=-0.00']
Add it on top of the internal nodes from get_internals()
Can you make an example? How do I get the internal nodes from get_internals()?
if debug:
sym = sym.get_internals()
blob_names = sym.list_outputs()
sym_group = []
for i in range(len(blob_names)):
if blob_names[i] not in args:
x = sym[i]
if blob_names[i] not in output_names:
x = mx.symbol.BlockGrad(x, name=blob_names[i])
sym_group.append(x)
sym = mx.symbol.Group(sym_group)
Fine tuning models is such a common use case that it should become a first class API instead of a debug code snippet.
Most helpful comment
https://github.com/dmlc/mxnet/blob/223fd62bbcfd2ec03720533c37ddb01893f0be25/example/autoencoder/solver.py#L64
Fine tuning models is such a common use case that it should become a first class API instead of a debug code snippet.