With the following script mxnet will core dump on y.backward(). My build is master with cuda off mkldnn on. I tried to build mkldnn off and the script wouldn't core dump then.
def test_rnn():
INT_OVERFLOW = 2**10
def batch_check(x, modes, params):
for m, p in zip(modes, params):
state = np.random.normal(0, 1, (1, 4, 1))
x.attach_grad()
state.attach_grad()
x.attach_grad()
p.attach_grad()
with mx.autograd.record():
y = npx.rnn(data=x, parameters=p, mode=m, \
state=state, state_size=1, num_layers=1)
assert y.shape == (INT_OVERFLOW, 4, 1)
assert type(y[0]).__name__ == 'ndarray'
y.backward()
print(state.grad)
data = np.random.normal(0, 1, (INT_OVERFLOW, 4, 4))
modes = ['rnn_relu', 'rnn_tanh', 'gru']
params = [np.random.normal(0, 1, (7,)), \
np.random.normal(0, 1, (7,)), \
np.random.normal(0, 1, (21,))]
batch_check(data, modes, params)
This will trigger two possible error messages:
Sometimes it's:
ubuntu@ip-172-31-38-169:~/incubator-mxnet$ python rnn.py
[22:40:24] ../src/storage/storage.cc:198: Using Pooled (Naive) StorageManager for CPU
corrupted size vs. prev_size
Aborted (core dumped)
Other times:
ubuntu@ip-172-31-38-169:~/incubator-mxnet$ python rnn.py
[21:57:52] ../src/storage/storage.cc:198: Using Pooled (Naive) StorageManager for CPU
malloc_consolidate(): invalid chunk size
Aborted (core dumped)
GDB backtrace:
when the error is
ubuntu@ip-172-31-38-169:~/incubator-mxnet$ python rnn.py
[22:40:24] ../src/storage/storage.cc:198: Using Pooled (Naive) StorageManager for CPU
corrupted size vs. prev_size
Aborted (core dumped)
corrupted size vs. prev_size
Thread 21 "python" received signal SIGABRT, Aborted.
[Switching to Thread 0x7fff692dd700 (LWP 78491)]
__GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
51 ../sysdeps/unix/sysv/linux/raise.c: No such file or directory.
(gdb) bt
#0 __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
#1 0x00007ffff78058b1 in __GI_abort () at abort.c:79
#2 0x00007ffff784e907 in __libc_message (action=action@entry=do_abort, fmt=fmt@entry=0x7ffff797bdfa "%s\n")
at ../sysdeps/posix/libc_fatal.c:181
#3 0x00007ffff785597a in malloc_printerr (str=str@entry=0x7ffff7979efd "corrupted size vs. prev_size")
at malloc.c:5350
#4 0x00007ffff7855b7c in malloc_consolidate (av=av@entry=0x7fff4c000020) at malloc.c:4456
#5 0x00007ffff7859848 in _int_malloc (av=av@entry=0x7fff4c000020, bytes=bytes@entry=5120) at malloc.c:3703
#6 0x00007ffff785a55b in _int_memalign (av=0x7fff4c000020, alignment=64, bytes=<optimized out>)
at malloc.c:4694
#7 0x00007ffff786002a in _mid_memalign (address=<optimized out>, bytes=5016, alignment=<optimized out>)
at malloc.c:3314
#8 __posix_memalign (memptr=0x7fff692da9c0, alignment=<optimized out>, size=5016) at malloc.c:5369
#9 0x00007fffe64adad2 in dnnl::impl::malloc (size=5016, alignment=64)
at ../3rdparty/mkldnn/src/common/utils.cpp:99
#10 0x00007fffe63e4ff8 in dnnl::impl::c_compatible::operator new (sz=5016)
at ../3rdparty/mkldnn/src/common/nstl.hpp:40
#11 0x00007fffe69cd594 in dnnl::impl::cpu::jit_uni_reorder_t::pd_t::create (reorder_pd=0x7fff692daef8,
engine=0x555557207880, attr=0x7fff4c086fc0, src_engine=0x555557207880, src_md=0x7fff692daf20,
dst_engine=0x555557207880, dst_md=0x7fff692db1e0) at ../3rdparty/mkldnn/src/cpu/jit_uni_reorder.cpp:1086
#12 0x00007fffe69c5e76 in dnnl::impl::cpu::jit_uni_reorder_create (reorder_pd=0x7fff692daef8,
engine=0x555557207880, attr=0x7fff4c086fc0, src_engine=0x555557207880, src_md=0x7fff692daf20,
dst_engine=0x555557207880, dst_md=0x7fff692db1e0) at ../3rdparty/mkldnn/src/cpu/jit_uni_reorder.cpp:1247
#13 0x00007fffe64a1e8f in dnnl_reorder_primitive_desc_create (reorder_pd=0x7fff692daef8,
---Type <return> to continue, or q <return> to quit---
src_md=0x7fff692daf20, src_engine=0x555557207880, dst_md=0x7fff692db1e0, dst_engine=0x555557207880,
attr=0x7fff4c086fc0) at ../3rdparty/mkldnn/src/common/reorder.cpp:73
#14 0x00007fffdb0a263c in dnnl::reorder::primitive_desc::primitive_desc (this=0x7fff692db4e0, src=...,
dst=..., attr=...) at ../3rdparty/mkldnn/include/dnnl.hpp:3166
#15 0x00007fffdb0a274e in dnnl::reorder::reorder (this=0x7fff692db540, src=..., dst=..., attr=...)
at ../3rdparty/mkldnn/include/dnnl.hpp:3217
#16 0x00007fffdbfe3213 in mxnet::op::MKLDNNMemoryReorder (src=..., dst=...)
at ../src/operator/nn/mkldnn/mkldnn_rnn.cc:399
#17 0x00007fffdbfda681 in mxnet::op::MKLDNNRnnBackward::SetNativeWeightsGrads (this=0x7fff4c077390)
at ../src/operator/nn/mkldnn/mkldnn_rnn.cc:867
#18 0x00007fffdbfe34f3 in mxnet::op::RegisterMKLDNNRnn<mxnet::op::MKLDNNRnnBackward> (rnn=...)
at ../src/operator/nn/mkldnn/mkldnn_rnn.cc:991
#19 0x00007fffdbfde69f in mxnet::op::MKLDNNRnnOp::Backward (this=0x55555731fbf0, ctx=...,
inputs=std::vector of length 5, capacity 5 = {...}, req=std::vector of length 3, capacity 3 = {...},
outputs=std::vector of length 3, capacity 3 = {...}) at ../src/operator/nn/mkldnn/mkldnn_rnn.cc:1203
#20 0x00007fffe31da173 in mxnet::op::RNNStatefulGradComputeExCPU (state_ptr=..., ctx=...,
inputs=std::vector of length 5, capacity 5 = {...}, req=std::vector of length 3, capacity 3 = {...},
outputs=std::vector of length 3, capacity 3 = {...}) at ../src/operator/rnn.cc:284
#21 0x00007fffdac7f26b in std::_Function_handler<void (mxnet::OpStatePtr const&, mxnet::OpContext const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> > const&), void (*)(mxnet::OpStatePtr const&, mxnet::OpContext const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> > const&)>::_M_invoke(std::_Any_data const&, mxnet::OpStatePtr const&, mxnet::OpC---Type <return> to continue, or q <return> to quit---
ontext const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> > const&) (__functor=..., __args#0=..., __args#1=..., __args#2=std::vector of length 5, capacity 5 = {...},
__args#3=std::vector of length 3, capacity 3 = {...},
__args#4=std::vector of length 3, capacity 3 = {...}) at /usr/include/c++/7/bits/std_function.h:316
#22 0x00007fffdadcd088 in std::function<void (mxnet::OpStatePtr const&, mxnet::OpContext const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> > const&)>::operator()(mxnet::OpStatePtr const&, mxnet::OpContext const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> > const&) const (this=0x555557295ce8, __args#0=..., __args#1=...,
__args#2=std::vector of length 5, capacity 5 = {...},
__args#3=std::vector of length 3, capacity 3 = {...},
__args#4=std::vector of length 3, capacity 3 = {...}) at /usr/include/c++/7/bits/std_function.h:706
#23 0x00007fffdae42905 in mxnet::imperative::PushOperator(mxnet::OpStatePtr const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, mxnet::DispatchMode)::{lambda(mxnet::RunContext, mxnet::engine::CallbackOnComplete)#1}::operator()(mxnet::RunContext, mxnet::engine::CallbackOnComplete) const (__closure=0x555557295be0, rctx=..., on_complete=...)
at ../src/imperative/./imperative_utils.h:758
#24 0x00007fffdae42b02 in mxnet::imperative::PushOperator(mxnet::OpStatePtr const&, nnvm::Op const*, nnvm::Nod---Type <return> to continue, or q <return> to quit---
eAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, mxnet::DispatchMode)::{lambda(mxnet::RunContext)#2}::operator()(mxnet::RunContext) const (
__closure=0x555557295be0, rctx=...) at ../src/imperative/./imperative_utils.h:772
#25 0x00007fffdae48f9e in std::_Function_handler<void (mxnet::RunContext), mxnet::imperative::PushOperator(mxnet::OpStatePtr const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, mxnet::DispatchMode)::{lambda(mxnet::RunContext)#2}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&) (__functor=..., __args#0=...)
at /usr/include/c++/7/bits/std_function.h:316
#26 0x00007fffdad9f5a2 in std::function<void (mxnet::RunContext)>::operator()(mxnet::RunContext) const (
this=0x555557194200, __args#0=...) at /usr/include/c++/7/bits/std_function.h:706
#27 0x00007fffdada8260 in mxnet::engine::ThreadedEngine::BulkFlush()::{lambda(mxnet::RunContext, mxnet::engine::CallbackOnComplete)#1}::operator()(mxnet::RunContext, mxnet::engine::CallbackOnComplete) const (
__closure=0x5555572dab50, ctx=..., on_complete=...) at ../src/engine/./threaded_engine.h:537
#28 0x00007fffdadac835 in std::_Function_handler<void (mxnet::RunContext, mxnet::engine::CallbackOnComplete), mxnet::engine::ThreadedEngine::BulkFlush()::{lambda(mxnet::RunContext, mxnet::engine::CallbackOnComplete)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&, mxnet::engine::CallbackOnComplete&&) (__functor=...,
---Type <return> to continue, or q <return> to quit---
__args#0=..., __args#1=...) at /usr/include/c++/7/bits/std_function.h:316
#29 0x00007fffdada03d0 in std::function<void (mxnet::RunContext, mxnet::engine::CallbackOnComplete)>::operator()(mxnet::RunContext, mxnet::engine::CallbackOnComplete) const (this=0x55555731c630, __args#0=...,
__args#1=...) at /usr/include/c++/7/bits/std_function.h:706
#30 0x00007fffdadb4fd4 in mxnet::engine::ThreadedEngine::ExecuteOprBlock (this=0x555557319ac0, run_ctx=...,
opr_block=0x55555731e1b8) at ../src/engine/./threaded_engine.h:381
#31 0x00007fffdadb8964 in mxnet::engine::ThreadedEnginePerDevice::CPUWorker<(dmlc::ConcurrentQueueType)0> (
this=0x555557319ac0, ctx=..., block=0x5555563ad5c0,
ready_event=std::shared_ptr<dmlc::ManualEvent> (use count 2, weak count 0) = {...})
at ../src/engine/threaded_engine_perdevice.cc:304
#32 0x00007fffdadb62f4 in mxnet::engine::ThreadedEnginePerDevice::PushToExecute(mxnet::engine::OprBlock*, bool)::{lambda()#1}::operator()() const::{lambda(std::shared_ptr<dmlc::ManualEvent>)#1}::operator()(dmlc::ManualEvent) const (__closure=0x55555731bf40,
ready_event=std::shared_ptr<dmlc::ManualEvent> (use count 2, weak count 0) = {...})
at ../src/engine/threaded_engine_perdevice.cc:120
#33 0x00007fffdadbecc0 in std::_Function_handler<void (std::shared_ptr<dmlc::ManualEvent>), mxnet::engine::ThreadedEnginePerDevice::PushToExecute(mxnet::engine::OprBlock*, bool)::{lambda()#1}::operator()() const::{lambda(std::shared_ptr<dmlc::ManualEvent>)#1}>::_M_invoke(std::_Any_data const&, std::shared_ptr<dmlc::ManualEvent>&&) (__functor=..., __args#0=...) at /usr/include/c++/7/bits/std_function.h:316
#34 0x00007fffdadbd927 in std::function<void (std::shared_ptr<dmlc::ManualEvent>)>::operator()(std::shared_ptr<dmlc::ManualEvent>) const (this=0x55555724b428, __args#0=std::shared_ptr<dmlc::ManualEvent> (empty) = {...})
at /usr/include/c++/7/bits/std_function.h:706
#35 0x00007fffdadbb605 in std::__invoke_impl<void, std::function<void (std::shared_ptr<dmlc::ManualEvent>)>, std::shared_ptr<dmlc::ManualEvent> >(std::__invoke_other, std::function<void (std::shared_ptr<dmlc::ManualEvent---Type <return> to continue, or q <return> to quit---
>)>&&, std::shared_ptr<dmlc::ManualEvent>&&) (__f=...) at /usr/include/c++/7/bits/invoke.h:60
#36 0x00007fffdadb7643 in std::__invoke<std::function<void (std::shared_ptr<dmlc::ManualEvent>)>, std::shared_ptr<dmlc::ManualEvent> >(std::function<void (std::shared_ptr<dmlc::ManualEvent>)>&&, std::shared_ptr<dmlc::ManualEvent>&&) (__fn=...) at /usr/include/c++/7/bits/invoke.h:95
#37 0x00007fffdadc4fc9 in std::thread::_Invoker<std::tuple<std::function<void (std::shared_ptr<dmlc::ManualEvent>)>, std::shared_ptr<dmlc::ManualEvent> > >::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (
this=0x55555724b418) at /usr/include/c++/7/thread:234
#38 0x00007fffdadc4f31 in std::thread::_Invoker<std::tuple<std::function<void (std::shared_ptr<dmlc::ManualEvent>)>, std::shared_ptr<dmlc::ManualEvent> > >::operator()() (this=0x55555724b418)
at /usr/include/c++/7/thread:243
#39 0x00007fffdadc4ed0 in std::thread::_State_impl<std::thread::_Invoker<std::tuple<std::function<void (std::shared_ptr<dmlc::ManualEvent>)>, std::shared_ptr<dmlc::ManualEvent> > > >::_M_run() (this=0x55555724b410)
at /usr/include/c++/7/thread:186
#40 0x00007fffd3e196df in ?? () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#41 0x00007ffff7bbd6db in start_thread (arg=0x7fff692dd700) at pthread_create.c:463
#42 0x00007ffff78e6a3f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
```
GDB backtrace:
when the error is
ubuntu@ip-172-31-38-169:~/incubator-mxnet$ python rnn.py
[21:57:52] ../src/storage/storage.cc:198: Using Pooled (Naive) StorageManager for CPU
malloc_consolidate(): invalid chunk size
Aborted (core dumped)
The backtrace is identical
[New Thread 0x7fff13ffe880 (LWP 78359)]
malloc_consolidate(): invalid chunk size
Thread 21 "python" received signal SIGABRT, Aborted.
[Switching to Thread 0x7fff68adc700 (LWP 78344)]
__GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
51 ../sysdeps/unix/sysv/linux/raise.c: No such file or directory.
(gdb) bt
#0 __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
#1 0x00007ffff78058b1 in __GI_abort () at abort.c:79
#2 0x00007ffff784e907 in __libc_message (action=action@entry=do_abort, fmt=fmt@entry=0x7ffff797bdfa "%s\n")
at ../sysdeps/posix/libc_fatal.c:181
#3 0x00007ffff785597a in malloc_printerr (
str=str@entry=0x7ffff797d650 "malloc_consolidate(): invalid chunk size") at malloc.c:5350
#4 0x00007ffff7855c1e in malloc_consolidate (av=av@entry=0x7fff58000020) at malloc.c:4441
#5 0x00007ffff7859848 in _int_malloc (av=av@entry=0x7fff58000020, bytes=bytes@entry=5120) at malloc.c:3703
#6 0x00007ffff785a55b in _int_memalign (av=0x7fff58000020, alignment=64, bytes=<optimized out>)
at malloc.c:4694
#7 0x00007ffff786002a in _mid_memalign (address=<optimized out>, bytes=5016, alignment=<optimized out>)
at malloc.c:3314
#8 __posix_memalign (memptr=0x7fff68ad99c0, alignment=<optimized out>, size=5016) at malloc.c:5369
#9 0x00007fffe64adad2 in dnnl::impl::malloc (size=5016, alignment=64)
at ../3rdparty/mkldnn/src/common/utils.cpp:99
#10 0x00007fffe63e4ff8 in dnnl::impl::c_compatible::operator new (sz=5016)
at ../3rdparty/mkldnn/src/common/nstl.hpp:40
#11 0x00007fffe69cd594 in dnnl::impl::cpu::jit_uni_reorder_t::pd_t::create (reorder_pd=0x7fff68ad9ef8,
engine=0x555557207540, attr=0x7fff58086f80, src_engine=0x555557207540, src_md=0x7fff68ad9f20,
dst_engine=0x555557207540, dst_md=0x7fff68ada1e0) at ../3rdparty/mkldnn/src/cpu/jit_uni_reorder.cpp:1086
#12 0x00007fffe69c5e76 in dnnl::impl::cpu::jit_uni_reorder_create (reorder_pd=0x7fff68ad9ef8,
engine=0x555557207540, attr=0x7fff58086f80, src_engine=0x555557207540, src_md=0x7fff68ad9f20,
dst_engine=0x555557207540, dst_md=0x7fff68ada1e0) at ../3rdparty/mkldnn/src/cpu/jit_uni_reorder.cpp:1247
#13 0x00007fffe64a1e8f in dnnl_reorder_primitive_desc_create (reorder_pd=0x7fff68ad9ef8,
---Type <return> to continue, or q <return> to quit---
src_md=0x7fff68ad9f20, src_engine=0x555557207540, dst_md=0x7fff68ada1e0, dst_engine=0x555557207540,
attr=0x7fff58086f80) at ../3rdparty/mkldnn/src/common/reorder.cpp:73
#14 0x00007fffdb0a263c in dnnl::reorder::primitive_desc::primitive_desc (this=0x7fff68ada4e0, src=...,
dst=..., attr=...) at ../3rdparty/mkldnn/include/dnnl.hpp:3166
#15 0x00007fffdb0a274e in dnnl::reorder::reorder (this=0x7fff68ada540, src=..., dst=..., attr=...)
at ../3rdparty/mkldnn/include/dnnl.hpp:3217
#16 0x00007fffdbfe3213 in mxnet::op::MKLDNNMemoryReorder (src=..., dst=...)
at ../src/operator/nn/mkldnn/mkldnn_rnn.cc:399
#17 0x00007fffdbfda681 in mxnet::op::MKLDNNRnnBackward::SetNativeWeightsGrads (this=0x7fff58077340)
at ../src/operator/nn/mkldnn/mkldnn_rnn.cc:867
#18 0x00007fffdbfe34f3 in mxnet::op::RegisterMKLDNNRnn<mxnet::op::MKLDNNRnnBackward> (rnn=...)
at ../src/operator/nn/mkldnn/mkldnn_rnn.cc:991
#19 0x00007fffdbfde69f in mxnet::op::MKLDNNRnnOp::Backward (this=0x55555731fbc0, ctx=...,
inputs=std::vector of length 5, capacity 5 = {...}, req=std::vector of length 3, capacity 3 = {...},
outputs=std::vector of length 3, capacity 3 = {...}) at ../src/operator/nn/mkldnn/mkldnn_rnn.cc:1203
#20 0x00007fffe31da173 in mxnet::op::RNNStatefulGradComputeExCPU (state_ptr=..., ctx=...,
inputs=std::vector of length 5, capacity 5 = {...}, req=std::vector of length 3, capacity 3 = {...},
outputs=std::vector of length 3, capacity 3 = {...}) at ../src/operator/rnn.cc:284
#21 0x00007fffdac7f26b in std::_Function_handler<void (mxnet::OpStatePtr const&, mxnet::OpContext const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> > const&), void (*)(mxnet::OpStatePtr const&, mxnet::OpContext const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> > const&)>::_M_invoke(std::_Any_data const&, mxnet::OpStatePtr const&, mxnet::OpC---Type <return> to continue, or q <return> to quit---
ontext const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> > const&) (__functor=..., __args#0=..., __args#1=..., __args#2=std::vector of length 5, capacity 5 = {...},
__args#3=std::vector of length 3, capacity 3 = {...},
__args#4=std::vector of length 3, capacity 3 = {...}) at /usr/include/c++/7/bits/std_function.h:316
#22 0x00007fffdadcd088 in std::function<void (mxnet::OpStatePtr const&, mxnet::OpContext const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> > const&)>::operator()(mxnet::OpStatePtr const&, mxnet::OpContext const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> > const&) const (this=0x555556a60738, __args#0=..., __args#1=...,
__args#2=std::vector of length 5, capacity 5 = {...},
__args#3=std::vector of length 3, capacity 3 = {...},
__args#4=std::vector of length 3, capacity 3 = {...}) at /usr/include/c++/7/bits/std_function.h:706
#23 0x00007fffdae42905 in mxnet::imperative::PushOperator(mxnet::OpStatePtr const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, mxnet::DispatchMode)::{lambda(mxnet::RunContext, mxnet::engine::CallbackOnComplete)#1}::operator()(mxnet::RunContext, mxnet::engine::CallbackOnComplete) const (__closure=0x555556a60630, rctx=..., on_complete=...)
at ../src/imperative/./imperative_utils.h:758
#24 0x00007fffdae42b02 in mxnet::imperative::PushOperator(mxnet::OpStatePtr const&, nnvm::Op const*, nnvm::Nod---Type <return> to continue, or q <return> to quit---
eAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, mxnet::DispatchMode)::{lambda(mxnet::RunContext)#2}::operator()(mxnet::RunContext) const (
__closure=0x555556a60630, rctx=...) at ../src/imperative/./imperative_utils.h:772
#25 0x00007fffdae48f9e in std::_Function_handler<void (mxnet::RunContext), mxnet::imperative::PushOperator(mxnet::OpStatePtr const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, mxnet::DispatchMode)::{lambda(mxnet::RunContext)#2}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&) (__functor=..., __args#0=...)
at /usr/include/c++/7/bits/std_function.h:316
#26 0x00007fffdad9f5a2 in std::function<void (mxnet::RunContext)>::operator()(mxnet::RunContext) const (
this=0x5555570658d0, __args#0=...) at /usr/include/c++/7/bits/std_function.h:706
#27 0x00007fffdada8260 in mxnet::engine::ThreadedEngine::BulkFlush()::{lambda(mxnet::RunContext, mxnet::engine::CallbackOnComplete)#1}::operator()(mxnet::RunContext, mxnet::engine::CallbackOnComplete) const (
__closure=0x5555572e61a0, ctx=..., on_complete=...) at ../src/engine/./threaded_engine.h:537
#28 0x00007fffdadac835 in std::_Function_handler<void (mxnet::RunContext, mxnet::engine::CallbackOnComplete), mxnet::engine::ThreadedEngine::BulkFlush()::{lambda(mxnet::RunContext, mxnet::engine::CallbackOnComplete)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&, mxnet::engine::CallbackOnComplete&&) (__functor=...,
---Type <return> to continue, or q <return> to quit---
__args#0=..., __args#1=...) at /usr/include/c++/7/bits/std_function.h:316
#29 0x00007fffdada03d0 in std::function<void (mxnet::RunContext, mxnet::engine::CallbackOnComplete)>::operator()(mxnet::RunContext, mxnet::engine::CallbackOnComplete) const (this=0x55555731c630, __args#0=...,
__args#1=...) at /usr/include/c++/7/bits/std_function.h:706
#30 0x00007fffdadb4fd4 in mxnet::engine::ThreadedEngine::ExecuteOprBlock (this=0x555557319780, run_ctx=...,
opr_block=0x55555731e1b8) at ../src/engine/./threaded_engine.h:381
#31 0x00007fffdadb8964 in mxnet::engine::ThreadedEnginePerDevice::CPUWorker<(dmlc::ConcurrentQueueType)0> (
this=0x555557319780, ctx=..., block=0x5555563ad5c0,
ready_event=std::shared_ptr<dmlc::ManualEvent> (use count 2, weak count 0) = {...})
at ../src/engine/threaded_engine_perdevice.cc:304
#32 0x00007fffdadb62f4 in mxnet::engine::ThreadedEnginePerDevice::PushToExecute(mxnet::engine::OprBlock*, bool)::{lambda()#1}::operator()() const::{lambda(std::shared_ptr<dmlc::ManualEvent>)#1}::operator()(dmlc::ManualEvent) const (__closure=0x55555731bc00,
ready_event=std::shared_ptr<dmlc::ManualEvent> (use count 2, weak count 0) = {...})
at ../src/engine/threaded_engine_perdevice.cc:120
#33 0x00007fffdadbecc0 in std::_Function_handler<void (std::shared_ptr<dmlc::ManualEvent>), mxnet::engine::ThreadedEnginePerDevice::PushToExecute(mxnet::engine::OprBlock*, bool)::{lambda()#1}::operator()() const::{lambda(std::shared_ptr<dmlc::ManualEvent>)#1}>::_M_invoke(std::_Any_data const&, std::shared_ptr<dmlc::ManualEvent>&&) (__functor=..., __args#0=...) at /usr/include/c++/7/bits/std_function.h:316
#34 0x00007fffdadbd927 in std::function<void (std::shared_ptr<dmlc::ManualEvent>)>::operator()(std::shared_ptr<dmlc::ManualEvent>) const (this=0x5555571f1258, __args#0=std::shared_ptr<dmlc::ManualEvent> (empty) = {...})
at /usr/include/c++/7/bits/std_function.h:706
#35 0x00007fffdadbb605 in std::__invoke_impl<void, std::function<void (std::shared_ptr<dmlc::ManualEvent>)>, std::shared_ptr<dmlc::ManualEvent> >(std::__invoke_other, std::function<void (std::shared_ptr<dmlc::ManualEvent---Type <return> to continue, or q <return> to quit---
>)>&&, std::shared_ptr<dmlc::ManualEvent>&&) (__f=...) at /usr/include/c++/7/bits/invoke.h:60
#36 0x00007fffdadb7643 in std::__invoke<std::function<void (std::shared_ptr<dmlc::ManualEvent>)>, std::shared_ptr<dmlc::ManualEvent> >(std::function<void (std::shared_ptr<dmlc::ManualEvent>)>&&, std::shared_ptr<dmlc::ManualEvent>&&) (__fn=...) at /usr/include/c++/7/bits/invoke.h:95
#37 0x00007fffdadc4fc9 in std::thread::_Invoker<std::tuple<std::function<void (std::shared_ptr<dmlc::ManualEvent>)>, std::shared_ptr<dmlc::ManualEvent> > >::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (
this=0x5555571f1248) at /usr/include/c++/7/thread:234
#38 0x00007fffdadc4f31 in std::thread::_Invoker<std::tuple<std::function<void (std::shared_ptr<dmlc::ManualEvent>)>, std::shared_ptr<dmlc::ManualEvent> > >::operator()() (this=0x5555571f1248)
at /usr/include/c++/7/thread:243
#39 0x00007fffdadc4ed0 in std::thread::_State_impl<std::thread::_Invoker<std::tuple<std::function<void (std::shared_ptr<dmlc::ManualEvent>)>, std::shared_ptr<dmlc::ManualEvent> > > >::_M_run() (this=0x5555571f1240)
at /usr/include/c++/7/thread:186
#40 0x00007fffd3e196df in ?? () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#41 0x00007ffff7bbd6db in start_thread (arg=0x7fff68adc700) at pthread_create.c:463
#42 0x00007ffff78e6a3f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
@TaoLv @pengzhao-intel
@TaoLv can you please take a look?
@TaoLv can you please take a look?
Yes, team is looking into this issue. @anko-intel
This issue will be fixed with oneDNN v1.7 release (planned October 23). We have partial workaround for this and if this is urgent we can open pull request.
@Zha0q1 @pengzhao-intel @szha Fix is already merged in all branches - Can we close this issue?
closing as @bgawrych mentioned.
Most helpful comment
Yes, team is looking into this issue. @anko-intel