Tensorrt: demo/BERT don't work on a Titan Xp Nvidia GPU with an error "cuda failure 209“

Created on 22 Aug 2019  Â·  1Comment  Â·  Source: NVIDIA/TensorRT

demo/BERT works well on a GTX 2080 GPU,but crush on Titan Xp。backtrace is as following:

(gdb) bt full
#0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
        set = {__val = {0, 1649481172316, 51539607584, 1, 4294967680, 1024, 1024, 140030812475818, 2097314, 4, 1, 8592, 5, 34817, 0, 1024}}
        pid = <optimized out>
        tid = <optimized out>
        ret = <optimized out>
#1  0x00007f5b76d1f801 in __GI_abort () at abort.c:79
        save_stage = 1
        act = {__sigaction_handler = {sa_handler = 0x7f5976d80000, sa_sigaction = 0x7f5976d80000}, sa_mask = {__val = {1432254016, 0, 
              140030812537859, 140030816073568, 140737474456976, 10, 140030812517195, 140030618316384, 140030615750539, 1, 140030618316384, 0, 
              140030816073568, 140030816055968, 140030812477565, 140027740815360}}, sa_flags = 1799564896, 
          sa_restorer = 0x7f5b6b432e60 <std::cout>}
        sigs = {__val = {32, 0 <repeats 15 times>}}
        __cnt = <optimized out>
        __set = <optimized out>
        __cnt = <optimized out>
        __set = <optimized out>
#2  0x00007f5b0fc1e22f in bert::launchTransQkv(CUstream_st*, int, int, int, int, float const*, float*) ()
   from /data/nfsdata/TensorRT/demo/BERT/build/libbert_plugins.so
No symbol table info available.
#3  0x00007f5b0fc23c2b in int bert::qkvToCtx<float>(cublasContext*&, int, int, int, int, float, float const*, float*, float*, float*, float*, CUstream_st*, int const*) () from /data/nfsdata/TensorRT/demo/BERT/build/libbert_plugins.so
No symbol table info available.
#4  0x00007f5b0fc1ff04 in bert::QKVToContextPlugin::enqueue(int, void const* const*, void**, void*, CUstream_st*) ()
   from /data/nfsdata/TensorRT/demo/BERT/build/libbert_plugins.so
No symbol table info available.
#5  0x00007f5b6ceb1e80 in nvinfer1::cudnn::selectTactic(nvinfer1::rt::EngineBuildContext const&, nvinfer1::rt::Layer&, nvinfer1::builder::Node*)
    () from /usr/lib/x86_64-linux-gnu/libnvinfer.so.5
No symbol table info available.
#6  0x00007f5b6ce63df9 in nvinfer1::builder::buildSingleLayer(nvinfer1::rt::EngineBuildContext&, nvinfer1::builder::Node&, std::unordered_map<std::string, std::unique_ptr<nvinfer1::rt::Region, std::default_delete<nvinfer1::rt::Region> >, std::hash<std::string>, std::equal_to<std::string>, std::allocator<std::pair<std::string const, std::unique_ptr<nvinfer1::rt::Region, std::default_delete<nvinfer1::rt::Region> > > > > const&, std::unordered_map<std::string, std::vector<float, std::allocator<float> >, std::hash<std::string>, std::equal_to<std::string>, std::allocator<std::pair<std::string const, std::vector<float, std::allocator<float> > > > >*, bool) () from /usr/lib/x86_64-linux-gnu/libnvinfer.so.5
No symbol table info available.
#7  0x00007f5b6ce66980 in nvinfer1::builder::EngineTacticSupply::getBestTactic(nvinfer1::builder::Node&, nvinfer1::query::Ports<nvinfer1::RegionFormatL> const&, bool) () from /usr/lib/x86_64-linux-gnu/libnvinfer.so.5
No symbol table info available.
#8  0x00007f5b6ce3ba36 in ?? () from /usr/lib/x86_64-linux-gnu/libnvinfer.so.5
No symbol table info available.
#9  0x00007f5b6ce4018a in nvinfer1::builder::chooseFormatsAndTactics(nvinfer1::builder::Graph&, nvinfer1::builder::TacticSupply&, std::unordered_map<std::string, std::vector<float, std::allocator<float> >, std::hash<std::string>, std::equal_to<std::string>, std::allocator<std::pair<std::string const, std::vector<float, std::allocator<float> > > > >*, bool) () from /usr/lib/x86_64-linux-gnu/libnvinfer.so.5
No symbol table info available.
#10 0x00007f5b6ce68834 in ?? () from /usr/lib/x86_64-linux-gnu/libnvinfer.so.5
No symbol table info available.
#11 0x00007f5b6ce6d303 in nvinfer1::builder::buildEngine(nvinfer1::CudaEngineBuildConfig&, nvinfer1::rt::HardwareContext const&, nvinfer1::Network const&) () from /usr/lib/x86_64-linux-gnu/libnvinfer.so.5
No symbol table info available.
#12 0x00007f5b6ce577cd in nvinfer1::builder::Builder::buildCudaEngine(nvinfer1::INetworkDefinition&) ()
   from /usr/lib/x86_64-linux-gnu/libnvinfer.so.5
No symbol table info available.
#13 0x00007f5b75980023 in void pybind11::cpp_function::initialize<pybind11::cpp_function::initialize<nvinfer1::ICudaEngine*, nvinfer1::IBuilder, nvinfer1::INetworkDefinition&, pybind11::name, pybind11::is_method, pybind11::sibling, pybind11::arg, char const*>(nvinfer1::ICudaEngine* (nvinfer1::IBuilder::*)(nvinfer1::INetworkDefinition&), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&, pybind11::arg const&, char const* const&)::{lambda(nvinfer1::IBuilder*, nvinfer1::INetworkDefinition&)#1}, nvinfer1::ICudaEngine*, nvinfer1::IBuilder*, nvinfer1::INetworkDefinition&, pybind11::name, pybind11::is_method, pybind11::sibling, pybind11::arg, char const*>(pybind11::cpp_function::initialize<nvinfer1::ICudaEngine*, nvinfer1::IBuilder, nvinfer1::INetworkDefinition&, pybind11::name, pybind11::is_method, pybind11::sibling, pybind11::arg, char const*>(nvinfer1::ICudaEngine* (nvinfer1::IBuilder::*)(nvinfer1::INetworkDefinition&), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&, pybind11::arg const&, char const* const&)::{lambda(nvinfer1::IBuilder*, nvinfer1::INetworkDefinition&)#1}&&, nvinfer1::ICudaEngine* (*)(nvinfer1::IBuilder*, nvinfer1::INetworkDefinition&), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&, pybind11::arg const&, char const* const&)::{lambda(pybind11::detail::function_call&)#3}::_FUN(pybind11::detail::function_call) ()
   from /usr/lib/python3.6/dist-packages/tensorrt/tensorrt.so
No symbol table info available.
#14 0x00007f5b758cc08a in pybind11::cpp_function::dispatcher(_object*, _object*, _object*) ()
   from /usr/lib/python3.6/dist-packages/tensorrt/tensorrt.so
Samples hardware question

Most helpful comment

It looks like the BERT sample is set up to compile only for SM 7.0 and 7.5. I suggest adjusting CMAKE_CUDA_FLAGS in CMakeLists.txt.

Update: the sample uses fp16 instrinsics, and thus will not work on a Titan Xp.

>All comments

It looks like the BERT sample is set up to compile only for SM 7.0 and 7.5. I suggest adjusting CMAKE_CUDA_FLAGS in CMakeLists.txt.

Update: the sample uses fp16 instrinsics, and thus will not work on a Titan Xp.

Was this page helpful?
0 / 5 - 0 ratings