The nvcc compiler crashes at a minimal example, when I compile with VS 2017 and CUDA 10.2 on Windows 10.
cmake -G"Visual Studio 15 2017 Win64" ..
-- Selecting Windows SDK version 10.0.17763.0 to target Windows 10.0.18363.
-- The CXX compiler identification is MSVC 19.16.27039.0
-- The CUDA compiler identification is NVIDIA 10.2.89
-- Check for working CXX compiler: C:/Program Files (x86)/Microsoft Visual Studio/2017/Enterprise/VC/Tools/MSVC/14.16.27023/bin/Hostx86/x64/cl.exe
-- Check for working CXX compiler: C:/Program Files (x86)/Microsoft Visual Studio/2017/Enterprise/VC/Tools/MSVC/14.16.27023/bin/Hostx86/x64/cl.exe -- works
-- Detecting CXX compiler ABI info
-- Detecting CXX compiler ABI info - done
-- Detecting CXX compile features
-- Detecting CXX compile features - done
-- Check for working CUDA compiler: C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.2/bin/nvcc.exe
-- Check for working CUDA compiler: C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.2/bin/nvcc.exe -- works
-- Detecting CUDA compiler ABI info
-- Detecting CUDA compiler ABI info - done
-- Found PythonInterp: C:/Users/user/miniconda3/envs/pybind11_conda/python.exe (found suitable version "3.8.2", minimum required is "3.6")
-- Found PythonLibs: C:/Users/user/miniconda3/envs/pybind11_conda/libs/python38.lib (found suitable version "3.8.2", minimum required is "3.6")
-- Found PythonInterp: C:/Users/user/miniconda3/envs/pybind11_conda/python.exe (found version "3.8.2")
-- Found PythonLibs: C:/Users/user/miniconda3/envs/pybind11_conda/libs/Python38.lib
-- Configuring done
-- Generating done
-- Build files have been written to: C:/Users/user/project/simple_cuda/build_conda
(pybind11_conda) PS C:\Users\user\project\simple_cuda\build_conda> cmake --build .
Microsoft (R) Build Engine version 15.9.21+g9802d43bc3 for .NET Framework
Copyright (C) Microsoft Corporation. All rights reserved.
Checking Build System
Building Custom Rule C:/Users/user/project/simple_cuda/CMakeLists.txt
Compiling CUDA source file ..\module.cu...
C:\Users\user\project\simple_cuda\build_conda>"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2\bin\nvcc.exe" -gencode=arch=compute_30,code=\"sm
_30,compute_30\" --use-local-env -ccbin "C:\Program Files (x86)\Microsoft Visual Studio\2017\Enterprise\VC\Tools\MSVC\14.16.27023\bin\HostX86\x64" -x cu
-IC:\Users\user\miniconda3\envs\pybind11_conda\include -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2\include" --keep-dir x64\Debug -ma
xrregcount=0 --machine 64 --compile -cudart static -Xcompiler="/EHsc -Zi -Ob0" -g -D_WINDOWS -D"CMAKE_INTDIR=\"Debug\"" -Dbinding_EXPORTS -D"CMAKE_INTD
IR=\"Debug\"" -Dbinding_EXPORTS -D_WINDLL -D_MBCS -Xcompiler "/EHsc /W3 /nologo /Od /Fdbinding.dir\Debug\vc141.pdb /FS /Zi /RTC1 /MDd /GR" -o binding.dir\
Debug\module.obj "C:\Users\user\project\simple_cuda\module.cu"
c:\users\user\miniconda3\envs\pybind11_conda\include\pybind11\cast.h(1495): error : expression must be a pointer to a complete object type [C:\Users\simeo
n\project\simple_cuda\build_conda\binding.vcxproj]
1 error detected in the compilation of "C:/Users/user/AppData/Local/Temp/tmpxft_000017ec_00000000-5_module.cpp4.ii".
module.cu
C:\Program Files (x86)\Microsoft Visual Studio\2017\Enterprise\Common7\IDE\VC\VCTargets\BuildCustomizations\CUDA 10.2.targets(764,9): error MSB3721: The com
mand ""C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2\bin\nvcc.exe" -gencode=arch=compute_30,code=\"sm_30,compute_30\" --use-local-env -ccbin "C:\
Program Files (x86)\Microsoft Visual Studio\2017\Enterprise\VC\Tools\MSVC\14.16.27023\bin\HostX86\x64" -x cu -IC:\Users\user\miniconda3\envs\pybind11_con
da\include -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2\include" --keep-dir x64\Debug -maxrregcount=0 --machine 64 --compile -cudart sta
tic -Xcompiler="/EHsc -Zi -Ob0" -g -D_WINDOWS -D"CMAKE_INTDIR=\"Debug\"" -Dbinding_EXPORTS -D"CMAKE_INTDIR=\"Debug\"" -Dbinding_EXPORTS -D_WINDLL -D_MBCS
-Xcompiler "/EHsc /W3 /nologo /Od /Fdbinding.dir\Debug\vc141.pdb /FS /Zi /RTC1 /MDd /GR" -o binding.dir\Debug\module.obj "C:\Users\user\project\simple_cud
a\module.cu"" exited with code 2. [C:\Users\user\project\simple_cuda\build_conda\binding.vcxproj]
module.cu:
#include <pybind11/pybind11.h>
int add(int i, int j){
return i + j;
}
PYBIND11_MODULE(binding, m) {
m.def("add", &add, "A function which adds two numbers");
}
CMakeLists.txt:
cmake_minimum_required(VERSION 3.14)
project(test LANGUAGES CXX CUDA)
find_package(PythonInterp 3.6 REQUIRED)
find_package(PythonLibs 3.6 REQUIRED)
find_package(pybind11 REQUIRED)
include_directories(${PYTHON_INCLUDE_DIRS})
add_library(binding SHARED module.cu)
set_target_properties(binding PROPERTIES SUFFIX ".pyd")
target_link_libraries(binding
${PYTHON_LIBRARIES}
cudart)
I found a solution at this issue: https://github.com/pytorch/pytorch/issues/11004
I replace the line explicit operator type&() { return *(this->value); } (cast.h:1495) with explicit operator type&() { return *(static_cast<type *>(this->value)); }. This works fine for me but I don't know if it is a good solution.
We are facing the exact same issue and can confirm the workaround fixes it.
@SimeonEhrig would you want to PR? Otherwise I could.
We are facing the exact same issue and can confirm the workaround fixes it.
@SimeonEhrig would you want to PR? Otherwise I could.
I opened a PR (#2240). Thanks for verifying my solution.
Fixed in #2240.
@SimeonEhrig independent of the successful work-around, can you please export CUDAFLAGS="--keep" and attach the created intermediate files here? The Nvidia Compiler team would like those intermediate files (e.g. .cpp1.ii and .cpp4.ii file extension) to be able to fix this.
@ax3l
I'm not able run the CUDA flag --keep with cmake under Windows.
I tried $Env:CUDAFLAGS="--keep" and cmake -G"Visual Studio 15 2017 Win64" -DCMAKE_CUDA_FLAGS="--keep" .. in different Powershells, but I get same error:
(pybind11_cuda_test) PS C:\Users\simeon\projects\test_pybind2\build> cmake -G"Visual Studio 15 2017 Win64" -DCMAKE_CUDA_FLAGS="--keep" ..
-- Selecting Windows SDK version 10.0.17763.0 to target Windows 10.0.18363.
-- The CXX compiler identification is MSVC 19.16.27039.0
-- The CUDA compiler identification is NVIDIA 10.2.89
-- Check for working CXX compiler: D:/MS/Microsoft Visual Studio/2017/Enterprise/VC/Tools/MSVC/14.16.27023/bin/Hostx86/x64/cl.exe
-- Check for working CXX compiler: D:/MS/Microsoft Visual Studio/2017/Enterprise/VC/Tools/MSVC/14.16.27023/bin/Hostx86/x64/cl.exe -- works
-- Detecting CXX compiler ABI info
-- Detecting CXX compiler ABI info - done
-- Detecting CXX compile features
-- Detecting CXX compile features - done
-- Check for working CUDA compiler: C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.2/bin/nvcc.exe
-- Check for working CUDA compiler: C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.2/bin/nvcc.exe -- broken
CMake Error at C:/Users/simeon/miniconda3/envs/pybind11_cuda_test/Library/share/cmake-3.16/Modules/CMakeTestCUDACompiler.cmake:46 (message):
The CUDA compiler
"C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.2/bin/nvcc.exe"
is not able to compile a simple test program.
It fails with the following output:
Change Dir: C:/Users/simeon/projects/test_pybind2/build/CMakeFiles/CMakeTmp
Run Build Command(s):D:/MS/Microsoft Visual Studio/2017/Enterprise/MSBuild/15.0/Bin/MSBuild.exe cmTC_b301a.vcxproj /p:Configuration=Debug /p:Platform=x64 /p:VisualStudioVersion=15.0 /v:m && Microsoft (R)-Build-Engine, Version 15.9.21+g9802d43bc3 f眉r .NET Framework
Copyright (C) Microsoft Corporation. Alle Rechte vorbehalten.
Compiling CUDA source file main.cu...
C:\Users\simeon\projects\test_pybind2\build\CMakeFiles\CMakeTmp>"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2\bin\nvcc.exe" -gencode=arch=compute_30,code=\"sm_30,compute_30\" --use-local-env -ccbin "D:\MS\Microsoft Visual Studio\2017\Enterprise\VC\Tools\MSVC\14.16.27023\bin\HostX86\x64" -x cu -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2\include" --keep-dir x64\Debug -maxrregcount=0 --machine 64 --compile -cudart static --keep -Xcompiler="-Zi -Ob0" -g -D"CMAKE_INTDIR=\"Debug\"" -D"CMAKE_INTDIR=\"Debug\"" -D_MBCS -Xcompiler "/EHsc /W1 /nologo /Od /FdcmTC_b301a.dir\Debug\vc141.pdb /FS /Zi /RTC1 /MDd " -o cmTC_b301a.dir\Debug\main.obj "C:\Users\simeon\projects\test_pybind2\build\CMakeFiles\CMakeTmp\main.cu"
main.cu
nvcc fatal : Failed to create the host compiler response file 'x64/Debug/main.cpp1.ii.res'
D:\MS\Microsoft Visual Studio\2017\Enterprise\Common7\IDE\VC\VCTargets\BuildCustomizations\CUDA 10.2.targets(764,9): error MSB3721: Der Befehl ""C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2\bin\nvcc.exe" -gencode=arch=compute_30,code=\"sm_30,compute_30\" --use-local-env -ccbin "D:\MS\Microsoft Visual Studio\2017\Enterprise\VC\Tools\MSVC\14.16.27023\bin\HostX86\x64" -x cu -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2\include" --keep-dir x64\Debug -maxrregcount=0 --machine 64 --compile -cudart static --keep -Xcompiler="-Zi -Ob0" -g -D"CMAKE_INTDIR=\"Debug\"" -D"CMAKE_INTDIR=\"Debug\"" -D_MBCS -Xcompiler "/EHsc /W1 /nologo /Od /FdcmTC_b301a.dir\Debug\vc141.pdb /FS /Zi /RTC1 /MDd " -o cmTC_b301a.dir\Debug\main.obj "C:\Users\simeon\projects\test_pybind2\build\CMakeFiles\CMakeTmp\main.cu"" wurde mit Code 1 beendet. [C:\Users\simeon\projects\test_pybind2\build\CMakeFiles\CMakeTmp\cmTC_b301a.vcxproj]
CMake will not be able to correctly generate this project.
Call Stack (most recent call first):
CMakeLists.txt:2 (project)
-- Configuring incomplete, errors occurred!
Latest CUDA release: CUDA Toolkit 11.1 Update 1
Got feedback: the NVCC fix will be released in an _upcoming_ CUDA release.
Most helpful comment
Latest CUDA release: CUDA Toolkit 11.1 Update 1
Got feedback: the NVCC fix will be released in an _upcoming_ CUDA release.