converting dtype raises assertion error.
File "/home/ubuntu/.local/share/virtualenvs/factory-H3jTBYbX/lib/python3.6/site-packages/torch/nn/modules/module.py", line 532, in __call__
result = self.forward(*input, **kwargs)
File "/home/ubuntu/.local/share/virtualenvs/factory-H3jTBYbX/lib/python3.6/site-packages/kornia/losses/focal.py", line 86, in forward
self.gamma.to(input.dtype))
RuntimeError: iter.device(arg).is_cuda() INTERNAL ASSERT FAILED at /pytorch/aten/src/ATen/native/cuda/Loops.cuh:197, please report a bug to PyTorch.
Steps to reproduce the behavior:
Line 85 causes the error.
from typing import Optional
import torch
import torch.nn as nn
import torch.nn.functional as F
from kornia.utils import one_hot
# based on:
# https://github.com/zhezh/focalloss/blob/master/focalloss.py
class FocalLoss(nn.Module):
r"""Criterion that computes Focal loss.
According to [1], the Focal loss is computed as follows:
.. math::
\text{FL}(p_t) = -\alpha_t (1 - p_t)^{\gamma} \, \text{log}(p_t)
where:
- :math:`p_t` is the model's estimated probability for each class.
Arguments:
alpha (float): Weighting factor :math:`\alpha \in [0, 1]`.
gamma (float): Focusing parameter :math:`\gamma >= 0`.
reduction (Optional[str]): Specifies the reduction to apply to the
output: ‘none’ | ‘mean’ | ‘sum’. ‘none’: no reduction will be applied,
‘mean’: the sum of the output will be divided by the number of elements
in the output, ‘sum’: the output will be summed. Default: ‘none’.
Shape:
- Input: :math:`(N, C, H, W)` where C = number of classes.
- Target: :math:`(N, H, W)` where each value is
:math:`0 ≤ targets[i] ≤ C−1`.
Examples:
>>> N = 5 # num_classes
>>> args = {"alpha": 0.5, "gamma": 2.0, "reduction": 'mean'}
>>> loss = kornia.losses.FocalLoss(*args)
>>> input = torch.randn(1, N, 3, 5, requires_grad=True)
>>> target = torch.empty(1, 3, 5, dtype=torch.long).random_(N)
>>> output = loss(input, target)
>>> output.backward()
References:
[1] https://arxiv.org/abs/1708.02002
"""
def __init__(self, alpha: float, gamma: Optional[float] = 2.0,
reduction: Optional[str] = 'none') -> None:
super(FocalLoss, self).__init__()
self.alpha: float = alpha
self.gamma: torch.Tensor = torch.tensor(gamma)
self.reduction: Optional[str] = reduction
self.eps: float = 1e-6
def forward( # type: ignore
self,
input: torch.Tensor,
target: torch.Tensor) -> torch.Tensor:
if not torch.is_tensor(input):
raise TypeError("Input type is not a torch.Tensor. Got {}"
.format(type(input)))
if not len(input.shape) == 4:
raise ValueError("Invalid input shape, we expect BxNxHxW. Got: {}"
.format(input.shape))
if not input.shape[-2:] == target.shape[-2:]:
raise ValueError("input and target shapes must be the same. Got: {}"
.format(input.shape, input.shape))
if not input.device == target.device:
raise ValueError(
"input and target must be in the same device. Got: {}" .format(
input.device, target.device))
# compute softmax over the classes axis
input_soft = F.softmax(input, dim=1) + self.eps
# create the labels one hot tensor
target_one_hot = one_hot(target, num_classes=input.shape[1],
device=input.device, dtype=input.dtype)
# compute the actual focal loss
weight = torch.pow(torch.tensor(1.) - input_soft,
self.gamma.to(input.dtype))
focal = -self.alpha * weight * torch.log(input_soft)
loss_tmp = torch.sum(target_one_hot * focal, dim=1)
if self.reduction == 'none':
loss = loss_tmp
elif self.reduction == 'mean':
loss = torch.mean(loss_tmp)
elif self.reduction == 'sum':
loss = torch.sum(loss_tmp)
else:
raise NotImplementedError("Invalid reduction mode: {}"
.format(self.reduction))
return loss
The caller code likes like this:
with torch.cuda.device('cuda:0'):
# ... do stuff
No assertion error
PyTorch version: 1.4.0
Is debug build: No
CUDA used to build PyTorch: 10.1
OS: Ubuntu 18.04.2 LTS
GCC version: (Ubuntu 7.4.0-1ubuntu1~18.04.1) 7.4.0
CMake version: version 3.10.2
Python version: 3.6
Is CUDA available: Yes
CUDA runtime version: 10.1.168
GPU models and configuration:
GPU 0: GeForce GTX 1080 Ti
GPU 1: GeForce GTX 1080 Ti
Nvidia driver version: 430.26
cuDNN version: Probably one of the following:
/usr/lib/x86_64-linux-gnu/libcudnn.so.7.6.0
/usr/local/cuda-10.1/targets/x86_64-linux/lib/libcudnn.so.7
Versions of relevant libraries:
[pip3] numpy==1.18.1
[pip3] torch==1.4.0
[pip3] torchvision==0.5.0
[conda] Could not collect
@keven425 what is the dtype for input
here? float32? I am trying to create a minimum repro. Looks like the offending line is self.gamma.to(input.dtype))
, but the following code works fine for me.
>>> with torch.cuda.device('cuda:0'):
... x = torch.tensor(5.0)
... y = torch.LongTensor(1)
... x.to(y.dtype)
... z = torch.zeros(2)
... x.to(z.dtype)
...
@colesbury do you know why the arg cannot be on GPU?
@keven425 can you please provide a complete reproduction? Specifically, code that calls FocalLoss and triggers the error.
Never mind here's a minimal repro:
import torch
x = torch.randn(10).cuda()
y = torch.tensor(3.0)
torch.pow(x, y)
Same problem if QAT is performed with 'fbgemm' parameters
model.qconfig = torch.quantization.get_default_qat_qconfig('fbgemm')
torch.backends.quantized.engine = 'fbgemm'
But it works with 'qnnpack' parameters
The error is following:
Traceback (most recent call last):
File "train_net.py", line 120, in <module>
args=(args,),
File "/root/some_detectron2/detectron2/engine/launch.py", line 52, in launch
main_func(*args)
File "train_net.py", line 78, in main
return trainer.train()
File "/root/some_detectron2/detectron2/engine/defaults.py", line 380, in train
super().train(self.start_iter, self.max_iter)
File "/root/some_detectron2/detectron2/engine/train_loop.py", line 132, in train
self.run_step()
File "/root/some_detectron2/detectron2/engine/train_loop.py", line 215, in run_step
loss_dict = self.model(data)
File "/root/anaconda2/envs/pytorch-gpu/lib/python3.7/site-packages/torch/nn/modules/module.py", line 550, in __call__
result = self.forward(*input, **kwargs)
File "/root/some_detectron2/detectron2/modeling/meta_arch/rcnn.py", line 121, in forward
features = self.backbone(images.tensor)
File "/root/anaconda2/envs/pytorch-gpu/lib/python3.7/site-packages/torch/nn/modules/module.py", line 550, in __call__
result = self.forward(*input, **kwargs)
File "/root/DensePose_ADASE/densepose/modeling/quantize.py", line 177, in new_forward
p5, p4, p3, p2 = self.bottom_up(x) # top->down
File "/root/anaconda2/envs/pytorch-gpu/lib/python3.7/site-packages/torch/nn/modules/module.py", line 550, in __call__
result = self.forward(*input, **kwargs)
File "/root/DensePose_ADASE/densepose/modeling/quantize.py", line 130, in new_forward
return old_forward(self, x)
File "/root/anaconda2/envs/pytorch-gpu/lib/python3.7/site-packages/timm/models/efficientnet.py", line 350, in forward
x = self.conv_stem(x)
File "/root/anaconda2/envs/pytorch-gpu/lib/python3.7/site-packages/torch/nn/modules/module.py", line 550, in __call__
result = self.forward(*input, **kwargs)
File "/root/anaconda2/envs/pytorch-gpu/lib/python3.7/site-packages/torch/nn/intrinsic/qat/modules/conv_fused.py", line 243, in forward
return self.activation_post_process(F.relu(ConvBn2d._forward(self, input)))
File "/root/anaconda2/envs/pytorch-gpu/lib/python3.7/site-packages/torch/nn/intrinsic/qat/modules/conv_fused.py", line 95, in _forward
conv = self._conv_forward(input, self.weight_fake_quant(scaled_weight))
File "/root/anaconda2/envs/pytorch-gpu/lib/python3.7/site-packages/torch/nn/modules/module.py", line 550, in __call__
result = self.forward(*input, **kwargs)
File "/root/anaconda2/envs/pytorch-gpu/lib/python3.7/site-packages/torch/quantization/fake_quantize.py", line 86, in forward
self.ch_axis, self.quant_min, self.quant_max)
RuntimeError: iter.device(arg).is_cuda() INTERNAL ASSERT FAILED at /opt/conda/conda-bld/pytorch_1587428266983/work/aten/src/ATen/native/cuda/Loops.cuh:56, please report a bug to PyTorch.
CUDA: 10.2
PyTorch: py3.7_cuda10.2.89_cudnn7.6.5_0
OS: Ubuntu 18
@colesbury's reproduction is covered by this more recent (and high priority issue): https://github.com/pytorch/pytorch/issues/46037, which I believe is a bug in torch.pow's implementation.
Closing this because @colesbury's snippet is covered, but the other snippets still need a reproduction. @keven425, @zetyquickly, if you're still encountering this issue would you provide a complete minimal snippet that can be copy-pasted and run as-is demonstrate the issue?
Most helpful comment
Same problem if QAT is performed with 'fbgemm' parameters
But it works with 'qnnpack' parameters
The error is following:
CUDA: 10.2
PyTorch: py3.7_cuda10.2.89_cudnn7.6.5_0
OS: Ubuntu 18