Mmdetection: RuntimeError: copy_if failed to synchronize: cudaErrorAssert: device-side assert triggered terminate called after throwing an instance of 'c10::Error'

Created on 15 Apr 2020 · 3Comments · Source: open-mmlab/mmdetection

2020-04-15 13:57:35,665 - mmdet - INFO - Environment info:

sys.platform: linux
Python: 3.6.9 (default, Nov 7 2019, 10:44:02) [GCC 8.3.0]
CUDA available: True
CUDA_HOME: /usr/local/cuda
NVCC: Cuda compilation tools, release 10.1, V10.1.243
GPU 0: Tesla K80
GCC: gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
PyTorch: 1.4.0
PyTorch compiling details: PyTorch built with:

GCC 7.3
Intel(R) Math Kernel Library Version 2019.0.4 Product Build 20190411 for Intel(R) 64 architecture applications
Intel(R) MKL-DNN v0.21.1 (Git Hash 7d2fd500bc78936d1d648ca713b901012f470dbc)
OpenMP 201511 (a.k.a. OpenMP 4.5)
NNPACK is enabled
CUDA Runtime 10.1
NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_37,code=compute_37
CuDNN 7.6.3
Magma 2.5.1
Build settings: BLAS=MKL, BUILD_NAMEDTENSOR=OFF, BUILD_TYPE=Release, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -fopenmp -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -O2 -fPIC -Wno-narrowing -Wall -Wextra -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Wno-stringop-overflow, DISABLE_NUMA=1, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, USE_CUDA=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, USE_STATIC_DISPATCH=OFF,

TorchVision: 0.5.0
OpenCV: 4.1.2
MMCV: 0.4.3
MMDetection: 1.1.0+a1c3aa4
MMDetection Compiler: GCC 7.5

MMDetection CUDA Compiler: 10.1

2020-04-15 13:57:35,666 - mmdet - INFO - Distributed training: False
2020-04-15 13:57:35,666 - mmdet - INFO - Config:
/content/mmdetection/configs/ssd512_coco_humans.py

model settings

input_size = 512
model = dict(
type='SingleStageDetector',
pretrained=None,
#pretrained = '/home/neo/data/hemant_intern_stuff/SpatioTemporalCoherence/Autoencoder/Vgg16Autoencoder/VggAutoencoder_adani_params.pt',
#pretrained = "open-mmlab://vgg16_caffe",
backbone=dict(
type='SSDVGG',
input_size=input_size,
depth=16,
with_last_pool=False,
ceil_mode=True,
out_indices=(3, 4),
out_feature_indices=(22, 34),
l2_norm_scale=20),
neck=None,
bbox_head=dict(
type='SSDHead',
input_size=input_size,
in_channels=(512, 1024, 512, 256, 256, 256, 256),
num_classes=2,
anchor_strides=(8, 16, 32, 64, 128, 256, 512),
basesize_ratio_range=(0.1, 0.9),
anchor_ratios=([2], [2, 3], [2, 3], [2, 3], [2, 3], [2], [2]),
target_means=(.0, .0, .0, .0),
target_stds=(0.1, 0.1, 0.2, 0.2)))
cudnn_benchmark = True

train_cfg = dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.,
ignore_iof_thr=-1,
gt_max_assign_all=False),
smoothl1_beta=1.,
allowed_border=-1,
pos_weight=-1,
neg_pos_ratio=3,
debug=False)
test_cfg = dict(
nms=dict(type='nms', iou_thr=0.45),
min_bbox_size=0,
score_thr=0.02,
max_per_img=200)

model training and testing settings

dataset settings

dataset_type = 'DroneDataset'
data_root = None
img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile', to_float32=True),
dict(type='LoadAnnotations', with_bbox=True),
dict(
type='PhotoMetricDistortion',
brightness_delta=32,
contrast_range=(0.5, 1.5),
saturation_range=(0.5, 1.5),
hue_delta=18),
dict(
type='Expand',
mean=img_norm_cfg['mean'],
to_rgb=img_norm_cfg['to_rgb'],
ratio_range=(1, 4)),
dict(
type='MinIoURandomCrop',
min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
min_crop_size=0.3),
#dict(type='Resize', img_scale=(352, 640), keep_ratio=False),
dict(type='Resize', img_scale=(512, 512), keep_ratio=False),
dict(type='Normalize', **img_norm_cfg),
dict(type='RandomFlip', flip_ratio=0.0),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
]

test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(512, 512),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=False),
dict(type='Normalize', **img_norm_cfg),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]

data = dict(
imgs_per_gpu=2,
workers_per_gpu=2,
train=dict(
type='RepeatDataset',
times=5,
dataset=dict(
type=dataset_type,
ann_file='/content/annotations/sushruth_72.json',
img_prefix='/content/drive/My Drive/sushruth/',
pipeline=train_pipeline)),
val=dict(
type=dataset_type,
ann_file='/content/annotations/sushruth_72.json',
img_prefix='/content/drive/My Drive/sushruth/',
pipeline=test_pipeline),
test=dict(
type=dataset_type,
ann_file='/content/annotations/sushruth_72.json',
img_prefix='/content/drive/My Drive/sushruth/',
pipeline=test_pipeline))

evaluation = dict(interval=1, metric='bbox')

optimizer

optimizer = dict(type='SGD', lr=1e-4, momentum=0.9, weight_decay=1e-4)

optimizer = dict(type='Adagrad',lr=1e-5,momentum=0.9,weight_decay=5e-4)

optimizer_config = dict()

learning policy

lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=100,
warmup_ratio=1.0 / 3,
step=[50, 70]) #[6, 16, 22,40])
checkpoint_config = dict(interval=1)

yapf:disable

log_config = dict(
interval=2,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])

yapf:enable

runtime settings

total_epochs = 50

total_epochs = 10
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/ssd512_coco/10_epochs/'
load_from = None
resume_from = None
workflow = [('train', 1)]

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
2020-04-15 13:57:38,385 - mmdet - INFO - Start running, host: root@a6cb7eba6047, work_dir: /content/mmdetection/work_dirs/ssd512_coco/10_epochs
2020-04-15 13:57:38,385 - mmdet - INFO - workflow: [('train', 1)], max: 10 epochs
/pytorch/aten/src/THCUNN/ClassNLLCriterion.cu:57: void ClassNLLCriterion_updateOutput_no_reduce_kernel(int, THCDeviceTensor, THCDeviceTensor, THCDeviceTensor, Dtype , int, int) [with Dtype = float]: block: [7,0,0], thread: [129,0,0] Assertion cur_target >= 0 && cur_target < n_classes failed.
/pytorch/aten/src/THCUNN/ClassNLLCriterion.cu:57: void ClassNLLCriterion_updateOutput_no_reduce_kernel(int, THCDeviceTensor, THCDeviceTensor, THCDeviceTensor, Dtype *, int, int) [with Dtype = float]: block: [6,0,0], thread: [896,0,0] Assertion cur_target >= 0 && cur_target < n_classes failed.
/pytorch/aten/src/THCUNN/ClassNLLCriterion.cu:57: void ClassNLLCriterion_updateOutput_no_reduce_kernel(int, THCDeviceTensor, THCDeviceTensor, THCDeviceTensor, Dtype *, int, int) [with Dtype = float]: block: [6,0,0], thread: [897,0,0] Assertion cur_target >= 0 && cur_target < n_classes failed.
/pytorch/aten/src/THCUNN/ClassNLLCriterion.cu:57: void ClassNLLCriterion_updateOutput_no_reduce_kernel(int, THCDeviceTensor, THCDeviceTensor, THCDeviceTensor, Dtype *, int, int) [with Dtype = float]: block: [6,0,0], thread: [898,0,0] Assertion cur_target >= 0 && cur_target < n_classes failed.
/pytorch/aten/src/THCUNN/ClassNLLCriterion.cu:57: void ClassNLLCriterion_updateOutput_no_reduce_kernel(int, THCDeviceTensor, THCDeviceTensor, THCDeviceTensor, Dtype *, int, int) [with Dtype = float]: block: [6,0,0], thread: [584,0,0] Assertion cur_target >= 0 && cur_target < n_classes failed.
/pytorch/aten/src/THCUNN/ClassNLLCriterion.cu:57: void ClassNLLCriterion_updateOutput_no_reduce_kernel(int, THCDeviceTensor, THCDeviceTensor, THCDeviceTensor, Dtype *, int, int) [with Dtype = float]: block: [6,0,0], thread: [586,0,0] Assertion cur_target >= 0 && cur_target < n_classes failed.
/pytorch/aten/src/THCUNN/ClassNLLCriterion.cu:57: void ClassNLLCriterion_updateOutput_no_reduce_kernel(int, THCDeviceTensor, THCDeviceTensor, THCDeviceTensor, Dtype *, int, int) [with Dtype = float]: block: [6,0,0], thread: [587,0,0] Assertion cur_target >= 0 && cur_target < n_classes failed.
/pytorch/aten/src/THCUNN/ClassNLLCriterion.cu:57: void ClassNLLCriterion_updateOutput_no_reduce_kernel(int, THCDeviceTensor, THCDeviceTensor, THCDeviceTensor, Dtype *, int, int) [with Dtype = float]: block: [6,0,0], thread: [588,0,0] Assertion cur_target >= 0 && cur_target < n_classes failed.
/pytorch/aten/src/THCUNN/ClassNLLCriterion.cu:57: void ClassNLLCriterion_updateOutput_no_reduce_kernel(int, THCDeviceTensor, THCDeviceTensor, THCDeviceTensor, Dtype *, int, int) [with Dtype = float]: block: [6,0,0], thread: [590,0,0] Assertion cur_target >= 0 && cur_target < n_classes failed.
/pytorch/aten/src/THCUNN/ClassNLLCriterion.cu:57: void ClassNLLCriterion_updateOutput_no_reduce_kernel(int, THCDeviceTensor, THCDeviceTensor, THCDeviceTensor, Dtype *, int, int) [with Dtype = float]: block: [6,0,0], thread: [591,0,0] Assertion cur_target >= 0 && cur_target < n_classes failed.
/pytorch/aten/src/THCUNN/ClassNLLCriterion.cu:57: void ClassNLLCriterion_updateOutput_no_reduce_kernel(int, THCDeviceTensor, THCDeviceTensor, THCDeviceTensor, Dtype *, int, int) [with Dtype = float]: block: [5,0,0], thread: [589,0,0] Assertion cur_target >= 0 && cur_target < n_classes failed.
/pytorch/aten/src/THCUNN/ClassNLLCriterion.cu:57: void ClassNLLCriterion_updateOutput_no_reduce_kernel(int, THCDeviceTensor, THCDeviceTensor, THCDeviceTensor, Dtype *, int, int) [with Dtype = float]: block: [5,0,0], thread: [591,0,0] Assertion cur_target >= 0 && cur_target < n_classes failed.
/pytorch/aten/src/THCUNN/ClassNLLCriterion.cu:57: void ClassNLLCriterion_updateOutput_no_reduce_kernel(int, THCDeviceTensor, THCDeviceTensor, THCDeviceTensor, Dtype *, int, int) [with Dtype = float]: block: [5,0,0], thread: [592,0,0] Assertion cur_target >= 0 && cur_target < n_classes failed.
/pytorch/aten/src/THCUNN/ClassNLLCriterion.cu:57: void ClassNLLCriterion_updateOutput_no_reduce_kernel(int, THCDeviceTensor, THCDeviceTensor, THCDeviceTensor, Dtype *, int, int) [with Dtype = float]: block: [5,0,0], thread: [593,0,0] Assertion cur_target >= 0 && cur_target < n_classes failed.
/pytorch/aten/src/THCUNN/ClassNLLCriterion.cu:57: void ClassNLLCriterion_updateOutput_no_reduce_kernel(int, THCDeviceTensor, THCDeviceTensor, THCDeviceTensor, Dtype *, int, int) [with Dtype = float]: block: [5,0,0], thread: [595,0,0] Assertion cur_target >= 0 && cur_target < n_classes failed.
/pytorch/aten/src/THCUNN/ClassNLLCriterion.cu:57: void ClassNLLCriterion_updateOutput_no_reduce_kernel(int, THCDeviceTensor, THCDeviceTensor, THCDeviceTensor, Dtype *, int, int) [with Dtype = float]: block: [6,0,0], thread: [893,0,0] Assertion cur_target >= 0 && cur_target < n_classes failed.
Traceback (most recent call last):
File "tools/train.py", line 151, in
main()
File "tools/train.py", line 147, in main
meta=meta)
File "/content/mmdetection/mmdet/apis/train.py", line 165, in train_detector
runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
File "/usr/local/lib/python3.6/dist-packages/mmcv/runner/runner.py", line 359, in run
epoch_runner(data_loaders[i], kwargs)
File "/usr/local/lib/python3.6/dist-packages/mmcv/runner/runner.py", line 263, in train
self.model, data_batch, train_mode=True, *kwargs)
File "/content/mmdetection/mmdet/apis/train.py", line 75, in batch_processor
losses = model(data)
File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 532, in __call__
result = self.forward(input, *kwargs)
File "/usr/local/lib/python3.6/dist-packages/torch/nn/parallel/data_parallel.py", line 150, in forward
return self.module(inputs[0], *kwargs[0])
File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 532, in __call__
result = self.forward(input, *kwargs)
File "/content/mmdetection/mmdet/core/fp16/decorators.py", line 49, in new_func
return old_func(args, *kwargs)
File "/content/mmdetection/mmdet/models/detectors/base.py", line 147, in forward
return self.forward_train(img, img_metas, *kwargs)
File "/content/mmdetection/mmdet/models/detectors/single_stage.py", line 71, in forward_train
*loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
File "/content/mmdetection/mmdet/models/anchor_heads/ssd_head.py", line 200, in loss
cfg=cfg)
File "/content/mmdetection/mmdet/core/utils/misc.py", line 24, in multi_apply
return tuple(map(list, zip(map_results)))
File "/content/mmdetection/mmdet/models/anchor_heads/ssd_head.py", line 115, in loss_single
pos_inds = (labels > 0).nonzero().view(-1)
RuntimeError: copy_if failed to synchronize: cudaErrorAssert: device-side assert triggered
terminate called after throwing an instance of 'c10::Error'
what(): CUDA error: device-side assert triggered (insert_events at /pytorch/c10/cuda/CUDACachingAllocator.cpp:764)
frame #0: c10::Error::Error(c10::SourceLocation, std::string const&) + 0x33 (0x7fdf5b4be193 in /usr/local/lib/python3.6/dist-packages/torch/lib/libc10.so)
frame #1: + 0x17f66 (0x7fdf5b6fbf66 in /usr/local/lib/python3.6/dist-packages/torch/lib/libc10_cuda.so)
frame #2: + 0x19cbd (0x7fdf5b6fdcbd in /usr/local/lib/python3.6/dist-packages/torch/lib/libc10_cuda.so)
frame #3: c10::TensorImpl::release_resources() + 0x4d (0x7fdf5b4ae63d in /usr/local/lib/python3.6/dist-packages/torch/lib/libc10.so)
frame #4: + 0x67aba2 (0x7fdfa6734ba2 in /usr/local/lib/python3.6/dist-packages/torch/lib/libtorch_python.so)
frame #5: + 0x67ac46 (0x7fdfa6734c46 in /usr/local/lib/python3.6/dist-packages/torch/lib/libtorch_python.so)
frame #6: python3() [0x54f6e6]
frame #7: python3() [0x5734e0]
frame #8: python3() [0x54f3eb]
frame #9: python3() [0x589078]
frame #10: python3() [0x5ade68]
frame #11: python3() [0x5ade7e]
frame #12: python3() [0x5ade7e]
frame #13: python3() [0x5ade7e]
frame #14: python3() [0x5ade7e]
frame #15: python3() [0x5ade7e]
frame #16: python3() [0x5ade7e]
frame #17: python3() [0x5ade7e]
frame #18: python3() [0x56be56]

frame #24: __libc_start_main + 0xe7 (0x7fdfb1bd7b97 in /lib/x86_64-linux-gnu/libc.so.6)