My environment:
windows 10 x64
python 3.6.6
cuda 9.0, V9.0.176
cudnn 7.3.1
tensorflow 1.11.0
keras 2.2.4.
`config = tensorflow.ConfigProto()
config.gpu_options.allow_growth = True
session = tensorflow.Session(config=config)
ROOT_DIR = os.path.abspath("C:/tool/Mask_RCNN/")
sys.path.append(ROOT_DIR) # To find local version of the library
from mrcnn.config import Config
from mrcnn import utils
import mrcnn.model as modellib
from mrcnn import visualize
from mrcnn.model import log
MODEL_DIR = os.path.join(ROOT_DIR, "logs")
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
if not os.path.exists(COCO_MODEL_PATH):
print(COCO_MODEL_PATH)
utils.download_trained_weights(COCO_MODEL_PATH)
class DrugDataset(utils.Dataset):
def get_obj_index(self, image):
n = np.max(image)
return n
def from_yaml_get_class(self,image_id):
info=self.image_info[image_id]
with open(info['yaml_path']) as f:
temp=yaml.load(f.read())
labels=temp['label_names']
del labels[0]
return labels
def load_shapes(self, img_number, count, height, width, img_folder, mask_folder, imglist,dataset_root_path):
self.add_class("Newton Ring", 1, "rainbow")
for i in range(count):
filestr = imglist[img_number[i]-1]
mask_path = mask_folder + "/" + filestr + "/maskrs.jpg"
yaml_path=mask_folder + "/" + filestr + "/info.yaml"
self.add_image("Newton Ring", image_id=i, path=img_folder + "/" + imglist[img_number[i]-1]+"/imgrs.jpg",
width=width, height=height, mask_path=mask_path,yaml_path=yaml_path)
class ShapesConfig(Config):
NAME = "Newton Ring"
GPU_COUNT = 1
IMAGES_PER_GPU = 1
NUM_CLASSES = 1 + 1 # background + 3 shapes
IMAGE_MIN_DIM = 1024
IMAGE_MAX_DIM = 1024
RPN_ANCHOR_SCALES = (8 * 6, 16 * 6, 32 * 6, 64 * 6, 128 * 6) #(32, 64, 128, 256, 512) # anchor side in pixels
TRAIN_ROIS_PER_IMAGE = 32
STEPS_PER_EPOCH = 100
VALIDATION_STEPS = 5
iter_num = 0
config = ShapesConfig()
config.display()
dataset_root_path = "D:/PROJECT/Dataset/HSIR/imageandmask"
img_folder = dataset_root_path
mask_folder = dataset_root_path
imglist = os.listdir(img_folder)
count = len(imglist)
width = 1224
height = 1024
x = range(1,count+1)
x_train, x_other = train_test_split(x, test_size=0.02, random_state=42)
x_val,x_test= train_test_split(x_other, test_size=0.8, random_state=42)
dataset_train = DrugDataset()
dataset_train.load_shapes(x_train,len(x_train), height, width, img_folder, mask_folder, imglist,dataset_root_path)
dataset_train.prepare()
dataset_val = DrugDataset()
dataset_val.load_shapes(x_val,len(x_val), height, width, img_folder,mask_folder, imglist,dataset_root_path)
dataset_val.prepare()
data = dict(
dataname = 'rainbow_test',
index_test = x_test
)
with open('testdata.yml', 'w') as outfile:
yaml.dump(data, outfile, default_flow_style=False)
print("config and divide the dataset")
model = modellib.MaskRCNN(mode="training", config=config,
model_dir=MODEL_DIR)
init_with = "coco" # imagenet, coco, or last
if init_with == "imagenet":
model.load_weights(model.get_imagenet_weights(), by_name=True)
elif init_with == "coco":
model.load_weights(COCO_MODEL_PATH, by_name=True,
exclude=["mrcnn_class_logits", "mrcnn_bbox_fc",
"mrcnn_bbox", "mrcnn_mask"])
elif init_with == "last":
model.load_weights(model.find_last(), by_name=True)
print("start training")
model.train(dataset_train, dataset_val,
learning_rate=config.LEARNING_RATE,
epochs=5,
layers='heads')
model.train(dataset_train, dataset_val,
learning_rate=config.LEARNING_RATE / 10,
epochs=10,
layers="all")
print("save model")
model_path = os.path.join(MODEL_DIR, "mask_rcnn_shapes.h5")
model.keras_model.save_weights(model_path)
I modified train_shapes.py and added cross validations
my dataset :
1896 images 1224x1024 only 1 class and 1 object for each image
and I got stuck on the first epoch 1/5, there is almost 0 % used in GPU nothing changed for hours.
Then I traced the code and found it stuck at model.py 'keras_model.fit_generator()'.
I tried the solutions in #287
trying setting workers=1, use_multiprocessing=False
or update keras
nothing worked out.
is there anyone who encountered this problem and find others solutions?
help me please
someone told me the main problem is about the version of keras
but after I changed to tensorflow 1.6.0 keras 2.1.6.
it was still not working on mac for no gpu
I tried to traced the code by print something every step
then I found when I worked on balloon.py
it can executed step by step from:
Epoch 1/10
-1
build rpn
Init batch arrays
Add to batch
batch b= 1
1/100 [..............................] - ETA: 23:05 - loss: 4.0967 - rpn_class_loss: 0.0105 - rpn_bbox_loss: 0.2817 - mrcnn_class_loss: 2.6626 - mrcnn_bbox_loss: 0.5993 - mrcnn_mask_loss: 0.54260
build rpn
Init batch arrays
Add to batch
batch b= 1
2/100 [......
but when I executed my code
there was nothing printed out after Epoch1/5:
when I tried
b = 0 # batch item index
image_index = -1
image_ids = np.copy(dataset.image_ids)
error_count = 0
no_augmentation_sources = no_augmentation_sources or []
# Anchors
# [anchor_count, (y1, x1, y2, x2)]
print("initial")
backbone_shapes = compute_backbone_shapes(config, config.IMAGE_SHAPE)
anchors = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES,
config.RPN_ANCHOR_RATIOS,
backbone_shapes,
config.BACKBONE_STRIDES,
config.RPN_ANCHOR_STRIDE)
print("initial2")
while True:
try:
# Increment index to pick next image. Shuffle if at the start of an epoch.
print(image_index)
image_index = (image_index + 1) % len(image_ids)
if shuffle and image_index == 0:
np.random.shuffle(image_ids)
# Get GT bounding boxes and masks for image.
image_id = image_ids[image_index]
# If the image source is not to be augmented pass None as augmentation
if dataset.image_info[image_id]['source'] in no_augmentation_sources:
image, image_meta, gt_class_ids, gt_boxes, gt_masks = \
load_image_gt(dataset, config, image_id, augment=augment,
augmentation=None,
use_mini_mask=config.USE_MINI_MASK)
print("no augment")
else:
image, image_meta, gt_class_ids, gt_boxes, gt_masks = \
load_image_gt(dataset, config, image_id, augment=augment,
augmentation=augmentation,
use_mini_mask=config.USE_MINI_MASK)
# Skip images that have no instances. This can happen in cases
# where we train on a subset of classes and the image doesn't
# have any of the classes we care about.
if not np.any(gt_class_ids > 0):
print("no instance")
continue
# RPN Targets
rpn_match, rpn_bbox = build_rpn_targets(image.shape, anchors,
gt_class_ids, gt_boxes, config)
print("build rpn")
it only printed
Epoch 1/1
initial
initial2
-1
no instance
0
no instance
1
no instance
2
no instance
3
no instance
4
no instance
5
no instance
6
no instance
7
no instance
8
no instance
9
no instance
10
no instance
11
no instance
12
no instance
13
no instance
14
no instance
0
....
repeatedly
I did add self.add_class("Newton Ring", 1, "rainbow") in my code why there is still no any instance ?
I have also met the same problems and I found solution in
https://stackoverflow.com/questions/51176661/keras-seems-to-hang-after-call-to-fit-generator
Hope it may help.
Hey, did you find a solution?
I had the same problem and I solved it by downgrading the graphic card's driver.