Keras: weights saved/loaded incorrectly when weights frozen

Created on 26 Jul 2018 · 17Comments · Source: keras-team/keras

Given this code:

vgg = VGG16(weights='imagenet', include_top=False, input_shape=(360, 480, 3))

for layer in vgg.layers[:-4]:
    layer.trainable = False

inputs = Input(shape=(360, 480, 3))

encoder = vgg(inputs)
encoder = Flatten()(encoder)
encoder = Dense(16)(encoder)
encoder = Model(inputs, encoder)

in1 = Input(shape=(360, 480, 3))

comparator = Model(in1, encoder(in1))

comparator.save_weights('h.h5')
comparator.load_weights('h.h5')

I get this error:

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-54-a45f35902442> in <module>()
     16 
     17 comparator.save_weights('h.h5')
---> 18 comparator.load_weights('h.h5')

c:\users\seanh\appdata\local\programs\python\python36\lib\site-packages\keras\engine\network.py in load_weights(self, filepath, by_name, skip_mismatch, reshape)
   1178             else:
   1179                 saving.load_weights_from_hdf5_group(
-> 1180                     f, self.layers, reshape=reshape)
   1181 
   1182     def _updated_config(self):

c:\users\seanh\appdata\local\programs\python\python36\lib\site-packages\keras\engine\saving.py in load_weights_from_hdf5_group(f, layers, reshape)
    914                                                        original_keras_version,
    915                                                        original_backend,
--> 916                                                        reshape=reshape)
    917         if len(weight_values) != len(symbolic_weights):
    918             raise ValueError('Layer #' + str(k) +

c:\users\seanh\appdata\local\programs\python\python36\lib\site-packages\keras\engine\saving.py in preprocess_weights_for_loading(layer, weights, original_keras_version, original_backend, reshape)
    555         weights = convert_nested_time_distributed(weights)
    556     elif layer.__class__.__name__ in ['Model', 'Sequential']:
--> 557         weights = convert_nested_model(weights)
    558 
    559     if original_keras_version == '1':

c:\users\seanh\appdata\local\programs\python\python36\lib\site-packages\keras\engine\saving.py in convert_nested_model(weights)
    531                     weights=weights[:num_weights],
    532                     original_keras_version=original_keras_version,
--> 533                     original_backend=original_backend))
    534                 weights = weights[num_weights:]
    535 

c:\users\seanh\appdata\local\programs\python\python36\lib\site-packages\keras\engine\saving.py in preprocess_weights_for_loading(layer, weights, original_keras_version, original_backend, reshape)
    555         weights = convert_nested_time_distributed(weights)
    556     elif layer.__class__.__name__ in ['Model', 'Sequential']:
--> 557         weights = convert_nested_model(weights)
    558 
    559     if original_keras_version == '1':

c:\users\seanh\appdata\local\programs\python\python36\lib\site-packages\keras\engine\saving.py in convert_nested_model(weights)
    543                     weights=weights[:num_weights],
    544                     original_keras_version=original_keras_version,
--> 545                     original_backend=original_backend))
    546                 weights = weights[num_weights:]
    547         return new_weights

c:\users\seanh\appdata\local\programs\python\python36\lib\site-packages\keras\engine\saving.py in preprocess_weights_for_loading(layer, weights, original_keras_version, original_backend, reshape)
    672                                  str(weights[0].size) + '. ')
    673             weights[0] = np.reshape(weights[0], layer_weights_shape)
--> 674         elif layer_weights_shape != weights[0].shape:
    675             weights[0] = np.transpose(weights[0], (3, 2, 0, 1))
    676             if layer.__class__.__name__ == 'ConvLSTM2D':

IndexError: list index out of range

but if the trainable = False part is taken out:

vgg = VGG16(weights='imagenet', include_top=False, input_shape=(360, 480, 3))

# for layer in vgg.layers[:-4]:
#     layer.trainable = False

inputs = Input(shape=(360, 480, 3))

encoder = vgg(inputs)
encoder = Flatten()(encoder)
encoder = Dense(16)(encoder)
encoder = Model(inputs, encoder)

in1 = Input(shape=(360, 480, 3))

comparator = Model(in1, encoder(in1))

comparator.save_weights('h.h5')
comparator.load_weights('h.h5')

No error occurs and it works. It also works if the comparator is omitted and just the encoder is saved and loaded:

vgg = VGG16(weights='imagenet', include_top=False, input_shape=(360, 480, 3))

for layer in vgg.layers[:-4]:
    layer.trainable = False

inputs = Input(shape=(360, 480, 3))

encoder = vgg(inputs)
encoder = Flatten()(encoder)
encoder = Dense(16)(encoder)
encoder = Model(inputs, encoder)

encoder.save_weights('h.h5')
encoder.load_weights('h.h5')

It seems like in the error case it is failing to save/load the nontrainable weights, but I don't see why that would be the case.

To put it into a question, what is the cause of this error and how can I get around it without skipping the comparator?

Source

Sean-Hastings

Most helpful comment

I cannot make the code provided by @raymond-yuan work. It still ends up with IndexError. Would you how to fix it?

Keras 2.2.4
Tensorflow 1.13.0-rc0
Python 3.6

zikaadam on 7 Feb 2019

👍3

All 17 comments

I'm having trouble reproducing this issue, can you try upgrading your version of keras and your backend (what backend are you using)?

raymond-yuan on 26 Jul 2018

Update us after you install latest keras and your backend latest version.
What it may happen (demends on how you execute your code, native python? AWS? jupyter?) due to the training actually having no weights. If you do not initialize it and is not trainable then it has empty weights. Usually, you will freeze a layer after you train it to preserve its weights for further changings.

chriskoups on 26 Jul 2018

This is using Keras version 2.2.0 and Tensorflow 1.9.0, both up-to-date according to pip.
I am running it in a Jupyter Notebook, but get the same results when run in a .py file as well.

I discovered the issue because a model I had trained (VGG plus Dense layers, concatenated 3 copies on different inputs) won't load from checkpoints. That one was trained, so I don't believe that to be the issue.

Raymond, if the provided code works for you what keras version and backend are you using?

Sean-Hastings on 26 Jul 2018

Just tested on Master branch installed from github source, same error

Sean-Hastings on 26 Jul 2018

I'm using tensorflow as well, can you try doing with tf.keras? This doesn't seem to have any issues:

from tensorflow.python.keras.applications.vgg16 import VGG16
from tensorflow.python.keras import layers
from tensorflow.python.keras import models

vgg = VGG16(weights='imagenet', include_top=False, input_shape=(360, 480, 3))

for layer in vgg.layers[:-4]:
  layer.trainable = False

inputs = layers.Input(shape=(360, 480, 3))

encoder = vgg(inputs)
encoder = layers.Flatten()(encoder)
encoder = layers.Dense(16)(encoder)
encoder = models.Model(inputs, encoder)

in1 = layers.Input(shape=(360, 480, 3))

comparator = models.Model(in1, encoder(in1))

comparator.save_weights('h.h5')
comparator.load_weights('h.h5')

raymond-yuan on 26 Jul 2018

👍1

That does work, thank you! Closing as the question/issue was answered but it is interesting that tf.python.keras works for this while plain keras doesn't. I guess I have some reading up to do.

Sean-Hastings on 26 Jul 2018

I cannot make the code provided by @raymond-yuan work. It still ends up with IndexError. Would you how to fix it?

Keras 2.2.4
Tensorflow 1.13.0-rc0
Python 3.6

zikaadam on 7 Feb 2019

👍3

I have the same versions as zikaadam and the same issue. I can't load models that were saved with frozen weights using tf.keras.

  File "test.py", line 22, in <module>
    comparator.load_weights('h.h5')
  File "/home/phorous/.local/lib/python3.6/site-packages/tensorflow/python/keras/engine/network.py", line 1516, in load_weights
    saving.load_weights_from_hdf5_group(f, self.layers)
  File "/home/phorous/.local/lib/python3.6/site-packages/tensorflow/python/keras/engine/saving.py", line 810, in load_weights_from_hdf5_group
    layer, weight_values, original_keras_version, original_backend)
  File "/home/phorous/.local/lib/python3.6/site-packages/tensorflow/python/keras/engine/saving.py", line 468, in preprocess_weights_for_loading
    weights = convert_nested_model(weights)
  File "/home/phorous/.local/lib/python3.6/site-packages/tensorflow/python/keras/engine/saving.py", line 444, in convert_nested_model
    original_backend=original_backend))
  File "/home/phorous/.local/lib/python3.6/site-packages/tensorflow/python/keras/engine/saving.py", line 468, in preprocess_weights_for_loading
    weights = convert_nested_model(weights)
  File "/home/phorous/.local/lib/python3.6/site-packages/tensorflow/python/keras/engine/saving.py", line 456, in convert_nested_model
    original_backend=original_backend))
  File "/home/phorous/.local/lib/python3.6/site-packages/tensorflow/python/keras/engine/saving.py", line 549, in preprocess_weights_for_loading
    if K.int_shape(layer.weights[0]) != weights[0].shape:
IndexError: list index out of range

csvance on 26 Apr 2019

👍1

I got the same issue when save check point and load the best performance checkpoint later.

 model_names = checkpoint_models_path + 'model.{epoch:02d}-{val_loss:.4f}.hdf5'
model_checkpoint = ModelCheckpoint(
    model_names, monitor='val_loss', verbose=1, save_best_only=True)
callbacks = [tensor_board, model_checkpoint, early_stop, reduce_lr]
model.fit_generator(trainDataGenerator,
      steps_per_epoch=len(trainDataGenerator),
      validation_data=validDataGenerator,
      validation_steps=len(validDataGenerator),
      epochs=3,
      verbose=1)
 pretrained_path = get_best_model()
  model.load_weights(pretrained_path)

if K.int_shape(layer.weights[0]) != weights[0].shape:

Python3.6, tensorflow 1.13.1

hoavt-54 on 22 May 2019

I think I found a solution to this, before loading weights, make sure to set the same trainable = True/False to the state it was in when it was saved.
Also, idk if it's the case, but it might be that Keras saves only weights in layers set to trainable=True, idk, that would be terrible.
If anything I think keras_model.save_weights() shouldn't make any distinction between trainable and frozen weights!!!

veqtor on 25 May 2019

I have a similar issue as well, I can't load weights in to a model, if the model has a different trainable state. I agree with @veqtor that freezing or unfreezing layers should not have any effect on saving and loading weights.

visionscaper on 7 Jun 2019

@Sean-Hastings This issue should not be closed because, although the problem might be solved in the TensorFlow Keras fork, it is still an issue in the original Keras code.

visionscaper on 7 Jun 2019

👍1

I also meet with this problem. Keras seemed to save the trainable weights at first that will interrupt the normal order of layers. Also, there are some logic mistakes when loading weights by name. The behave of this function: load_weights_from_hdf5_group_by_name load weights by order (not by name).

I did some modification in saving.py to deal with this problem, hope this change helps.

I add a function before the function load_weights_from_hdf5_group_by_name to help load weights by name:

def load_weights(weights_dict, layer,
                 weight_value_tuples,
                 original_keras_version=None,
                 original_backend=None,
                 reshape=False
                 ):
    if hasattr(layer, 'layers'):
        for l in layer.layers:
            load_weights(weights_dict, l, 
                         weight_value_tuples = weight_value_tuples,
                         original_keras_version = original_keras_version,
                         original_backend = original_backend,
                         reshape = reshape)
    else:
        weights = []
        for i in range(len(layer.weights)):
            if layer.weights[i].name in weights_dict:
                weights.append(weights_dict[layer.weights[i].name])
        if len(weights) > 0:
            weights = preprocess_weights_for_loading(layer, weights)
            for j in range(len(weights)):
                weight_value_tuples.append((layer.weights[j], weights[j]))

Modify the function load_weights_from_hdf5_group_by_name as follow:

def load_weights_from_hdf5_group_by_name(f, layers, skip_mismatch=False,
                                         reshape=False):

    """Implements name-based weight loading.

    (instead of topological weight loading).

    Layers that have no matching name are skipped.

    # Arguments
        f: A pointer to a HDF5 group.
        layers: A list of target layers.
        skip_mismatch: Boolean, whether to skip loading of layers
            where there is a mismatch in the number of weights,
            or a mismatch in the shape of the weights.
        reshape: Reshape weights to fit the layer when the correct number
            of values are present but the shape does not match.

    # Raises
        ValueError: in case of mismatch between provided layers
            and weights file and skip_mismatch=False.
    """
    if 'keras_version' in f.attrs:
        original_keras_version = f.attrs['keras_version'].decode('utf8')
    else:
        original_keras_version = '1'
    if 'backend' in f.attrs:
        original_backend = f.attrs['backend'].decode('utf8')
    else:
        original_backend = None

    # New file format.
    layer_names = load_attributes_from_hdf5_group(f, 'layer_names')

    # Reverse index of layer name to list of layers with name.
    # Create weights dict
    weights_dict = {}
    for k, name in enumerate(layer_names):
        g = f[name]
        weight_names = load_attributes_from_hdf5_group(g, 'weight_names')
        [weights_dict.update({weight_name : np.asarray(g[weight_name])}) for weight_name in weight_names]

    #load weights
    weight_value_tuples = []
    for layer in layers:
        load_weights(weights_dict, 
                     layer, 
                     weight_value_tuples,
                     original_keras_version,
                     original_backend,
                     reshape=reshape)

    K.batch_set_value(weight_value_tuples)

I also change the weights saving part, so that the function save_weights_to_hdf5_group can keep the order of layers:

def get_symbolic_weights(layer, symbolic_weights):
    if hasattr(layer, 'layers') is False:
        if len(layer.weights) > 0:
            symbolic_weights += layer.weights
    else:
        for l in layer.layers:
            get_symbolic_weights(l, symbolic_weights)



def save_weights_to_hdf5_group(f, layers):
    from .. import __version__ as keras_version

    save_attributes_to_hdf5_group(
        f, 'layer_names', [layer.name.encode('utf8') for layer in layers])
    f.attrs['backend'] = K.backend().encode('utf8')
    f.attrs['keras_version'] = str(keras_version).encode('utf8')

    for layer in layers:
        g = f.create_group(layer.name)
        symbolic_weights = []
        get_symbolic_weights(layer, symbolic_weights)
        #symbolic_weights = layer.weights
        weight_values = K.batch_get_value(symbolic_weights)
        weight_names = []
        for i, (w, val) in enumerate(zip(symbolic_weights, weight_values)):
            if hasattr(w, 'name') and w.name:
                name = str(w.name)
            else:
                name = 'param_' + str(i)
            weight_names.append(name.encode('utf8'))
        save_attributes_to_hdf5_group(g, 'weight_names', weight_names)
        for name, val in zip(weight_names, weight_values):
            param_dset = g.create_dataset(name, val.shape,
                                          dtype=val.dtype)
            if not val.shape:
                # scalar
                param_dset[()] = val
            else:
                param_dset[:] = val

I can not ensure that the changes can works for all the keras model. If there are mistakes, welcome to discuss.

RuaHU on 18 Jun 2019

Hi !

I get the same error when I try to load the following model after I trained it.
I have tried to use different versions of keras (2.2.2, 2.2.3, 2.2.4, 2.2.5) as well as using tensorflow.python.keras.
I also tried to build the model from JSON and load_weights but got the same result.

I don't get an error if the layers are Trainable.
I don't get an error if instead of using the base_network + meta_network, I used directly the ranking function. (The problem is then that my branches don't have the same weights of course.)

So it seems that the problem come when using frozen layers in a nested model.

Any help or any working configurations are welcomed :)

def create_base_network(input_dim):
    """
    The main part of the network, the one who give scores to each pictures
    :param input_dim: Dimension of input pictures during training
    :return: keras object model
    """
    feature_extractor = VGG19(weights="imagenet", include_top=False, input_shape=input_dim)
    for layer in feature_extractor.layers[:17]:
        layer.trainable = False

    inp = Input(shape=input_dim, name='input_image')
    base = feature_extractor(inp)
    base = Flatten(name='Flatten')(base)
    base = Dense(64, activation='relu', name='Dense_1')(base)
    base = BatchNormalization(name='BN1')(base)
    base = Dropout(0.49, name='Drop_1')(base)
    base = Dense(1, name="Dense_Output")(base)

    base_network = Model(inp, base, name='Scoring_model')
    return base_network


def create_meta_network(input_dim):
    """
    Second network that teach the first how to rank pictures

    :param input_dim: dimensions of pictures
    :return: keras object model
    """
    input_left = Input(shape=input_dim, name='left_input')
    input_right = Input(shape=input_dim, name='right_input')

    base_network = create_base_network(input_dim)

    left_score = base_network(input_left)
    right_score = base_network(input_right)

    # Subtract scores
    diff = Subtract()([left_score, right_score])

    # Pass difference through sigmoid function.
    prob = Activation("sigmoid", name="Activation_sigmoid")(diff)
    model = Model(inputs=[input_left, input_right], outputs=prob, name="Meta_Model")
    sgd = SGD(lr=1e-5, decay=1e-6, momentum=0.393, nesterov=True)
    model.compile(optimizer=sgd, loss="binary_crossentropy", metrics=['accuracy'])

    return model

def ranking_network(input_dim):

    feature_extractor = VGG19(weights="imagenet", include_top=False, input_shape=input_dim)
    for layer in feature_extractor.layers[:17]:
        layer.trainable = False

    img_left = Input(shape=input_dim)
    base_left = feature_extractor(img_left)
    base_left = Flatten(name='Flatten_left')(base_left)
    base_left = Dense(64, activation='relu', name='Dense_1_left')(base_left)
    base_left = BatchNormalization(name='BN1_left')(base_left)
    base_left = Dropout(0.49, name='Drop_1_left')(base_left)
    base_left = Dense(1, name="Dense_Output_left")(base_left)

    img_right = Input(shape=input_dim)
    base_right = feature_extractor(img_right)
    base_right = Flatten(name='Flatten_right')(base_right)
    base_right = Dense(64, activation='relu', name='Dense_1_right')(base_right)
    base_right = BatchNormalization(name='BN1_right')(base_right)
    base_right = Dropout(0.49, name='Drop_1_right')(base_right)
    base_right = Dense(1, name="Dense_Output_right")(base_right)

    # Subtract scores
    diff = Subtract()([base_left, base_right])

    # Pass difference through sigmoid function.
    prob = Activation("sigmoid", name="Activ_sigmoid")(diff)
    model = Model(inputs=[img_left, img_right], outputs=prob)
    sgd = SGD(lr=1e-5, decay=1e-6, momentum=0.393, nesterov=True)
    model.compile(optimizer=sgd, loss="binary_crossentropy", metrics=['accuracy'])

    return model

Here is my configuration :
- Keras 2.2.5
- Tensorflow 1.14
- Tensorflow.python.keras 2.2.4-tf
- Python 3.6.8 |Anaconda, Inc.| (default, Feb 21 2019, 18:30:04) [MSC v.1916 64 bit (AMD64)]'

GuillaumeGuebin on 23 Aug 2019

Hi,
Here I met the same problem using tensorflow-gpu1.0.8/1.13 and keras 2.2.4.
Traceback (most recent call last): File "train_08173d.py", line 753, in <module> train() File "train_08173d.py", line 581, in train model.load_weights('3dunet_pretrained_batch_12019091023110.0_1.0FN0.5FP0.5_5e-5_1.h5') File "/home/xybai/anaconda3/envs/tensorflow1.8/lib/python3.6/site-packages/keras/engine/network.py", line 1166, in load_weights f, self.layers, reshape=reshape) File "/home/xybai/anaconda3/envs/tensorflow1.8/lib/python3.6/site-packages/keras/engine/saving.py", line 1045, in load_weights_from_hdf5_group reshape=reshape) File "/home/xybai/anaconda3/envs/tensorflow1.8/lib/python3.6/site-packages/keras/engine/saving.py", line 682, in preprocess_weights_for_loading weights = convert_nested_model(weights) File "/home/xybai/anaconda3/envs/tensorflow1.8/lib/python3.6/site-packages/keras/engine/saving.py", line 658, in convert_nested_model original_backend=original_backend)) File "/home/xybai/anaconda3/envs/tensorflow1.8/lib/python3.6/site-packages/keras/engine/saving.py", line 801, in preprocess_weights_for_loading weights[0] = np.transpose(weights[0], (3, 2, 0, 1)) File "<__array_function__ internals>", line 6, in transpose File "/home/xybai/anaconda3/envs/tensorflow1.8/lib/python3.6/site-packages/numpy/core/fromnumeric.py", line 650, in transpose return _wrapfunc(a, 'transpose', axes) File "/home/xybai/anaconda3/envs/tensorflow1.8/lib/python3.6/site-packages/numpy/core/fromnumeric.py", line 61, in _wrapfunc return bound(*args, **kwds) ValueError: axes don't match array

aa1234241 on 11 Sep 2019

Hi,
Here is my solution.
First, I save the frozen part individually.
Then, I save the whole model.
When loading weights, I keep the layer.trainable = False for the frozen part and load the whole model.
Next, I load the weight of frozen part by load_weight(...,by_name = True) and set the layer.trainable = True for the frozen part.
Finally, compile the model.
It works for me.

aa1234241 on 11 Sep 2019

Hi !

So it seems that the problem come when using frozen layers in a nested model.

Any help or any working configurations are welcomed :)

def create_base_network(input_dim):
    """
    The main part of the network, the one who give scores to each pictures
    :param input_dim: Dimension of input pictures during training
    :return: keras object model
    """
    feature_extractor = VGG19(weights="imagenet", include_top=False, input_shape=input_dim)
    for layer in feature_extractor.layers[:17]:
        layer.trainable = False

    inp = Input(shape=input_dim, name='input_image')
    base = feature_extractor(inp)
    base = Flatten(name='Flatten')(base)
    base = Dense(64, activation='relu', name='Dense_1')(base)
    base = BatchNormalization(name='BN1')(base)
    base = Dropout(0.49, name='Drop_1')(base)
    base = Dense(1, name="Dense_Output")(base)

    base_network = Model(inp, base, name='Scoring_model')
    return base_network


def create_meta_network(input_dim):
    """
    Second network that teach the first how to rank pictures

    :param input_dim: dimensions of pictures
    :return: keras object model
    """
    input_left = Input(shape=input_dim, name='left_input')
    input_right = Input(shape=input_dim, name='right_input')

    base_network = create_base_network(input_dim)

    left_score = base_network(input_left)
    right_score = base_network(input_right)

    # Subtract scores
    diff = Subtract()([left_score, right_score])

    # Pass difference through sigmoid function.
    prob = Activation("sigmoid", name="Activation_sigmoid")(diff)
    model = Model(inputs=[input_left, input_right], outputs=prob, name="Meta_Model")
    sgd = SGD(lr=1e-5, decay=1e-6, momentum=0.393, nesterov=True)
    model.compile(optimizer=sgd, loss="binary_crossentropy", metrics=['accuracy'])

    return model

def ranking_network(input_dim):

    feature_extractor = VGG19(weights="imagenet", include_top=False, input_shape=input_dim)
    for layer in feature_extractor.layers[:17]:
        layer.trainable = False

    img_left = Input(shape=input_dim)
    base_left = feature_extractor(img_left)
    base_left = Flatten(name='Flatten_left')(base_left)
    base_left = Dense(64, activation='relu', name='Dense_1_left')(base_left)
    base_left = BatchNormalization(name='BN1_left')(base_left)
    base_left = Dropout(0.49, name='Drop_1_left')(base_left)
    base_left = Dense(1, name="Dense_Output_left")(base_left)

    img_right = Input(shape=input_dim)
    base_right = feature_extractor(img_right)
    base_right = Flatten(name='Flatten_right')(base_right)
    base_right = Dense(64, activation='relu', name='Dense_1_right')(base_right)
    base_right = BatchNormalization(name='BN1_right')(base_right)
    base_right = Dropout(0.49, name='Drop_1_right')(base_right)
    base_right = Dense(1, name="Dense_Output_right")(base_right)

    # Subtract scores
    diff = Subtract()([base_left, base_right])

    # Pass difference through sigmoid function.
    prob = Activation("sigmoid", name="Activ_sigmoid")(diff)
    model = Model(inputs=[img_left, img_right], outputs=prob)
    sgd = SGD(lr=1e-5, decay=1e-6, momentum=0.393, nesterov=True)
    model.compile(optimizer=sgd, loss="binary_crossentropy", metrics=['accuracy'])

    return model

Here is my configuration :