Spacy: EntityLinker, pipes.pyx KeyError: '0_12' using sample code given in guides

Created on 18 Oct 2019  路  3Comments  路  Source: explosion/spaCy

Had to change
kb = KnowledgeBase(vocab=nlp.vocab)
to
kb = KnowledgeBase(vocab=nlp.vocab, entity_vector_length=64)

kb_pretrain.py file, I have made some changes to resolve error that I got after copying and pasting pretrain code from the guide pretrain_kb.py

#!/usr/bin/env python
# coding: utf8

"""Example of defining and (pre)training spaCy's knowledge base,
which is needed to implement entity linking functionality.

For more details, see the documentation:
* Knowledge base: https://spacy.io/api/kb
* Entity Linking: https://spacy.io/usage/linguistic-features#entity-linking

Compatible with: spaCy v2.2
Last tested with: v2.2
"""
from __future__ import unicode_literals, print_function

import plac
from pathlib import Path

from spacy.vocab import Vocab
import spacy
from spacy.kb import KnowledgeBase

from bin.wiki_entity_linking.train_descriptions import EntityEncoder


# Q2146908 (Russ Cochran): American golfer
# Q7381115 (Russ Cochran): publisher
ENTITIES = {"Q2146908": ("American golfer", 342), "Q7381115": ("publisher", 17)}

INPUT_DIM = 300  # dimension of pretrained input vectors
DESC_WIDTH = 64  # dimension of output entity vectors


@plac.annotations(
    vocab_path=("Path to the vocab for the kb", "option", "v", Path),
    model=("Model name, should have pretrained word embeddings", "option", "m", str),
    output_dir=("Optional output directory", "option", "o", Path),
    n_iter=("Number of training iterations", "option", "n", int),
)
def main(vocab_path=None, model=None, output_dir=None, n_iter=50):
    """Load the model, create the KB and pretrain the entity encodings.
    Either an nlp model or a vocab is needed to provide access to pretrained word embeddings.
    If an output_dir is provided, the KB will be stored there in a file 'kb'.
    When providing an nlp model, the updated vocab will also be written to a directory in the output_dir."""
    if model is None and vocab_path is None:
        raise ValueError("Either the `nlp` model or the `vocab` should be specified.")

    if model is not None:
        nlp = spacy.load(model)  # load existing spaCy model
        print("Loaded model '%s'" % model)
    else:
        vocab = Vocab().from_disk(vocab_path)
        # create blank Language class with specified vocab
        nlp = spacy.blank("en", vocab=vocab)
        print("Created blank 'en' model with vocab from '%s'" % vocab_path)

    kb = KnowledgeBase(vocab=nlp.vocab, entity_vector_length=64)

    # set up the data
    entity_ids = []
    descriptions = []
    freqs = []
    for key, value in ENTITIES.items():
        desc, freq = value
        entity_ids.append(key)
        descriptions.append(desc)
        freqs.append(freq)

    # training entity description encodings
    # this part can easily be replaced with a custom entity encoder
    encoder = EntityEncoder(
        nlp=nlp,
        input_dim=INPUT_DIM,
        desc_width=DESC_WIDTH,
    )
    encoder.train(description_list=descriptions, to_print=True)

    # get the pretrained entity vectors
    embeddings = encoder.apply_encoder(descriptions)

    # set the entities, can also be done by calling `kb.add_entity` for each entity
    kb.set_entities(entity_list=entity_ids, freq_list=freqs, vector_list=embeddings)

    # adding aliases, the entities need to be defined in the KB beforehand
    kb.add_alias(
        alias="Russ Cochran",
        entities=["Q2146908", "Q7381115"],
        probabilities=[0.24, 0.7],  # the sum of these probabilities should not exceed 1
    )

    # test the trained model
    print()
    _print_kb(kb)

    # save model to output directory
    if output_dir is not None:
        output_dir = Path(output_dir)
        if not output_dir.exists():
            output_dir.mkdir()
        kb_path = str(output_dir / "kb")
        kb.dump(kb_path)
        print()
        print("Saved KB to", kb_path)

        # only storing the vocab if we weren't already reading it from file
        if not vocab_path:
            vocab_path = output_dir / "vocab"
            kb.vocab.to_disk(vocab_path)
            print("Saved vocab to", vocab_path)

        print()

        # test the saved model
        # always reload a knowledge base with the same vocab instance!
        print("Loading vocab from", vocab_path)
        print("Loading KB from", kb_path)
        vocab2 = Vocab().from_disk(vocab_path)
        kb2 = KnowledgeBase(vocab=vocab2, entity_vector_length=64)
        kb2.load_bulk(kb_path)
        _print_kb(kb2)
        print()


def _print_kb(kb):
    print(kb.get_size_entities(), "kb entities:", kb.get_entity_strings())
    print(kb.get_size_aliases(), "kb aliases:", kb.get_alias_strings())


if __name__ == "__main__":
    plac.call(main)

    # Expected output:

    # 2 kb entities: ['Q2146908', 'Q7381115']
    # 1 kb aliases: ['Russ Cochran']

running kb_train file with

python kb_train.py testKB/kb testKB/vocab -o trainedKB

Created blank 'en' model with vocab from 'testKB/vocab'
Loaded Knowledge Base from 'testKB/kb'
Traceback (most recent call last):
  File "kb_train.py", line 155, in <module>
    plac.call(main)
  File "/home/geek/anaconda3/lib/python3.7/site-packages/plac_core.py", line 328, in call
    cmd, result = parser.consume(arglist)
  File "/home/geek/anaconda3/lib/python3.7/site-packages/plac_core.py", line 207, in consume
    return cmd, self.func(*(args + varargs + extraopts), **kwargs)
  File "kb_train.py", line 115, in main
    sgd=optimizer,
  File "/home/geek/anaconda3/lib/python3.7/site-packages/spacy/language.py", line 475, in update
    proc.update(docs, golds, sgd=get_grads, losses=losses, **kwargs)
  File "pipes.pyx", line 1191, in spacy.pipeline.pipes.EntityLinker.update
KeyError: '0_12'

Info about spaCy

  • spaCy version: 2.1.8
  • Platform: Linux-5.0.0-31-generic-x86_64-with-debian-buster-sid
  • Python version: 3.7.3
feat / nel usage

Most helpful comment

It looks like the problem might be that you're running spaCy v2.1.8, but are trying to use the script compatible with v2.2 (see file header).

Here's the script for v2.1.8: https://github.com/explosion/spaCy/tree/v2.1.8/bin/wiki_entity_linking

The entity linking functionality is still new and updated frequently, though, so it's probably best to always use the latest version of spaCy if you want to experiment with entity linking.

All 3 comments

It looks like the problem might be that you're running spaCy v2.1.8, but are trying to use the script compatible with v2.2 (see file header).

Here's the script for v2.1.8: https://github.com/explosion/spaCy/tree/v2.1.8/bin/wiki_entity_linking

The entity linking functionality is still new and updated frequently, though, so it's probably best to always use the latest version of spaCy if you want to experiment with entity linking.

@ines Yes, upgrading spacy solves the problem. Thanks.

This thread has been automatically locked since there has not been any recent activity after it was closed. Please open a new issue for related bugs.

Was this page helpful?
0 / 5 - 0 ratings