Spacy: spacy train command produces confusing error if no parser annotations and -P flag not set

Created on 8 Jul 2018 · 2Comments · Source: explosion/spaCy

How to reproduce the behaviour

I have convert .iob format into .json format files.

[
  {
    "id":0,
    "paragraphs":[
      {
        "sentences":[
          {
            "tokens":[
              {
                "tag":"JJ",
                "ner":"B-protein",
                "orth":"Tumor-associated"
              },
              {
                "tag":"NN",
                "ner":"I-protein",
                "orth":"NADH"
              },
              {
                "tag":"NN",
                "ner":"L-protein",
                "orth":"oxidase"
              },
              {
                "tag":"(",
                "ner":"O",
                "orth":"("
              },
              {
                "tag":"NN",
                "ner":"U-protein",
                "orth":"tNOX"
              },
              {
                "tag":",",
                "ner":"O",
                "orth":","
              },
              {
                "tag":"RB",
                "ner":"O",
                "orth":"also"
              },
              {
                "tag":"VBN",
                "ner":"O",
                "orth":"known"
              },
              {
                "tag":"IN",
                "ner":"O",
                "orth":"as"
              },
              {
                "tag":"NN",
                "ner":"U-protein",
                "orth":"ENOX2"
              },
              {
                "tag":")",
                "ner":"O",
                "orth":")"
              },
              {
                "tag":"VBZ",
                "ner":"O",
                "orth":"is"
              },
              {
                "tag":"DT",
                "ner":"O",
                "orth":"a"
              },
              {
                "tag":"JJ",
                "ner":"B-protein",
                "orth":"growth-related"
              },
              {
                "tag":"NN",
                "ner":"L-protein",
                "orth":"protein"
              },
              {
                "tag":"VBN",
                "ner":"O",
                "orth":"expressed"
              },
              {
                "tag":"IN",
                "ner":"O",
                "orth":"in"
              },
              {
                "tag":"VBN",
                "ner":"B-cell_line",
                "orth":"transformed"
              },
              {
                "tag":"NNS",
                "ner":"L-cell_line",
                "orth":"cells"
              },
              {
                "tag":".",
                "ner":"O",
                "orth":"."
              }
            ]
          }
        ]
      }
    ]
  },
  {
    "id":0,
    "paragraphs":[
      {
        "sentences":[
          {
            "tokens":[
              {
                "tag":"JJ",
                "ner":"O",
                "orth":"Previous"
              },
              {
                "tag":"NNS",
                "ner":"O",
                "orth":"reports"
              },
              {
                "tag":"VBP",
                "ner":"O",
                "orth":"have"
              },
              {
                "tag":"VBN",
                "ner":"O",
                "orth":"revealed"
              },
              {
                "tag":"IN",
                "ner":"O",
                "orth":"that"
              },
              {
                "tag":"DT",
                "ner":"O",
                "orth":"the"
              },
              {
                "tag":"NN",
                "ner":"O",
                "orth":"inhibition"
              },
              {
                "tag":"IN",
                "ner":"O",
                "orth":"of"
              },
              {
                "tag":"NN",
                "ner":"U-protein",
                "orth":"tNOX"
              },
              {
                "tag":"NN",
                "ner":"O",
                "orth":"activity"
              },
              {
                "tag":"IN",
                "ner":"O",
                "orth":"by"
              },
              {
                "tag":"DT",
                "ner":"O",
                "orth":"the"
              },
              {
                "tag":"JJ",
                "ner":"O",
                "orth":"anti-cancer"
              },
              {
                "tag":"NN",
                "ner":"O",
                "orth":"drug"
              },
              {
                "tag":",",
                "ner":"O",
                "orth":","
              },
              {
                "tag":"NN",
                "ner":"O",
                "orth":"capsaicin"
              },
              {
                "tag":",",
                "ner":"O",
                "orth":","
              },
              {
                "tag":"VBZ",
                "ner":"O",
                "orth":"correlates"
              },
              {
                "tag":"IN",
                "ner":"O",
                "orth":"with"
              },
              {
                "tag":"DT",
                "ner":"O",
                "orth":"a"
              },
              {
                "tag":"NN",
                "ner":"O",
                "orth":"reduction"
              },
              {
                "tag":"IN",
                "ner":"O",
                "orth":"in"
              },
              {
                "tag":"NN",
                "ner":"O",
                "orth":"growth"
              },
              {
                "tag":"IN",
                "ner":"O",
                "orth":"of"
              },
              {
                "tag":"NN",
                "ner":"B-cell_type",
                "orth":"cancer"
              },
              {
                "tag":"NNS",
                "ner":"L-cell_type",
                "orth":"cells"
              },
              {
                "tag":",",
                "ner":"O",
                "orth":","
              },
              {
                "tag":"VBG",
                "ner":"O",
                "orth":"indicating"
              },
              {
                "tag":"DT",
                "ner":"O",
                "orth":"a"
              },
              {
                "tag":"JJ",
                "ner":"O",
                "orth":"close"
              },
              {
                "tag":"NN",
                "ner":"O",
                "orth":"relationship"
              },
              {
                "tag":"IN",
                "ner":"O",
                "orth":"between"
              },
              {
                "tag":"NN",
                "ner":"U-protein",
                "orth":"tNOX"
              },
              {
                "tag":"NN",
                "ner":"O",
                "orth":"activity"
              },
              {
                "tag":"CC",
                "ner":"O",
                "orth":"and"
              },
              {
                "tag":"NN",
                "ner":"O",
                "orth":"cell"
              },
              {
                "tag":"NN",
                "ner":"O",
                "orth":"growth"
              },
              {
                "tag":".",
                "ner":"O",
                "orth":"."
              }
            ]
          }
        ]
      }
    ]
  },
......
]

and when I run this command
python -m spacy train en model corpustrain.json corpusdev.json
an error occurs as follow

dropout_from = 0.2 by default
dropout_to = 0.2 by default
dropout_decay = 0.0 by default
batch_from = 1 by default
batch_to = 16 by default
batch_compound = 1.001 by default
max_doc_len = 5000 by default
beam_width = 1 by default
beam_density = 0.0 by default
beam_width = 1 by default
beam_density = 0.0 by default
Warning: Unnamed vectors -- this won't allow multiple vectors models to be loaded. (Shape: (0, 0))
learn_rate = 0.001 by default
optimizer_B1 = 0.9 by default
optimizer_B2 = 0.999 by default
optimizer_eps = 1e-08 by default
L2_penalty = 1e-06 by default
grad_norm_clip = 1.0 by default
embed_size = 7000 by default
token_vector_width = 128 by default
parser_hidden_depth = 1 by default
parser_maxout_pieces = 2 by default
token_vector_width = 128 by default
hidden_width = 200 by default
embed_size = 7000 by default
history_feats = 0 by default
history_width = 0 by default
parser_hidden_depth = 1 by default
parser_maxout_pieces = 2 by default
token_vector_width = 128 by default
hidden_width = 200 by default
embed_size = 7000 by default
history_feats = 0 by default
history_width = 0 by default
Itn.    P.Loss  N.Loss  UAS     NER P.  NER R.  NER F.  Tag %   Token %
Saving model...
Traceback (most recent call last):
  File "D:\Anaconda3\envs\tensorflow\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "D:\Anaconda3\envs\tensorflow\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "D:\Anaconda3\envs\tensorflow\lib\site-packages\spacy\__main__.py", line 31, in <module>
    plac.call(commands[command], sys.argv[1:])
  File "D:\Anaconda3\envs\tensorflow\lib\site-packages\plac_core.py", line 328, in call
    cmd, result = parser.consume(arglist)
  File "D:\Anaconda3\envs\tensorflow\lib\site-packages\plac_core.py", line 207, in consume
    return cmd, self.func(*(args + varargs + extraopts), **kwargs)
  File "D:\Anaconda3\envs\tensorflow\lib\site-packages\spacy\cli\train.py", line 133, in train
    drop=next(dropout_rates), losses=losses)
  File "D:\Anaconda3\envs\tensorflow\lib\site-packages\spacy\language.py", line 427, in update
    proc.update(docs, golds, drop=drop, sgd=get_grads, losses=losses)
  File "nn_parser.pyx", line 591, in spacy.syntax.nn_parser.Parser.update
  File "nn_parser.pyx", line 773, in spacy.syntax.nn_parser.Parser.get_batch_loss
  File "arc_eager.pyx", line 536, in spacy.syntax.arc_eager.ArcEager.set_costs
ValueError: [E021] Could not find a gold-standard action to supervise the dependency parser. The GoldParse was projective. The transition system has 3 actions. State at failure: __0 __0 After_0 | a purse-string

How to deal with it?
I'm looking forward to a solution. Thanks a lot.

Your Environment

Operating System: Windows 10 1803
Python Version Used: 3.5.3
spaCy Version Used: 2.0.11

bug feat / cli training

Source

czhou29

Most helpful comment

It'd be good to throw a more useful error here, but fortunately the solution is simple: you need the -P flag to your training command.

The problem is that spaCy is trying to train the parser, and it hasn't noticed there are no sentences with dependency parse information (and therefore no sentences to add labels from). So the parser then has no actions added, which triggers the exception.