import torch
from transformers import BertTokenizer, BertModel, BertForMaskedLM
# Load pre-trained model tokenizer (vocabulary)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
text = "[CLS] For an unfamiliar eye, the Porsche Cayenne and the Cayenne Coupe would look similar"
tokenized_text = tokenizer.tokenize(text)
# Mask a token that we will try to predict back with `BertForMaskedLM`
masked_index = 3
tokenized_text[masked_index] = '[MASK]'
print(tokenized_text)
# Convert token to vocabulary indices
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
tokens = tokenizer.convert_ids_to_tokens(indexed_tokens)
string = tokenizer.convert_tokens_to_string(tokens)
# # Define sentence A and B indices associated to 1st and 2nd sentences (see paper)
segments_ids = [0 for x in range(len(tokenized_text))]
#
# # Convert inputs to PyTorch tensors
tokens_tensor = torch.tensor([indexed_tokens])
segments_tensors = torch.tensor([segments_ids])
#
model = BertForMaskedLM.from_pretrained('bert-base-uncased', is_decoder=True)
model.eval()
#
# # Predict all tokens
with torch.no_grad():
outputs = model(tokens_tensor, token_type_ids=segments_tensors, tokens=tokenized_text, encoder_hidden_states=tokens_tensor)
predictions = outputs[0]
print('state_dict',len(model.state_dict()))
predicted_indices = []
# # confirm we were able to predict 'henson'
for i in range(len(tokenized_text)):
predicted_indices.append(torch.argmax(predictions[0, i]).item())
# predicted_index = torch.argmax(predictions[0, masked_index]).item()
predicted_token = tokenizer.convert_ids_to_tokens(predicted_indices)[0]
print('indexed_tokens', indexed_tokens)
print('predicted_indices', predicted_indices)
predicted_text = tokenizer.decode(predicted_indices)
print(predicted_text)
In modeling_bert it's mentioned
To behave as an decoder the model needs to be initialized with the
`is_decoder` argument of the configuration set to `True`; an
`encoder_hidden_states` is expected as an input to the forward pass.
So i did the same in my code but i get 2 error saying
INFO:transformers.modeling_utils:Weights of BertForMaskedLM not initialized from pretrained model: ['bert.encoder.layer.0.crossattention.self.query.weight
and
File "/Volumes/Data/transformers-master/transformers/modeling_bert.py", line 679, in forward
extended_attention_mask = causal_mask[:, None, :, :] * attention_mask[:, None, None, :]
RuntimeError: expected device cpu and dtype Float but got device cpu and dtype Bool
Am i missing something or is this the wrong way to configure bert decoder? In General, i'd like to know how encoder-decoder transformer work in BERT
Hi, you're initializing a decoder but you're using it as an encoder. For the task you're showing here, you only need the encoder part, no need to initialize a decoder:
model = BertForMaskedLM.from_pretrained('bert-base-uncased')
model.eval()
#
# # Predict all tokens
with torch.no_grad():
outputs = model(tokens_tensor, token_type_ids=segments_tensors)
predictions = outputs[0]
You can see an example of the Model2Model architecture (encoder-decoder) based on BERT in the quickstart section of the documentation.
This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. Thank you for your contributions.
Hi @LysandreJik ,
I intend to use Bert with a generative head.
Can you give an example of using bert with is_decoder as True?
Most helpful comment
Hi @LysandreJik ,
I intend to use Bert with a generative head.
Can you give an example of using bert with is_decoder as True?