Following the Implement a metric documentation I implemented the following metric:
class MRRMetric(TensorMetric):
def forward(self, x1, x2):
"""
Return the mean reciprocal rank using x1 as query and x2 as retrieved results.
:param x1: batch of queries embeddings.
:param x2: batch of results embeddings.
:return: mean reciprocal rank.
"""
return mrr(x1,x2)
However, it was not clear to me how to integrate and display the metric value during the training, validation, and testing process.
For instance, given the following model:
class MyModel(LightningModule):
"""Encodes the x1 and x2 into an same space of embeddings."""
def __init__(self, config):
super(JointEncoder, self).__init__()
self.config = config
self.x1_encoder = Encoder(config)
self.x2_encoder = Encoder(config)
self.tokenizer = Tokenizer(config)
self.loss_fn = NPairLoss()
self.mrr = MRRMetric(name="MRR")
def forward(self, x1, x2):
x1 = self.x1_encoder(x1)
x2 = self.x2_encoder(x2)
return x1, x2
def configure_optimizers(self):
return torch.optim.Adam(self.parameters(), lr=1e-6, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=True)
def training_step(self, batch, batch_idx):
x1, x2 = batch["x1"], batch["x2"]
predict = self(x1, x2)
loss = self.loss_fn(predict, target)
return {'loss': loss}
def test_step(self, batch, batch_idx):
x1, x2 = batch["x1"], batch["x2"]
predict = self(x1, x2)
loss = self.loss_fn(predict, target)
return {'test_loss': loss}
def test_epoch_end(self, outputs):
avg_loss = torch.stack([x['test_loss'] for x in outputs]).mean()
return {'avg_test_loss': avg_loss}
def validation_step(self, batch, batch_idx):
x1, x2 = batch["x1"], batch["x2"]
x1, x2 = self(x1, x2)
mrr = self.mrr(x1, x2)
return {'val_loss': mrr}
def validation_epoch_end(self, outputs):
avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
return {'val_loss': avg_loss}
def train_dataloader(self):
train_dataset = CodeSearchDataset(
path=self.config.dataset.train_path,
tokenizer=self.tokenizer,
max_length=self.config.preprocessing.max_length)
return DataLoader(
train_dataset,
batch_size=self.config.train.batch_size,
drop_last=True,
num_workers=self.config.preprocessing.num_workers
)
def test_dataloader(self):
test_dataset = CodeSearchDataset(
path=self.config.dataset.train_path,
tokenizer=self.tokenizer,
max_length=self.config.preprocessing.max_length)
return DataLoader(
test_dataset,
batch_size=self.config.train.batch_size,
drop_last=True,
num_workers=self.config.preprocessing.num_workers
)
def val_dataloader(self):
val_dataset = CodeSearchDataset(
path=self.config.dataset.train_path,
tokenizer=self.tokenizer,
max_length=self.config.preprocessing.max_length)
return DataLoader(
val_dataset,
batch_size=self.config.train.batch_size,
drop_last=True,
num_workers=self.config.preprocessing.num_workers
)
what is the best approach to logging something like this:
Epoch 1: 17%|██ | 43/252 [00:11<00:55, 3.78it/s, loss=4.528, mrr=0.725, v_num=0]
Include mrr metric value along side the returned dictionary:
def training_step(self, batch, batch_idx):
x1, x2 = batch["x1"], batch["x2"]
predict = self(x1, x2)
loss = self.loss_fn(predict, self.train_target)
b_mrr = self.mrr(x1,x2)
return {'loss': loss, 'mrr': b_mrr}
Hello @Ceceu
You could try using progress_bar key to print out in the progress bar:
def training_step(self, batch, batch_idx):
x, y, z = batch
# implement your own
out = self(x)
loss = self.loss(out, x)
logger_logs = {'training_loss': loss} # optional (MUST ALL BE TENSORS)
# if using TestTubeLogger or TensorBoardLogger you can nest scalars
logger_logs = {'losses': logger_logs} # optional (MUST ALL BE TENSORS)
output = {
'loss': loss, # required
'progress_bar': {'training_loss': loss}, # optional (MUST ALL BE TENSORS)
'log': logger_logs
}
# return a dict
return output
def training_epoch_end(self, outputs):
train_acc_mean = 0
for output in outputs:
train_acc_mean += output['train_acc']
train_acc_mean /= len(outputs)
# log training accuracy at the end of an epoch
results = {
'log': {'train_acc': train_acc_mean.item()},
'progress_bar': {'train_acc': train_acc_mean},
}
return results
Edited: You only need to put what you want to print out in the progress bar inside the progress_bar dictionary. loss is printed out by default.
Thanks @ydcjeff,
I was able to print the mrr metric using you instruction:
def training_step(self, batch, batch_idx):
x1, x2 = batch["x1"], batch["x2"]
predict = self(x1, x2)
loss = self.loss_fn(predict, self.train_target)
mrr = self.mrr(predict)
return {'loss': loss, 'progress_bar': {'m': mrr}}
During training, for some reason, it is not showing all digits of the metric (only the first digits). As if there was a limit on the size of the line that is printed on the terminal.
Epoch 1: 1%| | 101/13632 [00:34<1:16:00, 2.97it/s, loss=2.576, v_num=17, m=0.
In the code snippet above, at the end, m would have to be of the form 0.785. But only up to the "." is shown.
It's weird. It didn't even show ]. Are you on jupyter or pycharm?
It's weird. It didn't even show
]. Are you on jupyter or pycharm?
Pycharm.
Mind share the minimal code?
It's working properly on colab tho.
Mind share the minimal code?
It's working properly on colab tho.
Indeed it's working properly on Colab. Maybe this is a similar issue with #1399.
@Ceceu could you try like this? You can replace 100 with appropriate number to show correct progress bar.
class LitProgressBar(ProgressBar):
def init_sanity_tqdm(self):
bar = super().init_sanity_tqdm()
bar.ncols = 100
return bar
def init_training_tqdm(self):
bar = super().init_training_tqdm()
bar.ncols = 100
return bar
def init_validation_tqdm(self):
bar = super().init_validation_tqdm()
bar.ncols = 100
return bar
def init_test_tqdm(self):
bar = super().init_validation_tqdm()
bar.ncols = 100
return bar
bar = LitProgressBar()
trainer = Trainer(callbacks=[bar])
@Ceceu could you try like this? You can replace
100with appropriate number to show correct progress bar.class LitProgressBar(ProgressBar): def init_sanity_tqdm(self): bar = super().init_validation_tqdm() bar.ncols = 100 return bar def init_training_tqdm(self): bar = super().init_validation_tqdm() bar.ncols = 100 return bar def init_validation_tqdm(self): bar = super().init_validation_tqdm() bar.ncols = 100 return bar def init_test_tqdm(self): bar = super().init_validation_tqdm() bar.ncols = 100 return bar bar = LitProgressBar() trainer = Trainer(callbacks=[bar])
Actually I refactored to use Loggers.
Yea, using loggers is also fine too