When using MLflow logger, log_param() function require run_id
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-23-d048545e1854> in <module>
9 trainer.fit(model=experiment,
10 train_dataloader=train_dl,
---> 11 val_dataloaders=test_dl)
~/anaconda3/envs/ns_dl_2020_torch/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py in fit(self, model, train_dataloader, val_dataloaders, datamodule)
452 self.call_hook('on_fit_start')
453
--> 454 results = self.accelerator_backend.train()
455 self.accelerator_backend.teardown()
456
~/anaconda3/envs/ns_dl_2020_torch/lib/python3.7/site-packages/pytorch_lightning/accelerators/gpu_backend.py in train(self)
51
52 # train or test
---> 53 results = self.train_or_test()
54 return results
55
~/anaconda3/envs/ns_dl_2020_torch/lib/python3.7/site-packages/pytorch_lightning/accelerators/base_accelerator.py in train_or_test(self)
48 results = self.trainer.run_test()
49 else:
---> 50 results = self.trainer.train()
51 return results
52
~/anaconda3/envs/ns_dl_2020_torch/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py in train(self)
499
500 # run train epoch
--> 501 self.train_loop.run_training_epoch()
502
503 if self.max_steps and self.max_steps <= self.global_step:
~/anaconda3/envs/ns_dl_2020_torch/lib/python3.7/site-packages/pytorch_lightning/trainer/training_loop.py in run_training_epoch(self)
525 # TRAINING_STEP + TRAINING_STEP_END
526 # ------------------------------------
--> 527 batch_output = self.run_training_batch(batch, batch_idx, dataloader_idx)
528
529 # when returning -1 from train_step, we end epoch early
~/anaconda3/envs/ns_dl_2020_torch/lib/python3.7/site-packages/pytorch_lightning/trainer/training_loop.py in run_training_batch(self, batch, batch_idx, dataloader_idx)
660 opt_idx,
661 optimizer,
--> 662 self.trainer.hiddens
663 )
664
~/anaconda3/envs/ns_dl_2020_torch/lib/python3.7/site-packages/pytorch_lightning/trainer/training_loop.py in training_step_and_backward(self, split_batch, batch_idx, opt_idx, optimizer, hiddens)
739 """
740 # lightning module hook
--> 741 result = self.training_step(split_batch, batch_idx, opt_idx, hiddens)
742
743 if result is None:
~/anaconda3/envs/ns_dl_2020_torch/lib/python3.7/site-packages/pytorch_lightning/trainer/training_loop.py in training_step(self, split_batch, batch_idx, opt_idx, hiddens)
300 with self.trainer.profiler.profile('model_forward'):
301 args = self.build_train_args(split_batch, batch_idx, opt_idx, hiddens)
--> 302 training_step_output = self.trainer.accelerator_backend.training_step(args)
303 training_step_output = self.trainer.call_hook('training_step_end', training_step_output)
304
~/anaconda3/envs/ns_dl_2020_torch/lib/python3.7/site-packages/pytorch_lightning/accelerators/gpu_backend.py in training_step(self, args)
59 output = self.__training_step(args)
60 else:
---> 61 output = self.__training_step(args)
62
63 return output
~/anaconda3/envs/ns_dl_2020_torch/lib/python3.7/site-packages/pytorch_lightning/accelerators/gpu_backend.py in __training_step(self, args)
67 batch = self.to_device(batch)
68 args[0] = batch
---> 69 output = self.trainer.model.training_step(*args)
70 return output
71
<ipython-input-21-31b6dc3ffd67> in training_step(self, batch, batch_idx, optimizer_idx)
28 for key, val in train_loss.items():
29 self.log(key, val.item())
---> 30 self.logger.experiment.log_param(key=key, value=val.item())
31
32 return train_loss
TypeError: log_param() missing 1 required positional argument: 'run_id'
The MlflowLogger should behave the same as the mlflow api where only key and value argment is needed for log_param() function
mlf_logger = MLFlowLogger(
experiment_name='test',
tracking_uri="file:./ml-runs"
)
Cllass VAEexperiment(LightningModule):
...
def training_step(self, batch, batch_idx, optimizer_idx = 0):
....
for key, val in train_loss.items():
self.logger.experiment.log_param(key=key, value=val.item())
....
return train_loss
trainer = Trainer(logger=mlf_logger,
default_root_dir='../logs',
early_stop_callback=False,
gpus=1,
auto_select_gpus=True,
max_epochs=40)
trainer.fit(model=experiment,
train_dataloader=train_dl,
val_dataloaders=test_dl)
pytorch-lightning==0.10.0
torch==1.6.0
torchsummary==1.5.1
torchvision==0.7.0
Hi! thanks for your contribution!, great first issue!
Here, mlflow logger is actually an MlflowClient object. so you ll need to use the function calls specified in this doc - https://www.mlflow.org/docs/latest/_modules/mlflow/tracking/client.html . These functions needs run_id as first argument which can be accessed as self.logger.run_id
@nazim1021 thx for clarification! @qianyu-berkeley feel free to reopen if needed...
Thanks!
Most helpful comment
Here, mlflow logger is actually an MlflowClient object. so you ll need to use the function calls specified in this doc - https://www.mlflow.org/docs/latest/_modules/mlflow/tracking/client.html . These functions needs run_id as first argument which can be accessed as self.logger.run_id