Ray: PBT update config in custom explore function

Created on 28 Jun 2019  路  5Comments  路  Source: ray-project/ray

I am new to ray and tune. I want to use PBT with a dynamic config. For example,

def explore(config):
    # here config['varA'] = None not something as in TrainMNIST._setup
    return config

class TrainMNIST(Trainable):
    def _setup(self, config):
        config['varA'] = something
        self.config.update(config)

pbt = PopulationBasedTraining(
        ...,        
        custom_explore_fn=explore)

tune.run(
        TrainMNIST,
        name="exp",
        scheduler=pbt,
        stop={
            "test_acc": 0.99,
            "training_iteration": 100,
        },
        resources_per_trial={
            "cpu": 2,
            "gpu": 0.25,
        },
        **{"config":
            { "args": vars(args),
              "varA": None,
              }
        }

How can I explore with config['varA'] = something?

question

Most helpful comment

I found a way to work aound this by adding trial.config.update(result['config']) in on_trial_result. I use this to make the explore function adapt to each model's performance. Say, for better performance I use small step for hyperparams.

Still, I think synchronize config is essential for robustness.

    def on_trial_result(self, trial_runner, trial, result):
        trial.config.update(result['config'])
        ...

All 5 comments

Yes, you can do anything to the config in the explore function. I don't think changing it in setup makes sense though.

Yes, you can do anything to the config in the explore function. I don't think changing it in setup makes sense though.

Sorry for the misleading. I meant that config['varA']=None when passing into tune.run. Then, I change config['varA']=1 in TrainMNIST._setup. I want to explore with config['varA']=1. However, in explore I get
config['varA']=None.

MNIST_ROOT = path/to/mnist
LOCAL_DIR = path/to/results
perturbation_interval = 1


def explore(config):
    """Custom explore function.

    Args:
      config: dictionary containing ray config params.

    Returns:
      Copy of config with modified augmentation policy.
    """
    print(config)
    return config


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 3, kernel_size=3)
        self.fc = nn.Linear(192, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 3))
        x = x.view(-1, 192)
        x = self.fc(x)
        return F.log_softmax(x, dim=1)


class TrainMNIST(Trainable):
    def _setup(self, config):
        args = config.pop("args")
        args.update(config)

        self.is_cuda = torch.cuda.is_available()
        torch.manual_seed(args['seed'])
        if self.is_cuda:
            torch.cuda.manual_seed(args['seed'])

        self.kwargs = {"num_workers": 4, "pin_memory": True} if self.is_cuda else {}
        mnist_transforms = transforms.Compose(
            [transforms.ToTensor(),
             transforms.Normalize((0.1307,), (0.3081,))])

        self.trainset = datasets.MNIST(MNIST_ROOT, train=True, download=False, transform=mnist_transforms)
        self.testset = datasets.MNIST(MNIST_ROOT, train=False, download=False, transform=mnist_transforms)
        self.train_loader = torch.utils.data.DataLoader(self.trainset, batch_size=256, shuffle=True, **self.kwargs)
        self.test_loader = torch.utils.data.DataLoader(self.testset, batch_size=256, shuffle=True, **self.kwargs)

        self.model = Net()
        if self.is_cuda:
            self.model.cuda()

        self.optimizer = optim.SGD(self.model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
        self.scheduler = MultiStepLR(self.optimizer, milestones=[30, 60, 90], gamma=0.2)
        self.args = args

        if config['a'] is None:
            config['a'] = 1111111
            self.config.update(config)

    def _train_iteration(self):
        self.model.train()
        for batch_idx, (data, target) in enumerate(self.train_loader):
            if self.is_cuda:
                data, target = data.cuda(), target.cuda()
            self.optimizer.zero_grad()
            output = self.model(data)
            loss = F.nll_loss(output, target)
            loss.backward()
            self.optimizer.step()

    def _test(self):
        self.model.eval()
        correct = 0
        with torch.no_grad():
            for data, target in self.test_loader:
                if self.is_cuda:
                    data, target = data.cuda(), target.cuda()
                output = self.model(data)
                # get the index of the max log-probability
                pred = output.argmax(dim=1, keepdim=True)
                correct += pred.eq(
                    target.data.view_as(pred)).long().cpu().sum()

        accuracy = correct.item() / len(self.test_loader.dataset)
        return {"test_acc": accuracy}

    def _train(self):
        self._train_iteration()
        self.scheduler.step(epoch=self._iteration)
        return self._test()

    def _save(self, checkpoint_dir):
        checkpoint_path = os.path.join(checkpoint_dir, "model.pth")
        torch.save(self.model.state_dict(), checkpoint_path)
        return checkpoint_path

    def _restore(self, checkpoint_path):
        self.model.load_state_dict(torch.load(checkpoint_path))

    def reset_config(self, new_config):
        self.config.update(new_config)
        return True

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="PyTorch MNIST Example")
    parser.add_argument(
        "--ray-redis-address",
        help="Address of Ray cluster for seamless distributed execution.")
    parser.add_argument("--seed", type=int, default=1, metavar="S", help="random seed (default: 1)")

    args = parser.parse_args()
    if args.ray_redis_address:
        ray.init(redis_address=args.ray_redis_address)

    pbt = PopulationBasedTraining(
        time_attr="training_iteration",
        reward_attr="test_acc",
        perturbation_interval=perturbation_interval,
        custom_explore_fn=explore,
        log_config=True)

    tune.run(
        TrainMNIST,
        name="exp",
        scheduler=pbt,
        stop={
            "test_acc": 0.95,
            "training_iteration": 100,
        },
        resources_per_trial={
            "cpu": 2,
            "gpu": 0.25,
        },
        reuse_actors =True,
        num_samples = 2,
        checkpoint_freq = 0,
        local_dir = LOCAL_DIR,
        **{"config":
            { "args": vars(args),
              "a": None,
              }
        }
    )

Hm, I think this is to be expected given how PBT internally tracks the config. The copy of the config it gives your trial is not connected to the original since the trial may be running on a different machine. Why not set a: 1 in the experiment config instead?

I found a way to work aound this by adding trial.config.update(result['config']) in on_trial_result. I use this to make the explore function adapt to each model's performance. Say, for better performance I use small step for hyperparams.

Still, I think synchronize config is essential for robustness.

    def on_trial_result(self, trial_runner, trial, result):
        trial.config.update(result['config'])
        ...

update config when getting results

Was this page helpful?
0 / 5 - 0 ratings

Related issues

devin-petersohn picture devin-petersohn  路  35Comments

mattearllongshot picture mattearllongshot  路  33Comments

manishagarwal23 picture manishagarwal23  路  32Comments

arsedler9 picture arsedler9  路  35Comments

ankeshanand picture ankeshanand  路  31Comments