I tried to run the example of pysyft tutorial
link: https://github.com/OpenMined/PySyft/blob/dev/examples/tutorials/Part%206%20-%20Federated%20Learning%20on%20MNIST%20using%20a%20CNN.ipynb
But I have error in the .federate().
Somehow it can't convert torchvision.datasets to FederatedDataset.
I think that the error is somewhere in the for loop in the def dataset_federate (path:syft/frameworks/torch/federated/dataset.py).
Thank you!
@LaRiffle Do you know why for build tensor type first_layer would be true but the length of lambdas would be <1
Hey @geochri are you running the tutorial "as is" or have you made any modifications?
Also, do you have gpus or cuda?
@LaRiffle I don't have gpu. I didn't make any modifications!
Today I tried this approach and it works for the conversion.
train_dataset = datasets.MNIST('.', train=True, download=True,
transform=transform)
train_base = sy.BaseDataset(data=train_dataset.data, targets=train_dataset.targets)
train_base_federated = train_base.federate((bob, alice))
federated_trainloader = sy.FederatedDataLoader(train_base_federated,
batch_size=64,
shuffle=True)
But it's strange... because now other error appears.
---------------------------------------------------------------------------
PureTorchTensorFoundError Traceback (most recent call last)
~/anaconda3/lib/python3.7/site-packages/syft/frameworks/torch/tensors/interpreters/native.py in handle_func_command(cls, command)
198 new_args, new_kwargs, new_type, args_type = syft.frameworks.torch.hook_args.hook_function_args(
--> 199 cmd, args, kwargs, return_args_type=True
200 )
~/anaconda3/lib/python3.7/site-packages/syft/frameworks/torch/hook/hook_args.py in hook_function_args(attr, args, kwargs, return_args_type)
147 # Try running it
--> 148 new_args = hook_args(args)
149
~/anaconda3/lib/python3.7/site-packages/syft/frameworks/torch/hook/hook_args.py in <lambda>(x)
341
--> 342 return lambda x: f(lambdas, x)
343
~/anaconda3/lib/python3.7/site-packages/syft/frameworks/torch/hook/hook_args.py in seven_fold(lambdas, args, **kwargs)
543 return (
--> 544 lambdas[0](args[0], **kwargs),
545 lambdas[1](args[1], **kwargs),
~/anaconda3/lib/python3.7/site-packages/syft/frameworks/torch/hook/hook_args.py in <lambda>(i)
319 # Last if not, rule is probably == 1 so use type to return the right transformation.
--> 320 else lambda i: forward_func[type(i)](i)
321 for a, r in zip(args, rules) # And do this for all the args / rules provided
~/anaconda3/lib/python3.7/site-packages/syft/frameworks/torch/hook/hook_args.py in <lambda>(i)
50 if hasattr(i, "child")
---> 51 else (_ for _ in ()).throw(PureTorchTensorFoundError),
52 torch.nn.Parameter: lambda i: i.child
~/anaconda3/lib/python3.7/site-packages/syft/frameworks/torch/hook/hook_args.py in <genexpr>(.0)
50 if hasattr(i, "child")
---> 51 else (_ for _ in ()).throw(PureTorchTensorFoundError),
52 torch.nn.Parameter: lambda i: i.child
PureTorchTensorFoundError:
During handling of the above exception, another exception occurred:
RuntimeError Traceback (most recent call last)
<timed exec> in <module>
<ipython-input-18-72ddef6ce5b6> in train(args, model, device, federated_train_loader, optimizer, epoch)
5 data, target = data.to(device), target.to(device)
6 optimizer.zero_grad()
----> 7 output = model(data)
8 loss = F.nll_loss(output, target)
9 loss.backward()
~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
491 result = self._slow_forward(*input, **kwargs)
492 else:
--> 493 result = self.forward(*input, **kwargs)
494 for hook in self._forward_hooks.values():
495 hook_result = hook(self, input, result)
<ipython-input-17-ceb0955942ca> in forward(self, x)
8
9 def forward(self, x):
---> 10 x = F.relu(self.conv1(x))
11 x = F.max_pool2d(x, 2, 2)
12 x = F.relu(self.conv2(x))
~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
491 result = self._slow_forward(*input, **kwargs)
492 else:
--> 493 result = self.forward(*input, **kwargs)
494 for hook in self._forward_hooks.values():
495 hook_result = hook(self, input, result)
~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/conv.py in forward(self, input)
336 _pair(0), self.dilation, self.groups)
337 return F.conv2d(input, self.weight, self.bias, self.stride,
--> 338 self.padding, self.dilation, self.groups)
339
340
~/anaconda3/lib/python3.7/site-packages/syft/frameworks/torch/hook/hook.py in overloaded_func(*args, **kwargs)
705 cmd_name = f"{attr.__module__}.{attr.__name__}"
706 command = (cmd_name, None, args, kwargs)
--> 707 response = TorchTensor.handle_func_command(command)
708 return response
709
~/anaconda3/lib/python3.7/site-packages/syft/frameworks/torch/tensors/interpreters/native.py in handle_func_command(cls, command)
207 new_command = (cmd, None, new_args, new_kwargs)
208 # Send it to the appropriate class and get the response
--> 209 response = new_type.handle_func_command(new_command)
210 # Put back the wrappers where needed
211 response = syft.frameworks.torch.hook_args.hook_response(
~/anaconda3/lib/python3.7/site-packages/syft/frameworks/torch/pointers/object_pointer.py in handle_func_command(cls, command)
86
87 # Send the command
---> 88 response = owner.send_command(location, command)
89
90 return response
~/anaconda3/lib/python3.7/site-packages/syft/workers/base.py in send_command(self, recipient, message, return_ids)
421
422 try:
--> 423 ret_val = self.send_msg(MSGTYPE.CMD, message, location=recipient)
424 except ResponseSignatureError as e:
425 ret_val = None
~/anaconda3/lib/python3.7/site-packages/syft/workers/base.py in send_msg(self, msg_type, message, location)
219
220 # Step 2: send the message and wait for a response
--> 221 bin_response = self._send_msg(bin_message, location)
222
223 # Step 3: deserialize the response
~/anaconda3/lib/python3.7/site-packages/syft/workers/virtual.py in _send_msg(self, message, location)
4 class VirtualWorker(BaseWorker):
5 def _send_msg(self, message: bin, location: BaseWorker) -> bin:
----> 6 return location._recv_msg(message)
7
8 def _recv_msg(self, message: bin) -> bin:
~/anaconda3/lib/python3.7/site-packages/syft/workers/virtual.py in _recv_msg(self, message)
7
8 def _recv_msg(self, message: bin) -> bin:
----> 9 return self.recv_msg(message)
~/anaconda3/lib/python3.7/site-packages/syft/workers/base.py in recv_msg(self, bin_message)
250 print(f"worker {self} received {sy.codes.code2MSGTYPE[msg_type]} {contents}")
251 # Step 1: route message to appropriate function
--> 252 response = self._message_router[msg_type](contents)
253
254 # Step 2: Serialize the message to simple python objects
~/anaconda3/lib/python3.7/site-packages/syft/workers/base.py in execute_command(self, message)
379 command = getattr(command, path)
380
--> 381 response = command(*args, **kwargs)
382
383 # some functions don't return anything (such as .backward())
~/anaconda3/lib/python3.7/site-packages/syft/frameworks/torch/hook/hook.py in overloaded_func(*args, **kwargs)
705 cmd_name = f"{attr.__module__}.{attr.__name__}"
706 command = (cmd_name, None, args, kwargs)
--> 707 response = TorchTensor.handle_func_command(command)
708 return response
709
~/anaconda3/lib/python3.7/site-packages/syft/frameworks/torch/tensors/interpreters/native.py in handle_func_command(cls, command)
224 # in the execute_command function
225 if isinstance(args, tuple):
--> 226 response = eval(cmd)(*args, **kwargs)
227 else:
228 response = eval(cmd)(args, **kwargs)
RuntimeError: Expected 4-dimensional input for 4-dimensional weight 20 1 5, but got 3-dimensional input of size [5, 28, 28] instead
This is a bit mysterious to me :/
One thing, why have you changed the path of data ../data to .?
Second thing, can you try replacing the cell
federated_train_loader = sy.FederatedDataLoader( # <-- this is now a FederatedDataLoader
datasets.MNIST('../data', train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
]))
.federate((bob, alice)), # <-- NEW: we distribute the dataset across all the workers, it's now a FederatedDataset
batch_size=args.batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
datasets.MNIST('../data', train=False, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=args.test_batch_size, shuffle=True, **kwargs)
with
federated_train_loader = sy.FederatedDataLoader( # <-- this is now a FederatedDataLoader
datasets.MNIST('../data', train=True, download=True)
.federate((bob, alice)), # <-- NEW: we distribute the dataset across all the workers, it's now a FederatedDataset
batch_size=args.batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
datasets.MNIST('../data', train=False),
batch_size=args.test_batch_size, shuffle=True, **kwargs)
What happens now?
I had moved the data to a different path, but this isn't the problem.
I restored the path on the data, to be exactly the same as the repo path, but I had the same error.
I replaced the cell with your suggestions and here is the new error occurred.
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-4-7a53acbaa3ff> in <module>
1 federated_train_loader = sy.FederatedDataLoader( # <-- this is now a FederatedDataLoader
2 datasets.MNIST('../data', train=True, download=True)
----> 3 .federate((bob, alice)), # <-- NEW: we distribute the dataset across all the workers, it's now a FederatedDataset
4 batch_size=args.batch_size, shuffle=True, **kwargs)
5
~/anaconda3/lib/python3.7/site-packages/syft/frameworks/torch/federated/dataset.py in dataset_federate(dataset, workers)
159 datasets = []
160 data_loader = torch.utils.data.DataLoader(dataset, batch_size=data_size)
--> 161 for dataset_idx, (data, targets) in enumerate(data_loader):
162 worker = workers[dataset_idx % len(workers)]
163 logger.debug("Sending data to worker %s", worker.id)
~/anaconda3/lib/python3.7/site-packages/torch/utils/data/dataloader.py in __next__(self)
558 if self.num_workers == 0: # same-process loading
559 indices = next(self.sample_iter) # may raise StopIteration
--> 560 batch = self.collate_fn([self.dataset[i] for i in indices])
561 if self.pin_memory:
562 batch = _utils.pin_memory.pin_memory_batch(batch)
~/anaconda3/lib/python3.7/site-packages/torch/utils/data/_utils/collate.py in default_collate(batch)
66 elif isinstance(batch[0], container_abcs.Sequence):
67 transposed = zip(*batch)
---> 68 return [default_collate(samples) for samples in transposed]
69
70 raise TypeError((error_msg_fmt.format(type(batch[0]))))
~/anaconda3/lib/python3.7/site-packages/torch/utils/data/_utils/collate.py in <listcomp>(.0)
66 elif isinstance(batch[0], container_abcs.Sequence):
67 transposed = zip(*batch)
---> 68 return [default_collate(samples) for samples in transposed]
69
70 raise TypeError((error_msg_fmt.format(type(batch[0]))))
~/anaconda3/lib/python3.7/site-packages/torch/utils/data/_utils/collate.py in default_collate(batch)
68 return [default_collate(samples) for samples in transposed]
69
---> 70 raise TypeError((error_msg_fmt.format(type(batch[0]))))
TypeError: batch must contain tensors, numbers, dicts or lists; found <class 'PIL.Image.Image'>
This is now fixed in dev
Ans will be in master very soon,
The issue was torchvision update from 0.2.2 to 0.3
Thank you!
Most helpful comment
This is now fixed in dev
Ans will be in master very soon,
The issue was torchvision update from 0.2.2 to 0.3