I'm currently using PyTorch Geometric to solve a classifying task for 3D objects. I was hoping that I could rework this small PyTorch Geometric example over to PyTorch Lightning, but I encounter the following data type-related error when reaching the dataloader part:
TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class 'torch_geometric.data.data.Data'>.
As far as I understand PyTorch Geometric Data simply stores PyTorch tensors in a specific structure.
I have two questions:
My example code looks currently like this:
import torch
from torch import nn
import pytorch_lightning as pl
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
from torch.nn import functional as F
from torch_geometric.datasets import FAUST
import torch_geometric.transforms as T
from torch_geometric.nn import SplineConv
import os
import os.path as osp
class Net(pl.LightningModule):
def __init__(self):
super(Net, self).__init__()
self.conv1 = SplineConv(1, 32, dim=3, kernel_size=5, aggr="add")
self.conv2 = SplineConv(32, 64, dim=3, kernel_size=5, aggr="add")
self.conv3 = SplineConv(64, 64, dim=3, kernel_size=5, aggr="add")
self.conv4 = SplineConv(64, 64, dim=3, kernel_size=5, aggr="add")
self.conv5 = SplineConv(64, 64, dim=3, kernel_size=5, aggr="add")
self.conv6 = SplineConv(64, 64, dim=3, kernel_size=5, aggr="add")
self.lin1 = torch.nn.Linear(64, 256)
self.lin2 = torch.nn.Linear(256, 6890)
def forward(self, data):
x, edge_index, pseudo = data.x, data.edge_index, data.edge_attr
x = F.elu(self.conv1(x, edge_index, pseudo))
x = F.elu(self.conv2(x, edge_index, pseudo))
x = F.elu(self.conv3(x, edge_index, pseudo))
x = F.elu(self.conv4(x, edge_index, pseudo))
x = F.elu(self.conv5(x, edge_index, pseudo))
x = F.elu(self.conv6(x, edge_index, pseudo))
x = F.elu(self.lin1(x))
x = F.dropout(x, training=self.training)
x = self.lin2(x)
return F.log_softmax(x, dim=1)
def cross_entropy_loss(self, logits, labels):
return F.nll_loss(logits, labels)
def training_step(self, train_batch, batch_idx):
x, y = train_batch
logits = self.forward(x)
loss = self.cross_entropy_loss(logits, y)
logs = {"train_loss": loss}
return {"loss": loss, "log": logs}
def validation_step(self, val_batch, batch_idx):
x, y = val_batch
logits = self.forward(x)
loss = self.cross_entropy_loss(logits, y)
return {"val_loss": loss}
def validation_epoch_end(self, outputs):
avg_loss = torch.stack([x["val_loss"] for x in outputs]).mean()
tensorboard_logs = {"val_loss": avg_loss}
return {"avg_val_loss": avg_loss, "log": tensorboard_logs}
def configure_optimizers(self):
optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
return optimizer
def prepare_data(self):
path = osp.join(osp.dirname(osp.realpath(__file__)), "..", "data", "FAUST")
self.pre_transform = T.Compose([T.FaceToEdge(), T.Constant(value=1)])
self.train_dataset = FAUST(path, True, T.Cartesian(), self.pre_transform)
self.test_dataset = FAUST(path, False, T.Cartesian(), self.pre_transform)
def train_dataloader(self):
return DataLoader(self.train_dataset, batch_size=1, shuffle=True)
def val_dataloader(self):
return DataLoader(self.test_dataset, batch_size=1)
model = Net()
trainer = pl.Trainer(gpus=1)
trainer.fit(model)
I tried converting the data in pure Tensors as well as lists and dicts of tensors, but this resulted in a slew of errors with PyTorch Geometric. Unfortunately I have not found a working PyTorch Geometric example within the PyTorch Lightning framework online.
Hi! thanks for your contribution!, great first issue!
It is expected behavior of torch.utils.data.DataLoader if you use it with default_collate function.
You have to implement collate_func to handle other types of data.
It is expected behavior of torch.utils.data.DataLoader if you use it with default_collate function.
You have to implement collate_func to handle other types of data.
Thanks for your quick answer. After reading up on this topic, I found that you have to use the DataLoader from PyTorch Geometric, than it should work fine. The adapted and working example now looks like this:
import torch
from torch import nn
import pytorch_lightning as pl
from torch.utils.data import random_split
from torchvision import datasets, transforms
from torch.nn import functional as F
from pytorch_lightning.callbacks import EarlyStopping
from torch_geometric.data import DataLoader
from torch_geometric.datasets import FAUST
import torch_geometric.transforms as T
from torch_geometric.nn import SplineConv
import os
import os.path as osp
class Net(pl.LightningModule):
def __init__(self):
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
super(Net, self).__init__()
self.conv1 = SplineConv(1, 32, dim=3, kernel_size=5, aggr="add")
self.conv2 = SplineConv(32, 64, dim=3, kernel_size=5, aggr="add")
self.conv3 = SplineConv(64, 64, dim=3, kernel_size=5, aggr="add")
self.conv4 = SplineConv(64, 64, dim=3, kernel_size=5, aggr="add")
self.conv5 = SplineConv(64, 64, dim=3, kernel_size=5, aggr="add")
self.conv6 = SplineConv(64, 64, dim=3, kernel_size=5, aggr="add")
self.lin1 = torch.nn.Linear(64, 256)
self.lin2 = torch.nn.Linear(256, 6890)
def forward(self, data):
x, edge_index, pseudo = data.x, data.edge_index, data.edge_attr
x = F.elu(self.conv1(x, edge_index, pseudo))
x = F.elu(self.conv2(x, edge_index, pseudo))
x = F.elu(self.conv3(x, edge_index, pseudo))
x = F.elu(self.conv4(x, edge_index, pseudo))
x = F.elu(self.conv5(x, edge_index, pseudo))
x = F.elu(self.conv6(x, edge_index, pseudo))
x = F.elu(self.lin1(x))
x = F.dropout(x, training=self.training)
x = self.lin2(x)
return F.log_softmax(x, dim=1)
def cross_entropy_loss(self, logits, labels):
return F.nll_loss(logits, labels)
def training_step(self, train_batch, batch_idx):
logits = self.forward(train_batch)
loss = self.cross_entropy_loss(logits, self.target)
logs = {"train_loss": loss}
return {"loss": loss, "log": logs}
def validation_step(self, val_batch, batch_idx):
logits = self.forward(val_batch)
self.d = self.train_dataset[0]
self.target = torch.arange(
self.d.num_nodes, dtype=torch.long, device=self.device
)
loss = self.cross_entropy_loss(logits, self.target)
return {"val_loss": loss}
def validation_epoch_end(self, outputs):
avg_loss = torch.stack([x["val_loss"] for x in outputs]).mean()
tensorboard_logs = {"val_loss": avg_loss}
return {"avg_val_loss": avg_loss, "log": tensorboard_logs}
def configure_optimizers(self):
optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
return optimizer
def prepare_data(self):
path = osp.join(osp.dirname(osp.realpath(__file__)), "..", "data", "FAUST")
self.pre_transform = T.Compose([T.FaceToEdge(), T.Constant(value=1)])
self.train_dataset = FAUST(path, True, T.Cartesian(), self.pre_transform)
self.test_dataset = FAUST(path, False, T.Cartesian(), self.pre_transform)
def train_dataloader(self):
return DataLoader(self.train_dataset, batch_size=1, shuffle=True)
def val_dataloader(self):
return DataLoader(self.test_dataset, batch_size=1)
model = Net()
trainer = pl.Trainer(gpus=1, progress_bar_refresh_rate=50)
trainer.fit(model)
Hi @MarkusRosen, Sorry to open this up again. But I am trying to install torch geometric on Google Colab while also using Pytorch Lightning. There seems to be a compatibility issue with the Pytorch version of each. Could you tell me which torch geometric version you are using ? (e.g cu101 for PyTorch 1.5 or cpu version for Pytorch 1.4, etc...). For me, using any cuda version results in the following error: OSError: libc10_cuda.so: cannot open shared object file: No such file or directory. Attached are my installations.
Hi @MarkusRosen, Sorry to open this up again. But I am trying to install torch geometric on Google Colab while also using Pytorch Lightning. There seems to be a compatibility issue with the Pytorch version of each. Could you tell me which torch geometric version you are using ? (e.g cu101 for PyTorch 1.5 or cpu version for Pytorch 1.4, etc...). For me, using any cuda version results in the following error: OSError: libc10_cuda.so: cannot open shared object file: No such file or directory. Attached are my installations.
There seems to be some CUDA version related errors, I cannot directly solve them for you, but in my test above I used the following versions of PyTorch, Lightning and Geometric (Windows 10):
pytorch-lightning==0.7.3
torch==1.4.0
torch-cluster==1.5.4
torch-geometric==1.4.3
torch-scatter==2.0.4
torch-sparse==0.6.1
torch-spline-conv==1.2.0
torchvision==0.5.0
All running with CUDA 10.1.
I run into the same problem when I tried to upgrade to pl version 0.9, so I had to downgrade.
The following configuration now seems work for me:
cudatoolkit==10.1
torch==1.5.1
torchvision==0.6.0
torch-cluster==1.5.5
torch-scatter==2.0.5
torch-sparse==0.6.6
torch-spline-conv==1.2.0
torch-geometric==1.6.1
pytorch-lightning==0.8.1
Most helpful comment
I run into the same problem when I tried to upgrade to pl version 0.9, so I had to downgrade.
The following configuration now seems work for me: