INPUT_PATH = '../data/'
OUTPUT_PATH = '../models/'
model_path = '../models/'
import model_train as mt
import torch.nn.functional as F
import ray
import ray.tune as tune
from process_text import Dataset
import torch
ray.init()
ds = Dataset(INPUT_PATH, ['fileid','text'], 'tag')
tv_datafields = [("fileid", 'None'), ("text", 'TEXT'), ("tag", 'LABEL')]
trn_dl,vld_dl = ds.process_text(tv_datafields,'text',n_gpus=torch.device('cuda'))
args = {"dataset":ds,
"train_dl":trn_dl,
"valid_dl":vld_dl, "model_path":model_path}
def train_func(args, config, reporter):
model = mt.CNN(args['dataset'].text.vocab,dp=config['dp']).cuda()
optim = mt.get_optimizer(model,lr=config['lr'])
loss = mt.train_loop(model, config['epochs'], optim,args['train_dl'], args['valid_dl'], args["model_path"], gpu=True)
reporter(validation_loss=loss) # report metrics
tune.register_trainable("train_func",lambda cfg, rprtr: train_func(args,cfg, rprtr))
When I execute tune.register_trainable, I am getting the following error
AttributeError: Can't get local attribute 'wrap_function.
AttributeError: 'torch.dtype' object has no attribute '__module__'
AttributeError Traceback (most recent call last)
~/src/anaconda3/envs/skorch/lib/python3.6/pickle.py in save_global(self, obj, name)
917 module = sys.modules[module_name]
--> 918 obj2, parent = _getattribute(module, name)
919 except (ImportError, KeyError, AttributeError):
~/src/anaconda3/envs/skorch/lib/python3.6/pickle.py in _getattribute(obj, name)
265 raise AttributeError("Can't get local attribute {!r} on {!r}"
--> 266 .format(name, obj))
267 try:
AttributeError: Can't get local attribute 'wrap_function.
During handling of the above exception, another exception occurred:
PicklingError Traceback (most recent call last)
~/src/anaconda3/envs/skorch/lib/python3.6/site-packages/ray/cloudpickle/cloudpickle.py in save_global(self, obj, name, pack)
638 try:
--> 639 return Pickler.save_global(self, obj, name=name)
640 except Exception:
~/src/anaconda3/envs/skorch/lib/python3.6/pickle.py in save_global(self, obj, name)
921 "Can't pickle %r: it's not found as %s.%s" %
--> 922 (obj, module_name, name))
923 else:
PicklingError: Can't pickle
During handling of the above exception, another exception occurred:
AttributeError Traceback (most recent call last)
----> 1 tune.register_trainable("train_func",lambda cfg, rprtr: train_func(args,cfg, rprtr))
~/src/anaconda3/envs/skorch/lib/python3.6/site-packages/ray/tune/registry.py in register_trainable(name, trainable)
36 raise TypeError("Second argument must be convertable to Trainable",
37 trainable)
---> 38 _global_registry.register(TRAINABLE_CLASS, name, trainable)
39
40
~/src/anaconda3/envs/skorch/lib/python3.6/site-packages/ray/tune/registry.py in register(self, category, key, value)
75 raise TuneError("Unknown category {} not among {}".format(
76 category, KNOWN_CATEGORIES))
---> 77 self._to_flush[(category, key)] = pickle.dumps(value)
78 if _internal_kv_initialized():
79 self.flush_values()
~/src/anaconda3/envs/skorch/lib/python3.6/site-packages/ray/cloudpickle/cloudpickle.py in dumps(obj, protocol)
879 try:
880 cp = CloudPickler(file, protocol=protocol)
--> 881 cp.dump(obj)
882 return file.getvalue()
883 finally:
~/src/anaconda3/envs/skorch/lib/python3.6/site-packages/ray/cloudpickle/cloudpickle.py in dump(self, obj)
266 self.inject_addons()
267 try:
--> 268 return Pickler.dump(self, obj)
269 except RuntimeError as e:
270 if 'recursion' in e.args[0]:
~/src/anaconda3/envs/skorch/lib/python3.6/pickle.py in dump(self, obj)
407 if self.proto >= 4:
408 self.framer.start_framing()
--> 409 self.save(obj)
410 self.write(STOP)
411 self.framer.end_framing()
~/src/anaconda3/envs/skorch/lib/python3.6/pickle.py in save(self, obj, save_persistent_id)
474 f = self.dispatch.get(t)
475 if f is not None:
--> 476 f(self, obj) # Call unbound method with explicit self
477 return
478
~/src/anaconda3/envs/skorch/lib/python3.6/site-packages/ray/cloudpickle/cloudpickle.py in save_global(self, obj, name, pack)
646 typ = type(obj)
647 if typ is not obj and isinstance(obj, (type, types.ClassType)):
--> 648 return self.save_dynamic_class(obj)
649
650 raise
~/src/anaconda3/envs/skorch/lib/python3.6/site-packages/ray/cloudpickle/cloudpickle.py in save_dynamic_class(self, obj)
493 # Now save the rest of obj's __dict__. Any references to obj
494 # encountered while saving will point to the skeleton class.
--> 495 save(clsdict)
496
497 # Write a tuple of (skeleton_class, clsdict).
~/src/anaconda3/envs/skorch/lib/python3.6/pickle.py in save(self, obj, save_persistent_id)
474 f = self.dispatch.get(t)
475 if f is not None:
--> 476 f(self, obj) # Call unbound method with explicit self
477 return
478
~/src/anaconda3/envs/skorch/lib/python3.6/pickle.py in save_dict(self, obj)
819
820 self.memoize(obj)
--> 821 self._batch_setitems(obj.items())
822
823 dispatch[dict] = save_dict
~/src/anaconda3/envs/skorch/lib/python3.6/pickle.py in _batch_setitems(self, items)
845 for k, v in tmp:
846 save(k)
--> 847 save(v)
848 write(SETITEMS)
849 elif n:
~/src/anaconda3/envs/skorch/lib/python3.6/pickle.py in save(self, obj, save_persistent_id)
474 f = self.dispatch.get(t)
475 if f is not None:
--> 476 f(self, obj) # Call unbound method with explicit self
477 return
478
~/src/anaconda3/envs/skorch/lib/python3.6/site-packages/ray/cloudpickle/cloudpickle.py in save_function(self, obj, name)
408 # func is nested
409 if lookedup_by_name is None or lookedup_by_name is not obj:
--> 410 self.save_function_tuple(obj)
411 return
412
~/src/anaconda3/envs/skorch/lib/python3.6/site-packages/ray/cloudpickle/cloudpickle.py in save_function_tuple(self, func)
551 if hasattr(func, '__qualname__'):
552 state['qualname'] = func.__qualname__
--> 553 save(state)
554 write(pickle.TUPLE)
555 write(pickle.REDUCE) # applies _fill_function on the tuple
~/src/anaconda3/envs/skorch/lib/python3.6/pickle.py in save(self, obj, save_persistent_id)
474 f = self.dispatch.get(t)
475 if f is not None:
--> 476 f(self, obj) # Call unbound method with explicit self
477 return
478
~/src/anaconda3/envs/skorch/lib/python3.6/pickle.py in save_dict(self, obj)
819
820 self.memoize(obj)
--> 821 self._batch_setitems(obj.items())
822
823 dispatch[dict] = save_dict
~/src/anaconda3/envs/skorch/lib/python3.6/pickle.py in _batch_setitems(self, items)
845 for k, v in tmp:
846 save(k)
--> 847 save(v)
848 write(SETITEMS)
849 elif n:
~/src/anaconda3/envs/skorch/lib/python3.6/pickle.py in save(self, obj, save_persistent_id)
474 f = self.dispatch.get(t)
475 if f is not None:
--> 476 f(self, obj) # Call unbound method with explicit self
477 return
478
~/src/anaconda3/envs/skorch/lib/python3.6/pickle.py in save_list(self, obj)
779
780 self.memoize(obj)
--> 781 self._batch_appends(obj)
782
783 dispatch[list] = save_list
~/src/anaconda3/envs/skorch/lib/python3.6/pickle.py in _batch_appends(self, items)
806 write(APPENDS)
807 elif n:
--> 808 save(tmp[0])
809 write(APPEND)
810 # else tmp is empty, and we're done
~/src/anaconda3/envs/skorch/lib/python3.6/pickle.py in save(self, obj, save_persistent_id)
474 f = self.dispatch.get(t)
475 if f is not None:
--> 476 f(self, obj) # Call unbound method with explicit self
477 return
478
~/src/anaconda3/envs/skorch/lib/python3.6/site-packages/ray/cloudpickle/cloudpickle.py in save_function(self, obj, name)
403 or getattr(obj.__code__, 'co_filename', None) == '
404 or themodule is None):
--> 405 self.save_function_tuple(obj)
406 return
407 else:
~/src/anaconda3/envs/skorch/lib/python3.6/site-packages/ray/cloudpickle/cloudpickle.py in save_function_tuple(self, func)
551 if hasattr(func, '__qualname__'):
552 state['qualname'] = func.__qualname__
--> 553 save(state)
554 write(pickle.TUPLE)
555 write(pickle.REDUCE) # applies _fill_function on the tuple
~/src/anaconda3/envs/skorch/lib/python3.6/pickle.py in save(self, obj, save_persistent_id)
474 f = self.dispatch.get(t)
475 if f is not None:
--> 476 f(self, obj) # Call unbound method with explicit self
477 return
478
~/src/anaconda3/envs/skorch/lib/python3.6/pickle.py in save_dict(self, obj)
819
820 self.memoize(obj)
--> 821 self._batch_setitems(obj.items())
822
823 dispatch[dict] = save_dict
~/src/anaconda3/envs/skorch/lib/python3.6/pickle.py in _batch_setitems(self, items)
845 for k, v in tmp:
846 save(k)
--> 847 save(v)
848 write(SETITEMS)
849 elif n:
~/src/anaconda3/envs/skorch/lib/python3.6/pickle.py in save(self, obj, save_persistent_id)
474 f = self.dispatch.get(t)
475 if f is not None:
--> 476 f(self, obj) # Call unbound method with explicit self
477 return
478
~/src/anaconda3/envs/skorch/lib/python3.6/pickle.py in save_dict(self, obj)
819
820 self.memoize(obj)
--> 821 self._batch_setitems(obj.items())
822
823 dispatch[dict] = save_dict
~/src/anaconda3/envs/skorch/lib/python3.6/pickle.py in _batch_setitems(self, items)
845 for k, v in tmp:
846 save(k)
--> 847 save(v)
848 write(SETITEMS)
849 elif n:
~/src/anaconda3/envs/skorch/lib/python3.6/pickle.py in save(self, obj, save_persistent_id)
474 f = self.dispatch.get(t)
475 if f is not None:
--> 476 f(self, obj) # Call unbound method with explicit self
477 return
478
~/src/anaconda3/envs/skorch/lib/python3.6/pickle.py in save_dict(self, obj)
819
820 self.memoize(obj)
--> 821 self._batch_setitems(obj.items())
822
823 dispatch[dict] = save_dict
~/src/anaconda3/envs/skorch/lib/python3.6/pickle.py in _batch_setitems(self, items)
845 for k, v in tmp:
846 save(k)
--> 847 save(v)
848 write(SETITEMS)
849 elif n:
~/src/anaconda3/envs/skorch/lib/python3.6/pickle.py in save(self, obj, save_persistent_id)
519
520 # Save the reduce() output and finally memoize the object
--> 521 self.save_reduce(obj=obj, *rv)
522
523 def persistent_id(self, obj):
~/src/anaconda3/envs/skorch/lib/python3.6/pickle.py in save_reduce(self, func, args, state, listitems, dictitems, obj)
632
633 if state is not None:
--> 634 save(state)
635 write(BUILD)
636
~/src/anaconda3/envs/skorch/lib/python3.6/pickle.py in save(self, obj, save_persistent_id)
474 f = self.dispatch.get(t)
475 if f is not None:
--> 476 f(self, obj) # Call unbound method with explicit self
477 return
478
~/src/anaconda3/envs/skorch/lib/python3.6/pickle.py in save_dict(self, obj)
819
820 self.memoize(obj)
--> 821 self._batch_setitems(obj.items())
822
823 dispatch[dict] = save_dict
~/src/anaconda3/envs/skorch/lib/python3.6/pickle.py in _batch_setitems(self, items)
845 for k, v in tmp:
846 save(k)
--> 847 save(v)
848 write(SETITEMS)
849 elif n:
~/src/anaconda3/envs/skorch/lib/python3.6/pickle.py in save(self, obj, save_persistent_id)
519
520 # Save the reduce() output and finally memoize the object
--> 521 self.save_reduce(obj=obj, *rv)
522
523 def persistent_id(self, obj):
~/src/anaconda3/envs/skorch/lib/python3.6/pickle.py in save_reduce(self, func, args, state, listitems, dictitems, obj)
632
633 if state is not None:
--> 634 save(state)
635 write(BUILD)
636
~/src/anaconda3/envs/skorch/lib/python3.6/pickle.py in save(self, obj, save_persistent_id)
474 f = self.dispatch.get(t)
475 if f is not None:
--> 476 f(self, obj) # Call unbound method with explicit self
477 return
478
~/src/anaconda3/envs/skorch/lib/python3.6/pickle.py in save_dict(self, obj)
819
820 self.memoize(obj)
--> 821 self._batch_setitems(obj.items())
822
823 dispatch[dict] = save_dict
~/src/anaconda3/envs/skorch/lib/python3.6/pickle.py in _batch_setitems(self, items)
845 for k, v in tmp:
846 save(k)
--> 847 save(v)
848 write(SETITEMS)
849 elif n:
~/src/anaconda3/envs/skorch/lib/python3.6/pickle.py in save(self, obj, save_persistent_id)
505 # Check for string returned by reduce(), meaning "save as global"
506 if isinstance(rv, str):
--> 507 self.save_global(obj, rv)
508 return
509
~/src/anaconda3/envs/skorch/lib/python3.6/site-packages/ray/cloudpickle/cloudpickle.py in save_global(self, obj, name, pack)
633 dispatched here.
634 """
--> 635 if obj.__module__ == "__main__":
636 return self.save_dynamic_class(obj)
637
AttributeError: 'torch.dtype' object has no attribute '__module__'
I think the issue is that your function is closing over variables that are not serializable, e.g., torch state. My guess is Dataset is the culprit: try moving the creation of that into the training function.
Thanks for your reply. Let me try that out and get back to you.
I changed as per ur suggestion and it works. I also changed it from functional api to class api. But the class is not recognizing the config file I sent it.
My code is given below
from torchtext.data import Field, TabularDataset
from torchtext.data import Iterator, BucketIterator
import torch.nn as nn
import torch
import pandas as pd
from sklearn.model_selection import train_test_split
import torch.nn.functional as F
from tqdm import tqdm, tnrange
import numpy as np
import spacy
nlp = spacy.load('en')
from ray.tune import Trainable
INPUT_PATH = '../data/'
model_path = '../models/'
class Dataset:
def __init__(self,path,x_vars,y_vars,test_size=0.2,filename=None,
train_test_split=False):
self.path = path
self.x_vars = x_vars
self.y_vars = y_vars
if train_test_split == True:
self._get_train_val_data(filename,test_size)
def _get_train_val_data(self,filename,test_size):
data = pd.read_csv(f'{self.path}filename')
self.columns = data.columns
X_train, X_test, y_train, y_test = train_test_split(data.loc[:,self.x_vars],
data.loc[:,self.y_vars],
test_size=test_size, random_state=1)
train = pd.concat([X_train,y_train],axis=1)
vld = pd.concat([X_test,y_test],axis =1)
train.to_csv(f'{self.path}trn.csv',index=False)
vld.to_csv(f'{self.path}vld.csv',index=False)
def _tokenize_text(self,text):
removal=['PUNCT','SPACE']
text_out_final = []
for review in nlp(text).sents:
text_out = []
for token in review:
if token.pos_ not in removal and token.is_punct == False:
lemma = token.lemma_
if lemma != '-PRON-':
text_out.append(lemma)
text_out_final.extend(text_out)
return text_out_final
def _init_emb(self, num_special_toks=2):
sweep_range = len(self.text.vocab)
running_norm = 0.
num_non_zero = 0
total_words = 0
nn.init.normal_(self.text.vocab.vectors[0], mean=0, std=0.05)
for i in range(num_special_toks, sweep_range):
if len(self.text.vocab.vectors[i, :].nonzero()) == 0:
nn.init.normal_(self.text.vocab.vectors[i], mean=0, std=0.05)
else:
num_non_zero += 1
running_norm += torch.norm(self.text.vocab.vectors[i])
total_words += 1
norm_glove = running_norm / num_non_zero
print(f"Average GloVE norm is {norm_glove}",
f"Number of known words are {num_non_zero}",
f"Total number of words are {total_words}")
def _create_dataset(self,col2field_tuple,embed_nme,min_wrd_freq):
self.text = Field(sequential=True,tokenize=self._tokenize_text,pad_first=True)
self.label = Field(sequential=False, use_vocab=False)
txt2fld = {'TEXT': self.text, 'LABEL':self.label,'None':None}
tv_datafields = [(col,txt2fld[label]) for col,label in col2field_tuple]
trn_ds, vld_ds = TabularDataset.splits(
path=f'{self.path}',
train='trn_sample.csv', validation="vld_sample.csv",
format='csv',
skip_header=True,
fields=tv_datafields)
self.text.build_vocab(trn_ds, vectors=embed_nme, min_freq=min_wrd_freq)
self._init_emb()
return trn_ds,vld_ds
class Dataloader_custom:
def __init__(self,dl_iter,text_col_name,y_var):
self.dl_iter = dl_iter
self.x_var = text_col_name
self.y_var = y_var
def __iter__(self):
for batch in self.dl_iter:
x = getattr(batch,self.x_var)
if x.size()[0]>10:
y = getattr(batch,self.y_var).unsqueeze(0).float()
yield (x,y)
def __len__(self):
return len(self.dl_iter)
class CNN(nn.Module):
def __init__(self, vocab, filters = [(3,100),(4,100),(5,100),(7,100)], dp = 0.3, bn=True, nl_func=F.relu):
super(CNN, self).__init__()
V,D = vocab.vectors.size()
self.embed = nn.Embedding(V,D)
self.embed.weight.data.copy_(vocab.vectors)
self.embed.weight.requires_grad = False
self.conv_layers = nn.ModuleList([nn.Conv1d(in_channels=D, out_channels=n, kernel_size=ksz) for ksz,n in filters])
self.amp = nn.AdaptiveMaxPool1d(1)
self.dp = nn.Dropout(dp)
self.nl_func = nl_func
num_features = 0
for _,n in filters:
num_features += n
self.bnz = nn.BatchNorm1d(num_features) if bn else lambda x: x
self.fc = nn.Linear(num_features,1)
def forward(self,text):
text_embed = self.embed(text).transpose(1,2).transpose(0,2)
conv = [self.amp(self.nl_func(Conv_layer(text_embed))) for Conv_layer in self.conv_layers]
concat_conv = torch.cat(conv,2)
concat_conv = concat_conv.view(concat_conv.size(0),-1)
bn_concat_conv = self.bnz(concat_conv)
dp_bn_concat_conv = self.dp(bn_concat_conv)
out = self.fc(dp_bn_concat_conv)
out = out.transpose(0,1)
return out
def get_optimizer(model,lr,wd=0):
# Setting optimizer
parameters = filter(lambda x:x.requires_grad,model.parameters())
optim = torch.optim.Adam(parameters, lr=lr,weight_decay=wd)
return optim
class TrainCNN(Trainable):
def _setup(self, config):
args = {}
vars(args).update(config)
print("Dataset creation started")
ds = Dataset(INPUT_PATH, ['fileid','text'], 'tag')
tv_datafields = [("fileid", 'None'), ("text", 'TEXT'), ("tag", 'LABEL')]
self.trn_ds,self.vld_ds = ds._create_dataset(tv_datafields,'glove.6B.300d',2)
print('Dataset is created')
self.train_loader,self.test_loader = _create_dl_iterator(self)
self.train_loader = Dataloader_custom(self.train_loader, 'text','tag')
self.test_loader = Dataloader_custom(self.test_loader, 'text','tag')
self.model = CNN(ds.text.vocab, filters = [(3,100),(4,100),(5,100),(7,100)], dp = 0.3, bn=True, nl_func=F.relu).cuda()
self.optimizer = get_optimizer(model,args.lr,wd=0)
def _create_dl_iterator(self):
train_iter, val_iter = BucketIterator.splits((self.trn_ds, self.vld_ds),
batch_sizes=(64, 64),
device=torch.device('cuda'),
sort_key=lambda x: len(getattr(x,text_col_name)),
sort_within_batch=False,
repeat=False)
return train_iter, val_iter
def _train_iteration(self):
# Setting model in train mode
gpu=True
verbose = True
self.model.train()
total,sum_loss = 0,0
for x_text,label in tqdm(self.train_loader,desc='Training',dynamic_ncols=True):
bsz = label.shape[0]
y = label.float()
if gpu:
x_text = x_text.cuda()
y = y.cuda()
y_hat = self.model(x_text)
loss = F.binary_cross_entropy_with_logits(y_hat, y)
self.optim.zero_grad()
loss.backward()
self.optim.step()
total += bsz
sum_loss += bsz*(loss.item())
if verbose: print(np.sqrt(sum_loss/total))
return sum_loss/total
def _test(self):
gpu = True
self.model.eval()
total, sum_loss = 0,0
for x_text,label in tqdm(self.valid_dl,desc='Validation',dynamic_ncols=True):
bsz = label.shape[0]
y = label.float()
if gpu:
x_text = x_text.cuda()
y = y.cuda()
y_hat = self.model(x_text)
loss = F.binary_cross_entropy_with_logits(y_hat,y)
sum_loss += bsz*(loss.item())
total += bsz
return {"mean_loss": sum_loss/total}
def _train(self):
self._train_iteration()
return self._test()
def _save(self, path):
torch.save(self.model.state_dict(), os.path.join(path, "model.pth"))
return path
def _restore(self, path):
self.model.load_state_dict(os.path.join(path, "model.pth"))
if __name__ =='__main__':
import numpy as np
import ray
from ray import tune
ray.init()
tune.run_experiments(
{ "exp": {
"config": {"lr": tune.grid_search([0.1,0.2])},
"trial_resources": {
"cpu": 3,
"gpu": 1
},
"run": TrainCNN
}
})
It gives me the following error
│Remote function stop failed with:
Traceback (most recent call last):
packet_write_wait: Connection to 172.30.102.124 port 22: Broken pipe │ File "/home/ubuntu/src/anaconda3/envs/skorch/lib/python3.6/site-packages/ray/worker.py", line 923, in _process_task
➜ ~ │ self.reraise_actor_init_error()
│ File "/home/ubuntu/src/anaconda3/envs/skorch/lib/python3.6/site-packages/ray/worker.py", line 267, in reraise_actor_init_error
│ raise self.actor_init_error
│ File "/home/ubuntu/src/anaconda3/envs/skorch/lib/python3.6/site-packages/ray/worker.py", line 923, in _process_task
│ self.reraise_actor_init_error()
│ File "/home/ubuntu/src/anaconda3/envs/skorch/lib/python3.6/site-packages/ray/worker.py", line 267, in reraise_actor_init_error
│ raise self.actor_init_error
│ File "/home/ubuntu/src/anaconda3/envs/skorch/lib/python3.6/site-packages/ray/worker.py", line 945, in _process_task
│ *arguments)
│ File "/home/ubuntu/src/anaconda3/envs/skorch/lib/python3.6/site-packages/ray/actor.py", line 261, in actor_method_executor
│ method_returns = method(actor, *args)
│ File "/home/ubuntu/src/anaconda3/envs/skorch/lib/python3.6/site-packages/ray/tune/trainable.py", line 84, in __init__
│ self._setup()
│TypeError: _setup() missing 1 required positional argument: 'config'
│TypeError: _setup() missing 1 required positional argument: 'config'
We recently changed the signature of _setup() to _setup(config).
@richardliaw it looks like the documentation isn't updated to reflect this.
On Thu, Oct 18, 2018 at 7:18 PM Sooraj notifications@github.com wrote:
I changed as per ur suggestion and it works. I also changed it from
functional api to class api. But the class is not recognizing the config
file I sent it.My code is given below
from torchtext.data import Field, TabularDataset
from torchtext.data import Iterator, BucketIterator
import torch.nn as nn
import torch
import pandas as pd
from sklearn.model_selection import train_test_split
import torch.nn.functional as F
from tqdm import tqdm, tnrange
import numpy as np
import spacynlp = spacy.load('en')
from ray.tune import Trainable
INPUT_PATH = '../data/'
model_path = '../models/'
class Dataset:
def init(self,path,x_vars,y_vars,test_size=0.2,filename=None,
train_test_split=False):
self.path = path
self.x_vars = x_vars
self.y_vars = y_vars
if train_test_split == True:
self._get_train_val_data(filename,test_size)def _get_train_val_data(self,filename,test_size):
data = pd.read_csv(f'{self.path}filename')
self.columns = data.columns
X_train, X_test, y_train, y_test = train_test_split(data.loc[:,self.x_vars],
data.loc[:,self.y_vars],
test_size=test_size, random_state=1)
train = pd.concat([X_train,y_train],axis=1)
vld = pd.concat([X_test,y_test],axis =1)
train.to_csv(f'{self.path}trn.csv',index=False)
vld.to_csv(f'{self.path}vld.csv',index=False)def _tokenize_text(self,text):
removal=['PUNCT','SPACE']
text_out_final = []
for review in nlp(text).sents:
text_out = []
for token in review:
if token.pos_ not in removal and token.is_punct == False:
lemma = token.lemma_
if lemma != '-PRON-':
text_out.append(lemma)text_out_final.extend(text_out) return text_out_finaldef _init_emb(self, num_special_toks=2):
sweep_range = len(self.text.vocab)
running_norm = 0.
num_non_zero = 0
total_words = 0
nn.init.normal_(self.text.vocab.vectors[0], mean=0, std=0.05)
for i in range(num_special_toks, sweep_range):
if len(self.text.vocab.vectors[i, :].nonzero()) == 0:
nn.init.normal_(self.text.vocab.vectors[i], mean=0, std=0.05)
else:
num_non_zero += 1
running_norm += torch.norm(self.text.vocab.vectors[i])
total_words += 1
norm_glove = running_norm / num_non_zero
print(f"Average GloVE norm is {norm_glove}",
f"Number of known words are {num_non_zero}",
f"Total number of words are {total_words}")def _create_dataset(self,col2field_tuple,embed_nme,min_wrd_freq):
self.text = Field(sequential=True,tokenize=self._tokenize_text,pad_first=True)
self.label = Field(sequential=False, use_vocab=False)txt2fld = {'TEXT': self.text, 'LABEL':self.label,'None':None} tv_datafields = [(col,txt2fld[label]) for col,label in col2field_tuple] trn_ds, vld_ds = TabularDataset.splits( path=f'{self.path}', train='trn_sample.csv', validation="vld_sample.csv", format='csv', skip_header=True, fields=tv_datafields) self.text.build_vocab(trn_ds, vectors=embed_nme, min_freq=min_wrd_freq) self._init_emb() return trn_ds,vld_dsclass Dataloader_custom:
def init(self,dl_iter,text_col_name,y_var):
self.dl_iter = dl_iter
self.x_var = text_col_name
self.y_var = y_vardef __iter__(self):
for batch in self.dl_iter:
x = getattr(batch,self.x_var)
if x.size()[0]>10:
y = getattr(batch,self.y_var).unsqueeze(0).float()
yield (x,y)def __len__(self):
return len(self.dl_iter)class CNN(nn.Module):
def init(self, vocab, filters = [(3,100),(4,100),(5,100),(7,100)], dp =
0.3, bn=True, nl_func=F.relu):
super(CNN, self).init()
V,D = vocab.vectors.size()
self.embed = nn.Embedding(V,D)
self.embed.weight.data.copy_(vocab.vectors)
self.embed.weight.requires_grad = False
self.conv_layers = nn.ModuleList([nn.Conv1d(in_channels=D, out_channels=n,
kernel_size=ksz) for ksz,n in filters])
self.amp = nn.AdaptiveMaxPool1d(1)
self.dp = nn.Dropout(dp)
self.nl_func = nl_func
num_features = 0
for _,n in filters:
num_features += n
self.bnz = nn.BatchNorm1d(num_features) if bn else lambda x: x
self.fc = nn.Linear(num_features,1)def forward(self,text):
text_embed = self.embed(text).transpose(1,2).transpose(0,2)
conv = [self.amp(self.nl_func(Conv_layer(text_embed))) for Conv_layer in self.conv_layers]
concat_conv = torch.cat(conv,2)
concat_conv = concat_conv.view(concat_conv.size(0),-1)
bn_concat_conv = self.bnz(concat_conv)
dp_bn_concat_conv = self.dp(bn_concat_conv)
out = self.fc(dp_bn_concat_conv)
out = out.transpose(0,1)
return outdef get_optimizer(model,lr,wd=0):
Setting optimizer
parameters = filter(lambda x:x.requires_grad,model.parameters())
optim = torch.optim.Adam(parameters, lr=lr,weight_decay=wd)
return optimclass TrainCNN(Trainable):
def _setup(self, config):
args = {}
vars(args).update(config)
print("Dataset creation started")
ds = Dataset(INPUT_PATH, ['fileid','text'], 'tag')
tv_datafields = [("fileid", 'None'), ("text", 'TEXT'), ("tag", 'LABEL')]
self.trn_ds,self.vld_ds =
ds._create_dataset(tv_datafields,'glove.6B.300d',2)
print('Dataset is created')
self.train_loader,self.test_loader = _create_dl_iterator(self)
self.train_loader = Dataloader_custom(self.train_loader, 'text','tag')
self.test_loader = Dataloader_custom(self.test_loader, 'text','tag')
self.model = CNN(ds.text.vocab, filters =
[(3,100),(4,100),(5,100),(7,100)], dp = 0.3, bn=True, nl_func=F.relu).cuda()
self.optimizer = get_optimizer(model,args.lr,wd=0)def _create_dl_iterator(self):
train_iter, val_iter = BucketIterator.splits((self.trn_ds, self.vld_ds),
batch_sizes=(64, 64),
device=torch.device('cuda'),
sort_key=lambda x: len(getattr(x,text_col_name)),
sort_within_batch=False,
repeat=False)
return train_iter, val_iterdef _train_iteration(self):
# Setting model in train mode
gpu=True
verbose = True
self.model.train()
total,sum_loss = 0,0
for x_text,label in tqdm(self.train_loader,desc='Training',dynamic_ncols=True):
bsz = label.shape[0]
y = label.float()
if gpu:
x_text = x_text.cuda()
y = y.cuda()
y_hat = self.model(x_text)
loss = F.binary_cross_entropy_with_logits(y_hat, y)
self.optim.zero_grad()
loss.backward()
self.optim.step()
total += bsz
sum_loss += bsz*(loss.item())
if verbose: print(np.sqrt(sum_loss/total))
return sum_loss/totaldef _test(self):
gpu = True
self.model.eval()
total, sum_loss = 0,0
for x_text,label in tqdm(self.valid_dl,desc='Validation',dynamic_ncols=True):
bsz = label.shape[0]
y = label.float()
if gpu:
x_text = x_text.cuda()
y = y.cuda()
y_hat = self.model(x_text)
loss = F.binary_cross_entropy_with_logits(y_hat,y)
sum_loss += bsz*(loss.item())
total += bsz
return {"mean_loss": sum_loss/total}def _train(self):
self._train_iteration()
return self._test()def _save(self, path):
torch.save(self.model.state_dict(), os.path.join(path, "model.pth"))
return pathdef _restore(self, path):
self.model.load_state_dict(os.path.join(path, "model.pth"))if name =='main':
import numpy as np
import ray
from ray import tuneray.init()
tune.run_experiments(
{ "exp": {
"config": {"lr": tune.grid_search([0.1,0.2])},
"trial_resources": {
"cpu": 3,
"gpu": 1
},
"run": TrainCNN
}
})It gives me the following error
│Remote function stop failed with:
Traceback (most recent call last):
packet_write_wait: Connection to 172.30.102.124 port 22: Broken pipe │
File
"/home/ubuntu/src/anaconda3/envs/skorch/lib/python3.6/site-packages/ray/worker.py",
line 923, in _process_task
➜ ~ │ self.reraise_actor_init_error()
│ File
"/home/ubuntu/src/anaconda3/envs/skorch/lib/python3.6/site-packages/ray/worker.py",
line 267, in reraise_actor_init_error
│ raise self.actor_init_error
│ File
"/home/ubuntu/src/anaconda3/envs/skorch/lib/python3.6/site-packages/ray/worker.py",
line 923, in _process_task
│ self.reraise_actor_init_error()
│ File
"/home/ubuntu/src/anaconda3/envs/skorch/lib/python3.6/site-packages/ray/worker.py",
line 267, in reraise_actor_init_error
│ raise self.actor_init_error
│ File
"/home/ubuntu/src/anaconda3/envs/skorch/lib/python3.6/site-packages/ray/worker.py",
line 945, in _process_task
│ arguments)
│ File
"/home/ubuntu/src/anaconda3/envs/skorch/lib/python3.6/site-packages/ray/actor.py",
line 261, in actor_method_executor
│ method_returns = method(actor, *args)
│ File
"/home/ubuntu/src/anaconda3/envs/skorch/lib/python3.6/site-packages/ray/tune/trainable.py",
line 84, in *init
│ self._setup()
│TypeError: _setup() missing 1 required positional argument: 'config'—
You are receiving this because you commented.
Reply to this email directly, view it on GitHub
https://github.com/ray-project/ray/issues/3085#issuecomment-431223428,
or mute the thread
https://github.com/notifications/unsubscribe-auth/AAA6SrFihm8rVG9G55D3ke7xDlZN8P6Kks5umTZ1gaJpZM4Xs2d-
.
OK, updated in this PR -
https://github.com/ray-project/ray/pull/3081/files#diff-ac05b67effec14a8eabaddf2e45b6b8aR226
On Thu, Oct 18, 2018 at 7:31 PM Eric Liang notifications@github.com wrote:
│TypeError: _setup() missing 1 required positional argument: 'config'
We recently changed the signature of _setup() to _setup(config).@richardliaw it looks like the documentation isn't updated to reflect this.
On Thu, Oct 18, 2018 at 7:18 PM Sooraj notifications@github.com wrote:
I changed as per ur suggestion and it works. I also changed it from
functional api to class api. But the class is not recognizing the config
file I sent it.My code is given below
from torchtext.data import Field, TabularDataset
from torchtext.data import Iterator, BucketIterator
import torch.nn as nn
import torch
import pandas as pd
from sklearn.model_selection import train_test_split
import torch.nn.functional as F
from tqdm import tqdm, tnrange
import numpy as np
import spacynlp = spacy.load('en')
from ray.tune import Trainable
INPUT_PATH = '../data/'
model_path = '../models/'
class Dataset:
def init(self,path,x_vars,y_vars,test_size=0.2,filename=None,
train_test_split=False):
self.path = path
self.x_vars = x_vars
self.y_vars = y_vars
if train_test_split == True:
self._get_train_val_data(filename,test_size)def _get_train_val_data(self,filename,test_size):
data = pd.read_csv(f'{self.path}filename')
self.columns = data.columns
X_train, X_test, y_train, y_test =
train_test_split(data.loc[:,self.x_vars],
data.loc[:,self.y_vars],
test_size=test_size, random_state=1)
train = pd.concat([X_train,y_train],axis=1)
vld = pd.concat([X_test,y_test],axis =1)
train.to_csv(f'{self.path}trn.csv',index=False)
vld.to_csv(f'{self.path}vld.csv',index=False)def _tokenize_text(self,text):
removal=['PUNCT','SPACE']
text_out_final = []
for review in nlp(text).sents:
text_out = []
for token in review:
if token.pos_ not in removal and token.is_punct == False:
lemma = token.lemma_
if lemma != '-PRON-':
text_out.append(lemma)text_out_final.extend(text_out)
return text_out_finaldef _init_emb(self, num_special_toks=2):
sweep_range = len(self.text.vocab)
running_norm = 0.
num_non_zero = 0
total_words = 0
nn.init.normal_(self.text.vocab.vectors[0], mean=0, std=0.05)
for i in range(num_special_toks, sweep_range):
if len(self.text.vocab.vectors[i, :].nonzero()) == 0:
nn.init.normal_(self.text.vocab.vectors[i], mean=0, std=0.05)
else:
num_non_zero += 1
running_norm += torch.norm(self.text.vocab.vectors[i])
total_words += 1
norm_glove = running_norm / num_non_zero
print(f"Average GloVE norm is {norm_glove}",
f"Number of known words are {num_non_zero}",
f"Total number of words are {total_words}")def _create_dataset(self,col2field_tuple,embed_nme,min_wrd_freq):
self.text =
Field(sequential=True,tokenize=self._tokenize_text,pad_first=True)
self.label = Field(sequential=False, use_vocab=False)txt2fld = {'TEXT': self.text, 'LABEL':self.label,'None':None}
tv_datafields = [(col,txt2fld[label]) for col,label in col2field_tuple]trn_ds, vld_ds = TabularDataset.splits(
path=f'{self.path}',
train='trn_sample.csv', validation="vld_sample.csv",
format='csv',
skip_header=True,
fields=tv_datafields)
self.text.build_vocab(trn_ds, vectors=embed_nme, min_freq=min_wrd_freq)
self._init_emb()
return trn_ds,vld_dsclass Dataloader_custom:
def init(self,dl_iter,text_col_name,y_var):
self.dl_iter = dl_iter
self.x_var = text_col_name
self.y_var = y_vardef __iter__(self):
for batch in self.dl_iter:
x = getattr(batch,self.x_var)
if x.size()[0]>10:
y = getattr(batch,self.y_var).unsqueeze(0).float()
yield (x,y)def __len__(self):
return len(self.dl_iter)class CNN(nn.Module):
def init(self, vocab, filters = [(3,100),(4,100),(5,100),(7,100)], dp =
0.3, bn=True, nl_func=F.relu):
super(CNN, self).init()
V,D = vocab.vectors.size()
self.embed = nn.Embedding(V,D)
self.embed.weight.data.copy_(vocab.vectors)
self.embed.weight.requires_grad = False
self.conv_layers = nn.ModuleList([nn.Conv1d(in_channels=D,
out_channels=n,
kernel_size=ksz) for ksz,n in filters])
self.amp = nn.AdaptiveMaxPool1d(1)
self.dp = nn.Dropout(dp)
self.nl_func = nl_func
num_features = 0
for _,n in filters:
num_features += n
self.bnz = nn.BatchNorm1d(num_features) if bn else lambda x: x
self.fc = nn.Linear(num_features,1)def forward(self,text):
text_embed = self.embed(text).transpose(1,2).transpose(0,2)
conv = [self.amp(self.nl_func(Conv_layer(text_embed))) for Conv_layer in
self.conv_layers]
concat_conv = torch.cat(conv,2)
concat_conv = concat_conv.view(concat_conv.size(0),-1)
bn_concat_conv = self.bnz(concat_conv)
dp_bn_concat_conv = self.dp(bn_concat_conv)
out = self.fc(dp_bn_concat_conv)
out = out.transpose(0,1)
return outdef get_optimizer(model,lr,wd=0):
Setting optimizer
parameters = filter(lambda x:x.requires_grad,model.parameters())
optim = torch.optim.Adam(parameters, lr=lr,weight_decay=wd)
return optimclass TrainCNN(Trainable):
def _setup(self, config):
args = {}
vars(args).update(config)
print("Dataset creation started")
ds = Dataset(INPUT_PATH, ['fileid','text'], 'tag')
tv_datafields = [("fileid", 'None'), ("text", 'TEXT'), ("tag", 'LABEL')]
self.trn_ds,self.vld_ds =
ds._create_dataset(tv_datafields,'glove.6B.300d',2)
print('Dataset is created')
self.train_loader,self.test_loader = _create_dl_iterator(self)
self.train_loader = Dataloader_custom(self.train_loader, 'text','tag')
self.test_loader = Dataloader_custom(self.test_loader, 'text','tag')
self.model = CNN(ds.text.vocab, filters =
[(3,100),(4,100),(5,100),(7,100)], dp = 0.3, bn=True,
nl_func=F.relu).cuda()
self.optimizer = get_optimizer(model,args.lr,wd=0)def _create_dl_iterator(self):
train_iter, val_iter = BucketIterator.splits((self.trn_ds, self.vld_ds),
batch_sizes=(64, 64),
device=torch.device('cuda'),
sort_key=lambda x: len(getattr(x,text_col_name)),
sort_within_batch=False,
repeat=False)
return train_iter, val_iterdef _train_iteration(self):
Setting model in train mode
gpu=True
verbose = True
self.model.train()
total,sum_loss = 0,0
for x_text,label in
tqdm(self.train_loader,desc='Training',dynamic_ncols=True):
bsz = label.shape[0]
y = label.float()
if gpu:
x_text = x_text.cuda()
y = y.cuda()
y_hat = self.model(x_text)
loss = F.binary_cross_entropy_with_logits(y_hat, y)
self.optim.zero_grad()
loss.backward()
self.optim.step()
total += bsz
sum_loss += bsz*(loss.item())
if verbose: print(np.sqrt(sum_loss/total))
return sum_loss/totaldef _test(self):
gpu = True
self.model.eval()
total, sum_loss = 0,0
for x_text,label in
tqdm(self.valid_dl,desc='Validation',dynamic_ncols=True):
bsz = label.shape[0]
y = label.float()
if gpu:
x_text = x_text.cuda()
y = y.cuda()
y_hat = self.model(x_text)
loss = F.binary_cross_entropy_with_logits(y_hat,y)
sum_loss += bsz*(loss.item())
total += bsz
return {"mean_loss": sum_loss/total}def _train(self):
self._train_iteration()
return self._test()def _save(self, path):
torch.save(self.model.state_dict(), os.path.join(path, "model.pth"))
return pathdef _restore(self, path):
self.model.load_state_dict(os.path.join(path, "model.pth"))if name =='main':
import numpy as np
import ray
from ray import tuneray.init()
tune.run_experiments(
{ "exp": {
"config": {"lr": tune.grid_search([0.1,0.2])},
"trial_resources": {
"cpu": 3,
"gpu": 1
},
"run": TrainCNN
}
})It gives me the following error
│Remote function stop failed with:
Traceback (most recent call last):
packet_write_wait: Connection to 172.30.102.124 port 22: Broken pipe │
File"/home/ubuntu/src/anaconda3/envs/skorch/lib/python3.6/site-packages/ray/worker.py",
line 923, in _process_task
➜ ~ │ self.reraise_actor_init_error()
│ File"/home/ubuntu/src/anaconda3/envs/skorch/lib/python3.6/site-packages/ray/worker.py",
line 267, in reraise_actor_init_error
│ raise self.actor_init_error
│ File"/home/ubuntu/src/anaconda3/envs/skorch/lib/python3.6/site-packages/ray/worker.py",
line 923, in _process_task
│ self.reraise_actor_init_error()
│ File"/home/ubuntu/src/anaconda3/envs/skorch/lib/python3.6/site-packages/ray/worker.py",
line 267, in reraise_actor_init_error
│ raise self.actor_init_error
│ File"/home/ubuntu/src/anaconda3/envs/skorch/lib/python3.6/site-packages/ray/worker.py",
line 945, in _process_task
│ *arguments)
│ File"/home/ubuntu/src/anaconda3/envs/skorch/lib/python3.6/site-packages/ray/actor.py",
line 261, in actor_method_executor
│ method_returns = method(actor, *args)
│ File"/home/ubuntu/src/anaconda3/envs/skorch/lib/python3.6/site-packages/ray/tune/trainable.py",
line 84, in init
│ self._setup()
│TypeError: _setup() missing 1 required positional argument: 'config'—
You are receiving this because you commented.
Reply to this email directly, view it on GitHub
https://github.com/ray-project/ray/issues/3085#issuecomment-431223428,
or mute the thread
<
https://github.com/notifications/unsubscribe-auth/AAA6SrFihm8rVG9G55D3ke7xDlZN8P6Kks5umTZ1gaJpZM4Xs2d-.
—
You are receiving this because you were mentioned.
Reply to this email directly, view it on GitHub
https://github.com/ray-project/ray/issues/3085#issuecomment-431225354,
or mute the thread
https://github.com/notifications/unsubscribe-auth/AEUc5QmWWfBB8bULjA76G4tRcKQ8AYQyks5umTlsgaJpZM4Xs2d-
.
Also, Sooraj, to fix your issue, you should upgrade ray by installing the
latest wheels via pip:
https://ray.readthedocs.io/en/latest/installation.html
Let me know if you run into any issues.
On Thu, Oct 18, 2018 at 10:31 PM Richard Liaw rich.liaw@gmail.com wrote:
OK, updated in this PR -
https://github.com/ray-project/ray/pull/3081/files#diff-ac05b67effec14a8eabaddf2e45b6b8aR226On Thu, Oct 18, 2018 at 7:31 PM Eric Liang notifications@github.com
wrote:│TypeError: _setup() missing 1 required positional argument: 'config'
We recently changed the signature of _setup() to _setup(config).@richardliaw it looks like the documentation isn't updated to reflect
this.On Thu, Oct 18, 2018 at 7:18 PM Sooraj notifications@github.com wrote:
I changed as per ur suggestion and it works. I also changed it from
functional api to class api. But the class is not recognizing the config
file I sent it.My code is given below
from torchtext.data import Field, TabularDataset
from torchtext.data import Iterator, BucketIterator
import torch.nn as nn
import torch
import pandas as pd
from sklearn.model_selection import train_test_split
import torch.nn.functional as F
from tqdm import tqdm, tnrange
import numpy as np
import spacynlp = spacy.load('en')
from ray.tune import Trainable
INPUT_PATH = '../data/'
model_path = '../models/'
class Dataset:
def init(self,path,x_vars,y_vars,test_size=0.2,filename=None,
train_test_split=False):
self.path = path
self.x_vars = x_vars
self.y_vars = y_vars
if train_test_split == True:
self._get_train_val_data(filename,test_size)def _get_train_val_data(self,filename,test_size):
data = pd.read_csv(f'{self.path}filename')
self.columns = data.columns
X_train, X_test, y_train, y_test =
train_test_split(data.loc[:,self.x_vars],
data.loc[:,self.y_vars],
test_size=test_size, random_state=1)
train = pd.concat([X_train,y_train],axis=1)
vld = pd.concat([X_test,y_test],axis =1)
train.to_csv(f'{self.path}trn.csv',index=False)
vld.to_csv(f'{self.path}vld.csv',index=False)def _tokenize_text(self,text):
removal=['PUNCT','SPACE']
text_out_final = []
for review in nlp(text).sents:
text_out = []
for token in review:
if token.pos_ not in removal and token.is_punct == False:
lemma = token.lemma_
if lemma != '-PRON-':
text_out.append(lemma)text_out_final.extend(text_out)
return text_out_finaldef _init_emb(self, num_special_toks=2):
sweep_range = len(self.text.vocab)
running_norm = 0.
num_non_zero = 0
total_words = 0
nn.init.normal_(self.text.vocab.vectors[0], mean=0, std=0.05)
for i in range(num_special_toks, sweep_range):
if len(self.text.vocab.vectors[i, :].nonzero()) == 0:
nn.init.normal_(self.text.vocab.vectors[i], mean=0, std=0.05)
else:
num_non_zero += 1
running_norm += torch.norm(self.text.vocab.vectors[i])
total_words += 1
norm_glove = running_norm / num_non_zero
print(f"Average GloVE norm is {norm_glove}",
f"Number of known words are {num_non_zero}",
f"Total number of words are {total_words}")def _create_dataset(self,col2field_tuple,embed_nme,min_wrd_freq):
self.text =
Field(sequential=True,tokenize=self._tokenize_text,pad_first=True)
self.label = Field(sequential=False, use_vocab=False)txt2fld = {'TEXT': self.text, 'LABEL':self.label,'None':None}
tv_datafields = [(col,txt2fld[label]) for col,label in col2field_tuple]trn_ds, vld_ds = TabularDataset.splits(
path=f'{self.path}',
train='trn_sample.csv', validation="vld_sample.csv",
format='csv',
skip_header=True,
fields=tv_datafields)
self.text.build_vocab(trn_ds, vectors=embed_nme, min_freq=min_wrd_freq)
self._init_emb()
return trn_ds,vld_dsclass Dataloader_custom:
def init(self,dl_iter,text_col_name,y_var):
self.dl_iter = dl_iter
self.x_var = text_col_name
self.y_var = y_vardef __iter__(self):
for batch in self.dl_iter:
x = getattr(batch,self.x_var)
if x.size()[0]>10:
y = getattr(batch,self.y_var).unsqueeze(0).float()
yield (x,y)def __len__(self):
return len(self.dl_iter)class CNN(nn.Module):
> def init(self, vocab, filters = [(3,100),(4,100),(5,100),(7,100)], dp
0.3, bn=True, nl_func=F.relu):
super(CNN, self).init()
V,D = vocab.vectors.size()
self.embed = nn.Embedding(V,D)
self.embed.weight.data.copy_(vocab.vectors)
self.embed.weight.requires_grad = False
self.conv_layers = nn.ModuleList([nn.Conv1d(in_channels=D,
out_channels=n,
kernel_size=ksz) for ksz,n in filters])
self.amp = nn.AdaptiveMaxPool1d(1)
self.dp = nn.Dropout(dp)
self.nl_func = nl_func
num_features = 0
for _,n in filters:
num_features += n
self.bnz = nn.BatchNorm1d(num_features) if bn else lambda x: x
self.fc = nn.Linear(num_features,1)def forward(self,text):
text_embed = self.embed(text).transpose(1,2).transpose(0,2)
conv = [self.amp(self.nl_func(Conv_layer(text_embed))) for Conv_layer
in self.conv_layers]
concat_conv = torch.cat(conv,2)
concat_conv = concat_conv.view(concat_conv.size(0),-1)
bn_concat_conv = self.bnz(concat_conv)
dp_bn_concat_conv = self.dp(bn_concat_conv)
out = self.fc(dp_bn_concat_conv)
out = out.transpose(0,1)
return outdef get_optimizer(model,lr,wd=0):
Setting optimizer
parameters = filter(lambda x:x.requires_grad,model.parameters())
optim = torch.optim.Adam(parameters, lr=lr,weight_decay=wd)
return optimclass TrainCNN(Trainable):
def _setup(self, config):
args = {}
vars(args).update(config)
print("Dataset creation started")
ds = Dataset(INPUT_PATH, ['fileid','text'], 'tag')
tv_datafields = [("fileid", 'None'), ("text", 'TEXT'), ("tag", 'LABEL')]
self.trn_ds,self.vld_ds =
ds._create_dataset(tv_datafields,'glove.6B.300d',2)
print('Dataset is created')
self.train_loader,self.test_loader = _create_dl_iterator(self)
self.train_loader = Dataloader_custom(self.train_loader, 'text','tag')
self.test_loader = Dataloader_custom(self.test_loader, 'text','tag')
self.model = CNN(ds.text.vocab, filters =
[(3,100),(4,100),(5,100),(7,100)], dp = 0.3, bn=True,
nl_func=F.relu).cuda()
self.optimizer = get_optimizer(model,args.lr,wd=0)def _create_dl_iterator(self):
train_iter, val_iter = BucketIterator.splits((self.trn_ds, self.vld_ds),
batch_sizes=(64, 64),
device=torch.device('cuda'),
sort_key=lambda x: len(getattr(x,text_col_name)),
sort_within_batch=False,
repeat=False)
return train_iter, val_iterdef _train_iteration(self):
Setting model in train mode
gpu=True
verbose = True
self.model.train()
total,sum_loss = 0,0
for x_text,label in
tqdm(self.train_loader,desc='Training',dynamic_ncols=True):
bsz = label.shape[0]
y = label.float()
if gpu:
x_text = x_text.cuda()
y = y.cuda()
y_hat = self.model(x_text)
loss = F.binary_cross_entropy_with_logits(y_hat, y)
self.optim.zero_grad()
loss.backward()
self.optim.step()
total += bsz
sum_loss += bsz*(loss.item())
if verbose: print(np.sqrt(sum_loss/total))
return sum_loss/totaldef _test(self):
gpu = True
self.model.eval()
total, sum_loss = 0,0
for x_text,label in
tqdm(self.valid_dl,desc='Validation',dynamic_ncols=True):
bsz = label.shape[0]
y = label.float()
if gpu:
x_text = x_text.cuda()
y = y.cuda()
y_hat = self.model(x_text)
loss = F.binary_cross_entropy_with_logits(y_hat,y)
sum_loss += bsz*(loss.item())
total += bsz
return {"mean_loss": sum_loss/total}def _train(self):
self._train_iteration()
return self._test()def _save(self, path):
torch.save(self.model.state_dict(), os.path.join(path, "model.pth"))
return pathdef _restore(self, path):
self.model.load_state_dict(os.path.join(path, "model.pth"))if name =='main':
import numpy as np
import ray
from ray import tuneray.init()
tune.run_experiments(
{ "exp": {
"config": {"lr": tune.grid_search([0.1,0.2])},
"trial_resources": {
"cpu": 3,
"gpu": 1
},
"run": TrainCNN
}
})It gives me the following error
│Remote function stop failed with:
Traceback (most recent call last):
packet_write_wait: Connection to 172.30.102.124 port 22: Broken pipe │
File"/home/ubuntu/src/anaconda3/envs/skorch/lib/python3.6/site-packages/ray/worker.py",
line 923, in _process_task
➜ ~ │ self.reraise_actor_init_error()
│ File"/home/ubuntu/src/anaconda3/envs/skorch/lib/python3.6/site-packages/ray/worker.py",
line 267, in reraise_actor_init_error
│ raise self.actor_init_error
│ File"/home/ubuntu/src/anaconda3/envs/skorch/lib/python3.6/site-packages/ray/worker.py",
line 923, in _process_task
│ self.reraise_actor_init_error()
│ File"/home/ubuntu/src/anaconda3/envs/skorch/lib/python3.6/site-packages/ray/worker.py",
line 267, in reraise_actor_init_error
│ raise self.actor_init_error
│ File"/home/ubuntu/src/anaconda3/envs/skorch/lib/python3.6/site-packages/ray/worker.py",
line 945, in _process_task
│ *arguments)
│ File"/home/ubuntu/src/anaconda3/envs/skorch/lib/python3.6/site-packages/ray/actor.py",
line 261, in actor_method_executor
│ method_returns = method(actor, *args)
│ File"/home/ubuntu/src/anaconda3/envs/skorch/lib/python3.6/site-packages/ray/tune/trainable.py",
line 84, in init
│ self._setup()
│TypeError: _setup() missing 1 required positional argument: 'config'—
You are receiving this because you commented.
Reply to this email directly, view it on GitHub
<https://github.com/ray-project/ray/issues/3085#issuecomment-431223428
,
or mute the thread
<
https://github.com/notifications/unsubscribe-auth/AAA6SrFihm8rVG9G55D3ke7xDlZN8P6Kks5umTZ1gaJpZM4Xs2d-.
—
You are receiving this because you were mentioned.
Reply to this email directly, view it on GitHub
https://github.com/ray-project/ray/issues/3085#issuecomment-431225354,
or mute the thread
https://github.com/notifications/unsubscribe-auth/AEUc5QmWWfBB8bULjA76G4tRcKQ8AYQyks5umTlsgaJpZM4Xs2d-
.
Yes, it is working now. Thank you @richardliaw and @ericl . I will keep exploring more options in ray tune. I will close this comment for now. I will reopen it if any other error occurs. Once again, thank for all the help guys.