Hey guys,
I am a beginner to Deep Learning and am learning by following various tutorials that are available online. For the sake of practicing, I am trying to build a simple wine-quality algorithm and I am getting the following error:
File "<ipython-input-25-424cf38562b6>", line 19, in <module>
grid_search.fit(X_train, y_train)
File "C:\Users\kashy\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py", line 722, in fit
self._run_search(evaluate_candidates)
File "C:\Users\kashy\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py", line 1191, in _run_search
evaluate_candidates(ParameterGrid(self.param_grid))
File "C:\Users\kashy\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py", line 711, in evaluate_candidates
cv.split(X, y, groups)))
File "C:\Users\kashy\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py", line 983, in __call__
if self.dispatch_one_batch(iterator):
File "C:\Users\kashy\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py", line 825, in dispatch_one_batch
self._dispatch(tasks)
File "C:\Users\kashy\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py", line 782, in _dispatch
job = self._backend.apply_async(batch, callback=cb)
File "C:\Users\kashy\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py", line 182, in apply_async
result = ImmediateResult(func)
File "C:\Users\kashy\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py", line 545, in __init__
self.results = batch()
File "C:\Users\kashy\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py", line 261, in __call__
for func, args, kwargs in self.items]
File "C:\Users\kashy\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py", line 261, in <listcomp>
for func, args, kwargs in self.items]
File "C:\Users\kashy\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 568, in _fit_and_score
test_scores = _score(estimator, X_test, y_test, scorer, is_multimetric)
File "C:\Users\kashy\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 605, in _score
return _multimetric_score(estimator, X_test, y_test, scorer)
File "C:\Users\kashy\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 635, in _multimetric_score
score = scorer(estimator, X_test, y_test)
File "C:\Users\kashy\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py", line 98, in __call__
**self._kwargs)
File "C:\Users\kashy\Anaconda3\lib\site-packages\sklearn\metrics\classification.py", line 176, in accuracy_score
y_type, y_true, y_pred = _check_targets(y_true, y_pred)
File "C:\Users\kashy\Anaconda3\lib\site-packages\sklearn\metrics\classification.py", line 81, in _check_targets
"and {1} targets".format(type_true, type_pred))
ValueError: Classification metrics can't handle a mix of multilabel-indicator and multiclass targets
Since I am comparatively a beginner in this field, I am unable to find the error and I would appreciate if anyone could help me or guide me as to what my mistake is.
The code is:
import numpy as np
import pandas as pd
dataset = pd.read_csv('winequality-white.csv', delimiter = ';')
X_input = dataset.iloc[:,:11].values
y_input = dataset.iloc[:,-1].values
y_input = y_input.reshape(y_input.shape[0], 1)
from sklearn.preprocessing import OneHotEncoder
onehotencoder = OneHotEncoder(categorical_features = [-1])
y_input = onehotencoder.fit_transform(y_input).toarray()
#splitting data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_input, y_input, test_size = 0.25)
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range = (0,1))
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from keras.utils import to_categorical
def build_classifier(optimizer):
classifier = Sequential()
classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu', input_dim = 11))
classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu'))
classifier.add(Dense(units = 7, kernel_initializer = 'uniform', activation = 'softmax'))
classifier.compile(optimizer = optimizer, loss = 'categorical_crossentropy', metrics = ['accuracy'])
return classifier
classifier = KerasClassifier(build_fn = build_classifier)
parameters = {'batch_size' : [32, 64],
'epochs' : [500, 100],
'optimizer' : ['adam', 'rmsprop']}
grid_search = GridSearchCV(estimator = classifier,
param_grid = parameters,
scoring = 'accuracy',
cv = 10)
grid_search.fit(X_train, y_train)
best_parameters = grid_search.best_params_
best_accuracy = grid_search.best_score_
Dataset: http://archive.ics.uci.edu/ml/datasets/Wine+Quality
Your targets are one-hot encoded that is why sklearn is confused and throwing up an error about multilabel indicator, you need to use raw arrays not one-hot encoded.
iuliakhomenko How to use raw arrays ?
iuliakhomenko How to use raw arrays ?
has anyone find any solution to this or to what @iuliakhomenko means by " raw arrays" ?
comments galat hai
answer Kisi ka Sahi Nahi hai
@iuliakhomenko might meant using the category number as input instead of one-hot encoded vector, such as using number 1 install of[0,1,0,0...]. hope helps.
Most helpful comment
Your targets are one-hot encoded that is why sklearn is confused and throwing up an error about multilabel indicator, you need to use raw arrays not one-hot encoded.