Grid search is a tuning technique that attempts to compute the optimum values of hyperparameters. It is an exhaustive search that is performed on the specific parameter values of a model. The parameters of the estimator/model used to apply these methods are optimized by cross-validated grid-search over a parameter grid.
from google.colab import drive
drive.mount('/content/drive')
functions_path = '/content/drive/MyDrive/Lib/'
%run {functions_path}myfunctions.ipynb
from sklearn.model_selection import GridSearchCV
# Choose the type of classifier.
estimator = DecisionTreeClassifier(random_state=1)
# Grid of parameters to choose from
parameters = {'max_depth': np.arange(1,10),
'min_samples_leaf': [1, 2, 5, 7, 10,15,20],
'max_leaf_nodes' : [2, 3, 5, 10],
'min_impurity_decrease': [0.001,0.01,0.1]
}
# Type of scoring used to compare parameter combinations
acc_scorer = metrics.make_scorer(metrics.recall_score)
# Run the grid search
grid_obj = GridSearchCV(estimator, parameters, scoring=acc_scorer, cv=5)
grid_obj = grid_obj.fit(X_train, y_train)
# Set the clf to the best combination of parameters
clf= grid_obj.best_estimator_
# Fit the best algorithm to the data.
clf.fit(X_train, y_train)
make_confusion_matrix(clf, X_test, y_test)
get_accuracy_and_recall_score(clf, X_train, X_test, y_train, y_test)
get_feature_importances_and_visualize(clf, X_test)