博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
GridSearchCV和RandomizedSearchCV调参
阅读量:5067 次
发布时间:2019-06-12

本文共 4797 字,大约阅读时间需要 15 分钟。

1 GridSearchCV实际上可以看做是for循环输入一组参数后再比较哪种情况下最优.

使用GirdSearchCV模板

# Use scikit-learn to grid search the batch size and epochsimport numpyfrom sklearn.model_selection import GridSearchCVfrom keras.models import Sequentialfrom keras.layers import Densefrom keras.wrappers.scikit_learn import KerasClassifierimport pandas as pdimport osos.environ["CUDA_VISIBLE_DEVICES"] = "1"# Function to create model, required for KerasClassifierdef create_model(optimizer='adam'):    # create model    model = Sequential()    model.add(Dense(12, input_dim=8, activation='relu'))    model.add(Dense(1, activation='sigmoid'))    # Compile model    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])    return model# fix random seed for reproducibilityseed = 7numpy.random.seed(seed)# load datasetdataset = pd.read_csv('diabetes.csv', )# split into input (X) and output (Y) variablesX = dataset[['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness',             'Insulin','BMI', 'DiabetesPedigreeFunction', 'Age']]Y = dataset['Outcome']# create modelmodel = KerasClassifier(build_fn=create_model, epochs=100, batch_size=10, verbose=0)# define the grid search parametersoptimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']param_grid = dict(optimizer=optimizer)grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)grid_result = grid.fit(X, Y)# summarize resultsprint("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))print(grid_result)print('kkkk')print(grid_result.cv_results_)means = grid_result.cv_results_['mean_test_score']stds = grid_result.cv_results_['std_test_score']params = grid_result.cv_results_['params']for mean, stdev, param in zip(means, stds, params):    print("%f (%f) with: %r" % (mean, stdev, param))
View Code

 

参考:https://machinelearningmastery.com/grid-search-hyperparameters-deep-learning-models-python-keras/

          https://blog.csdn.net/weixin_41988628/article/details/83098130

2

利用随机搜索实现鸢尾花调参,

from sklearn.datasets import load_iris  # 自带的样本数据集from sklearn.neighbors import KNeighborsClassifier  # 要估计的是knn里面的参数,包括k的取值和样本权重分布方式import matplotlib.pyplot as plt  # 可视化绘图from sklearn.model_selection import GridSearchCV,RandomizedSearchCV  # 网格搜索和随机搜索import pandas as pdiris = pd.read_csv('../data/iris.csv', )print(iris.head())print(iris.columns)X = iris[['Sepal.Length', 'Sepal.Width', 'Petal.Length','Petal.Width']]  # 150个样本,4个属性y = iris['Species'] # 150个类标号k_range = range(1, 31)  # 优化参数k的取值范围weight_options = ['uniform', 'distance']  # 代估参数权重的取值范围。uniform为统一取权值,distance表示距离倒数取权值# 下面是构建parameter grid,其结构是key为参数名称,value是待搜索的数值列表的一个字典结构param_grid = {
'n_neighbors':k_range,'weights':weight_options} # 定义优化参数字典,字典中的key值必须是分类算法的函数的参数名print(param_grid)knn = KNeighborsClassifier(n_neighbors=5) # 定义分类算法。n_neighbors和weights的参数名称和param_grid字典中的key名对应# ================================网格搜索=======================================# 这里GridSearchCV的参数形式和cross_val_score的形式差不多,其中param_grid是parameter grid所对应的参数# GridSearchCV中的n_jobs设置为-1时,可以实现并行计算(如果你的电脑支持的情况下)grid = GridSearchCV(estimator = knn, param_grid = param_grid, cv=10, scoring='accuracy') #针对每个参数对进行了10次交叉验证。scoring='accuracy'使用准确率为结果的度量指标。可以添加多个度量指标grid.fit(X, y)print('网格搜索-度量记录:',grid.cv_results_) # 包含每次训练的相关信息print('网格搜索-最佳度量值:',grid.best_score_) # 获取最佳度量值print('网格搜索-最佳参数:',grid.best_params_) # 获取最佳度量值时的代定参数的值。是一个字典print('网格搜索-最佳模型:',grid.best_estimator_) # 获取最佳度量时的分类器模型# 使用获取的最佳参数生成模型,预测数据knn = KNeighborsClassifier(n_neighbors=grid.best_params_['n_neighbors'], weights=grid.best_params_['weights']) # 取出最佳参数进行建模knn.fit(X, y) # 训练模型print(knn.predict([[3, 5, 4, 2]])) # 预测新对象# =====================================随机搜索===========================================rand = RandomizedSearchCV(knn, param_grid, cv=10, scoring='accuracy', n_iter=10, random_state=5) #rand.fit(X, y)print('随机搜索-度量记录:',grid.cv_results_) # 包含每次训练的相关信息print('随机搜索-最佳度量值:',grid.best_score_) # 获取最佳度量值print('随机搜索-最佳参数:',grid.best_params_) # 获取最佳度量值时的代定参数的值。是一个字典print('随机搜索-最佳模型:',grid.best_estimator_) # 获取最佳度量时的分类器模型# 使用获取的最佳参数生成模型,预测数据knn = KNeighborsClassifier(n_neighbors=grid.best_params_['n_neighbors'], weights=grid.best_params_['weights']) # 取出最佳参数进行建模knn.fit(X, y) # 训练模型print(knn.predict([[3, 5, 4, 2]])) # 预测新对象# =====================================自定义度量===========================================from sklearn import metrics# 自定义度量函数def scorerfun(estimator, X, y): y_pred = estimator.predict(X) return metrics.accuracy_score(y, y_pred)rand = RandomizedSearchCV(knn, param_grid, cv=10, scoring='accuracy', n_iter=10, random_state=5) #rand.fit(X, y)print('随机搜索-最佳度量值:',grid.best_score_) # 获取最佳度量值
View Code

参考:https://blog.csdn.net/luanpeng825485697/article/details/79831703

转载于:https://www.cnblogs.com/xxswkl/p/11072795.html

你可能感兴趣的文章