Source code for nni.algorithms.hpo.gp_tuner.gp_tuner

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

"""
GPTuner is a Bayesian Optimization method where Gaussian Process is used for modeling loss functions.

See :class:`GPTuner` for details.
"""

import warnings
import logging
import numpy as np
from schema import Schema, Optional

from sklearn.gaussian_process.kernels import Matern
from sklearn.gaussian_process import GaussianProcessRegressor

from nni import ClassArgsValidator
from nni.common.hpo_utils import validate_search_space
from nni.tuner import Tuner
from nni.utils import OptimizeMode, extract_scalar_reward

from .target_space import TargetSpace
from .util import UtilityFunction, acq_max

logger = logging.getLogger("GP_Tuner_AutoML")

class GPClassArgsValidator(ClassArgsValidator):
    def validate_class_args(self, **kwargs):
        Schema({
            Optional('optimize_mode'): self.choices('optimize_mode', 'maximize', 'minimize'),
            Optional('utility'): self.choices('utility', 'ei', 'ucb', 'poi'),
            Optional('kappa'): float,
            Optional('xi'): float,
            Optional('nu'): float,
            Optional('alpha'): float,
            Optional('cold_start_num'): int,
            Optional('selection_num_warm_up'):  int,
            Optional('selection_num_starting_points'):  int,
        }).validate(kwargs)

[docs]class GPTuner(Tuner): """ GP tuner is a Bayesian Optimization method where Gaussian Process is used for modeling loss functions. Bayesian optimization works by constructing a posterior distribution of functions (a Gaussian Process) that best describes the function you want to optimize. As the number of observations grows, the posterior distribution improves, and the algorithm becomes more certain of which regions in parameter space are worth exploring and which are not. GP tuner is designed to minimize/maximize the number of steps required to find a combination of parameters that are close to the optimal combination. To do so, this method uses a proxy optimization problem (finding the maximum of the acquisition function) that, albeit still a hard problem, is cheaper (in the computational sense) to solve, and it's amenable to common tools. Therefore, Bayesian Optimization is suggested for situations where sampling the function to be optimized is very expensive. Note that the only acceptable types in the :doc:`search space </hpo/search_space>` are ``randint``, ``uniform``, ``quniform``, ``loguniform``, ``qloguniform``, and numerical ``choice``. This optimization approach is described in Section 3 of the paper `Algorithms for Hyper-Parameter Optimization <https://papers.nips.cc/paper/4443-algorithms-for-hyper-parameter-optimization.pdf>`__ ( :footcite:t:`bergstra2011algorithms` ). Examples -------- .. code-block:: config.tuner.name = 'GP' config.tuner.class_args = { 'optimize_mode': 'maximize', 'utility': 'ei', 'kappa': 5.0, 'xi': 0.0, 'nu': 2.5, 'alpha': 1e-6, 'cold_start_num': 10, 'selection_num_warm_up': 100000, 'selection_num_starting_points': 250 } Parameters ---------- optimize_mode : str Optimize mode, 'maximize' or 'minimize', by default 'maximize' utility : str Utility function (also called 'acquisition funcition') to use, which can be 'ei', 'ucb' or 'poi'. By default 'ei'. kappa : float Value used by utility function 'ucb'. The bigger kappa is, the more the tuner will be exploratory. By default 5. xi : float Used by utility function 'ei' and 'poi'. The bigger xi is, the more the tuner will be exploratory. By default 0. nu : float Used to specify Matern kernel. The smaller nu, the less smooth the approximated function is. By default 2.5. alpha : float Used to specify Gaussian Process Regressor. Larger values correspond to increased noise level in the observations. By default 1e-6. cold_start_num : int Number of random exploration to perform before Gaussian Process. By default 10. selection_num_warm_up : int Number of random points to evaluate for getting the point which maximizes the acquisition function. By default 100000 selection_num_starting_points : int Number of times to run L-BFGS-B from a random starting point after the warmup. By default 250. """ def __init__(self, optimize_mode="maximize", utility='ei', kappa=5, xi=0, nu=2.5, alpha=1e-6, cold_start_num=10, selection_num_warm_up=100000, selection_num_starting_points=250): self._optimize_mode = OptimizeMode(optimize_mode) # utility function related self._utility = utility self._kappa = kappa self._xi = xi # target space self._space = None self._random_state = np.random.RandomState() # nu, alpha are GPR related params self._gp = GaussianProcessRegressor( kernel=Matern(nu=nu), alpha=alpha, normalize_y=True, n_restarts_optimizer=25, random_state=self._random_state ) # num of random evaluations before GPR self._cold_start_num = cold_start_num # params for acq_max self._selection_num_warm_up = selection_num_warm_up self._selection_num_starting_points = selection_num_starting_points # num of imported data self._supplement_data_num = 0 def update_search_space(self, search_space): """ Update the self.bounds and self.types by the search_space.json file. Override of the abstract method in :class:`~nni.tuner.Tuner`. """ validate_search_space(search_space, ['choice', 'randint', 'uniform', 'quniform', 'loguniform', 'qloguniform']) self._space = TargetSpace(search_space, self._random_state) def generate_parameters(self, parameter_id, **kwargs): """ Method which provides one set of hyper-parameters. If the number of trial result is lower than cold_start_number, GPTuner will first randomly generate some parameters. Otherwise, choose the parameters by the Gussian Process Model. Override of the abstract method in :class:`~nni.tuner.Tuner`. """ if self._space.len() < self._cold_start_num: results = self._space.random_sample() else: # Sklearn's GP throws a large number of warnings at times, but # we don't really need to see them here. with warnings.catch_warnings(): warnings.simplefilter("ignore") self._gp.fit(self._space.params, self._space.target) util = UtilityFunction( kind=self._utility, kappa=self._kappa, xi=self._xi) results = acq_max( f_acq=util.utility, gp=self._gp, y_max=self._space.target.max(), bounds=self._space.bounds, space=self._space, num_warmup=self._selection_num_warm_up, num_starting_points=self._selection_num_starting_points ) results = self._space.array_to_params(results) logger.info("Generate paramageters:\n %s", results) return results def receive_trial_result(self, parameter_id, parameters, value, **kwargs): """ Method invoked when a trial reports its final result. Override of the abstract method in :class:`~nni.tuner.Tuner`. """ value = extract_scalar_reward(value) if self._optimize_mode == OptimizeMode.Minimize: value = -value logger.info("Received trial result.") logger.info("value :%s", value) logger.info("parameter : %s", parameters) self._space.register(parameters, value) def import_data(self, data): """ Import additional data for tuning. Override of the abstract method in :class:`~nni.tuner.Tuner`. """ _completed_num = 0 for trial_info in data: logger.info( "Importing data, current processing progress %s / %s", _completed_num, len(data)) _completed_num += 1 assert "parameter" in trial_info _params = trial_info["parameter"] assert "value" in trial_info _value = trial_info['value'] if not _value: logger.info( "Useless trial data, value is %s, skip this trial data.", _value) continue self._supplement_data_num += 1 _parameter_id = '_'.join( ["ImportData", str(self._supplement_data_num)]) self.receive_trial_result( parameter_id=_parameter_id, parameters=_params, value=_value) logger.info("Successfully import data to GP tuner.")