nni.algorithms.hpo.evolution_tuner 源代码

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

"""
evolution_tuner.py
"""
from __future__ import annotations

import copy
import random
import logging

from collections import deque
import numpy as np
from schema import Schema, Optional

import nni
from nni import ClassArgsValidator
from nni.tuner import Tuner
from nni.utils import OptimizeMode, extract_scalar_reward, split_index, json2parameter, json2space

logger = logging.getLogger(__name__)

class Individual:
    """
    Individual class to store the indv info.

    Parameters
    ----------
    config : str, default = None
        Search space.
    info : str, default = None
        The str to save information of individual.
    result : float, None = None
        The final metric of a individual.
    """

    def __init__(self, config=None, info=None, result=None):
        self.config = config
        self.result = result
        self.info = info

    def __str__(self):
        return "info: " + str(self.info) + \
            ", config :" + str(self.config) + ", result: " + str(self.result)

class EvolutionClassArgsValidator(ClassArgsValidator):
    def validate_class_args(self, **kwargs):
        Schema({
            'optimize_mode': self.choices('optimize_mode', 'maximize', 'minimize'),
            Optional('population_size'): self.range('population_size', int, 0, 99999),
        }).validate(kwargs)

[文档] class EvolutionTuner(Tuner): """ Naive Evolution comes from `Large-Scale Evolution of Image Classifiers <https://arxiv.org/pdf/1703.01041.pdf>`__ It randomly initializes a population based on the search space. For each generation, it chooses better ones and does some mutation. (e.g., changes a hyperparameter, adds/removes one layer, etc.) on them to get the next generation. Naive Evolution requires many trials to works but it’s very simple and it’s easily expanded with new features. Examples -------- .. code-block:: config.tuner.name = 'Evolution' config.tuner.class_args = { 'optimize_mode': 'maximize', 'population_size': 100 } Parameters ---------- optimize_mode: str Optimize mode, 'maximize' or 'minimize'. If 'maximize', the tuner will try to maximize metrics. If 'minimize', the tuner will try to minimize metrics. population_size: int The initial size of the population (trial num) in the evolution tuner(default=32). The larger population size, the better evolution performance. It's suggested that ``population_size`` be much larger than ``concurrency`` so users can get the most out of the algorithm. And at least ``concurrency``, or the tuner will fail on its first generation of parameters. """ def __init__(self, optimize_mode='maximize', population_size=32): self.optimize_mode = OptimizeMode(optimize_mode) self.population_size = population_size self.searchspace_json = None self.running_trials = {} self.num_running_trials = 0 self.random_state = None self.population = None self.space = None self.credit = 0 # record the unsatisfied trial requests self.send_trial_callback = None self.param_ids = deque() def update_search_space(self, search_space): """ Update search space. Search_space contains the information that user pre-defined. Parameters ---------- search_space : dict """ self.searchspace_json = search_space self.space = json2space(self.searchspace_json) self.random_state = np.random.RandomState() self.population = [] for _ in range(self.population_size): self._random_generate_individual() def trial_end(self, parameter_id, success, **kwargs): """ To deal with trial failure. If a trial fails, random generate the parameters and add into the population. Parameters ---------- parameter_id : int Unique identifier for hyper-parameters used by this trial. success : bool True if the trial successfully completed; False if failed or terminated. **kwargs Not used """ self.num_running_trials -= 1 logger.info('trial (%d) end', parameter_id) if not success: self.running_trials.pop(parameter_id) self._random_generate_individual() if self.credit > 1: param_id = self.param_ids.popleft() config = self._generate_individual(param_id) logger.debug('Send new trial (%d, %s) for reducing credit', param_id, config) self.send_trial_callback(param_id, config) self.credit -= 1 self.num_running_trials += 1 def generate_multiple_parameters(self, parameter_id_list, **kwargs): """ Returns multiple sets of trial (hyper-)parameters, as iterable of serializable objects. Parameters ---------- parameter_id_list : list of int Unique identifiers for each set of requested hyper-parameters. **kwargs Not used Returns ------- list A list of newly generated configurations """ result = [] if 'st_callback' in kwargs: self.send_trial_callback = kwargs['st_callback'] else: logger.warning('Send trial callback is not found in kwargs. Evolution tuner might not work properly.') for parameter_id in parameter_id_list: had_exception = False try: logger.debug("generating param for %s", parameter_id) res = self.generate_parameters(parameter_id, **kwargs) self.num_running_trials += 1 except nni.NoMoreTrialError: had_exception = True if not had_exception: result.append(res) return result def _random_generate_individual(self): is_rand = dict() for item in self.space: is_rand[item] = True config = json2parameter(self.searchspace_json, is_rand, self.random_state) self.population.append(Individual(config=config)) def _generate_individual(self, parameter_id): """ This function will generate the config for a trial. If at the first generation, randomly generates individuals to satisfy self.population_size. Otherwise, random choose a pair of individuals and compare their fitnesses. The worst of the pair will be removed. Copy the best of the pair and mutate it to generate a new individual. Parameters ---------- parameter_id : int Returns ------- dict A group of candidate parameters that evolution tuner generated. """ pos = -1 for i in range(len(self.population)): if self.population[i].result is None: pos = i break if pos != -1: indiv = copy.deepcopy(self.population[pos]) self.population.pop(pos) else: random.shuffle(self.population) # avoid only 1 individual has result if len(self.population) > 1 and self.population[0].result < self.population[1].result: self.population[0] = self.population[1] # mutation on the worse individual space = json2space(self.searchspace_json, self.population[0].config) is_rand = dict() mutation_pos = space[random.randint(0, len(space)-1)] for i in range(len(self.space)): is_rand[self.space[i]] = (self.space[i] == mutation_pos) config = json2parameter( self.searchspace_json, is_rand, self.random_state, self.population[0].config) if len(self.population) > 1: self.population.pop(1) indiv = Individual(config=config) # remove "_index" from config and save params-id self.running_trials[parameter_id] = indiv config = split_index(indiv.config) return config def generate_parameters(self, parameter_id, **kwargs): """ This function will returns a dict of trial (hyper-)parameters. If no trial configration for now, self.credit plus 1 to send the config later Parameters ---------- parameter_id : int Returns ------- dict One newly generated configuration. """ if not self.population: raise RuntimeError('The population is empty') if self.num_running_trials >= self.population_size: logger.warning("No enough trial config, population_size is suggested to be larger than trialConcurrency") self.credit += 1 self.param_ids.append(parameter_id) raise nni.NoMoreTrialError('no more parameters now.') return self._generate_individual(parameter_id) def receive_trial_result(self, parameter_id, parameters, value, **kwargs): """ Record the result from a trial Parameters ---------- parameter_id : int parameters : dict value : dict/float if value is dict, it should have "default" key. value is final metrics of the trial. """ reward = extract_scalar_reward(value) if parameter_id not in self.running_trials: raise RuntimeError('Received parameter_id %s not in running_trials.', parameter_id) # restore the paramsters contains "_index" config = self.running_trials[parameter_id].config self.running_trials.pop(parameter_id) if self.optimize_mode == OptimizeMode.Minimize: reward = -reward indiv = Individual(config=config, result=reward) self.population.append(indiv) def import_data(self, data): pass