Source code for nni.retiarii.evaluator.pytorch.lightning

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import os
import warnings
from pathlib import Path
from typing import Any, Dict, Union, Optional, List, Callable, Type

import pytorch_lightning as pl
import torch.nn as nn
import torch.nn.functional as nn_functional
import torch.optim as optim
import torchmetrics
import torch.utils.data as torch_data

import nni
from nni.common.serializer import is_traceable
try:
    from .cgo import trainer as cgo_trainer
    cgo_import_failed = False
except ImportError:
    cgo_import_failed = True

from nni.retiarii.graph import Evaluator
from nni.typehint import Literal


__all__ = ['LightningModule', 'Trainer', 'DataLoader', 'Lightning', 'Classification', 'Regression']


[docs]class LightningModule(pl.LightningModule): """ Basic wrapper of generated model. Lightning modules used in NNI should inherit this class. It's a subclass of ``pytorch_lightning.LightningModule``. See https://pytorch-lightning.readthedocs.io/en/stable/common/lightning_module.html """ running_mode: Literal['multi', 'oneshot'] = 'multi' """An indicator of whether current module is running in a multi-trial experiment or an one-shot. This flag should be automatically set by experiments when they start to run. """
[docs] def set_model(self, model: Union[Callable[[], nn.Module], nn.Module]) -> None: """Set the inner model (architecture) to train / evaluate. Parameters ---------- model : callable or nn.Module Can be a callable returning nn.Module or nn.Module. """ if isinstance(model, nn.Module): self.model = model else: self.model = model()
Trainer = nni.trace(pl.Trainer) Trainer.__doc__ = """ Traced version of ``pytorch_lightning.Trainer``. See https://pytorch-lightning.readthedocs.io/en/stable/common/trainer.html """ DataLoader = nni.trace(torch_data.DataLoader) DataLoader.__doc__ = """ Traced version of ``torch.utils.data.DataLoader``. See https://pytorch.org/docs/stable/data.html """
[docs]@nni.trace class Lightning(Evaluator): """ Delegate the whole training to PyTorch Lightning. Since the arguments passed to the initialization needs to be serialized, ``LightningModule``, ``Trainer`` or ``DataLoader`` in this file should be used. Another option is to hide dataloader in the Lightning module, in which case, dataloaders are not required for this class to work. Following the programming style of Lightning, metrics sent to NNI should be obtained from ``callback_metrics`` in trainer. Two hooks are added at the end of validation epoch and the end of ``fit``, respectively. The metric name and type depend on the specific task. .. warning:: The Lightning evaluator are stateful. If you try to use a previous Lightning evaluator, please note that the inner ``lightning_module`` and ``trainer`` will be reused. Parameters ---------- lightning_module Lightning module that defines the training logic. trainer Lightning trainer that handles the training. train_dataloders Used in ``trainer.fit()``. A PyTorch DataLoader with training samples. If the ``lightning_module`` has a predefined train_dataloader method this will be skipped. It can be `any types of dataloader supported by Lightning <https://pytorch-lightning.readthedocs.io/en/stable/guides/data.html>`__. val_dataloaders Used in ``trainer.fit()``. Either a single PyTorch Dataloader or a list of them, specifying validation samples. If the ``lightning_module`` has a predefined val_dataloaders method this will be skipped. It can be `any types of dataloader supported by Lightning <https://pytorch-lightning.readthedocs.io/en/stable/guides/data.html>`__. """ def __init__(self, lightning_module: LightningModule, trainer: Trainer, train_dataloaders: Optional[Any] = None, val_dataloaders: Optional[Any] = None, train_dataloader: Optional[Any] = None): assert isinstance(lightning_module, LightningModule), f'Lightning module must be an instance of {__name__}.LightningModule.' if train_dataloader is not None: warnings.warn('`train_dataloader` is deprecated and replaced with `train_dataloaders`.', DeprecationWarning) train_dataloaders = train_dataloader if cgo_import_failed: assert isinstance(trainer, pl.Trainer) and is_traceable(trainer), f'Trainer must be imported from {__name__}' else: # this is not isinstance(trainer, Trainer) because with a different trace call, it can be different assert (isinstance(trainer, pl.Trainer) and is_traceable(trainer)) or isinstance(trainer, cgo_trainer.Trainer), \ f'Trainer must be imported from {__name__} or nni.retiarii.evaluator.pytorch.cgo.trainer' if not _check_dataloader(train_dataloaders): warnings.warn(f'Please try to wrap PyTorch DataLoader with nni.trace or ' f'import DataLoader from {__name__}: {train_dataloaders}', RuntimeWarning) if not _check_dataloader(val_dataloaders): warnings.warn(f'Please try to wrap PyTorch DataLoader with nni.trace or ' f'import DataLoader from {__name__}: {val_dataloaders}', RuntimeWarning) self.module = lightning_module self.trainer = trainer self.train_dataloaders = train_dataloaders self.val_dataloaders = val_dataloaders @staticmethod def _load(ir): return Lightning(ir['module'], ir['trainer'], ir['train_dataloaders'], ir['val_dataloaders']) def _dump(self): return { 'type': self.__class__, 'module': self.module, 'trainer': self.trainer, 'train_dataloaders': self.train_dataloaders, 'val_dataloaders': self.val_dataloaders } def _execute(self, model_cls): return self.fit(model_cls) @property def train_dataloader(self): warnings.warn('train_dataloader is deprecated, please use `train_dataloaders`.', DeprecationWarning) def __eq__(self, other): eq_func = False eq_args = False if other is None: return False if hasattr(self, "function") and hasattr(other, "function"): eq_func = getattr(self, "function") == getattr(other, "function") elif not (hasattr(self, "function") or hasattr(other, "function")): eq_func = True if hasattr(self, "arguments") and hasattr(other, "arguments"): eq_args = getattr(self, "arguments") == getattr(other, "arguments") elif not (hasattr(self, "arguments") or hasattr(other, "arguments")): eq_args = True return eq_func and eq_args
[docs] def fit(self, model): """ Fit the model with provided dataloader, with Lightning trainer. Parameters ---------- model : nn.Module The model to fit. """ self.module.set_model(model) return self.trainer.fit(self.module, self.train_dataloaders, self.val_dataloaders)
def _check_dataloader(dataloader): # Check the type of dataloader recursively. if isinstance(dataloader, list): return all([_check_dataloader(d) for d in dataloader]) if isinstance(dataloader, dict): return all([_check_dataloader(v) for v in dataloader.values()]) if isinstance(dataloader, torch_data.DataLoader): return is_traceable(dataloader) return True ### The following are some commonly used Lightning modules ### class _SupervisedLearningModule(LightningModule): trainer: pl.Trainer def __init__(self, criterion: Type[nn.Module], metrics: Dict[str, Type[torchmetrics.Metric]], learning_rate: float = 0.001, weight_decay: float = 0., optimizer: Type[optim.Optimizer] = optim.Adam, export_onnx: Union[Path, str, bool, None] = None): super().__init__() self.save_hyperparameters('criterion', 'optimizer', 'learning_rate', 'weight_decay') self.criterion = criterion() self.optimizer = optimizer self.metrics = nn.ModuleDict({name: cls() for name, cls in metrics.items()}) if export_onnx is None or export_onnx is True: self.export_onnx = Path(os.environ.get('NNI_OUTPUT_DIR', '.')) / 'model.onnx' elif export_onnx: self.export_onnx = Path(export_onnx) else: self.export_onnx = None def forward(self, x): y_hat = self.model(x) return y_hat def training_step(self, batch, batch_idx): x, y = batch y_hat = self(x) loss = self.criterion(y_hat, y) self.log('train_loss', loss, prog_bar=True) for name, metric in self.metrics.items(): self.log('train_' + name, metric(y_hat, y), prog_bar=True) return loss def validation_step(self, batch, batch_idx): x, y = batch y_hat = self(x) if self.running_mode == 'multi' and self.export_onnx is not None: self.export_onnx.parent.mkdir(exist_ok=True) try: self.to_onnx(self.export_onnx, x, export_params=True) except RuntimeError as e: warnings.warn(f'ONNX conversion failed. As a result, you might not be able to use visualization. Error message: {e}') self.export_onnx = None self.log('val_loss', self.criterion(y_hat, y), prog_bar=True) for name, metric in self.metrics.items(): self.log('val_' + name, metric(y_hat, y), prog_bar=True) def test_step(self, batch, batch_idx): x, y = batch y_hat = self(x) self.log('test_loss', self.criterion(y_hat, y), prog_bar=True) for name, metric in self.metrics.items(): self.log('test_' + name, metric(y_hat, y), prog_bar=True) def configure_optimizers(self): return self.optimizer(self.parameters(), lr=self.hparams.learning_rate, weight_decay=self.hparams.weight_decay) # type: ignore def on_validation_epoch_end(self): if not self.trainer.sanity_checking and self.running_mode == 'multi': # Don't report metric when sanity checking nni.report_intermediate_result(self._get_validation_metrics()) def on_fit_end(self): if self.running_mode == 'multi': nni.report_final_result(self._get_validation_metrics()) def _get_validation_metrics(self): if len(self.metrics) == 1: metric_name = next(iter(self.metrics)) return self.trainer.callback_metrics['val_' + metric_name].item() else: warnings.warn('Multiple metrics without "default" is not supported by current framework.') return {name: self.trainer.callback_metrics['val_' + name].item() for name in self.metrics} class _AccuracyWithLogits(torchmetrics.Accuracy): def update(self, pred, target): return super().update(nn_functional.softmax(pred, dim=-1), target) @nni.trace class _ClassificationModule(_SupervisedLearningModule): def __init__(self, criterion: Type[nn.Module] = nn.CrossEntropyLoss, learning_rate: float = 0.001, weight_decay: float = 0., optimizer: Type[optim.Optimizer] = optim.Adam, export_onnx: bool = True): super().__init__(criterion, {'acc': _AccuracyWithLogits}, learning_rate=learning_rate, weight_decay=weight_decay, optimizer=optimizer, export_onnx=export_onnx)
[docs]class Classification(Lightning): """ Evaluator that is used for classification. Parameters ---------- criterion : nn.Module Class for criterion module (not an instance). default: ``nn.CrossEntropyLoss`` learning_rate : float Learning rate. default: 0.001 weight_decay : float L2 weight decay. default: 0 optimizer : Optimizer Class for optimizer (not an instance). default: ``Adam`` train_dataloaders : DataLoader Used in ``trainer.fit()``. A PyTorch DataLoader with training samples. If the ``lightning_module`` has a predefined train_dataloader method this will be skipped. val_dataloaders : DataLoader or List of DataLoader Used in ``trainer.fit()``. Either a single PyTorch Dataloader or a list of them, specifying validation samples. If the ``lightning_module`` has a predefined val_dataloaders method this will be skipped. export_onnx : bool If true, model will be exported to ``model.onnx`` before training starts. default true trainer_kwargs : dict Optional keyword arguments passed to trainer. See `Lightning documentation <https://pytorch-lightning.readthedocs.io/en/stable/common/trainer.html>`__ for details. Examples -------- >>> evaluator = Classification() To use customized criterion and optimizer: >>> evaluator = Classification(nn.LabelSmoothingCrossEntropy, optimizer=torch.optim.SGD) Extra keyword arguments will be passed to trainer, some of which might be necessary to enable GPU acceleration: >>> evaluator = Classification(accelerator='gpu', devices=2, strategy='ddp') """ def __init__(self, criterion: Type[nn.Module] = nn.CrossEntropyLoss, learning_rate: float = 0.001, weight_decay: float = 0., optimizer: Type[optim.Optimizer] = optim.Adam, train_dataloaders: Optional[DataLoader] = None, val_dataloaders: Union[DataLoader, List[DataLoader], None] = None, export_onnx: bool = True, train_dataloader: Optional[DataLoader] = None, **trainer_kwargs): if train_dataloader is not None: warnings.warn('`train_dataloader` is deprecated and replaced with `train_dataloaders`.', DeprecationWarning) train_dataloaders = train_dataloader module = _ClassificationModule(criterion=criterion, learning_rate=learning_rate, weight_decay=weight_decay, optimizer=optimizer, export_onnx=export_onnx) super().__init__(module, Trainer(**trainer_kwargs), train_dataloaders=train_dataloaders, val_dataloaders=val_dataloaders)
@nni.trace class _RegressionModule(_SupervisedLearningModule): def __init__(self, criterion: Type[nn.Module] = nn.MSELoss, learning_rate: float = 0.001, weight_decay: float = 0., optimizer: Type[optim.Optimizer] = optim.Adam, export_onnx: bool = True): super().__init__(criterion, {'mse': torchmetrics.MeanSquaredError}, learning_rate=learning_rate, weight_decay=weight_decay, optimizer=optimizer, export_onnx=export_onnx)
[docs]class Regression(Lightning): """ Evaluator that is used for regression. Parameters ---------- criterion : nn.Module Class for criterion module (not an instance). default: ``nn.MSELoss`` learning_rate : float Learning rate. default: 0.001 weight_decay : float L2 weight decay. default: 0 optimizer : Optimizer Class for optimizer (not an instance). default: ``Adam`` train_dataloaders : DataLoader Used in ``trainer.fit()``. A PyTorch DataLoader with training samples. If the ``lightning_module`` has a predefined train_dataloader method this will be skipped. val_dataloaders : DataLoader or List of DataLoader Used in ``trainer.fit()``. Either a single PyTorch Dataloader or a list of them, specifying validation samples. If the ``lightning_module`` has a predefined val_dataloaders method this will be skipped. export_onnx : bool If true, model will be exported to ``model.onnx`` before training starts. default: true trainer_kwargs : dict Optional keyword arguments passed to trainer. See `Lightning documentation <https://pytorch-lightning.readthedocs.io/en/stable/common/trainer.html>`__ for details. Examples -------- >>> evaluator = Regression() Extra keyword arguments will be passed to trainer, some of which might be necessary to enable GPU acceleration: >>> evaluator = Regression(gpus=1) """ def __init__(self, criterion: Type[nn.Module] = nn.MSELoss, learning_rate: float = 0.001, weight_decay: float = 0., optimizer: Type[optim.Optimizer] = optim.Adam, train_dataloaders: Optional[DataLoader] = None, val_dataloaders: Union[DataLoader, List[DataLoader], None] = None, export_onnx: bool = True, train_dataloader: Optional[DataLoader] = None, **trainer_kwargs): if train_dataloader is not None: warnings.warn('`train_dataloader` is deprecated and replaced with `train_dataloaders`.', DeprecationWarning) train_dataloaders = train_dataloader module = _RegressionModule(criterion=criterion, learning_rate=learning_rate, weight_decay=weight_decay, optimizer=optimizer, export_onnx=export_onnx) super().__init__(module, Trainer(**trainer_kwargs), train_dataloaders=train_dataloaders, val_dataloaders=val_dataloaders)