Source code for nni.compression.pytorch.quantization_speedup.calibrator

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import os
import logging
import tensorrt as trt
import pycuda.driver as cuda

logger = logging.getLogger(__name__)

[docs]class Calibrator(trt.IInt8Calibrator):
    def __init__(self, training_data, cache_file, batch_size=64, algorithm=trt.CalibrationAlgoType.ENTROPY_CALIBRATION_2):
        """
        Parameters
        ----------
        training_data : numpy array
            The data using to calibrate quantization model
        cache_file : str
            The path user want to store calibrate cache file
        batch_size : int
            The batch_size of calibrating process
        algorithm : tensorrt.tensorrt.CalibrationAlgoType
            The algorithms of calibrating contains LEGACY_CALIBRATION,
            ENTROPY_CALIBRATION, ENTROPY_CALIBRATION_2, MINMAX_CALIBRATION.
            Please refer to https://docs.nvidia.com/deeplearning/tensorrt/api/
            python_api/infer/Int8/Calibrator.html for detail
        """
        trt.IInt8Calibrator.__init__(self)

        self.algorithm = algorithm
        self.cache_file = cache_file

        self.data = training_data
        self.batch_size = batch_size
        self.current_index = 0

        # Allocate enough memory for a whole batch.
        self.device_input = cuda.mem_alloc(self.data[0].nbytes * self.batch_size)

    def get_algorithm(self):
        return self.algorithm

    def get_batch_size(self):
        return self.batch_size

[docs]    def get_batch(self, names):
        """
        This function is used to define the way of feeding calibrating data each batch.

        Parameters
        ----------
        names : str
             The names of the network inputs for each object in the bindings array

        Returns
        -------
        list
            A list of device memory pointers set to the memory containing each network
            input data, or an empty list if there are no more batches for calibration.
            You can allocate these device buffers with pycuda, for example, and then
            cast them to int to retrieve the pointer
        """
        if self.current_index + self.batch_size > self.data.shape[0]:
            return None

        current_batch = int(self.current_index / self.batch_size)
        if current_batch % 10 == 0:
            logger.info("Calibrating batch %d, containing %d images", current_batch, self.batch_size)

        batch = self.data[self.current_index:self.current_index + self.batch_size].ravel()
        cuda.memcpy_htod(self.device_input, batch)
        self.current_index += self.batch_size
        memory_pointers = [self.device_input]
        return memory_pointers

[docs]    def read_calibration_cache(self):
        """
        If there is a cache, use it instead of calibrating again. Otherwise, implicitly return None.

        Returns
        -------
        cache object
            A cache object which contains calibration parameters for quantization
        """
        if os.path.exists(self.cache_file):
            with open(self.cache_file, "rb") as f:
                return f.read()

[docs]    def write_calibration_cache(self, cache):
        """
        Write calibration cache to specific path.

        Parameters
        ----------
        cache : str
             The calibration cache to write
        """
        with open(self.cache_file, "wb") as f:
            f.write(cache)