Source code for nni.compression.pytorch.quantization_speedup.calibrator

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import os
import logging
import tensorrt as trt
import pycuda.driver as cuda

logger = logging.getLogger(__name__)

[docs]class Calibrator(trt.IInt8Calibrator): def __init__(self, training_data, cache_file, batch_size=64, algorithm=trt.CalibrationAlgoType.ENTROPY_CALIBRATION_2): """ Parameters ---------- training_data : numpy array The data using to calibrate quantization model cache_file : str The path user want to store calibrate cache file batch_size : int The batch_size of calibrating process algorithm : tensorrt.tensorrt.CalibrationAlgoType The algorithms of calibrating contains LEGACY_CALIBRATION, ENTROPY_CALIBRATION, ENTROPY_CALIBRATION_2, MINMAX_CALIBRATION. Please refer to https://docs.nvidia.com/deeplearning/tensorrt/api/ python_api/infer/Int8/Calibrator.html for detail """ trt.IInt8Calibrator.__init__(self) self.algorithm = algorithm self.cache_file = cache_file self.data = training_data self.batch_size = batch_size self.current_index = 0 # Allocate enough memory for a whole batch. self.device_input = cuda.mem_alloc(self.data[0].nbytes * self.batch_size) def get_algorithm(self): return self.algorithm def get_batch_size(self): return self.batch_size
[docs] def get_batch(self, names): """ This function is used to define the way of feeding calibrating data each batch. Parameters ---------- names : str The names of the network inputs for each object in the bindings array Returns ------- list A list of device memory pointers set to the memory containing each network input data, or an empty list if there are no more batches for calibration. You can allocate these device buffers with pycuda, for example, and then cast them to int to retrieve the pointer """ if self.current_index + self.batch_size > self.data.shape[0]: return None current_batch = int(self.current_index / self.batch_size) if current_batch % 10 == 0: logger.info("Calibrating batch %d, containing %d images", current_batch, self.batch_size) batch = self.data[self.current_index:self.current_index + self.batch_size].ravel() cuda.memcpy_htod(self.device_input, batch) self.current_index += self.batch_size memory_pointers = [self.device_input] return memory_pointers
[docs] def read_calibration_cache(self): """ If there is a cache, use it instead of calibrating again. Otherwise, implicitly return None. Returns ------- cache object A cache object which contains calibration parameters for quantization """ if os.path.exists(self.cache_file): with open(self.cache_file, "rb") as f: return f.read()
[docs] def write_calibration_cache(self, cache): """ Write calibration cache to specific path. Parameters ---------- cache : str The calibration cache to write """ with open(self.cache_file, "wb") as f: f.write(cache)