Source code for nni.compression.pytorch.quantization_speedup.backend

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

[docs]class BaseModelSpeedup: """ Base speedup class for backend engine """ def __init__(self, model, config): """ Parameters ---------- model : pytorch model The model to speed up by quantization. config : dict Config recording bit number and name of layers. """ self.model = model self.config = config
[docs] def inference(self, test_data): """ This function should be overrided by subclass to provide inference ability, which should return output and inference time. Parameters ---------- test_data : numpy data test data given to the inference engine Returns ------- numpy data output data will be generated after inference float latency of such inference process """ raise NotImplementedError('Backend engine must overload inference()')
[docs] def compress(self): """ This function should be overrided by subclass to build inference engine which will be used to process input data """ raise NotImplementedError('Backend engine must overload compress()')
[docs] def export_quantized_model(self, path): """ This function should be overrided by subclass to build inference engine which will be used to process input data """ raise NotImplementedError('Backend engine must overload export_quantized_model()')