TF-Lite & TF-TRT

TensorrFlow Lite

> TF-Lite simple conversion

import tensorflow as tf
import pathlib

# from keras model
converter = tf.lite.TFLiteConverter.from_keras_model(model)
# or from tf saved model
converter = tf.lite.TFLiteConverter.from_saved_model(tf_path_model)
# last from concrete functions
converter = tf.lite.TFLiteConverter.from_concrete_funcions(tf_path_concrete_functions)

# start conversion
tflite_model = converter.converter()

# save model
tflite_model_file = pathlib.Path('./my_path')
tflite_model_file.write_bytes(tflite_model)

> Float16 quantization

import tensorflow as tf

converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)

converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_types = [tf.float16]

tflite_quant_model = converter.convert()

> Dynamic range quantization

import tensorflow as tf

converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)

converter.optimizations = [tf.lite.Optimize.DEFAULT]

tflite_quant_model = converter.convert()

> Full integer quantization (with float fallback)

import tensorflow as tf

def representative_dataset_gen():
  for _ in range(num_calibration_steps):
    # Remember to pre-process your dataset as your training
    yield [input]

converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)

converter.optimizations = [tf.lite.Optimize.DEFAULT]

converter.representative_dataset = representative_dataset_gen

tflite_quant_model = converter.convert()

> Full integer quantization (integer only)

import tensorflow as tf

def representative_dataset_gen():
  for _ in range(num_calibration_steps):
    # Remember to pre-process your dataset as your training
    yield [input]

converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)

converter.optimizations = [tf.lite.Optimize.DEFAULT]


converter.representative_dataset = representative_dataset_gen
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8  # or tf.uint8
converter.inference_output_type = tf.int8  # or tf.uint8

tflite_quant_model = converter.convert()

N.B.: Con versioni precedenti a TensorFlow 2.3.0 seguire il codice seguente se no per motivi trascendentali input e output non sono quantizzati. 

import tensorflow as tf

def representative_dataset_gen():
  for _ in range(num_calibration_steps):
    # Remember to pre-process your dataset as your training
    yield [input]

converter = tf.compat.v1.lite.TFLiteConverter.from_keras_model_file(os.path.join(MODEL_DIR, 'model.h5'))

converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]
converter.representative_dataset = representative_dataset_gen
converter.experimental_new_converter = True
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.uint8
converter.inference_output_type = tf.uint8

tflite_full_integer_model = converter.convert()

Nel video qui  sotto potete trovare una trattazione completa (purtroppo in inglese 🤯) con tutte le trasformazioni eseguibili con il convertitore di TF-Lite.

TensorRT

> FP32/FP16 quantization

import tensorflow as tf

# select quantization format 'FP32' or 'FP16'
FP = 'FP16'

params = tf.experimental.tensorrt.ConversionParams(
    precision_mode=FP)

converter = tf.experimental.tensorrt.Converter(
    input_saved_model_dir="my_dir", conversion_params=params)

converter.convert()

converter.save(output_saved_model_dir)

> FP32/FP16 quantization with pre-built engines

import tensorflow as tf

# select quantization format 'FP32' or 'FP16'
FP = 'FP16'

def representative_dataset_gen():
  for _ in range(num_calibration_steps):
    # Get sample input data as a numpy array in a method of your choosing.
    yield [input]

params = tf.experimental.tensorrt.ConversionParams(
    precision_mode=FP,
    # Set this to a large enough number so it can cache all the engines.
    maximum_cached_engines=16)
converter = tf.experimental.tensorrt.Converter(
    input_saved_model_dir="my_dir", conversion_params=params)

converter.convert()

converter.build(input_fn=representative_dataset_gen)  # Generate corresponding TRT engines
converter.save(output_saved_model_dir)  # Generated engines will be saved.

> Full integer quantization with pre-built engines

import tensorflow as tf

# select quantization format
FP = 'INT8'

def representative_dataset_gen():
  for _ in range(num_calibration_steps):
    # Get sample input data as a numpy array in a method of your choosing.
    yield [input]

params = tf.experimental.tensorrt.ConversionParams(
    precision_mode=FP
    # Currently only one INT8 engine is supported in this mode.
    maximum_cached_engines=1,
    use_calibration=True)
converter = tf.experimental.tensorrt.Converter(
    input_saved_model_dir="my_dir", conversion_params=params)


converter.convert(calibration_input_fn=representative_dataset_gen)


converter.build(input_fn=representative_dataset_gen)

# Save the TRT engine and the engines.
converter.save(output_saved_model_dir)