importtensorflowastfimportpathlib# from keras model
converter = tf.lite.TFLiteConverter.from_keras_model(model)
# or from tf saved model
converter = tf.lite.TFLiteConverter.from_saved_model(tf_path_model)
# last from concrete functions
converter = tf.lite.TFLiteConverter.from_concrete_funcions(tf_path_concrete_functions)
# start conversion
tflite_model = converter.convert()
# save model
tflite_model_file = pathlib.Path('./my_path')
tflite_model_file.write_bytes(tflite_model)
importtensorflowastfdefrepresentative_dataset_gen():
for _ inrange(num_calibration_steps):
# Remember to pre-process your dataset as your trainingyield [input]
converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset_gen
tflite_quant_model = converter.convert()
> Full integer quantization (integer only)
importtensorflowastfdefrepresentative_dataset_gen():
for _ inrange(num_calibration_steps):
# Remember to pre-process your dataset as your trainingyield [input]
converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset_gen
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8 # or tf.uint8
converter.inference_output_type = tf.int8 # or tf.uint8
tflite_quant_model = converter.convert()
N.B.: Con versioni precedenti a TensorFlow 2.3.0 seguire il codice seguente se no per motivi trascendentali input e output non sono quantizzati.
importtensorflowastfdefrepresentative_dataset_gen():
for _ inrange(num_calibration_steps):
# Remember to pre-process your dataset as your trainingyield [input]
converter = tf.compat.v1.lite.TFLiteConverter.from_keras_model_file(os.path.join(MODEL_DIR, 'model.h5'))
converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]
converter.representative_dataset = representative_dataset_gen
converter.experimental_new_converter =True
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.uint8
converter.inference_output_type = tf.uint8
tflite_full_integer_model = converter.convert()
Nel video qui sotto potete trovare una trattazione completa (purtroppo in inglese ) con tutte le trasformazioni eseguibili con il convertitore di TF-Lite.
TensorRT
> FP32/FP16 quantization
importtensorflowastf# select quantization format 'FP32' or 'FP16'
FP ='FP16'
params = tf.experimental.tensorrt.ConversionParams(
precision_mode=FP)
converter = tf.experimental.tensorrt.Converter(
input_saved_model_dir="my_dir", conversion_params=params)
converter.convert()
converter.save(output_saved_model_dir)
> FP32/FP16 quantization with pre-built engines
importtensorflowastf# select quantization format 'FP32' or 'FP16'
FP ='FP16'defrepresentative_dataset_gen():
for _ inrange(num_calibration_steps):
# Get sample input data as a numpy array in a method of your choosing.yield [input]
params = tf.experimental.tensorrt.ConversionParams(
precision_mode=FP,
# Set this to a large enough number so it can cache all the engines.
maximum_cached_engines=16)
converter = tf.experimental.tensorrt.Converter(
input_saved_model_dir="my_dir", conversion_params=params)
converter.convert()
converter.build(input_fn=representative_dataset_gen) # Generate corresponding TRT engines
converter.save(output_saved_model_dir) # Generated engines will be saved.
> Full integer quantization with pre-built engines
importtensorflowastf# select quantization format
FP ='INT8'defrepresentative_dataset_gen():
for _ inrange(num_calibration_steps):
# Get sample input data as a numpy array in a method of your choosing.yield [input]
params = tf.experimental.tensorrt.ConversionParams(
precision_mode=FP
# Currently only one INT8 engine is supported in this mode.
maximum_cached_engines=1,
use_calibration=True)
converter = tf.experimental.tensorrt.Converter(
input_saved_model_dir="my_dir", conversion_params=params)
converter.convert(calibration_input_fn=representative_dataset_gen)
converter.build(input_fn=representative_dataset_gen)
# Save the TRT engine and the engines.
converter.save(output_saved_model_dir)