import tensorflow as tf
# select quantization format 'FP32' or 'FP16'
FP = 'FP16'
def representative_dataset_gen():
for _ in range(num_calibration_steps):
# Get sample input data as a numpy array in a method of your choosing.
yield [input]
params = tf.experimental.tensorrt.ConversionParams(
precision_mode=FP,
# Set this to a large enough number so it can cache all the engines.
maximum_cached_engines=16)
converter = tf.experimental.tensorrt.Converter(
input_saved_model_dir="my_dir", conversion_params=params)
converter.convert()
converter.build(input_fn=representative_dataset_gen) # Generate corresponding TRT engines
converter.save(output_saved_model_dir) # Generated engines will be saved.