[英]How to get quantized weights from TensorFlow's quantization aware training with experimental quantization
我正在使用 TensorFlow 的量化感知训练 API 并希望部署具有任意位宽的 model。 由于 tflite 部署仅支持 8 位量化,我将使用自定义推理算法进行部署,但我仍然需要以正确的大小访问 model 的权重。
目前,在使用量化感知训练后,我的 model 仍处于浮点状态,据我所知,访问量化权重的唯一方法是将 model 转换为 tflite 格式。 但是,在使用实验功能时这是不可能的。
这是我的量化配置 class:
class Quantizer(tfmot.quantization.keras.QuantizeConfig):
# Configure how to quantize weights.
def get_weights_and_quantizers(self, layer):
return [(layer.kernel, tfmot.quantization.keras.quantizers.LastValueQuantizer(num_bits=8, symmetric=True, narrow_range=False, per_axis=False))]
# Configure how to quantize activations.
def get_activations_and_quantizers(self, layer):
return [(layer.activation, tfmot.quantization.keras.quantizers.MovingAverageQuantizer(num_bits=8, symmetric=False, narrow_range=False, per_axis=False))]
def set_quantize_weights(self, layer, quantize_weights):
# Add this line for each item returned in `get_weights_and_quantizers`
# , in the same order
layer.kernel = quantize_weights[0]
def set_quantize_activations(self, layer, quantize_activations):
# Add this line for each item returned in `get_activations_and_quantizers`
# , in the same order.
layer.activation = quantize_activations[0]
# Configure how to quantize outputs (may be equivalent to activations).
def get_output_quantizers(self, layer):
return []
def get_config(self):
return {}
class ModifiedQuantizer(Quantizer):
# Configure weights to quantize with 4-bit instead of 8-bits.
def get_weights_and_quantizers(self, layer):
return [(layer.kernel, quantizer(num_bits=bits, symmetric=symmetric, narrow_range=narrow_range, per_axis=per_axis))]
以下是我如何量化 model:
supported_layers = [
tf.keras.layers.Conv2D,
tf.keras.layers.DepthwiseConv2D
]
class Quantizer(tfmot.quantization.keras.QuantizeConfig):
# Configure how to quantize weights.
def get_weights_and_quantizers(self, layer):
return [(layer.kernel, tfmot.quantization.keras.quantizers.LastValueQuantizer(num_bits=8, symmetric=True, narrow_range=False, per_axis=False))]
# Configure how to quantize activations.
def get_activations_and_quantizers(self, layer):
return [(layer.activation, tfmot.quantization.keras.quantizers.MovingAverageQuantizer(num_bits=8, symmetric=False, narrow_range=False, per_axis=False))]
def set_quantize_weights(self, layer, quantize_weights):
# Add this line for each item returned in `get_weights_and_quantizers`
# , in the same order
layer.kernel = quantize_weights[0]
def set_quantize_activations(self, layer, quantize_activations):
# Add this line for each item returned in `get_activations_and_quantizers`
# , in the same order.
layer.activation = quantize_activations[0]
# Configure how to quantize outputs (may be equivalent to activations).
def get_output_quantizers(self, layer):
return []
def get_config(self):
return {}
class ModifiedQuantizer(Quantizer):
# Configure weights to quantize with 4-bit instead of 8-bits.
def get_weights_and_quantizers(self, layer):
return [(layer.kernel, quantizer(num_bits=bits, symmetric=symmetric, narrow_range=narrow_range, per_axis=per_axis))]
# Configure how to quantize activations.
def get_activations_and_quantizers(self, layer):
return [(layer.activation, tfmot.quantization.keras.quantizers.MovingAverageQuantizer(num_bits=bits, symmetric=False, narrow_range=False, per_axis=False))]
def quantize_all_layers(layer):
for supported_layer in supported_layers:
if isinstance(layer, supported_layer):
return quantize_annotate_layer(layer, quantize_config=ModifiedQuantizer())
# print(layer.name)
return layer
annotated_model = clone_model(
model,
clone_function=quantize_all_layers
)
with quantize_scope(
{'Quantizer': Quantizer},
{'ModifiedQuantizer': ModifiedQuantizer},
{'_relu6': models._relu6}):
q_aware_model = quantize_apply(annotated_model)
optimizer = keras.optimizers.Adam(
learning_rate=0.001)
q_aware_model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(
from_logits=True),
optimizer=optimizer, metrics=['sparse_categorical_accuracy'])
train_images, train_labels, val_images, val_labels, _, _ = cifar10.load()
q_aware_model.fit(train_images, train_labels, batch_size=64, epochs=1, verbose=1,
validation_data=(val_images, val_labels))
前面说过,在 ModifiedQuantizer 中使用 eg bits=4 时,model 仍然保存在浮点数中,我不知道如何访问量化权重。
谢谢!
我怀疑您可以通过在给定层的权重张量上调用LastValueQuantizer.__call__
来获得量化权重。 如何调用该方法是个问题。
目前的签名是:
LastValueQuantizer.__call__(inputs, training, weights, **kwargs)
我假设inputs
是层的权重, weights
是LastValueQuantizer.build
返回的值。 如果您可以获得对build
返回的weights
的引用,我希望直接使用LastValueQuantizer.__call__
量化层的权重会很简单。
[nav] In [1]: from tensorflow_model_optimization.quantization.keras.quantizers import LastValueQuantizer
INFO:tensorflow:Enabling eager execution
INFO:tensorflow:Enabling v2 tensorshape
INFO:tensorflow:Enabling resource variables
INFO:tensorflow:Enabling tensor equality
INFO:tensorflow:Enabling control flow v2
[nav] In [2]: q = LastValueQuantizer(num_bits=3, per_axis=True, symmetric=True, narrow_range=True)
[ins] In [3]: ??q.__call__
Signature: q.__call__(inputs, training, weights, **kwargs)
Source:
def __call__(self, inputs, training, weights, **kwargs):
"""Quantize tensor.
Args:
inputs: Input tensor to be quantized.
training: Whether the graph is currently training.
weights: Dictionary of weights the quantizer can use to quantize the
tensor. This contains the weights created in the `build` function.
**kwargs: Additional variables which may be passed to the quantizer.
Returns:
Quantized tensor.
"""
return quant_ops.LastValueQuantize(
inputs,
weights['min_var'],
weights['max_var'],
is_training=training,
num_bits=self.num_bits,
per_channel=self.per_axis,
symmetric=self.symmetric,
narrow_range=self.narrow_range
)
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.