| model.visual.patch_embed.proj.input_quantizer TensorQuantizer(disabled) |
| model.visual.patch_embed.proj.output_quantizer TensorQuantizer(disabled) |
| model.visual.patch_embed.proj.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.0.attn.qkv.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.0.attn.qkv.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.0.attn.qkv.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.0.attn.proj.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.0.attn.proj.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.0.attn.proj.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.0.attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.0.attn.k_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.0.attn.v_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.0.attn.softmax_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.0.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.0.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.0.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.0.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.0.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.0.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.1.attn.qkv.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.1.attn.qkv.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.1.attn.qkv.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.1.attn.proj.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.1.attn.proj.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.1.attn.proj.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.1.attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.1.attn.k_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.1.attn.v_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.1.attn.softmax_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.1.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.1.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.1.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.1.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.1.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.1.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.2.attn.qkv.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.2.attn.qkv.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.2.attn.qkv.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.2.attn.proj.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.2.attn.proj.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.2.attn.proj.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.2.attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.2.attn.k_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.2.attn.v_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.2.attn.softmax_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.2.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.2.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.2.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.2.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.2.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.2.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.3.attn.qkv.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.3.attn.qkv.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.3.attn.qkv.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.3.attn.proj.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.3.attn.proj.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.3.attn.proj.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.3.attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.3.attn.k_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.3.attn.v_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.3.attn.softmax_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.3.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.3.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.3.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.3.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.3.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.3.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.4.attn.qkv.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.4.attn.qkv.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.4.attn.qkv.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.4.attn.proj.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.4.attn.proj.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.4.attn.proj.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.4.attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.4.attn.k_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.4.attn.v_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.4.attn.softmax_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.4.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.4.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.4.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.4.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.4.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.4.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.5.attn.qkv.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.5.attn.qkv.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.5.attn.qkv.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.5.attn.proj.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.5.attn.proj.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.5.attn.proj.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.5.attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.5.attn.k_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.5.attn.v_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.5.attn.softmax_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.5.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.5.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.5.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.5.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.5.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.5.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.6.attn.qkv.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.6.attn.qkv.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.6.attn.qkv.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.6.attn.proj.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.6.attn.proj.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.6.attn.proj.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.6.attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.6.attn.k_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.6.attn.v_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.6.attn.softmax_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.6.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.6.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.6.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.6.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.6.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.6.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.7.attn.qkv.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.7.attn.qkv.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.7.attn.qkv.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.7.attn.proj.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.7.attn.proj.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.7.attn.proj.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.7.attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.7.attn.k_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.7.attn.v_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.7.attn.softmax_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.7.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.7.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.7.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.7.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.7.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.7.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.8.attn.qkv.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.8.attn.qkv.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.8.attn.qkv.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.8.attn.proj.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.8.attn.proj.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.8.attn.proj.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.8.attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.8.attn.k_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.8.attn.v_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.8.attn.softmax_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.8.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.8.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.8.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.8.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.8.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.8.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.9.attn.qkv.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.9.attn.qkv.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.9.attn.qkv.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.9.attn.proj.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.9.attn.proj.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.9.attn.proj.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.9.attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.9.attn.k_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.9.attn.v_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.9.attn.softmax_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.9.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.9.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.9.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.9.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.9.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.9.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.10.attn.qkv.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.10.attn.qkv.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.10.attn.qkv.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.10.attn.proj.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.10.attn.proj.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.10.attn.proj.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.10.attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.10.attn.k_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.10.attn.v_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.10.attn.softmax_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.10.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.10.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.10.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.10.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.10.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.10.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.11.attn.qkv.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.11.attn.qkv.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.11.attn.qkv.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.11.attn.proj.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.11.attn.proj.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.11.attn.proj.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.11.attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.11.attn.k_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.11.attn.v_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.11.attn.softmax_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.11.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.11.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.11.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.11.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.11.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.11.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.12.attn.qkv.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.12.attn.qkv.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.12.attn.qkv.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.12.attn.proj.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.12.attn.proj.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.12.attn.proj.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.12.attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.12.attn.k_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.12.attn.v_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.12.attn.softmax_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.12.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.12.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.12.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.12.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.12.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.12.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.13.attn.qkv.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.13.attn.qkv.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.13.attn.qkv.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.13.attn.proj.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.13.attn.proj.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.13.attn.proj.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.13.attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.13.attn.k_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.13.attn.v_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.13.attn.softmax_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.13.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.13.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.13.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.13.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.13.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.13.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.14.attn.qkv.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.14.attn.qkv.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.14.attn.qkv.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.14.attn.proj.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.14.attn.proj.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.14.attn.proj.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.14.attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.14.attn.k_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.14.attn.v_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.14.attn.softmax_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.14.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.14.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.14.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.14.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.14.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.14.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.15.attn.qkv.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.15.attn.qkv.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.15.attn.qkv.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.15.attn.proj.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.15.attn.proj.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.15.attn.proj.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.15.attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.15.attn.k_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.15.attn.v_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.15.attn.softmax_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.15.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.15.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.15.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.15.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.15.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.15.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.16.attn.qkv.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.16.attn.qkv.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.16.attn.qkv.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.16.attn.proj.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.16.attn.proj.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.16.attn.proj.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.16.attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.16.attn.k_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.16.attn.v_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.16.attn.softmax_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.16.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.16.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.16.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.16.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.16.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.16.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.17.attn.qkv.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.17.attn.qkv.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.17.attn.qkv.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.17.attn.proj.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.17.attn.proj.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.17.attn.proj.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.17.attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.17.attn.k_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.17.attn.v_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.17.attn.softmax_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.17.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.17.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.17.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.17.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.17.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.17.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.18.attn.qkv.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.18.attn.qkv.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.18.attn.qkv.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.18.attn.proj.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.18.attn.proj.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.18.attn.proj.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.18.attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.18.attn.k_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.18.attn.v_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.18.attn.softmax_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.18.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.18.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.18.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.18.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.18.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.18.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.19.attn.qkv.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.19.attn.qkv.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.19.attn.qkv.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.19.attn.proj.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.19.attn.proj.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.19.attn.proj.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.19.attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.19.attn.k_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.19.attn.v_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.19.attn.softmax_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.19.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.19.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.19.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.19.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.19.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.19.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.20.attn.qkv.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.20.attn.qkv.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.20.attn.qkv.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.20.attn.proj.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.20.attn.proj.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.20.attn.proj.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.20.attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.20.attn.k_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.20.attn.v_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.20.attn.softmax_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.20.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.20.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.20.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.20.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.20.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.20.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.21.attn.qkv.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.21.attn.qkv.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.21.attn.qkv.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.21.attn.proj.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.21.attn.proj.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.21.attn.proj.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.21.attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.21.attn.k_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.21.attn.v_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.21.attn.softmax_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.21.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.21.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.21.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.21.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.21.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.21.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.22.attn.qkv.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.22.attn.qkv.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.22.attn.qkv.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.22.attn.proj.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.22.attn.proj.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.22.attn.proj.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.22.attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.22.attn.k_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.22.attn.v_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.22.attn.softmax_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.22.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.22.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.22.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.22.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.22.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.22.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.23.attn.qkv.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.23.attn.qkv.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.23.attn.qkv.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.23.attn.proj.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.23.attn.proj.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.23.attn.proj.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.23.attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.23.attn.k_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.23.attn.v_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.23.attn.softmax_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.23.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.23.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.23.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.23.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.23.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.23.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.24.attn.qkv.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.24.attn.qkv.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.24.attn.qkv.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.24.attn.proj.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.24.attn.proj.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.24.attn.proj.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.24.attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.24.attn.k_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.24.attn.v_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.24.attn.softmax_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.24.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.24.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.24.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.24.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.24.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.24.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.25.attn.qkv.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.25.attn.qkv.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.25.attn.qkv.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.25.attn.proj.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.25.attn.proj.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.25.attn.proj.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.25.attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.25.attn.k_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.25.attn.v_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.25.attn.softmax_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.25.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.25.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.25.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.25.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.25.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.25.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.26.attn.qkv.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.26.attn.qkv.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.26.attn.qkv.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.26.attn.proj.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.26.attn.proj.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.26.attn.proj.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.26.attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.26.attn.k_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.26.attn.v_bmm_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.26.attn.softmax_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.26.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.26.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.26.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.26.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.26.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) |
| model.visual.blocks.26.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) |
| model.visual.merger.linear_fc1.input_quantizer TensorQuantizer(disabled) |
| model.visual.merger.linear_fc1.output_quantizer TensorQuantizer(disabled) |
| model.visual.merger.linear_fc1.weight_quantizer TensorQuantizer(disabled) |
| model.visual.merger.linear_fc2.input_quantizer TensorQuantizer(disabled) |
| model.visual.merger.linear_fc2.output_quantizer TensorQuantizer(disabled) |
| model.visual.merger.linear_fc2.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.0.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.0.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.0.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.0.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.0.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.0.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.0.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.0.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.0.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.0.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.0.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.0.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.0.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.0.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.0.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.0.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.0.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.0.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.0.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=4.5938 calibrator=MaxCalibrator quant) |
| model.language_model.layers.0.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.0.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4062 calibrator=MaxCalibrator quant) |
| model.language_model.layers.0.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=4.5938 calibrator=MaxCalibrator quant) |
| model.language_model.layers.0.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.0.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1660 calibrator=MaxCalibrator quant) |
| model.language_model.layers.0.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=17.7500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.0.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.0.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.9336 calibrator=MaxCalibrator quant) |
| model.language_model.layers.1.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.1.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.1.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.1.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.1.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.1.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.1.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.1.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.1.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.1.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.1.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.1.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.1.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.1.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.1.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.1.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.1.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.1.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.1.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=3.3750 calibrator=MaxCalibrator quant) |
| model.language_model.layers.1.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.1.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2295 calibrator=MaxCalibrator quant) |
| model.language_model.layers.1.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=3.3750 calibrator=MaxCalibrator quant) |
| model.language_model.layers.1.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.1.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2070 calibrator=MaxCalibrator quant) |
| model.language_model.layers.1.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=12.7500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.1.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.1.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4453 calibrator=MaxCalibrator quant) |
| model.language_model.layers.2.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.2.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.2.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.2.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.2.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.2.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.2.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.2.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.2.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.2.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.2.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.2.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.2.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.2.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.2.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.2.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.2.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.2.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.2.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=5.1250 calibrator=MaxCalibrator quant) |
| model.language_model.layers.2.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.2.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3359 calibrator=MaxCalibrator quant) |
| model.language_model.layers.2.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=5.1250 calibrator=MaxCalibrator quant) |
| model.language_model.layers.2.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.2.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2002 calibrator=MaxCalibrator quant) |
| model.language_model.layers.2.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=23.3750 calibrator=MaxCalibrator quant) |
| model.language_model.layers.2.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.2.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=1.1250 calibrator=MaxCalibrator quant) |
| model.language_model.layers.3.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.3.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.3.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.3.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.3.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.3.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.3.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.3.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.3.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.3.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.3.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.3.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.3.self_attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.3.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=11.7500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.3.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=13.5000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.3.self_attn.softmax_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.3.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=6.0938 calibrator=MaxCalibrator quant) |
| model.language_model.layers.3.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.3.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1621 calibrator=MaxCalibrator quant) |
| model.language_model.layers.3.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=6.0938 calibrator=MaxCalibrator quant) |
| model.language_model.layers.3.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.3.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1768 calibrator=MaxCalibrator quant) |
| model.language_model.layers.3.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=32.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.3.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.3.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.6797 calibrator=MaxCalibrator quant) |
| model.language_model.layers.4.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.4.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.4.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.4.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.4.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.4.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.4.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.4.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.4.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.4.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.4.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.4.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.4.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.4.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.4.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.4.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.4.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.4.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.4.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=6.9688 calibrator=MaxCalibrator quant) |
| model.language_model.layers.4.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.4.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1787 calibrator=MaxCalibrator quant) |
| model.language_model.layers.4.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=6.9688 calibrator=MaxCalibrator quant) |
| model.language_model.layers.4.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.4.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1611 calibrator=MaxCalibrator quant) |
| model.language_model.layers.4.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=30.3750 calibrator=MaxCalibrator quant) |
| model.language_model.layers.4.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.4.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3184 calibrator=MaxCalibrator quant) |
| model.language_model.layers.5.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.5.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.5.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.5.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.5.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.5.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.5.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.5.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.5.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.5.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.5.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.5.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.5.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.5.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.5.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.5.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.5.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.5.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.5.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=7.4062 calibrator=MaxCalibrator quant) |
| model.language_model.layers.5.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.5.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1455 calibrator=MaxCalibrator quant) |
| model.language_model.layers.5.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=7.4062 calibrator=MaxCalibrator quant) |
| model.language_model.layers.5.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.5.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1270 calibrator=MaxCalibrator quant) |
| model.language_model.layers.5.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=40.5000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.5.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.5.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3750 calibrator=MaxCalibrator quant) |
| model.language_model.layers.6.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.6.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.6.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.6.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.6.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.6.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.6.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.6.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.6.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.6.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.6.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.6.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.6.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.6.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.6.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.6.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.6.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.6.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.6.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=10.2500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.6.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.6.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2275 calibrator=MaxCalibrator quant) |
| model.language_model.layers.6.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=10.2500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.6.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.6.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2002 calibrator=MaxCalibrator quant) |
| model.language_model.layers.6.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=28.8750 calibrator=MaxCalibrator quant) |
| model.language_model.layers.6.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.6.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.5625 calibrator=MaxCalibrator quant) |
| model.language_model.layers.7.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.7.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.7.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.7.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.7.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.7.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.7.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.7.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.7.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.7.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.7.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.7.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.7.self_attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.7.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=14.3750 calibrator=MaxCalibrator quant) |
| model.language_model.layers.7.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=14.3125 calibrator=MaxCalibrator quant) |
| model.language_model.layers.7.self_attn.softmax_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.7.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=10.5000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.7.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.7.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2246 calibrator=MaxCalibrator quant) |
| model.language_model.layers.7.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=10.5000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.7.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.7.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1289 calibrator=MaxCalibrator quant) |
| model.language_model.layers.7.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=25.2500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.7.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.7.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3516 calibrator=MaxCalibrator quant) |
| model.language_model.layers.8.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.8.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.8.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.8.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.8.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.8.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.8.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.8.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.8.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.8.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.8.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.8.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.8.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.8.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.8.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.8.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.8.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.8.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.8.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=8.7500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.8.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.8.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2109 calibrator=MaxCalibrator quant) |
| model.language_model.layers.8.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=8.7500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.8.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.8.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1309 calibrator=MaxCalibrator quant) |
| model.language_model.layers.8.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=38.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.8.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.8.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4258 calibrator=MaxCalibrator quant) |
| model.language_model.layers.9.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.9.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.9.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.9.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.9.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.9.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.9.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.9.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.9.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.9.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.9.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.9.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.9.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.9.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.9.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.9.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.9.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.9.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.9.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=9.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.9.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.9.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1709 calibrator=MaxCalibrator quant) |
| model.language_model.layers.9.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=9.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.9.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.9.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1196 calibrator=MaxCalibrator quant) |
| model.language_model.layers.9.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=42.2500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.9.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.9.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4297 calibrator=MaxCalibrator quant) |
| model.language_model.layers.10.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.10.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.10.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.10.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.10.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.10.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.10.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.10.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.10.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.10.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.10.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.10.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.10.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.10.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.10.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.10.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.10.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.10.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.10.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=11.9375 calibrator=MaxCalibrator quant) |
| model.language_model.layers.10.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.10.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2373 calibrator=MaxCalibrator quant) |
| model.language_model.layers.10.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=11.9375 calibrator=MaxCalibrator quant) |
| model.language_model.layers.10.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.10.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1309 calibrator=MaxCalibrator quant) |
| model.language_model.layers.10.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=41.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.10.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.10.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2734 calibrator=MaxCalibrator quant) |
| model.language_model.layers.11.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.11.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.11.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.11.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.11.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.11.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.11.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.11.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.11.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.11.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.11.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.11.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.11.self_attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.11.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=14.5000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.11.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=11.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.11.self_attn.softmax_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.11.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=12.6250 calibrator=MaxCalibrator quant) |
| model.language_model.layers.11.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.11.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2002 calibrator=MaxCalibrator quant) |
| model.language_model.layers.11.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=12.6250 calibrator=MaxCalibrator quant) |
| model.language_model.layers.11.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.11.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1211 calibrator=MaxCalibrator quant) |
| model.language_model.layers.11.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=78.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.11.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.11.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.5820 calibrator=MaxCalibrator quant) |
| model.language_model.layers.12.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.12.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.12.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.12.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.12.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.12.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.12.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.12.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.12.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.12.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.12.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.12.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.12.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.12.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.12.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.12.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.12.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.12.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.12.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=13.3750 calibrator=MaxCalibrator quant) |
| model.language_model.layers.12.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.12.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1826 calibrator=MaxCalibrator quant) |
| model.language_model.layers.12.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=13.3750 calibrator=MaxCalibrator quant) |
| model.language_model.layers.12.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.12.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1108 calibrator=MaxCalibrator quant) |
| model.language_model.layers.12.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=44.2500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.12.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.12.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2188 calibrator=MaxCalibrator quant) |
| model.language_model.layers.13.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.13.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.13.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.13.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.13.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.13.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.13.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.13.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.13.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.13.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.13.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.13.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.13.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.13.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.13.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.13.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.13.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.13.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.13.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=13.9375 calibrator=MaxCalibrator quant) |
| model.language_model.layers.13.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.13.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1572 calibrator=MaxCalibrator quant) |
| model.language_model.layers.13.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=13.9375 calibrator=MaxCalibrator quant) |
| model.language_model.layers.13.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.13.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1484 calibrator=MaxCalibrator quant) |
| model.language_model.layers.13.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=24.7500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.13.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.13.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2432 calibrator=MaxCalibrator quant) |
| model.language_model.layers.14.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.14.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.14.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.14.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.14.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.14.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.14.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.14.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.14.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.14.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.14.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.14.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.14.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.14.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.14.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.14.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.14.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.14.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.14.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=13.6875 calibrator=MaxCalibrator quant) |
| model.language_model.layers.14.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.14.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1245 calibrator=MaxCalibrator quant) |
| model.language_model.layers.14.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=13.6875 calibrator=MaxCalibrator quant) |
| model.language_model.layers.14.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.14.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1289 calibrator=MaxCalibrator quant) |
| model.language_model.layers.14.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=23.2500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.14.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.14.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2266 calibrator=MaxCalibrator quant) |
| model.language_model.layers.15.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.15.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.15.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.15.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.15.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.15.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.15.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.15.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.15.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.15.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.15.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.15.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.15.self_attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.15.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=18.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.15.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=13.8750 calibrator=MaxCalibrator quant) |
| model.language_model.layers.15.self_attn.softmax_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.15.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=17.5000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.15.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.15.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1504 calibrator=MaxCalibrator quant) |
| model.language_model.layers.15.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=17.5000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.15.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.15.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1182 calibrator=MaxCalibrator quant) |
| model.language_model.layers.15.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=20.5000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.15.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.15.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2285 calibrator=MaxCalibrator quant) |
| model.language_model.layers.16.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.16.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.16.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.16.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.16.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.16.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.16.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.16.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.16.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.16.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.16.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.16.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.16.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.16.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.16.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.16.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.16.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.16.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.16.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=18.8750 calibrator=MaxCalibrator quant) |
| model.language_model.layers.16.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.16.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1484 calibrator=MaxCalibrator quant) |
| model.language_model.layers.16.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=18.8750 calibrator=MaxCalibrator quant) |
| model.language_model.layers.16.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.16.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1182 calibrator=MaxCalibrator quant) |
| model.language_model.layers.16.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=24.6250 calibrator=MaxCalibrator quant) |
| model.language_model.layers.16.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.16.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3320 calibrator=MaxCalibrator quant) |
| model.language_model.layers.17.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.17.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.17.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.17.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.17.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.17.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.17.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.17.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.17.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.17.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.17.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.17.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.17.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.17.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.17.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.17.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.17.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.17.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.17.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=19.3750 calibrator=MaxCalibrator quant) |
| model.language_model.layers.17.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.17.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1934 calibrator=MaxCalibrator quant) |
| model.language_model.layers.17.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=19.3750 calibrator=MaxCalibrator quant) |
| model.language_model.layers.17.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.17.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1387 calibrator=MaxCalibrator quant) |
| model.language_model.layers.17.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=26.2500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.17.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.17.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2109 calibrator=MaxCalibrator quant) |
| model.language_model.layers.18.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.18.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.18.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.18.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.18.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.18.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.18.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.18.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.18.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.18.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.18.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.18.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.18.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.18.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.18.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.18.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.18.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.18.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.18.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=26.7500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.18.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.18.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2559 calibrator=MaxCalibrator quant) |
| model.language_model.layers.18.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=26.7500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.18.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.18.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2236 calibrator=MaxCalibrator quant) |
| model.language_model.layers.18.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=27.6250 calibrator=MaxCalibrator quant) |
| model.language_model.layers.18.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.18.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.5664 calibrator=MaxCalibrator quant) |
| model.language_model.layers.19.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.19.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.19.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.19.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.19.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.19.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.19.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.19.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.19.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.19.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.19.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.19.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.19.self_attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.19.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=18.1250 calibrator=MaxCalibrator quant) |
| model.language_model.layers.19.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=14.3750 calibrator=MaxCalibrator quant) |
| model.language_model.layers.19.self_attn.softmax_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.19.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=28.2500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.19.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.19.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2051 calibrator=MaxCalibrator quant) |
| model.language_model.layers.19.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=28.2500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.19.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.19.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1387 calibrator=MaxCalibrator quant) |
| model.language_model.layers.19.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=39.5000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.19.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.19.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.5312 calibrator=MaxCalibrator quant) |
| model.language_model.layers.20.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.20.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.20.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.20.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.20.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.20.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.20.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.20.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.20.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.20.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.20.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.20.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.20.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.20.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.20.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.20.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.20.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.20.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.20.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=20.7500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.20.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.20.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2393 calibrator=MaxCalibrator quant) |
| model.language_model.layers.20.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=20.7500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.20.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.20.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1338 calibrator=MaxCalibrator quant) |
| model.language_model.layers.20.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=51.2500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.20.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.20.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3574 calibrator=MaxCalibrator quant) |
| model.language_model.layers.21.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.21.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.21.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.21.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.21.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.21.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.21.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.21.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.21.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.21.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.21.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.21.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.21.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.21.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.21.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.21.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.21.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.21.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.21.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=25.7500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.21.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.21.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2539 calibrator=MaxCalibrator quant) |
| model.language_model.layers.21.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=25.7500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.21.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.21.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1387 calibrator=MaxCalibrator quant) |
| model.language_model.layers.21.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=65.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.21.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.21.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2441 calibrator=MaxCalibrator quant) |
| model.language_model.layers.22.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.22.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.22.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.22.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.22.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.22.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.22.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.22.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.22.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.22.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.22.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.22.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.22.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.22.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.22.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.22.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.22.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.22.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.22.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=25.7500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.22.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.22.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2295 calibrator=MaxCalibrator quant) |
| model.language_model.layers.22.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=25.7500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.22.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.22.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2168 calibrator=MaxCalibrator quant) |
| model.language_model.layers.22.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=61.2500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.22.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.22.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4863 calibrator=MaxCalibrator quant) |
| model.language_model.layers.23.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.23.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.23.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.23.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.23.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.23.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.23.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.23.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.23.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.23.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.23.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.23.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.23.self_attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.23.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=20.2500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.23.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=19.2500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.23.self_attn.softmax_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.23.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=27.2500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.23.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.23.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1309 calibrator=MaxCalibrator quant) |
| model.language_model.layers.23.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=27.2500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.23.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.23.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1147 calibrator=MaxCalibrator quant) |
| model.language_model.layers.23.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=95.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.23.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.23.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.6016 calibrator=MaxCalibrator quant) |
| model.language_model.layers.24.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.24.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.24.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.24.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.24.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.24.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.24.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.24.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.24.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.24.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.24.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.24.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.24.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.24.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.24.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.24.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.24.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.24.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.24.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=26.5000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.24.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.24.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1523 calibrator=MaxCalibrator quant) |
| model.language_model.layers.24.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=26.5000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.24.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.24.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1768 calibrator=MaxCalibrator quant) |
| model.language_model.layers.24.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=59.2500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.24.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.24.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.7188 calibrator=MaxCalibrator quant) |
| model.language_model.layers.25.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.25.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.25.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.25.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.25.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.25.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.25.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.25.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.25.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.25.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.25.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.25.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.25.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.25.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.25.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.25.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.25.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.25.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.25.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=29.1250 calibrator=MaxCalibrator quant) |
| model.language_model.layers.25.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.25.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1455 calibrator=MaxCalibrator quant) |
| model.language_model.layers.25.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=29.1250 calibrator=MaxCalibrator quant) |
| model.language_model.layers.25.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.25.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1455 calibrator=MaxCalibrator quant) |
| model.language_model.layers.25.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=53.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.25.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.25.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4941 calibrator=MaxCalibrator quant) |
| model.language_model.layers.26.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.26.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.26.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.26.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.26.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.26.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.26.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.26.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.26.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.26.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.26.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.26.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.26.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.26.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.26.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.26.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.26.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.26.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.26.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=36.7500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.26.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.26.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1592 calibrator=MaxCalibrator quant) |
| model.language_model.layers.26.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=36.7500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.26.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.26.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1855 calibrator=MaxCalibrator quant) |
| model.language_model.layers.26.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=42.7500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.26.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.26.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.6289 calibrator=MaxCalibrator quant) |
| model.language_model.layers.27.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.27.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.27.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.27.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.27.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.27.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.27.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.27.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.27.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.27.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.27.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.27.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.27.self_attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.27.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=21.2500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.27.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=26.1250 calibrator=MaxCalibrator quant) |
| model.language_model.layers.27.self_attn.softmax_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.27.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=31.3750 calibrator=MaxCalibrator quant) |
| model.language_model.layers.27.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.27.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1729 calibrator=MaxCalibrator quant) |
| model.language_model.layers.27.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=31.3750 calibrator=MaxCalibrator quant) |
| model.language_model.layers.27.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.27.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1846 calibrator=MaxCalibrator quant) |
| model.language_model.layers.27.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=32.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.27.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.27.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.5391 calibrator=MaxCalibrator quant) |
| model.language_model.layers.28.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.28.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.28.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.28.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.28.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.28.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.28.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.28.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.28.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.28.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.28.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.28.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.28.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.28.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.28.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.28.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.28.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.28.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.28.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=31.5000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.28.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.28.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2051 calibrator=MaxCalibrator quant) |
| model.language_model.layers.28.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=31.5000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.28.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.28.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1455 calibrator=MaxCalibrator quant) |
| model.language_model.layers.28.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=38.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.28.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.28.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3828 calibrator=MaxCalibrator quant) |
| model.language_model.layers.29.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.29.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.29.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.29.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.29.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.29.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.29.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.29.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.29.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.29.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.29.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.29.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.29.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.29.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.29.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.29.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.29.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.29.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.29.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=33.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.29.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.29.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1562 calibrator=MaxCalibrator quant) |
| model.language_model.layers.29.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=33.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.29.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.29.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2637 calibrator=MaxCalibrator quant) |
| model.language_model.layers.29.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=38.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.29.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.29.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3672 calibrator=MaxCalibrator quant) |
| model.language_model.layers.30.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.30.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.30.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.30.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.30.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.30.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.30.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.30.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.30.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.30.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.30.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.30.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.30.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.30.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.30.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.30.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.30.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.30.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.30.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=38.2500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.30.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.30.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2070 calibrator=MaxCalibrator quant) |
| model.language_model.layers.30.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=38.2500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.30.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.30.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3203 calibrator=MaxCalibrator quant) |
| model.language_model.layers.30.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=29.8750 calibrator=MaxCalibrator quant) |
| model.language_model.layers.30.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.30.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.6328 calibrator=MaxCalibrator quant) |
| model.language_model.layers.31.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.31.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.31.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.31.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.31.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.31.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.31.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.31.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.31.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.31.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.31.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.31.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.31.self_attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.31.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=21.7500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.31.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=37.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.31.self_attn.softmax_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.31.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=34.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.31.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.31.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1699 calibrator=MaxCalibrator quant) |
| model.language_model.layers.31.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=34.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.31.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.31.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1475 calibrator=MaxCalibrator quant) |
| model.language_model.layers.31.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=24.8750 calibrator=MaxCalibrator quant) |
| model.language_model.layers.31.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.31.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.6484 calibrator=MaxCalibrator quant) |
| model.language_model.layers.32.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.32.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.32.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.32.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.32.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.32.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.32.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.32.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.32.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.32.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.32.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.32.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.32.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.32.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.32.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.32.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.32.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.32.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.32.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=34.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.32.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.32.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3086 calibrator=MaxCalibrator quant) |
| model.language_model.layers.32.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=34.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.32.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.32.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1650 calibrator=MaxCalibrator quant) |
| model.language_model.layers.32.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=26.1250 calibrator=MaxCalibrator quant) |
| model.language_model.layers.32.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.32.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.5469 calibrator=MaxCalibrator quant) |
| model.language_model.layers.33.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.33.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.33.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.33.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.33.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.33.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.33.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.33.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.33.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.33.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.33.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.33.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.33.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.33.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.33.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.33.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.33.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.33.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.33.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=35.7500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.33.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.33.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2461 calibrator=MaxCalibrator quant) |
| model.language_model.layers.33.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=35.7500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.33.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.33.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2852 calibrator=MaxCalibrator quant) |
| model.language_model.layers.33.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=40.2500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.33.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.33.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3184 calibrator=MaxCalibrator quant) |
| model.language_model.layers.34.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.34.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.34.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.34.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.34.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.34.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.34.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.34.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.34.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.34.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.34.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.34.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.34.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.34.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.34.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.34.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.34.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.34.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.34.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=45.7500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.34.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.34.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3418 calibrator=MaxCalibrator quant) |
| model.language_model.layers.34.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=45.7500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.34.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.34.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2256 calibrator=MaxCalibrator quant) |
| model.language_model.layers.34.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=29.8750 calibrator=MaxCalibrator quant) |
| model.language_model.layers.34.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.34.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.5586 calibrator=MaxCalibrator quant) |
| model.language_model.layers.35.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.35.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.35.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.35.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.35.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.35.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.35.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.35.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.35.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.35.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.35.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.35.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.35.self_attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.35.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=19.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.35.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=33.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.35.self_attn.softmax_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.35.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=43.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.35.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.35.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3105 calibrator=MaxCalibrator quant) |
| model.language_model.layers.35.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=43.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.35.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.35.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2178 calibrator=MaxCalibrator quant) |
| model.language_model.layers.35.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=46.5000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.35.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.35.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.6367 calibrator=MaxCalibrator quant) |
| model.language_model.layers.36.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.36.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.36.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.36.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.36.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.36.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.36.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.36.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.36.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.36.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.36.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.36.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.36.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.36.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.36.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.36.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.36.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.36.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.36.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=34.5000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.36.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.36.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3262 calibrator=MaxCalibrator quant) |
| model.language_model.layers.36.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=34.5000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.36.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.36.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2236 calibrator=MaxCalibrator quant) |
| model.language_model.layers.36.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=49.2500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.36.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.36.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2559 calibrator=MaxCalibrator quant) |
| model.language_model.layers.37.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.37.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.37.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.37.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.37.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.37.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.37.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.37.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.37.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.37.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.37.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.37.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.37.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.37.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.37.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.37.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.37.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.37.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.37.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=29.2500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.37.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.37.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2480 calibrator=MaxCalibrator quant) |
| model.language_model.layers.37.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=29.2500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.37.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.37.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2129 calibrator=MaxCalibrator quant) |
| model.language_model.layers.37.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=48.7500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.37.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.37.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3008 calibrator=MaxCalibrator quant) |
| model.language_model.layers.38.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.38.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.38.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.38.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.38.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.38.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.38.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.38.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.38.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.38.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.38.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.38.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.38.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.38.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.38.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.38.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.38.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.38.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.38.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=37.5000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.38.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.38.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3262 calibrator=MaxCalibrator quant) |
| model.language_model.layers.38.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=37.5000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.38.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.38.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1699 calibrator=MaxCalibrator quant) |
| model.language_model.layers.38.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=67.5000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.38.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.38.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.6797 calibrator=MaxCalibrator quant) |
| model.language_model.layers.39.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.39.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.39.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.39.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.39.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.39.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.39.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.39.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.39.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.39.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.39.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.39.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.39.self_attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.39.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=19.8750 calibrator=MaxCalibrator quant) |
| model.language_model.layers.39.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=25.3750 calibrator=MaxCalibrator quant) |
| model.language_model.layers.39.self_attn.softmax_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.39.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=38.5000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.39.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.39.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2139 calibrator=MaxCalibrator quant) |
| model.language_model.layers.39.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=38.5000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.39.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.39.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2002 calibrator=MaxCalibrator quant) |
| model.language_model.layers.39.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=59.2500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.39.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.39.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.5820 calibrator=MaxCalibrator quant) |
| model.language_model.layers.40.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.40.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.40.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.40.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.40.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.40.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.40.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.40.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.40.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.40.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.40.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.40.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.40.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.40.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.40.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.40.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.40.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.40.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.40.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=41.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.40.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.40.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2656 calibrator=MaxCalibrator quant) |
| model.language_model.layers.40.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=41.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.40.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.40.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1670 calibrator=MaxCalibrator quant) |
| model.language_model.layers.40.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=76.5000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.40.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.40.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.6562 calibrator=MaxCalibrator quant) |
| model.language_model.layers.41.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.41.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.41.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.41.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.41.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.41.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.41.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.41.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.41.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.41.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.41.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.41.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.41.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.41.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.41.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.41.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.41.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.41.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.41.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=37.2500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.41.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.41.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2461 calibrator=MaxCalibrator quant) |
| model.language_model.layers.41.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=37.2500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.41.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.41.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1699 calibrator=MaxCalibrator quant) |
| model.language_model.layers.41.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=90.5000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.41.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.41.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.5391 calibrator=MaxCalibrator quant) |
| model.language_model.layers.42.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.42.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.42.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.42.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.42.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.42.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.42.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.42.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.42.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.42.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.42.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.42.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.42.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.42.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.42.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.42.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.42.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.42.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.42.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=56.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.42.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.42.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2812 calibrator=MaxCalibrator quant) |
| model.language_model.layers.42.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=56.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.42.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.42.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3535 calibrator=MaxCalibrator quant) |
| model.language_model.layers.42.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=100.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.42.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.42.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.5625 calibrator=MaxCalibrator quant) |
| model.language_model.layers.43.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.43.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.43.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.43.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.43.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.43.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.43.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.43.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.43.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.43.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.43.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.43.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.43.self_attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.43.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=20.2500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.43.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=49.5000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.43.self_attn.softmax_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.43.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=40.7500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.43.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.43.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2051 calibrator=MaxCalibrator quant) |
| model.language_model.layers.43.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=40.7500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.43.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.43.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1592 calibrator=MaxCalibrator quant) |
| model.language_model.layers.43.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=45.7500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.43.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.43.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.5859 calibrator=MaxCalibrator quant) |
| model.language_model.layers.44.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.44.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.44.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.44.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.44.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.44.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.44.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.44.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.44.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.44.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.44.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.44.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.44.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.44.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.44.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.44.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.44.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.44.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.44.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=39.2500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.44.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.44.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2100 calibrator=MaxCalibrator quant) |
| model.language_model.layers.44.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=39.2500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.44.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.44.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1748 calibrator=MaxCalibrator quant) |
| model.language_model.layers.44.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=41.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.44.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.44.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4707 calibrator=MaxCalibrator quant) |
| model.language_model.layers.45.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.45.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.45.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.45.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.45.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.45.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.45.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.45.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.45.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.45.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.45.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.45.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.45.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.45.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.45.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.45.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.45.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.45.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.45.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=36.7500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.45.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.45.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2539 calibrator=MaxCalibrator quant) |
| model.language_model.layers.45.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=36.7500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.45.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.45.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2324 calibrator=MaxCalibrator quant) |
| model.language_model.layers.45.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=45.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.45.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.45.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3828 calibrator=MaxCalibrator quant) |
| model.language_model.layers.46.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.46.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.46.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.46.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.46.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.46.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.46.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.46.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.46.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.46.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.46.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.46.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.46.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.46.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.46.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.46.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.46.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.46.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.46.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=43.5000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.46.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.46.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1973 calibrator=MaxCalibrator quant) |
| model.language_model.layers.46.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=43.5000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.46.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.46.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3262 calibrator=MaxCalibrator quant) |
| model.language_model.layers.46.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=44.5000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.46.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.46.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.6055 calibrator=MaxCalibrator quant) |
| model.language_model.layers.47.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.47.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.47.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.47.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.47.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.47.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.47.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.47.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.47.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.47.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.47.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.47.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.47.self_attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.47.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=18.3750 calibrator=MaxCalibrator quant) |
| model.language_model.layers.47.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=49.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.47.self_attn.softmax_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.47.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=52.7500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.47.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.47.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2061 calibrator=MaxCalibrator quant) |
| model.language_model.layers.47.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=52.7500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.47.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.47.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2910 calibrator=MaxCalibrator quant) |
| model.language_model.layers.47.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=49.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.47.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.47.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.6055 calibrator=MaxCalibrator quant) |
| model.language_model.layers.48.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.48.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.48.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.48.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.48.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.48.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.48.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.48.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.48.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.48.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.48.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.48.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.48.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.48.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.48.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.48.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.48.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.48.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.48.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=45.2500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.48.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.48.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3516 calibrator=MaxCalibrator quant) |
| model.language_model.layers.48.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=45.2500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.48.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.48.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2637 calibrator=MaxCalibrator quant) |
| model.language_model.layers.48.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=64.5000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.48.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.48.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.5859 calibrator=MaxCalibrator quant) |
| model.language_model.layers.49.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.49.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.49.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.49.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.49.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.49.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.49.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.49.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.49.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.49.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.49.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.49.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.49.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.49.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.49.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.49.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.49.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.49.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.49.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=43.5000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.49.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.49.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3672 calibrator=MaxCalibrator quant) |
| model.language_model.layers.49.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=43.5000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.49.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.49.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2402 calibrator=MaxCalibrator quant) |
| model.language_model.layers.49.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=96.5000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.49.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.49.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.6328 calibrator=MaxCalibrator quant) |
| model.language_model.layers.50.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.50.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.50.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.50.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.50.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.50.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.50.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.50.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.50.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.50.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.50.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.50.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.50.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.50.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.50.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.50.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.50.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.50.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.50.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=55.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.50.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.50.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2930 calibrator=MaxCalibrator quant) |
| model.language_model.layers.50.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=55.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.50.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.50.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2246 calibrator=MaxCalibrator quant) |
| model.language_model.layers.50.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=63.2500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.50.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.50.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.5703 calibrator=MaxCalibrator quant) |
| model.language_model.layers.51.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.51.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.51.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.51.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.51.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.51.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.51.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.51.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.51.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.51.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.51.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.51.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.51.self_attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.51.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=17.7500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.51.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=49.2500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.51.self_attn.softmax_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.51.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=49.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.51.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.51.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2852 calibrator=MaxCalibrator quant) |
| model.language_model.layers.51.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=49.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.51.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.51.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2676 calibrator=MaxCalibrator quant) |
| model.language_model.layers.51.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=88.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.51.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.51.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.7188 calibrator=MaxCalibrator quant) |
| model.language_model.layers.52.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.52.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.52.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.52.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.52.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.52.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.52.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.52.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.52.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.52.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.52.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.52.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.52.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.52.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.52.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.52.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.52.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.52.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.52.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=46.7500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.52.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.52.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3438 calibrator=MaxCalibrator quant) |
| model.language_model.layers.52.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=46.7500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.52.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.52.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2324 calibrator=MaxCalibrator quant) |
| model.language_model.layers.52.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=96.5000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.52.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.52.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.7734 calibrator=MaxCalibrator quant) |
| model.language_model.layers.53.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.53.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.53.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.53.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.53.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.53.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.53.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.53.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.53.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.53.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.53.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.53.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.53.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.53.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.53.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.53.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.53.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.53.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.53.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=43.7500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.53.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.53.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3594 calibrator=MaxCalibrator quant) |
| model.language_model.layers.53.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=43.7500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.53.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.53.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2715 calibrator=MaxCalibrator quant) |
| model.language_model.layers.53.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=113.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.53.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.53.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.5781 calibrator=MaxCalibrator quant) |
| model.language_model.layers.54.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.54.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.54.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.54.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.54.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.54.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.54.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.54.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.54.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.54.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.54.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.54.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.54.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.54.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.54.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.54.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.54.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.54.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.54.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=45.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.54.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.54.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3770 calibrator=MaxCalibrator quant) |
| model.language_model.layers.54.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=45.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.54.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.54.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2002 calibrator=MaxCalibrator quant) |
| model.language_model.layers.54.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=552.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.54.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.54.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.7031 calibrator=MaxCalibrator quant) |
| model.language_model.layers.55.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.55.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.55.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.55.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.55.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.55.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.55.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.55.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.55.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.55.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.55.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.55.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.55.self_attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.55.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=16.1250 calibrator=MaxCalibrator quant) |
| model.language_model.layers.55.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=52.7500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.55.self_attn.softmax_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.55.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=34.2500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.55.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.55.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3477 calibrator=MaxCalibrator quant) |
| model.language_model.layers.55.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=34.2500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.55.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.55.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2637 calibrator=MaxCalibrator quant) |
| model.language_model.layers.55.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=160.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.55.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.55.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.7344 calibrator=MaxCalibrator quant) |
| model.language_model.layers.56.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.56.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.56.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.56.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.56.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.56.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.56.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.56.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.56.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.56.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.56.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.56.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.56.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.56.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.56.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.56.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.56.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.56.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.56.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=38.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.56.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.56.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3711 calibrator=MaxCalibrator quant) |
| model.language_model.layers.56.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=38.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.56.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.56.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1953 calibrator=MaxCalibrator quant) |
| model.language_model.layers.56.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=165.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.56.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.56.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.7227 calibrator=MaxCalibrator quant) |
| model.language_model.layers.57.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.57.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.57.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.57.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.57.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.57.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.57.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.57.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.57.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.57.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.57.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.57.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.57.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.57.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.57.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.57.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.57.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.57.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.57.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=42.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.57.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.57.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.5039 calibrator=MaxCalibrator quant) |
| model.language_model.layers.57.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=42.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.57.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.57.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1973 calibrator=MaxCalibrator quant) |
| model.language_model.layers.57.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=167.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.57.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.57.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.6875 calibrator=MaxCalibrator quant) |
| model.language_model.layers.58.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.58.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.58.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.58.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.58.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.58.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.58.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.58.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.58.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.58.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.58.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.58.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.58.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.58.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.58.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.58.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.58.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.58.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.58.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=44.2500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.58.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.58.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4492 calibrator=MaxCalibrator quant) |
| model.language_model.layers.58.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=44.2500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.58.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.58.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2188 calibrator=MaxCalibrator quant) |
| model.language_model.layers.58.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=462.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.58.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.58.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.7266 calibrator=MaxCalibrator quant) |
| model.language_model.layers.59.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.59.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.59.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.59.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.59.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.59.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.59.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.59.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.59.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.59.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.59.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.59.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.59.self_attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.59.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=16.3750 calibrator=MaxCalibrator quant) |
| model.language_model.layers.59.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=104.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.59.self_attn.softmax_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.59.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=45.5000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.59.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.59.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3438 calibrator=MaxCalibrator quant) |
| model.language_model.layers.59.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=45.5000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.59.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.59.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3574 calibrator=MaxCalibrator quant) |
| model.language_model.layers.59.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=536.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.59.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.59.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.8047 calibrator=MaxCalibrator quant) |
| model.language_model.layers.60.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.60.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.60.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.60.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.60.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.60.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.60.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.60.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.60.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.60.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.60.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.60.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.60.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.60.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.60.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.60.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.60.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.60.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.60.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=52.2500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.60.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.60.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4180 calibrator=MaxCalibrator quant) |
| model.language_model.layers.60.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=52.2500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.60.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.60.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2637 calibrator=MaxCalibrator quant) |
| model.language_model.layers.60.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=218.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.60.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.60.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=1.1328 calibrator=MaxCalibrator quant) |
| model.language_model.layers.61.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.61.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.61.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.61.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.61.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.61.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.61.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.61.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.61.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.61.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.61.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.61.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.61.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.61.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.61.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.61.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.61.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.61.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.61.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=51.2500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.61.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.61.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4668 calibrator=MaxCalibrator quant) |
| model.language_model.layers.61.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=51.2500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.61.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.61.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3477 calibrator=MaxCalibrator quant) |
| model.language_model.layers.61.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=253.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.61.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.61.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4805 calibrator=MaxCalibrator quant) |
| model.language_model.layers.62.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.62.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.62.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.62.linear_attn.out_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.62.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.62.linear_attn.out_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.62.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.62.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.62.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.62.linear_attn.in_proj_z.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.62.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.62.linear_attn.in_proj_z.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.62.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.62.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.62.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.62.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.62.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.62.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.62.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=54.5000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.62.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.62.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4180 calibrator=MaxCalibrator quant) |
| model.language_model.layers.62.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=54.5000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.62.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.62.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2910 calibrator=MaxCalibrator quant) |
| model.language_model.layers.62.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=332.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.62.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.62.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.6562 calibrator=MaxCalibrator quant) |
| model.language_model.layers.63.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.63.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.63.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.63.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.63.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.63.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.63.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.63.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.63.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.63.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.63.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.63.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.63.self_attn.q_bmm_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.63.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=13.1875 calibrator=MaxCalibrator quant) |
| model.language_model.layers.63.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=149.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.63.self_attn.softmax_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.63.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=59.7500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.63.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.63.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3574 calibrator=MaxCalibrator quant) |
| model.language_model.layers.63.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=59.7500 calibrator=MaxCalibrator quant) |
| model.language_model.layers.63.mlp.up_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.63.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2891 calibrator=MaxCalibrator quant) |
| model.language_model.layers.63.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=544.0000 calibrator=MaxCalibrator quant) |
| model.language_model.layers.63.mlp.down_proj.output_quantizer TensorQuantizer(disabled) |
| model.language_model.layers.63.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.5039 calibrator=MaxCalibrator quant) |
| lm_head.input_quantizer TensorQuantizer(disabled) |
| lm_head.output_quantizer TensorQuantizer(disabled) |
| lm_head.weight_quantizer TensorQuantizer(disabled) |
| 2140 TensorQuantizers found in model |
|
|