[model] update kt code (#9406)

2025-11-05 15:27:22 +08:00
parent 56f45e826f
commit eaf963f67f
28 changed files with 108 additions and 68 deletions
--- a/examples/kt_optimize_rules/DeepSeek-V2-Lite-Chat-sft.yaml
+++ b/examples/kt_optimize_rules/DeepSeek-V2-Lite-Chat-sft.yaml
@@ -6,7 +6,7 @@
      generate_device: "cuda"
      prefill_device: "cuda"
 - match:
-    name: "^model\\.layers\\.(?!.*self_attn\\.kv_b_proj).*$"  # regular expression 
+    name: "^model\\.layers\\.(?!.*self_attn\\.kv_b_proj).*$"  # regular expression
    class: torch.nn.Linear  # only match modules matching name and class simultaneously
  replace:
    class: ktransformers.operators.linear.KTransformersLinear  # optimized Kernel on quantized data types
@@ -65,4 +65,4 @@
    class: "default"
    kwargs:
      generate_device: "cpu"
-      prefill_device: "cpu"
+      prefill_device: "cpu"