[model] update kt code (#9406)

2025-11-05 15:27:22 +08:00
parent 56f45e826f
commit eaf963f67f
28 changed files with 108 additions and 68 deletions
--- a/examples/kt_optimize_rules/DeepSeek-V2-Chat-sft-amx.yaml
+++ b/examples/kt_optimize_rules/DeepSeek-V2-Chat-sft-amx.yaml
@@ -6,7 +6,7 @@
      generate_device: "cuda"
      prefill_device: "cuda"
 - match:
-    name: "^model\\.layers\\.(?!.*self_attn\\.kv_b_proj).*$"  # regular expression 
+    name: "^model\\.layers\\.(?!.*self_attn\\.kv_b_proj).*$"  # regular expression
    class: torch.nn.Linear  # only match modules matching name and class simultaneously
  replace:
    class: ktransformers.operators.linear.KTransformersLinear  # optimized Kernel on quantized data types
@@ -66,4 +66,4 @@
    class: "default"
    kwargs:
      generate_device: "cpu"
-      prefill_device: "cpu"
+      prefill_device: "cpu"
--- a/examples/kt_optimize_rules/DeepSeek-V2-Chat.yaml
+++ b/examples/kt_optimize_rules/DeepSeek-V2-Chat.yaml
@@ -6,7 +6,7 @@
      generate_device: "cuda"
      prefill_device: "cuda"
 - match:
-    name: "^model\\.layers\\.(?!.*self_attn\\.kv_b_proj).*$"  # regular expression 
+    name: "^model\\.layers\\.(?!.*self_attn\\.kv_b_proj).*$"  # regular expression
    class: torch.nn.Linear  # only match modules matching name and class simultaneously
  replace:
    class: ktransformers.operators.linear.KTransformersLinear  # optimized Kernel on quantized data types
@@ -65,4 +65,4 @@
    class: "default"
    kwargs:
      generate_device: "cpu"
-      prefill_device: "cpu"
+      prefill_device: "cpu"
--- a/examples/kt_optimize_rules/DeepSeek-V2-Lite-Chat-sft-amx-multi-gpu.yaml
+++ b/examples/kt_optimize_rules/DeepSeek-V2-Lite-Chat-sft-amx-multi-gpu.yaml
@@ -24,7 +24,7 @@
      prefill_device: "cuda:1"

 - match:
-    name: "^model\\.layers\\.(0|[1-9])\\.(?!.*self_attn\\.kv_b_proj).*$"  # regular expression 
+    name: "^model\\.layers\\.(0|[1-9])\\.(?!.*self_attn\\.kv_b_proj).*$"  # regular expression
    class: torch.nn.Linear  # only match modules matching name and class simultaneously
  replace:
    class: ktransformers.operators.linear.KTransformersLinear  # optimized Kernel on quantized data types
@@ -35,7 +35,7 @@
      prefill_op: "KLinearTorch"

 - match:
-    name: "^model\\.layers\\.([12][0-9])\\.(?!.*self_attn\\.kv_b_proj).*$"  # regular expression 
+    name: "^model\\.layers\\.([12][0-9])\\.(?!.*self_attn\\.kv_b_proj).*$"  # regular expression
    class: torch.nn.Linear  # only match modules matching name and class simultaneously
  replace:
    class: ktransformers.operators.linear.KTransformersLinear  # optimized Kernel on quantized data types
@@ -44,7 +44,7 @@
      prefill_device: "cuda:1"
      generate_op: "KLinearTorch"
      prefill_op: "KLinearTorch"
-  
+
 - match:
    name: "^model\\.layers\\.(0|[1-9])\\.mlp$"
    class: ktransformers.models.modeling_deepseek.DeepseekV2MoE
@@ -108,7 +108,7 @@
    class: "ktransformers.operators.models.KDeepseekV2Model"
    kwargs:
      per_layer_prefill_intput_threshold: 0 # 0 is close layer wise prefill
-      transfer_map: 
+      transfer_map:
        10: "cuda:1"

 - match:
--- a/examples/kt_optimize_rules/DeepSeek-V2-Lite-Chat-sft-amx.yaml
+++ b/examples/kt_optimize_rules/DeepSeek-V2-Lite-Chat-sft-amx.yaml
@@ -6,7 +6,7 @@
      generate_device: "cuda"
      prefill_device: "cuda"
 - match:
-    name: "^model\\.layers\\.(?!.*self_attn\\.kv_b_proj).*$"  # regular expression 
+    name: "^model\\.layers\\.(?!.*self_attn\\.kv_b_proj).*$"  # regular expression
    class: torch.nn.Linear  # only match modules matching name and class simultaneously
  replace:
    class: ktransformers.operators.linear.KTransformersLinear  # optimized Kernel on quantized data types
@@ -66,4 +66,4 @@
    class: "default"
    kwargs:
      generate_device: "cpu"
-      prefill_device: "cpu"
+      prefill_device: "cpu"
--- a/examples/kt_optimize_rules/DeepSeek-V2-Lite-Chat-sft.yaml
+++ b/examples/kt_optimize_rules/DeepSeek-V2-Lite-Chat-sft.yaml
@@ -6,7 +6,7 @@
      generate_device: "cuda"
      prefill_device: "cuda"
 - match:
-    name: "^model\\.layers\\.(?!.*self_attn\\.kv_b_proj).*$"  # regular expression 
+    name: "^model\\.layers\\.(?!.*self_attn\\.kv_b_proj).*$"  # regular expression
    class: torch.nn.Linear  # only match modules matching name and class simultaneously
  replace:
    class: ktransformers.operators.linear.KTransformersLinear  # optimized Kernel on quantized data types
@@ -65,4 +65,4 @@
    class: "default"
    kwargs:
      generate_device: "cpu"
-      prefill_device: "cpu"
+      prefill_device: "cpu"
--- a/examples/kt_optimize_rules/DeepSeek-V2-Lite-Chat.yaml
+++ b/examples/kt_optimize_rules/DeepSeek-V2-Lite-Chat.yaml
@@ -6,7 +6,7 @@
      generate_device: "cuda"
      prefill_device: "cuda"
 - match:
-    name: "^model\\.layers\\.(?!.*self_attn\\.kv_b_proj).*$"  # regular expression 
+    name: "^model\\.layers\\.(?!.*self_attn\\.kv_b_proj).*$"  # regular expression
    class: torch.nn.Linear  # only match modules matching name and class simultaneously
  replace:
    class: ktransformers.operators.linear.KTransformersLinear  # optimized Kernel on quantized data types
@@ -65,4 +65,4 @@
    class: "default"
    kwargs:
      generate_device: "cpu"
-      prefill_device: "cpu"
+      prefill_device: "cpu"
--- a/examples/kt_optimize_rules/DeepSeek-V3-Chat-amx.yaml
+++ b/examples/kt_optimize_rules/DeepSeek-V3-Chat-amx.yaml
@@ -7,7 +7,7 @@
      prefill_device: "cuda"

 - match:
-    name: "^lm_head$"  # regular expression 
+    name: "^lm_head$"  # regular expression
    class: torch.nn.Linear  # only match modules matching name and class simultaneously
  replace:
    class: ktransformers.operators.linear.KTransformersLinear  # optimized Kernel on quantized data types
@@ -18,7 +18,7 @@
      prefill_op: "KLinearTorch"

 - match:
-    name: "^model\\.layers\\.(?!.*self_attn\\.kv_b_proj).*$"  # regular expression 
+    name: "^model\\.layers\\.(?!.*self_attn\\.kv_b_proj).*$"  # regular expression
    class: torch.nn.Linear  # only match modules matching name and class simultaneously
  replace:
    class: ktransformers.operators.linear.KTransformersLinear  # optimized Kernel on quantized data types
@@ -74,4 +74,4 @@
    class: "default"
    kwargs:
      generate_device: "cpu"
-      prefill_device: "cpu"
+      prefill_device: "cpu"
--- a/examples/kt_optimize_rules/DeepSeek-V3-Chat-sft-amx-multi-gpu.yaml
+++ b/examples/kt_optimize_rules/DeepSeek-V3-Chat-sft-amx-multi-gpu.yaml
@@ -24,7 +24,7 @@
      prefill_device: "cuda:1"

 - match:
-    name: "^model\\.layers\\.(0|[1-9]|[12][0-9])\\.(?!self_attn\\.kv_b_proj).*$"  # regular expression 
+    name: "^model\\.layers\\.(0|[1-9]|[12][0-9])\\.(?!self_attn\\.kv_b_proj).*$"  # regular expression
    class: torch.nn.Linear  # only match modules matching name and class simultaneously
  replace:
    class: ktransformers.operators.linear.KTransformersLinear  # optimized Kernel on quantized data types
@@ -35,7 +35,7 @@
      prefill_op: "KLinearTorch"

 - match:
-    name: "^model\\.layers\\.([3456][0-9])\\.(?!self_attn\\.kv_b_proj).*$"  # regular expression 
+    name: "^model\\.layers\\.([3456][0-9])\\.(?!self_attn\\.kv_b_proj).*$"  # regular expression
    class: torch.nn.Linear  # only match modules matching name and class simultaneously
  replace:
    class: ktransformers.operators.linear.KTransformersLinear  # optimized Kernel on quantized data types
@@ -44,7 +44,7 @@
      prefill_device: "cuda:1"
      generate_op: "KLinearTorch"
      prefill_op: "KLinearTorch"
-  
+
 - match:
    name: "^model\\.layers\\.(0|[1-9]|[12][0-9])\\.mlp$"
    class: ktransformers.models.modeling_deepseek_v3.DeepseekV3MoE
@@ -125,7 +125,7 @@
    class: "ktransformers.operators.models.KDeepseekV2Model"
    kwargs:
      per_layer_prefill_intput_threshold: 0 # 0 is close layer wise prefill
-      transfer_map: 
+      transfer_map:
        30: "cuda:1"

 - match:
--- a/examples/kt_optimize_rules/DeepSeek-V3-Chat-sft-amx.yaml
+++ b/examples/kt_optimize_rules/DeepSeek-V3-Chat-sft-amx.yaml
@@ -7,7 +7,7 @@
      prefill_device: "cuda"

 - match:
-    name: "^lm_head$"  # regular expression 
+    name: "^lm_head$"  # regular expression
    class: torch.nn.Linear  # only match modules matching name and class simultaneously
  replace:
    class: ktransformers.operators.linear.KTransformersLinear  # optimized Kernel on quantized data types
@@ -18,7 +18,7 @@
      prefill_op: "KLinearTorch"

 - match:
-    name: "^model\\.layers\\.(?!.*self_attn\\.kv_b_proj).*$"  # regular expression 
+    name: "^model\\.layers\\.(?!.*self_attn\\.kv_b_proj).*$"  # regular expression
    class: torch.nn.Linear  # only match modules matching name and class simultaneously
  replace:
    class: ktransformers.operators.linear.KTransformersLinear  # optimized Kernel on quantized data types
@@ -74,4 +74,4 @@
    class: "default"
    kwargs:
      generate_device: "cpu"
-      prefill_device: "cpu"
+      prefill_device: "cpu"