[infer] support lora adapter for SGLang backend (#8067)

This commit is contained in:
Saiya
2025-05-16 23:33:47 +08:00
committed by GitHub
parent 52b23f9e56
commit ab41f7956c
3 changed files with 22 additions and 1 deletions

View File

@@ -364,6 +364,12 @@ class SGLangArguments:
default=None,
metadata={"help": "Config to initialize the SGLang engine. Please use JSON strings."},
)
sglang_lora_backend: Literal["triton", "flashinfer"] = field(
default="triton",
metadata={
"help": "The backend of running GEMM kernels for Lora modules. Recommend using the Triton LoRA backend for better performance and stability."
},
)
def __post_init__(self):
if isinstance(self.sglang_config, str) and self.sglang_config.startswith("{"):