[misc] support export ollama modelfile (#6899)
* support export ollama modelfile * update config * add system and num ctx Former-commit-id: 8c2af7466f4015f300b51841db11bcd2505ebf20
This commit is contained in:
@@ -170,6 +170,12 @@ llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
|
||||
llamafactory-cli export examples/merge_lora/llama3_gptq.yaml
|
||||
```
|
||||
|
||||
### Save Ollama modelfile
|
||||
|
||||
```bash
|
||||
llamafactory-cli export examples/merge_lora/llama3_full_sft.yaml
|
||||
```
|
||||
|
||||
### Inferring LoRA Fine-Tuned Models
|
||||
|
||||
#### Batch Generation using vLLM Tensor Parallel
|
||||
|
||||
@@ -170,6 +170,12 @@ llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
|
||||
llamafactory-cli export examples/merge_lora/llama3_gptq.yaml
|
||||
```
|
||||
|
||||
### 保存 Ollama 配置文件
|
||||
|
||||
```bash
|
||||
llamafactory-cli export examples/merge_lora/llama3_full_sft.yaml
|
||||
```
|
||||
|
||||
### 推理 LoRA 模型
|
||||
|
||||
#### 使用 vLLM+TP 批量推理
|
||||
|
||||
11
examples/merge_lora/llama3_full_sft.yaml
Normal file
11
examples/merge_lora/llama3_full_sft.yaml
Normal file
@@ -0,0 +1,11 @@
|
||||
### model
|
||||
model_name_or_path: saves/llama3-8b/full/sft
|
||||
template: llama3
|
||||
trust_remote_code: true
|
||||
infer_dtype: bfloat16
|
||||
|
||||
### export
|
||||
export_dir: output/llama3_full_sft
|
||||
export_size: 5
|
||||
export_device: cpu
|
||||
export_legacy_format: false
|
||||
@@ -4,9 +4,9 @@ template: llama3
|
||||
trust_remote_code: true
|
||||
|
||||
### export
|
||||
export_dir: models/llama3_gptq
|
||||
export_dir: output/llama3_gptq
|
||||
export_quantization_bit: 4
|
||||
export_quantization_dataset: data/c4_demo.json
|
||||
export_size: 2
|
||||
export_size: 5
|
||||
export_device: cpu
|
||||
export_legacy_format: false
|
||||
|
||||
@@ -4,11 +4,10 @@
|
||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||
adapter_name_or_path: saves/llama3-8b/lora/sft
|
||||
template: llama3
|
||||
finetuning_type: lora
|
||||
trust_remote_code: true
|
||||
|
||||
### export
|
||||
export_dir: models/llama3_lora_sft
|
||||
export_size: 2
|
||||
export_dir: output/llama3_lora_sft
|
||||
export_size: 5
|
||||
export_device: cpu
|
||||
export_legacy_format: false
|
||||
|
||||
@@ -4,11 +4,10 @@
|
||||
model_name_or_path: Qwen/Qwen2-VL-7B-Instruct
|
||||
adapter_name_or_path: saves/qwen2_vl-7b/lora/sft
|
||||
template: qwen2_vl
|
||||
finetuning_type: lora
|
||||
trust_remote_code: true
|
||||
|
||||
### export
|
||||
export_dir: models/qwen2_vl_lora_sft
|
||||
export_size: 2
|
||||
export_dir: output/qwen2_vl_lora_sft
|
||||
export_size: 5
|
||||
export_device: cpu
|
||||
export_legacy_format: false
|
||||
|
||||
Reference in New Issue
Block a user