[v1] add init on rank0 for fsdp2 (#10264)

This commit is contained in:
jiaqiw09
2026-03-27 14:54:03 +08:00
committed by GitHub
parent d02fcd3588
commit df2e6edb7e
9 changed files with 84 additions and 12 deletions

View File

@@ -1,5 +1,4 @@
model: Qwen/Qwen3-4B
trust_remote_code: true
model_class: llm
template: qwen3_nothink

View File

@@ -1,5 +1,4 @@
model: Qwen/Qwen3-0.6B
model_class: llm
template: qwen3_nothink

View File

@@ -1,5 +1,4 @@
model: Qwen/Qwen3-0.6B
trust_remote_code: true
model_class: llm
template: qwen3_nothink

View File

@@ -1,5 +1,4 @@
model: Qwen/Qwen3-4B
trust_remote_code: true
model_class: llm
template: qwen3_nothink
@@ -28,7 +27,6 @@ train_dataset: data/v1_sft_demo.yaml
### training
output_dir: ./outputs/test_lora
micro_batch_size: 1
global_batch_size: 4
cutoff_len: 2048
learning_rate: 1.0e-4
bf16: true

View File

@@ -0,0 +1,40 @@
model: Qwen/Qwen3-4B
model_class: llm
template: qwen3_nothink
# PEFT Configuration
peft_config:
name: lora
r: 16
lora_alpha: 32
lora_dropout: 0.05
target_modules: all
# Kernel Config
kernel_config:
name: auto
include_kernels: auto
# FSDP Config
dist_config:
name: fsdp2
dcp_path: null
init_config:
name: init_on_rank0
### data
train_dataset: data/v1_sft_demo.yaml
### training
output_dir: ./outputs/test_lora
micro_batch_size: 1
cutoff_len: 2048
learning_rate: 1.0e-4
bf16: true
max_steps: 10
### sample
sample_backend: hf
max_new_tokens: 128

View File

@@ -1,5 +1,4 @@
model: Qwen/Qwen3-0.6B
trust_remote_code: true
model_class: llm
template: qwen3_nothink