[misc] move wechat out (#9223)

This commit is contained in:
Yaowei Zheng
2025-10-02 02:06:09 +08:00
committed by GitHub
parent 7dd910f067
commit bcc2c1fd8f
4 changed files with 7 additions and 7 deletions

View File

@@ -6,7 +6,7 @@
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
trust_remote_code: true
### Method configuration
### Method configuration
stage: sft
do_train: true
finetuning_type: full
@@ -40,9 +40,9 @@ fp8: true
fp8_backend: torchao # Use TorchAO backend for FP8
fp8_enable_fsdp_float8_all_gather: false # Not used with DeepSpeed
### DeepSpeed configuration
### DeepSpeed configuration
deepspeed: examples/deepspeed/ds_z3_fp8_config.json
### Logging configuration
report_to: wandb
run_name: llama3_fp8_deepspeed_sft
run_name: llama3_fp8_deepspeed_sft

View File

@@ -1,4 +1,4 @@
# FP8 training example with FSDP
# FP8 training example with FSDP
# This config demonstrates FP8 mixed precision training using HuggingFace Accelerate
# with FSDP for distributed training and float8 all-gather optimization
@@ -48,4 +48,4 @@ fsdp_transformer_layer_cls_to_wrap: LlamaDecoderLayer
### Logging configuration
report_to: wandb
run_name: llama3_fp8_fsdp_sft
run_name: llama3_fp8_fsdp_sft