mirror of
https://github.com/hiyouga/LlamaFactory.git
synced 2026-03-16 22:33:09 +00:00
[tracker] Add Trackio Integration for LlamaFactory (#10165)
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
@@ -100,6 +100,52 @@ def _parse_args(
|
||||
return tuple(parsed_args)
|
||||
|
||||
|
||||
def _verify_trackio_args(training_args: "TrainingArguments") -> None:
|
||||
"""Validates Trackio-specific arguments.
|
||||
|
||||
Args:
|
||||
training_args: TrainingArguments instance (not a dictionary)
|
||||
"""
|
||||
report_to = training_args.report_to
|
||||
if not report_to:
|
||||
return
|
||||
|
||||
if isinstance(report_to, str):
|
||||
report_to = [report_to]
|
||||
|
||||
if "trackio" not in report_to:
|
||||
return
|
||||
|
||||
# --- Enforce project (required by Trackio) ---
|
||||
if not training_args.project:
|
||||
raise ValueError("`--project` must be specified when using Trackio.")
|
||||
|
||||
# --- Validate trackio_space_id format ---
|
||||
space_id = training_args.trackio_space_id
|
||||
if space_id:
|
||||
if space_id != "trackio" and "/" not in space_id:
|
||||
logger.warning(
|
||||
f"trackio_space_id '{space_id}' should typically be in format "
|
||||
"'org/space' for Hugging Face Spaces deployment."
|
||||
)
|
||||
|
||||
# --- Inform about default project usage ---
|
||||
if training_args.project == "huggingface":
|
||||
logger.info(
|
||||
"Using default project name 'huggingface'. "
|
||||
"Consider setting a custom project name with --project "
|
||||
"for better organization."
|
||||
)
|
||||
|
||||
# --- Validate hub repo privacy flag ---
|
||||
if training_args.hub_private_repo:
|
||||
logger.info("Repository will be created as private on Hugging Face Hub.")
|
||||
|
||||
# --- Recommend run_name for experiment clarity ---
|
||||
if not training_args.run_name:
|
||||
logger.warning("Consider setting --run_name for better experiment tracking clarity.")
|
||||
|
||||
|
||||
def _set_transformers_logging() -> None:
|
||||
if os.getenv("LLAMAFACTORY_VERBOSITY", "INFO") in ["DEBUG", "INFO"]:
|
||||
transformers.utils.logging.set_verbosity_info()
|
||||
@@ -278,8 +324,10 @@ def get_train_args(args: dict[str, Any] | list[str] | None = None) -> _TRAIN_CLS
|
||||
if finetuning_args.reward_model_type == "lora" and model_args.use_unsloth:
|
||||
raise ValueError("Unsloth does not support lora reward model.")
|
||||
|
||||
if training_args.report_to and training_args.report_to[0] not in ["wandb", "tensorboard"]:
|
||||
raise ValueError("PPO only accepts wandb or tensorboard logger.")
|
||||
if training_args.report_to and any(
|
||||
logger not in ("wandb", "tensorboard", "trackio", "none") for logger in training_args.report_to
|
||||
):
|
||||
raise ValueError("PPO only accepts wandb, tensorboard, or trackio logger.")
|
||||
|
||||
if not model_args.use_kt and training_args.parallel_mode == ParallelMode.NOT_DISTRIBUTED:
|
||||
raise ValueError("Please launch distributed training with `llamafactory-cli` or `torchrun`.")
|
||||
@@ -352,6 +400,7 @@ def get_train_args(args: dict[str, Any] | list[str] | None = None) -> _TRAIN_CLS
|
||||
_set_env_vars()
|
||||
_verify_model_args(model_args, data_args, finetuning_args)
|
||||
_check_extra_dependencies(model_args, finetuning_args, training_args)
|
||||
_verify_trackio_args(training_args)
|
||||
|
||||
if not finetuning_args.use_mca and training_args.fp8_enable_fsdp_float8_all_gather and not training_args.fp8:
|
||||
logger.warning_rank0("fp8_enable_fsdp_float8_all_gather requires fp8=True. Setting fp8=True.")
|
||||
|
||||
Reference in New Issue
Block a user