release v0.6.1
Former-commit-id: a59d823f554505b2e649e6e111b9dee8306d3ad8
This commit is contained in:
@@ -70,7 +70,7 @@ def create_modelcard_and_push(
|
||||
|
||||
def create_ref_model(
|
||||
model_args: "ModelArguments", finetuning_args: "FinetuningArguments", add_valuehead: bool = False
|
||||
) -> Union["PreTrainedModel", "AutoModelForCausalLMWithValueHead"]:
|
||||
) -> Optional[Union["PreTrainedModel", "AutoModelForCausalLMWithValueHead"]]:
|
||||
r"""
|
||||
Creates reference model for PPO/DPO training. Evaluation mode is not supported.
|
||||
|
||||
@@ -105,7 +105,7 @@ def create_ref_model(
|
||||
|
||||
def create_reward_model(
|
||||
model: "AutoModelForCausalLMWithValueHead", model_args: "ModelArguments", finetuning_args: "FinetuningArguments"
|
||||
) -> "AutoModelForCausalLMWithValueHead":
|
||||
) -> Optional["AutoModelForCausalLMWithValueHead"]:
|
||||
r"""
|
||||
Creates reward model for PPO training.
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user