mirror of
https://github.com/hiyouga/LlamaFactory.git
synced 2026-02-02 08:33:38 +00:00
change to right-padding, update reward score #803
Former-commit-id: baa90415bc8f5ebd423d001378b51c3a3a6c2ec7
This commit is contained in:
@@ -1,5 +1,4 @@
|
||||
# Inspired by:
|
||||
# https://github.com/lvwerra/trl/blob/main/examples/summarization/scripts/reward_summarization.py
|
||||
# https://github.com/CarperAI/trlx/blob/main/examples/summarize_rlhf/reward_model/train_reward_model_gptj.py
|
||||
|
||||
from typing import TYPE_CHECKING, Optional, List
|
||||
|
||||
Reference in New Issue
Block a user