Former-commit-id: ba97550671811a27177306dd231bb427130b26fb
This commit is contained in:
hiyouga
2024-01-20 23:22:09 +08:00
parent 841fa0030f
commit 7543dc4a9d
5 changed files with 316 additions and 282 deletions

View File

@@ -38,10 +38,10 @@ def checksum(data_files: List[str], file_sha1: Optional[str] = None) -> None:
logger.warning("Checksum failed: mismatched SHA-1 hash value at {}.".format(data_files[0]))
def infer_max_len(source_len: int, target_len: int, data_args: "DataArguments") -> Tuple[int, int]:
max_target_len = int(data_args.cutoff_len * (target_len / (source_len + target_len)))
max_target_len = max(max_target_len, data_args.reserved_label_len)
max_source_len = data_args.cutoff_len - max_target_len
def infer_max_len(source_len: int, target_len: int, cutoff_len: int, reserved_label_len: int) -> Tuple[int, int]:
max_target_len = int(cutoff_len * (target_len / (source_len + target_len)))
max_target_len = max(max_target_len, reserved_label_len)
max_source_len = cutoff_len - max_target_len
return max_source_len, max_target_len