add license
Former-commit-id: 69cfc98d7c81756a5ab6bf962240e393e449fef0
This commit is contained in:
@@ -1,7 +1,20 @@
|
||||
# coding=utf-8
|
||||
# Calculates the optimal learning rate for 7B/13B models using LLaMA's hyper-parameters.
|
||||
# Usage: python cal_lr.py --model_name_or_path path_to_model --dataset alpaca_en --cutoff_len 1024 --batch_size 16
|
||||
# Inspired by: https://github.com/imoneoi/openchat/blob/master/ochat/training_deepspeed/train.py
|
||||
# Copyright 2024 imoneoi and the LlamaFactory team.
|
||||
#
|
||||
# This code is inspired by imoneoi's OpenChat library.
|
||||
# https://github.com/imoneoi/openchat/blob/3.6.0/ochat/training_deepspeed/train.py
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import math
|
||||
from typing import Literal
|
||||
@@ -32,6 +45,10 @@ def calculate_lr(
|
||||
cutoff_len: int = 1024, # i.e. maximum input length during training
|
||||
is_mistral: bool = False, # mistral model uses a smaller learning rate,
|
||||
):
|
||||
r"""
|
||||
Calculates the optimal learning rate for 7B/13B models using LLaMA's hyper-parameters.
|
||||
Usage: python cal_lr.py --model_name_or_path path_to_model --dataset alpaca_en --cutoff_len 1024 --batch_size 16
|
||||
"""
|
||||
model_args, data_args, training_args, _, _ = get_train_args(
|
||||
dict(
|
||||
stage=stage,
|
||||
|
||||
Reference in New Issue
Block a user