mirror of
https://github.com/karpathy/nanochat.git
synced 2026-01-30 04:22:02 +00:00
small tweaks
This commit is contained in:
@@ -61,7 +61,6 @@ for d in "${DEPTHS[@]}"; do
|
|||||||
# No --target-flops, let it use the default ratio from base_train
|
# No --target-flops, let it use the default ratio from base_train
|
||||||
torchrun --standalone --nproc_per_node=$NPROC_PER_NODE -m scripts.base_train -- \
|
torchrun --standalone --nproc_per_node=$NPROC_PER_NODE -m scripts.base_train -- \
|
||||||
--depth=$d \
|
--depth=$d \
|
||||||
--target-param-data-ratio=8 \
|
|
||||||
--run="${WANDB_RUN}_d${d}" \
|
--run="${WANDB_RUN}_d${d}" \
|
||||||
--model-tag="${TAG}" \
|
--model-tag="${TAG}" \
|
||||||
--core-metric-every=999999 \
|
--core-metric-every=999999 \
|
||||||
|
|||||||
@@ -7,7 +7,8 @@ FLOPS_BUDGETS=(
|
|||||||
3e18
|
3e18
|
||||||
6e18
|
6e18
|
||||||
)
|
)
|
||||||
DEPTHS=(8 10 12 14 16 18 20)
|
DEPTHS=(6 7 8 9 10 11 12 13 14)
|
||||||
|
|
||||||
NPROC_PER_NODE="${NPROC_PER_NODE:-8}"
|
NPROC_PER_NODE="${NPROC_PER_NODE:-8}"
|
||||||
WANDB_RUN="${WANDB_RUN:-scaling_${LABEL}}"
|
WANDB_RUN="${WANDB_RUN:-scaling_${LABEL}}"
|
||||||
EVAL_TOKENS=$((100 * 524288)) # ~100M tokens for final eval (default is ~10M)
|
EVAL_TOKENS=$((100 * 524288)) # ~100M tokens for final eval (default is ~10M)
|
||||||
|
|||||||
Reference in New Issue
Block a user