From babde18ce1cb59cb3d36f8874d1248983c7ba9c3 Mon Sep 17 00:00:00 2001 From: Andrej Karpathy Date: Sun, 18 Jan 2026 03:00:38 +0000 Subject: [PATCH] small tweaks --- miniseries.sh | 1 - scaling_laws.sh | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/miniseries.sh b/miniseries.sh index 9a4512b..c42544e 100644 --- a/miniseries.sh +++ b/miniseries.sh @@ -61,7 +61,6 @@ for d in "${DEPTHS[@]}"; do # No --target-flops, let it use the default ratio from base_train torchrun --standalone --nproc_per_node=$NPROC_PER_NODE -m scripts.base_train -- \ --depth=$d \ - --target-param-data-ratio=8 \ --run="${WANDB_RUN}_d${d}" \ --model-tag="${TAG}" \ --core-metric-every=999999 \ diff --git a/scaling_laws.sh b/scaling_laws.sh index 7c269c6..1f9dab8 100644 --- a/scaling_laws.sh +++ b/scaling_laws.sh @@ -7,7 +7,8 @@ FLOPS_BUDGETS=( 3e18 6e18 ) -DEPTHS=(8 10 12 14 16 18 20) +DEPTHS=(6 7 8 9 10 11 12 13 14) + NPROC_PER_NODE="${NPROC_PER_NODE:-8}" WANDB_RUN="${WANDB_RUN:-scaling_${LABEL}}" EVAL_TOKENS=$((100 * 524288)) # ~100M tokens for final eval (default is ~10M)