small tweaks

2026-01-30 04:22:02 +00:00 · 2026-01-18 03:00:38 +00:00
parent cf5c9e5b8e
commit babde18ce1
2 changed files with 2 additions and 2 deletions
--- a/miniseries.sh
+++ b/miniseries.sh
@@ -61,7 +61,6 @@ for d in "${DEPTHS[@]}"; do
    # No --target-flops, let it use the default ratio from base_train
    torchrun --standalone --nproc_per_node=$NPROC_PER_NODE -m scripts.base_train -- \
        --depth=$d \
        --target-param-data-ratio=8 \
        --run="${WANDB_RUN}_d${d}" \
        --model-tag="${TAG}" \
        --core-metric-every=999999 \
--- a/scaling_laws.sh
+++ b/scaling_laws.sh
@@ -7,7 +7,8 @@ FLOPS_BUDGETS=(
    3e18
    6e18
 )
-DEPTHS=(8 10 12 14 16 18 20)
+DEPTHS=(6 7 8 9 10 11 12 13 14)
 NPROC_PER_NODE="${NPROC_PER_NODE:-8}"
 WANDB_RUN="${WANDB_RUN:-scaling_${LABEL}}"
 EVAL_TOKENS=$((100 * 524288))  # ~100M tokens for final eval (default is ~10M)