something i've wanted to do for a while - move all .sh runs to their own directory so they don't pollute root dir

2026-01-30 04:22:02 +00:00 · 2026-01-18 15:27:41 +00:00
parent a91743c168
commit 63bb5831e2
6 changed files with 15 additions and 12 deletions
--- a/runs/runcpu.sh
+++ b/runs/runcpu.sh
@@ -0,0 +1,70 @@
+#!/bin/bash
+
+# Showing an example run for exercising some of the code paths on the CPU (or MPS on Macbooks)
+# This script was last updated/tuned on Jan 17, 2026.
+
+# Run as:
+# bash dev/cpu_demo_run.sh
+
+# NOTE: Training LLMs requires GPU compute and $$$. You will not get far on your Macbook.
+# Think of this run as educational/fun demo, not something you should expect to work well.
+# (This is why I hide this script away in dev/)
+# You may also want to run this script manually and one by one, copy pasting commands into your terminal.
+
+# all the setup stuff
+export OMP_NUM_THREADS=1
+export NANOCHAT_BASE_DIR="$HOME/.cache/nanochat"
+mkdir -p $NANOCHAT_BASE_DIR
+command -v uv &> /dev/null || curl -LsSf https://astral.sh/uv/install.sh | sh
+[ -d ".venv" ] || uv venv
+uv sync --extra cpu
+source .venv/bin/activate
+if [ -z "$WANDB_RUN" ]; then
+    WANDB_RUN=dummy
+fi
+
+# train tokenizer on ~2B characters (~34 seconds on my MacBook Pro M3 Max)
+python -m nanochat.dataset -n 8
+python -m scripts.tok_train --max-chars=2000000000
+python -m scripts.tok_eval
+
+# train a small 4 layer model
+# I tuned this run to complete in about 30 minutes on my MacBook Pro M3 Max.
+# To get better results, try increasing num_iterations, or get other ideas from your favorite LLM.
+python -m scripts.base_train \
+    --depth=6 \
+    --head-dim=64 \
+    --window-pattern=L \
+    --max-seq-len=512 \
+    --device-batch-size=32 \
+    --total-batch-size=16384 \
+    --eval-every=100 \
+    --eval-tokens=524288 \
+    --core-metric-every=-1 \
+    --sample-every=100 \
+    --num-iterations=5000 \
+    --run=$WANDB_RUN
+python -m scripts.base_loss --device-batch-size=1 --split-tokens=16384
+python -m scripts.base_eval --max-per-task=16
+
+# midtraining (~10 minutes on my MacBook Pro M3 Max)
+curl -L -o $NANOCHAT_BASE_DIR/identity_conversations.jsonl https://karpathy-public.s3.us-west-2.amazonaws.com/identity_conversations.jsonl
+python -m scripts.mid_train \
+    --max-seq-len=512 \
+    --device-batch-size=32 \
+    --total-batch-size=16384 \
+    --eval-every=200 \
+    --eval-tokens=524288 \
+    --num-iterations=1500 \
+    --run=$WANDB_RUN
+
+# (it's ~ok to skip SFT)
+
+# Chat with the model over CLI
+# The model should be able to say that it is Paris.
+# It might even know that the color of the sky is blue.
+# Sometimes the model likes it if you first say Hi before you ask it questions.
+# python -m scripts.chat_cli -i mid -p "What is the capital of France?"
+
+# Chat with the model over a pretty WebUI ChatGPT style
+# python -m scripts.chat_web -i mid