add personality to nanochat. breaks previous code on git pull and requires download of a new file from s3, but there is a helpful error message so hopefully its ok

This commit is contained in:
Andrej Karpathy
2025-10-21 15:04:58 +00:00
parent 0f007889dd
commit fe5aed940b
7 changed files with 468 additions and 2 deletions

View File

@@ -24,6 +24,7 @@ if [ ! -d "$NANOCHAT_BASE_DIR/eval_bundle" ]; then
rm eval_bundle.zip
mv eval_bundle $NANOCHAT_BASE_DIR
fi
curl -L -o $NANOCHAT_BASE_DIR/identity_conversations.jsonl https://karpathy-public.s3.us-west-2.amazonaws.com/identity_conversations.jsonl
# train tokenizer on ~4B characters and kick off download of the rest for pretraining
python -m nanochat.dataset -n 16