mirror of
https://github.com/karpathy/nanochat.git
synced 2026-01-30 04:22:02 +00:00
add explicit UTF-8 encoding
This commit is contained in:
@@ -32,7 +32,7 @@ class CustomJSON(Task):
|
||||
print("-" * 80)
|
||||
|
||||
else:
|
||||
with open(filepath, 'r') as f:
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line: # skip empty lines
|
||||
|
||||
@@ -119,7 +119,7 @@ class SpellingBee(Task):
|
||||
self.split = split
|
||||
filename = WORD_LIST_URL.split("/")[-1]
|
||||
word_list_path = download_file_with_lock(WORD_LIST_URL, filename)
|
||||
with open(word_list_path) as f:
|
||||
with open(word_list_path, 'r', encoding='utf-8') as f:
|
||||
words = [line.strip() for line in f]
|
||||
self.words = words
|
||||
|
||||
@@ -238,7 +238,7 @@ class SimpleSpelling(Task):
|
||||
self.split = split
|
||||
filename = WORD_LIST_URL.split("/")[-1]
|
||||
word_list_path = download_file_with_lock(WORD_LIST_URL, filename)
|
||||
with open(word_list_path) as f:
|
||||
with open(word_list_path, 'r', encoding='utf-8') as f:
|
||||
words = [line.strip() for line in f]
|
||||
rng = random.Random(42)
|
||||
rng.shuffle(words) # use a different word order than the SpellingBee task
|
||||
|
||||
Reference in New Issue
Block a user