From e7cb145f5dc50052ad72d9a107f3b698c2be43c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E3=82=86=E3=82=8A?= Date: Tue, 3 Feb 2026 11:14:07 +0800 Subject: [PATCH] [logging] Fix race condition in LoggerHandler during multi-GPU training (#10156) Co-authored-by: yurekami --- src/llamafactory/extras/logging.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/llamafactory/extras/logging.py b/src/llamafactory/extras/logging.py index 6997200a3..35ff65bb6 100644 --- a/src/llamafactory/extras/logging.py +++ b/src/llamafactory/extras/logging.py @@ -41,12 +41,13 @@ class LoggerHandler(logging.Handler): datefmt="%Y-%m-%d %H:%M:%S", ) self.setLevel(logging.INFO) + self.thread_pool = ThreadPoolExecutor(max_workers=1) os.makedirs(output_dir, exist_ok=True) self.running_log = os.path.join(output_dir, RUNNING_LOG) - if os.path.exists(self.running_log): + try: os.remove(self.running_log) - - self.thread_pool = ThreadPoolExecutor(max_workers=1) + except OSError: + pass def _write_log(self, log_entry: str) -> None: with open(self.running_log, "a", encoding="utf-8") as f: