mirror of
https://github.com/anthropics/claude-code.git
synced 2026-01-30 04:02:03 +00:00
fix(hookify): add size limits to transcript reading to prevent OOM
When hookify rules use `field: transcript`, the entire transcript file was read into memory. For long-running sessions or ralph loops, transcripts can grow to gigabytes, causing OOM kills. This fix adds: - 10MB max size limit for transcript reading - For larger files, only the tail (most recent content) is read - Warning at 5MB to alert users of large transcripts - Proper error handling consolidated into a reusable function This addresses memory issues reported in monorepo where CC was consuming 30-40GB before getting OOM killed during startup/resume.
This commit is contained in:
@@ -1,6 +1,7 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
"""Rule evaluation engine for hookify plugin."""
|
"""Rule evaluation engine for hookify plugin."""
|
||||||
|
|
||||||
|
import os
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
@@ -9,6 +10,13 @@ from typing import List, Dict, Any, Optional
|
|||||||
# Import from local module
|
# Import from local module
|
||||||
from hookify.core.config_loader import Rule, Condition
|
from hookify.core.config_loader import Rule, Condition
|
||||||
|
|
||||||
|
# Maximum transcript size to load into memory (10MB default)
|
||||||
|
# For larger files, only the tail is read to prevent OOM
|
||||||
|
MAX_TRANSCRIPT_SIZE_BYTES = 10 * 1024 * 1024 # 10MB
|
||||||
|
|
||||||
|
# Size threshold for warning about large transcripts
|
||||||
|
TRANSCRIPT_WARNING_SIZE_BYTES = 5 * 1024 * 1024 # 5MB
|
||||||
|
|
||||||
|
|
||||||
# Cache compiled regexes (max 128 patterns)
|
# Cache compiled regexes (max 128 patterns)
|
||||||
@lru_cache(maxsize=128)
|
@lru_cache(maxsize=128)
|
||||||
@@ -24,6 +32,58 @@ def compile_regex(pattern: str) -> re.Pattern:
|
|||||||
return re.compile(pattern, re.IGNORECASE)
|
return re.compile(pattern, re.IGNORECASE)
|
||||||
|
|
||||||
|
|
||||||
|
def read_transcript_safely(transcript_path: str) -> str:
|
||||||
|
"""Read transcript file with size limits to prevent OOM.
|
||||||
|
|
||||||
|
For large transcripts (>10MB), only reads the tail of the file
|
||||||
|
to prevent memory exhaustion. This preserves the most recent
|
||||||
|
conversation context which is typically what rules care about.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
transcript_path: Path to the transcript file
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Transcript content as string, possibly truncated for large files
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
file_size = os.path.getsize(transcript_path)
|
||||||
|
|
||||||
|
# Warn about large transcripts
|
||||||
|
if file_size > TRANSCRIPT_WARNING_SIZE_BYTES:
|
||||||
|
size_mb = file_size / (1024 * 1024)
|
||||||
|
print(f"Warning: Large transcript ({size_mb:.1f}MB): {transcript_path}", file=sys.stderr)
|
||||||
|
|
||||||
|
# For files within limit, read normally
|
||||||
|
if file_size <= MAX_TRANSCRIPT_SIZE_BYTES:
|
||||||
|
with open(transcript_path, 'r') as f:
|
||||||
|
return f.read()
|
||||||
|
|
||||||
|
# For large files, read only the tail to prevent OOM
|
||||||
|
size_mb = file_size / (1024 * 1024)
|
||||||
|
limit_mb = MAX_TRANSCRIPT_SIZE_BYTES / (1024 * 1024)
|
||||||
|
print(f"Warning: Transcript too large ({size_mb:.1f}MB), reading last {limit_mb:.0f}MB only", file=sys.stderr)
|
||||||
|
|
||||||
|
with open(transcript_path, 'r') as f:
|
||||||
|
# Seek to position near end, leaving room for MAX_TRANSCRIPT_SIZE_BYTES
|
||||||
|
f.seek(file_size - MAX_TRANSCRIPT_SIZE_BYTES)
|
||||||
|
# Skip partial line at seek position
|
||||||
|
f.readline()
|
||||||
|
return f.read()
|
||||||
|
|
||||||
|
except FileNotFoundError:
|
||||||
|
print(f"Warning: Transcript file not found: {transcript_path}", file=sys.stderr)
|
||||||
|
return ''
|
||||||
|
except PermissionError:
|
||||||
|
print(f"Warning: Permission denied reading transcript: {transcript_path}", file=sys.stderr)
|
||||||
|
return ''
|
||||||
|
except (IOError, OSError) as e:
|
||||||
|
print(f"Warning: Error reading transcript {transcript_path}: {e}", file=sys.stderr)
|
||||||
|
return ''
|
||||||
|
except UnicodeDecodeError as e:
|
||||||
|
print(f"Warning: Encoding error in transcript {transcript_path}: {e}", file=sys.stderr)
|
||||||
|
return ''
|
||||||
|
|
||||||
|
|
||||||
class RuleEngine:
|
class RuleEngine:
|
||||||
"""Evaluates rules against hook input data."""
|
"""Evaluates rules against hook input data."""
|
||||||
|
|
||||||
@@ -205,24 +265,10 @@ class RuleEngine:
|
|||||||
if field == 'reason':
|
if field == 'reason':
|
||||||
return input_data.get('reason', '')
|
return input_data.get('reason', '')
|
||||||
elif field == 'transcript':
|
elif field == 'transcript':
|
||||||
# Read transcript file if path provided
|
# Read transcript file with size limits to prevent OOM
|
||||||
transcript_path = input_data.get('transcript_path')
|
transcript_path = input_data.get('transcript_path')
|
||||||
if transcript_path:
|
if transcript_path:
|
||||||
try:
|
return read_transcript_safely(transcript_path)
|
||||||
with open(transcript_path, 'r') as f:
|
|
||||||
return f.read()
|
|
||||||
except FileNotFoundError:
|
|
||||||
print(f"Warning: Transcript file not found: {transcript_path}", file=sys.stderr)
|
|
||||||
return ''
|
|
||||||
except PermissionError:
|
|
||||||
print(f"Warning: Permission denied reading transcript: {transcript_path}", file=sys.stderr)
|
|
||||||
return ''
|
|
||||||
except (IOError, OSError) as e:
|
|
||||||
print(f"Warning: Error reading transcript {transcript_path}: {e}", file=sys.stderr)
|
|
||||||
return ''
|
|
||||||
except UnicodeDecodeError as e:
|
|
||||||
print(f"Warning: Encoding error in transcript {transcript_path}: {e}", file=sys.stderr)
|
|
||||||
return ''
|
|
||||||
elif field == 'user_prompt':
|
elif field == 'user_prompt':
|
||||||
# For UserPromptSubmit events
|
# For UserPromptSubmit events
|
||||||
return input_data.get('user_prompt', '')
|
return input_data.get('user_prompt', '')
|
||||||
|
|||||||
Reference in New Issue
Block a user