feat: add thorough verification process and enhance agent output modal

- Introduced a new markdown file outlining a mandatory 3-pass verification process for code completion, focusing on correctness, edge cases, and maintainability. - Updated the AgentInfoPanel to display a todo list for non-backlog features, ensuring users can see the agent's current tasks. - Enhanced the AgentOutputModal to support a summary view, extracting and displaying summary content from raw log output. - Improved the log parser to extract summaries from various formats, enhancing the overall user experience and information accessibility.
2026-02-04 09:13:08 +00:00 · 2026-01-04 01:56:45 -05:00
parent 32f859b927
commit e2206d7a96
5 changed files with 264 additions and 44 deletions
--- a/.claude/commands/thorough.md
+++ b/.claude/commands/thorough.md
@@ -0,0 +1,45 @@
 When you think you are done, you are NOT done.
 You must run a mandatory 3-pass verification before concluding:
 ## Pass 1: Correctness & Functionality
 - [ ] Verify logic matches requirements and specifications
 - [ ] Check type safety (TypeScript types are correct and complete)
 - [ ] Ensure imports are correct and follow project conventions
 - [ ] Verify all functions/classes work as intended
 - [ ] Check that return values and side effects are correct
 - [ ] Run relevant tests if they exist, or verify testability
 - [ ] Confirm integration with existing code works properly
 ## Pass 2: Edge Cases & Safety
 - [ ] Handle null/undefined inputs gracefully
 - [ ] Validate all user inputs and external data
 - [ ] Check error handling (try/catch, error boundaries, etc.)
 - [ ] Verify security considerations (no sensitive data exposure, proper auth checks)
 - [ ] Test boundary conditions (empty arrays, zero values, max lengths, etc.)
 - [ ] Ensure resource cleanup (file handles, connections, timers)
 - [ ] Check for potential race conditions or async issues
 - [ ] Verify file path security (no directory traversal vulnerabilities)
 ## Pass 3: Maintainability & Code Quality
 - [ ] Code follows project style guide and conventions
 - [ ] Functions/classes are single-purpose and well-named
 - [ ] Remove dead code, unused imports, and console.logs
 - [ ] Extract magic numbers/strings into named constants
 - [ ] Check for code duplication (DRY principle)
 - [ ] Verify appropriate abstraction levels (not over/under-engineered)
 - [ ] Add necessary comments for complex logic
 - [ ] Ensure consistent error messages and logging
 - [ ] Check that code is readable and self-documenting
 - [ ] Verify proper separation of concerns
 **For each pass, explicitly report:**
 - What you checked
 - Any issues found and how they were fixed
 - Any remaining concerns or trade-offs
 Only after completing all three passes with explicit findings may you conclude the work is done.
--- a/apps/ui/src/components/views/board-view/components/kanban-card/agent-info-panel.tsx
+++ b/apps/ui/src/components/views/board-view/components/kanban-card/agent-info-panel.tsx
@@ -255,6 +255,45 @@ export function AgentInfoPanel({
    );
  }
  // Show just the todo list for non-backlog features when showAgentInfo is false
  // This ensures users always see what the agent is working on
  if (!showAgentInfo && feature.status !== 'backlog' && agentInfo && agentInfo.todos.length > 0) {
    return (
      <div className="mb-3 space-y-1 overflow-hidden">
        <div className="flex items-center gap-1 text-[10px] text-muted-foreground/70">
          <ListTodo className="w-3 h-3" />
          <span>
            {agentInfo.todos.filter((t) => t.status === 'completed').length}/
            {agentInfo.todos.length} tasks
          </span>
        </div>
        <div className="space-y-0.5 max-h-24 overflow-y-auto">
          {agentInfo.todos.map((todo, idx) => (
            <div key={idx} className="flex items-center gap-1.5 text-[10px]">
              {todo.status === 'completed' ? (
                <CheckCircle2 className="w-2.5 h-2.5 text-[var(--status-success)] shrink-0" />
              ) : todo.status === 'in_progress' ? (
                <Loader2 className="w-2.5 h-2.5 text-[var(--status-warning)] animate-spin shrink-0" />
              ) : (
                <Circle className="w-2.5 h-2.5 text-muted-foreground/50 shrink-0" />
              )}
              <span
                className={cn(
                  'break-words hyphens-auto line-clamp-2 leading-relaxed',
                  todo.status === 'completed' && 'text-muted-foreground/60 line-through',
                  todo.status === 'in_progress' && 'text-[var(--status-warning)]',
                  todo.status === 'pending' && 'text-muted-foreground/80'
                )}
              >
                {todo.content}
              </span>
            </div>
          ))}
        </div>
      </div>
    );
  }
  // Always render SummaryDialog if showAgentInfo is true (even if no agentInfo yet)
  // This ensures the dialog can be opened from the expand button
  return (
--- a/apps/ui/src/components/views/board-view/dialogs/agent-output-modal.tsx
+++ b/apps/ui/src/components/views/board-view/dialogs/agent-output-modal.tsx
@@ -1,4 +1,4 @@
-import { useEffect, useRef, useState } from 'react';
+import { useEffect, useRef, useState, useMemo } from 'react';
 import {
  Dialog,
  DialogContent,
@@ -6,12 +6,14 @@ import {
  DialogHeader,
  DialogTitle,
 } from '@/components/ui/dialog';
-import { Loader2, List, FileText, GitBranch } from 'lucide-react';
+import { Loader2, List, FileText, GitBranch, ClipboardList } from 'lucide-react';
 import { getElectronAPI } from '@/lib/electron';
 import { LogViewer } from '@/components/ui/log-viewer';
 import { GitDiffPanel } from '@/components/ui/git-diff-panel';
 import { TaskProgressPanel } from '@/components/ui/task-progress-panel';
 import { Markdown } from '@/components/ui/markdown';
 import { useAppStore } from '@/store/app-store';
 import { extractSummary } from '@/lib/log-parser';
 import type { AutoModeEvent } from '@/types/electron';
 interface AgentOutputModalProps {
@@ -27,7 +29,7 @@ interface AgentOutputModalProps {
  projectPath?: string;
 }
-type ViewMode = 'parsed' | 'raw' | 'changes';
+type ViewMode = 'summary' | 'parsed' | 'raw' | 'changes';
 export function AgentOutputModal({
  open,
@@ -40,8 +42,14 @@ export function AgentOutputModal({
 }: AgentOutputModalProps) {
  const [output, setOutput] = useState<string>('');
  const [isLoading, setIsLoading] = useState(true);
-  const [viewMode, setViewMode] = useState<ViewMode>('parsed');
+  const [viewMode, setViewMode] = useState<ViewMode | null>(null);
  const [projectPath, setProjectPath] = useState<string>('');
  // Extract summary from output
  const summary = useMemo(() => extractSummary(output), [output]);
  // Determine the effective view mode - default to summary if available, otherwise parsed
  const effectiveViewMode = viewMode ?? (summary ? 'summary' : 'parsed');
  const scrollRef = useRef<HTMLDivElement>(null);
  const autoScrollRef = useRef(true);
  const projectPathRef = useRef<string>('');
@@ -299,8 +307,8 @@ export function AgentOutputModal({
        className="w-[60vw] max-w-[60vw] max-h-[80vh] flex flex-col"
        data-testid="agent-output-modal"
      >
-        <DialogHeader className="flex-shrink-0">
+        <DialogHeader className="shrink-0">
-          <div className="flex items-center justify-between">
+          <div className="flex items-center justify-between pr-8">
            <DialogTitle className="flex items-center gap-2">
              {featureStatus !== 'verified' && featureStatus !== 'waiting_approval' && (
                <Loader2 className="w-5 h-5 text-primary animate-spin" />
@@ -308,10 +316,24 @@ export function AgentOutputModal({
              Agent Output
            </DialogTitle>
            <div className="flex items-center gap-1 bg-muted rounded-lg p-1">
              {summary && (
                <button
                  onClick={() => setViewMode('summary')}
                  className={`flex items-center gap-1.5 px-3 py-1.5 rounded-md text-xs font-medium transition-all ${
                    effectiveViewMode === 'summary'
                      ? 'bg-primary/20 text-primary shadow-sm'
                      : 'text-muted-foreground hover:text-foreground hover:bg-accent'
                  }`}
                  data-testid="view-mode-summary"
                >
                  <ClipboardList className="w-3.5 h-3.5" />
                  Summary
                </button>
              )}
              <button
                onClick={() => setViewMode('parsed')}
                className={`flex items-center gap-1.5 px-3 py-1.5 rounded-md text-xs font-medium transition-all ${
-                  viewMode === 'parsed'
+                  effectiveViewMode === 'parsed'
                    ? 'bg-primary/20 text-primary shadow-sm'
                    : 'text-muted-foreground hover:text-foreground hover:bg-accent'
                }`}
@@ -323,7 +345,7 @@ export function AgentOutputModal({
              <button
                onClick={() => setViewMode('changes')}
                className={`flex items-center gap-1.5 px-3 py-1.5 rounded-md text-xs font-medium transition-all ${
-                  viewMode === 'changes'
+                  effectiveViewMode === 'changes'
                    ? 'bg-primary/20 text-primary shadow-sm'
                    : 'text-muted-foreground hover:text-foreground hover:bg-accent'
                }`}
@@ -335,7 +357,7 @@ export function AgentOutputModal({
              <button
                onClick={() => setViewMode('raw')}
                className={`flex items-center gap-1.5 px-3 py-1.5 rounded-md text-xs font-medium transition-all ${
-                  viewMode === 'raw'
+                  effectiveViewMode === 'raw'
                    ? 'bg-primary/20 text-primary shadow-sm'
                    : 'text-muted-foreground hover:text-foreground hover:bg-accent'
                }`}
@@ -361,7 +383,7 @@ export function AgentOutputModal({
          className="flex-shrink-0 mx-1"
        />
-        {viewMode === 'changes' ? (
+        {effectiveViewMode === 'changes' ? (
          <div className="flex-1 min-h-[400px] max-h-[60vh] overflow-y-auto scrollbar-visible">
            {projectPath ? (
              <GitDiffPanel
@@ -378,6 +400,10 @@ export function AgentOutputModal({
              </div>
            )}
          </div>
        ) : effectiveViewMode === 'summary' && summary ? (
          <div className="flex-1 overflow-y-auto bg-zinc-950 rounded-lg p-4 min-h-[400px] max-h-[60vh] scrollbar-visible">
            <Markdown>{summary}</Markdown>
          </div>
        ) : (
          <>
            <div
@@ -394,7 +420,7 @@ export function AgentOutputModal({
                <div className="flex items-center justify-center h-full text-muted-foreground">
                  No output yet. The agent will stream output here as it works.
                </div>
-              ) : viewMode === 'parsed' ? (
+              ) : effectiveViewMode === 'parsed' ? (
                <LogViewer output={output} />
              ) : (
                <div className="whitespace-pre-wrap break-words text-zinc-300">{output}</div>
--- a/apps/ui/src/lib/agent-context-parser.ts
+++ b/apps/ui/src/lib/agent-context-parser.ts
@@ -39,57 +39,120 @@ export function formatModelName(model: string): string {
  return model.split('-').slice(1, 3).join(' ');
 }
 /**
 * Helper to extract a balanced JSON object from a string starting at a given position
 */
 function extractJsonObject(str: string, startIdx: number): string | null {
  if (str[startIdx] !== '{') return null;
  let depth = 0;
  let inString = false;
  let escapeNext = false;
  for (let i = startIdx; i < str.length; i++) {
    const char = str[i];
    if (escapeNext) {
      escapeNext = false;
      continue;
    }
    if (char === '\\' && inString) {
      escapeNext = true;
      continue;
    }
    if (char === '"' && !escapeNext) {
      inString = !inString;
      continue;
    }
    if (inString) continue;
    if (char === '{') depth++;
    else if (char === '}') {
      depth--;
      if (depth === 0) {
        return str.slice(startIdx, i + 1);
      }
    }
  }
  return null;
 }
 /**
 * Extracts todos from the context content
 * Looks for TodoWrite tool calls in the format:
- * TodoWrite: [{"content": "...", "status": "..."}]
+ * 🔧 Tool: TodoWrite
 * Input: {"todos": [{"content": "...", "status": "..."}]}
 */
 function extractTodos(content: string): AgentTaskInfo['todos'] {
  const todos: AgentTaskInfo['todos'] = [];
-  // Look for TodoWrite tool inputs
+  // Find all occurrences of TodoWrite tool calls
-  const todoMatches = content.matchAll(
+  const todoWriteMarker = '🔧 Tool: TodoWrite';
-    /TodoWrite.*?(?:"todos"\s*:\s*)?(\[[\s\S]*?\](?=\s*(?:\}|$|🔧|📋|⚡|✅|❌)))/g
+  let searchStart = 0;
  );
-  for (const match of todoMatches) {
+  while (true) {
    const markerIdx = content.indexOf(todoWriteMarker, searchStart);
    if (markerIdx === -1) break;
    // Look for "Input:" after the marker
    const inputIdx = content.indexOf('Input:', markerIdx);
    if (inputIdx === -1 || inputIdx > markerIdx + 100) {
      searchStart = markerIdx + 1;
      continue;
    }
    // Find the start of the JSON object
    const jsonStart = content.indexOf('{', inputIdx);
    if (jsonStart === -1) {
      searchStart = markerIdx + 1;
      continue;
    }
    // Extract the complete JSON object
    const jsonStr = extractJsonObject(content, jsonStart);
    if (jsonStr) {
      try {
-      // Try to find JSON array in the match
+        const parsed = JSON.parse(jsonStr) as {
-      const jsonStr = match[1] || match[0];
+          todos?: Array<{ content: string; status: string }>;
-      const arrayMatch = jsonStr.match(/\[[\s\S]*?\]/);
+        };
-      if (arrayMatch) {
+        if (parsed.todos && Array.isArray(parsed.todos)) {
-        const parsed = JSON.parse(arrayMatch[0]);
+          // Clear previous todos - we want the latest state
-        if (Array.isArray(parsed)) {
+          todos.length = 0;
-          for (const item of parsed) {
+          for (const item of parsed.todos) {
            if (item.content && item.status) {
              // Check if this todo already exists (avoid duplicates)
              if (!todos.some((t) => t.content === item.content)) {
              todos.push({
                content: item.content,
-                  status: item.status,
+                status: item.status as 'pending' | 'in_progress' | 'completed',
              });
            }
          }
        }
        }
      }
      } catch {
        // Ignore parse errors
      }
    }
-  // Also try to extract from markdown task lists
+    searchStart = markerIdx + 1;
  }
  // Also try to extract from markdown task lists as fallback
  if (todos.length === 0) {
    const markdownTodos = content.matchAll(/- \[([ xX])\] (.+)/g);
    for (const match of markdownTodos) {
      const isCompleted = match[1].toLowerCase() === 'x';
-    const content = match[2].trim();
+      const todoContent = match[2].trim();
-    if (!todos.some((t) => t.content === content)) {
+      if (!todos.some((t) => t.content === todoContent)) {
        todos.push({
-        content,
+          content: todoContent,
          status: isCompleted ? 'completed' : 'pending',
        });
      }
    }
  }
  return todos;
 }
--- a/apps/ui/src/lib/log-parser.ts
+++ b/apps/ui/src/lib/log-parser.ts
@@ -664,6 +664,53 @@ function mergeConsecutiveEntries(entries: LogEntry[]): LogEntry[] {
  return merged;
 }
 /**
 * Extracts summary content from raw log output
 * Returns the summary text if found, or null if no summary exists
 */
 export function extractSummary(rawOutput: string): string | null {
  if (!rawOutput || !rawOutput.trim()) {
    return null;
  }
  // Try to find <summary> tags first (preferred format)
  const summaryTagMatch = rawOutput.match(/<summary>([\s\S]*?)<\/summary>/);
  if (summaryTagMatch) {
    return summaryTagMatch[1].trim();
  }
  // Try to find markdown ## Summary section
  const summaryHeaderMatch = rawOutput.match(/^##\s+Summary\s*\n([\s\S]*?)(?=\n##\s+|$)/m);
  if (summaryHeaderMatch) {
    return summaryHeaderMatch[1].trim();
  }
  // Try other summary formats (Feature, Changes, Implementation)
  const otherHeaderMatch = rawOutput.match(
    /^##\s+(Feature|Changes|Implementation)\s*\n([\s\S]*?)(?=\n##\s+|$)/m
  );
  if (otherHeaderMatch) {
    return `## ${otherHeaderMatch[1]}\n${otherHeaderMatch[2].trim()}`;
  }
  // Try to find summary introduction lines
  const introMatch = rawOutput.match(
    /(^|\n)(All tasks completed[\s\S]*?)(?=\n🔧|\n📋|\n⚡|\n❌|$)/
  );
  if (introMatch) {
    return introMatch[2].trim();
  }
  const completionMatch = rawOutput.match(
    /(^|\n)((I've|I have) (successfully |now )?(completed|finished|implemented)[\s\S]*?)(?=\n🔧|\n📋|\n⚡|\n❌|$)/
  );
  if (completionMatch) {
    return completionMatch[2].trim();
  }
  return null;
 }
 /**
 * Gets the color classes for a log entry type
 */