diff --git a/docs/technical/elevenlabs.md b/docs/technical/elevenlabs.md new file mode 100644 index 0000000..b71813b --- /dev/null +++ b/docs/technical/elevenlabs.md @@ -0,0 +1,112 @@ +--- +title: Text to Dialogue quickstart +subtitle: Learn how to generate immersive dialogue from text. +--- + +This guide will show you how to generate immersive, natural-sounding dialogue from text using the Text to Dialogue API. + +## Using the Text to Dialogue API + + + + [Create an API key in the dashboard here](https://elevenlabs.io/app/settings/api-keys), which you’ll use to securely [access the API](/docs/api-reference/authentication). + + Store the key as a managed secret and pass it to the SDKs either as a environment variable via an `.env` file, or directly in your app’s configuration depending on your preference. + + ```js title=".env" + ELEVENLABS_API_KEY= + ``` + + + + We'll also use the `dotenv` library to load our API key from an environment variable. + + + ```python + pip install elevenlabs + pip install python-dotenv + ``` + + ```typescript + npm install @elevenlabs/elevenlabs-js + npm install dotenv + ``` + + + + + + Create a new file named `example.py` or `example.mts`, depending on your language of choice and add the following code: + + + ```python maxLines=0 + # example.py + from dotenv import load_dotenv + from elevenlabs.client import ElevenLabs + from elevenlabs.play import play + + load_dotenv() + + elevenlabs = ElevenLabs( + api_key=os.getenv("ELEVENLABS_API_KEY"), + ) + + audio = elevenlabs.text_to_dialogue.convert( + inputs=[ + { + "text": "[cheerfully] Hello, how are you?", + "voice_id": "9BWtsMINqrJLrRacOk9x", + }, + { + "text": "[stuttering] I'm... I'm doing well, thank you", + "voice_id": "IKne3meq5aSn9XLyUdCD", + } + ] + ) + + play(audio) + ``` + + ```typescript maxLines=0 + // example.mts + import { ElevenLabsClient, play } from "@elevenlabs/elevenlabs-js"; + import "dotenv/config"; + + const elevenlabs = new ElevenLabsClient(); + + const audio = await elevenlabs.textToDialogue.convert({ + inputs: [ + { + text: "[cheerfully] Hello, how are you?", + voiceId: "9BWtsMINqrJLrRacOk9x", + }, + { + text: "[stuttering] I'm... I'm doing well, thank you", + voiceId: "IKne3meq5aSn9XLyUdCD", + }, + ], + }); + + play(audio); + ``` + + + + + ```python + python example.py + ``` + + ```typescript + npx tsx example.mts + ``` + + + You should hear the dialogue audio play. + + + + +## Next steps + +Explore the [API reference](/docs/api-reference/text-to-dialogue/convert) for more information on the Text to Dialogue API and its options. diff --git a/package-lock.json b/package-lock.json index 5d19bd5..a855a72 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10,6 +10,7 @@ "dependencies": { "@ai-sdk/mistral": "^2.0.15", "@ai-sdk/openai": "^2.0.32", + "@elevenlabs/elevenlabs-js": "^2.16.0", "@mendable/firecrawl-js": "^4.3.5", "@radix-ui/react-progress": "^1.1.7", "@radix-ui/react-slot": "^1.2.3", @@ -126,6 +127,19 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/@elevenlabs/elevenlabs-js": { + "version": "2.16.0", + "resolved": "https://registry.npmjs.org/@elevenlabs/elevenlabs-js/-/elevenlabs-js-2.16.0.tgz", + "integrity": "sha512-XJGZ1nBkZCBJd9EJs8Pjdc9DidMysgY+KP9Ik1pGCNBkYEZ39P67YaXVNwIUkNS474AbezaAfNsueUWs1DPs/g==", + "license": "MIT", + "dependencies": { + "command-exists": "^1.2.9", + "node-fetch": "^2.7.0" + }, + "engines": { + "node": ">=18.0.0" + } + }, "node_modules/@emnapi/core": { "version": "1.5.0", "resolved": "https://registry.npmjs.org/@emnapi/core/-/core-1.5.0.tgz", @@ -2608,6 +2622,12 @@ "node": ">= 0.8" } }, + "node_modules/command-exists": { + "version": "1.2.9", + "resolved": "https://registry.npmjs.org/command-exists/-/command-exists-1.2.9.tgz", + "integrity": "sha512-LTQ/SGc+s0Xc0Fu5WaKnR0YiygZkm9eKFvyS+fRsU7/ZWFF8ykFM6Pc9aCVf1+xasOOZpO3BAVgVrKvsqKHV7w==", + "license": "MIT" + }, "node_modules/concat-map": { "version": "0.0.1", "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", @@ -5045,6 +5065,26 @@ "node": "^10 || ^12 || >=14" } }, + "node_modules/node-fetch": { + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", + "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", + "license": "MIT", + "dependencies": { + "whatwg-url": "^5.0.0" + }, + "engines": { + "node": "4.x || >=6.0.0" + }, + "peerDependencies": { + "encoding": "^0.1.0" + }, + "peerDependenciesMeta": { + "encoding": { + "optional": true + } + } + }, "node_modules/object-assign": { "version": "4.1.1", "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", @@ -6131,6 +6171,12 @@ "node": ">=8.0" } }, + "node_modules/tr46": { + "version": "0.0.3", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", + "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==", + "license": "MIT" + }, "node_modules/ts-api-utils": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/ts-api-utils/-/ts-api-utils-2.1.0.tgz", @@ -6355,6 +6401,22 @@ "punycode": "^2.1.0" } }, + "node_modules/webidl-conversions": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", + "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==", + "license": "BSD-2-Clause" + }, + "node_modules/whatwg-url": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", + "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==", + "license": "MIT", + "dependencies": { + "tr46": "~0.0.3", + "webidl-conversions": "^3.0.0" + } + }, "node_modules/which": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", diff --git a/package.json b/package.json index f44deab..3c51140 100644 --- a/package.json +++ b/package.json @@ -11,6 +11,7 @@ "dependencies": { "@ai-sdk/mistral": "^2.0.15", "@ai-sdk/openai": "^2.0.32", + "@elevenlabs/elevenlabs-js": "^2.16.0", "@mendable/firecrawl-js": "^4.3.5", "@radix-ui/react-progress": "^1.1.7", "@radix-ui/react-slot": "^1.2.3", diff --git a/src/app/api/generate-audio/route.ts b/src/app/api/generate-audio/route.ts new file mode 100644 index 0000000..2298f6d --- /dev/null +++ b/src/app/api/generate-audio/route.ts @@ -0,0 +1,140 @@ +import { NextRequest, NextResponse } from 'next/server'; +import { ElevenLabsClient } from "@elevenlabs/elevenlabs-js"; + +// Helper function to convert stream to buffer +async function streamToBuffer(stream: ReadableStream): Promise { + const reader = stream.getReader(); + const chunks: Uint8Array[] = []; + + while (true) { + const { done, value } = await reader.read(); + if (done) break; + chunks.push(value); + } + + const totalLength = chunks.reduce((acc, chunk) => acc + chunk.length, 0); + const result = new Uint8Array(totalLength); + let offset = 0; + + for (const chunk of chunks) { + result.set(chunk, offset); + offset += chunk.length; + } + + return result.buffer; +} + +interface Message { + id: string; + speaker: 'host1' | 'host2'; + text: string; + timestamp: string; +} + +export async function POST(request: NextRequest) { + try { + const { messages } = await request.json(); + + if (!messages || !Array.isArray(messages) || messages.length === 0) { + console.error('❌ Invalid messages received:', messages); + return NextResponse.json( + { error: 'Messages are required and must be an array' }, + { status: 400 } + ); + } + + console.log('🎵 Audio generation request received:', { + messageCount: messages.length, + firstMessage: messages[0]?.text?.substring(0, 50) + '...', + secondMessage: messages[1]?.text?.substring(0, 50) + '...', + hasApiKey: !!process.env.ELEVENLABS_API_KEY, + totalMessagesExpected: messages.length + }); + + // Log each message being processed + messages.forEach((msg, index) => { + console.log(`📝 Message ${index + 1}: Speaker=${msg.speaker}, Length=${msg.text.length}, Text="${msg.text.substring(0, 100)}..."`); + }); + + // Initialize ElevenLabs client + const elevenlabs = new ElevenLabsClient({ + apiKey: process.env.ELEVENLABS_API_KEY, + }); + + if (!process.env.ELEVENLABS_API_KEY) { + console.error('❌ ELEVENLABS_API_KEY is not set'); + return NextResponse.json( + { error: 'ElevenLabs API key is not configured' }, + { status: 500 } + ); + } + + // Voice IDs for the two hosts + const voiceIds = { + host1: "9BWtsMINqrJLrRacOk9x", // Cheerful, enthusiastic voice + host2: "IKne3meq5aSn9XLyUdCD", // More thoughtful, calm voice + }; + + // Convert messages to ElevenLabs format + const inputs = messages.map((message: Message) => ({ + text: message.text, + voiceId: voiceIds[message.speaker], + })); + + console.log('🎤 Converting messages to audio using regular TTS'); + + try { + // Generate audio using regular text-to-speech for each message + console.log('⏳ Starting ElevenLabs TTS conversion...'); + + const audioBuffers: ArrayBuffer[] = []; + + for (let i = 0; i < inputs.length; i++) { + const input = inputs[i]; + console.log(`🎵 Processing message ${i + 1}/${inputs.length} with voice ${input.voiceId}`); + + const audioStream = await elevenlabs.textToSpeech.convert(input.voiceId, { + model_id: "eleven_multilingual_v2", + text: input.text, + }); + + const audioBuffer = await streamToBuffer(audioStream); + audioBuffers.push(audioBuffer); + console.log(`✅ Message ${i + 1} completed, size: ${audioBuffer.byteLength} bytes`); + } + + // Combine all audio buffers + const totalLength = audioBuffers.reduce((acc, buffer) => acc + buffer.byteLength, 0); + const combinedBuffer = new Uint8Array(totalLength); + let offset = 0; + + for (const buffer of audioBuffers) { + combinedBuffer.set(new Uint8Array(buffer), offset); + offset += buffer.byteLength; + } + + console.log('🎵 All audio generated successfully, total size:', totalLength, 'bytes'); + + return NextResponse.json({ + success: true, + audio: Buffer.from(combinedBuffer).toString('base64'), + messageCount: messages.length, + audioSize: totalLength, + }); + + } catch (elevenLabsError) { + console.error('❌ ElevenLabs API error:', elevenLabsError); + return NextResponse.json( + { error: `ElevenLabs API error: ${elevenLabsError instanceof Error ? elevenLabsError.message : 'Unknown error'}` }, + { status: 500 } + ); + } + + } catch (error) { + console.error('❌ Audio generation error:', error); + return NextResponse.json( + { error: error instanceof Error ? error.message : 'Failed to generate audio' }, + { status: 500 } + ); + } +} \ No newline at end of file diff --git a/src/app/page.tsx b/src/app/page.tsx index 725aa03..b4bb7e6 100644 --- a/src/app/page.tsx +++ b/src/app/page.tsx @@ -17,6 +17,7 @@ interface Message { export default function Home() { const [url, setUrl] = useState(''); const [isLoading, setIsLoading] = useState(false); + const [isGeneratingAudio, setIsGeneratingAudio] = useState(false); const [messages, setMessages] = useState([]); const [visibleMessages, setVisibleMessages] = useState([]); const scrollContainerRef = useRef(null); @@ -24,23 +25,42 @@ export default function Home() { const [currentTime, setCurrentTime] = useState(0); const [duration, setDuration] = useState(0); const [progressInterval, setProgressInterval] = useState(null); + const [audioUrl, setAudioUrl] = useState(null); + const [audioRef, setAudioRef] = useState(null); + + // Debug states + const [debugSteps, setDebugSteps] = useState([]); + const [audioStatus, setAudioStatus] = useState('Waiting to start...'); + const [apiResponse, setApiResponse] = useState<{success?: boolean; messageCount?: number; audioSize?: number} | null>(null); const [recentSources, setRecentSources] = useState>([]); + + // Debug helper function + const addDebugStep = (step: string) => { + console.log('🔍 DEBUG:', step); + setDebugSteps(prev => [...prev, `${new Date().toLocaleTimeString()}: ${step}`]); + }; const handleSubmit = async (e: React.FormEvent) => { e.preventDefault(); setIsLoading(true); + addDebugStep('🚀 Starting podcast generation'); // Clear existing conversation before starting new one setMessages([]); setVisibleMessages([]); setDuration(0); + setAudioUrl(null); + setDebugSteps([]); + setAudioStatus('Starting...'); + setApiResponse(null); try { + addDebugStep('🕷️ Starting web scraping...'); // Call Firecrawl API to scrape the website const response = await fetch('/api/scrape', { method: 'POST', @@ -50,9 +70,12 @@ export default function Home() { body: JSON.stringify({ url }), }); + addDebugStep('📡 Waiting for scrape response...'); const result = await response.json(); + addDebugStep(`✅ Scrape completed: ${result.success ? 'SUCCESS' : 'FAILED'}`); if (result.success) { + addDebugStep(`📄 Content scraped: ${result.data.content?.length || 0} chars`); // Add to recent sources setRecentSources(prev => [{ url: result.data.url, @@ -61,13 +84,15 @@ export default function Home() { scrapedAt: result.data.scrapedAt }, ...prev]); - + setAudioStatus('Generating conversation...'); // Generate conversation await generateConversation(result.data.content, result.data.title || 'Untitled', result.data.url); } else { + addDebugStep(`❌ Scrape failed: ${result.error}`); throw new Error(result.error || 'Failed to scrape website'); } } catch (error) { + addDebugStep(`❌ ERROR: ${error instanceof Error ? error.message : 'Unknown error'}`); console.error('Error:', error); setMessages([{ id: '1', @@ -87,7 +112,7 @@ export default function Home() { }; const generateConversation = async (content: string, title: string, url: string) => { - console.log('Starting conversation generation...'); + addDebugStep('💬 Starting conversation generation...'); try { const response = await fetch('/api/generate-conversation', { method: 'POST', @@ -97,9 +122,12 @@ export default function Home() { body: JSON.stringify({ content, title, url }), }); - console.log('Conversation API response status:', response.status); + addDebugStep('📡 Waiting for conversation API...'); + const status = response.status; + addDebugStep(`📡 Conversation API status: ${status}`); if (!response.ok) { + addDebugStep(`❌ Conversation generation failed: ${status}`); throw new Error('Failed to generate conversation'); } @@ -163,6 +191,9 @@ export default function Home() { } console.log('Conversation generation completed successfully'); + + // Generate audio after conversation is complete + await generateAudio(); } catch (error) { console.error('Conversation generation error:', error); @@ -181,31 +212,115 @@ export default function Home() { } }; + const generateAudio = async () => { + if (messages.length === 0) return; + + addDebugStep('🎵 Starting audio generation...'); + setAudioStatus('Generating audio...'); + setIsGeneratingAudio(true); + + try { + addDebugStep('📡 Sending audio generation request...'); + const response = await fetch('/api/generate-audio', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ messages }), + }); + + const status = response.status; + addDebugStep(`📡 Audio API response status: ${status}`); + + if (!response.ok) { + const errorText = await response.text(); + addDebugStep(`❌ Audio generation failed: ${status} - ${errorText}`); + throw new Error(`Failed to generate audio: ${response.status} - ${errorText}`); + } + + const result = await response.json(); + addDebugStep(`✅ Audio API response: success=${result.success}, messages=${result.messageCount}, size=${result.audioSize} bytes`); + setApiResponse(result); + + // Create audio URL from base64 data + addDebugStep(`🎵 Creating audio blob from ${result.audio?.length || 0} base64 chars...`); + const audioBuffer = Buffer.from(result.audio, 'base64'); + addDebugStep(`🎵 Audio buffer size: ${audioBuffer.length} bytes`); + + const audioBlob = new Blob([audioBuffer], { type: 'audio/mpeg' }); + addDebugStep(`🎵 Audio blob created, size: ${audioBlob.size} bytes`); + + const url = URL.createObjectURL(audioBlob); + addDebugStep(`🎵 Audio URL created: ${url.substring(0, 50)}...`); + setAudioUrl(url); + addDebugStep('🎵 Audio URL created successfully!'); + setAudioStatus('Audio ready!'); + } catch (error) { + const errorMessage = error instanceof Error ? error.message : 'Unknown error'; + addDebugStep(`❌ Audio generation ERROR: ${errorMessage}`); + + // Check if it's a quota exceeded error + if (errorMessage.includes('quota_exceeded') || errorMessage.includes('exceeds your quota')) { + setAudioStatus('Quota exceeded - please check your ElevenLabs account'); + // Show user-friendly error message + setMessages(prev => [...prev, { + id: `error-${Date.now()}`, + speaker: 'host1', + text: '⚠️ Audio generation failed: ElevenLabs quota exceeded. You have 79 credits remaining but need 140 credits. Please upgrade your plan or try with shorter content.', + timestamp: new Date().toISOString() + }]); + } else { + setAudioStatus('Audio generation failed'); + // Show generic error to user + setMessages(prev => [...prev, { + id: 'error-audio', + speaker: 'host1', + text: `[Error] Could not generate audio: ${error instanceof Error ? error.message : 'Unknown error'}`, + timestamp: '0:00' + }]); + } + } finally { + setIsGeneratingAudio(false); + } + }; + const togglePlay = () => { + if (!audioRef) return; + + addDebugStep(`🔊 Toggle play called. Current state: isPlaying=${isPlaying}, audioRef exists=${!!audioRef}`); + if (isPlaying) { + audioRef.pause(); setIsPlaying(false); if (progressInterval) { clearInterval(progressInterval); setProgressInterval(null); } + addDebugStep('🔊 Audio paused'); } else { - setIsPlaying(true); - const interval = setInterval(() => { - setCurrentTime(prev => { - if (prev >= duration) { - setIsPlaying(false); - clearInterval(interval); - setProgressInterval(null); - return duration; + addDebugStep(`🔊 Attempting to play audio. Current time: ${audioRef.currentTime}, duration: ${audioRef.duration}`); + audioRef.play().then(() => { + setIsPlaying(true); + addDebugStep('🔊 Audio playing successfully'); + const interval = setInterval(() => { + if (audioRef) { + setCurrentTime(audioRef.currentTime); + setDuration(audioRef.duration || 0); } - return prev + 1; - }); - }, 1000); - setProgressInterval(interval); + }, 1000); + setProgressInterval(interval); + }).catch(error => { + addDebugStep(`❌ Error playing audio: ${error.message}`); + console.error('Error playing audio:', error); + setIsPlaying(false); + }); } }; const restartAudio = () => { + if (!audioRef) return; + + audioRef.currentTime = 0; setCurrentTime(0); setIsPlaying(false); if (progressInterval) { @@ -223,6 +338,79 @@ export default function Home() { } }, [messages]); + // Setup audio element when audio URL is available + useEffect(() => { + if (audioUrl) { + addDebugStep(`🔊 Setting up audio element with URL: ${audioUrl.substring(0, 50)}...`); + const audio = new Audio(audioUrl); + + // Force audio to load + audio.load(); + addDebugStep(`🔊 Audio element created and loading...`); + + // Audio event listeners + const handleLoadedMetadata = () => { + setDuration(audio.duration); + addDebugStep(`🔊 Audio loaded, duration: ${audio.duration} seconds`); + console.log('Audio loaded, duration:', audio.duration); + }; + + const handleLoadedData = () => { + addDebugStep(`🔊 Audio data loaded, readyState: ${audio.readyState}`); + console.log('Audio data loaded, readyState:', audio.readyState); + }; + + const handleTimeUpdate = () => { + setCurrentTime(audio.currentTime); + }; + + const handleEnded = () => { + setIsPlaying(false); + setCurrentTime(0); + if (progressInterval) { + clearInterval(progressInterval); + setProgressInterval(null); + } + }; + + const handleCanPlay = () => { + addDebugStep('🔊 Audio can play'); + console.log('Audio can play'); + }; + + const handleError = (e: any) => { + addDebugStep(`❌ Audio error: ${e.message || 'Unknown error'}`); + console.error('Audio error:', e); + }; + + audio.addEventListener('loadedmetadata', handleLoadedMetadata); + audio.addEventListener('loadeddata', handleLoadedData); + audio.addEventListener('timeupdate', handleTimeUpdate); + audio.addEventListener('ended', handleEnded); + audio.addEventListener('canplay', handleCanPlay); + audio.addEventListener('error', handleError); + + setAudioRef(audio); + + // Cleanup + return () => { + audio.removeEventListener('loadedmetadata', handleLoadedMetadata); + audio.removeEventListener('loadeddata', handleLoadedData); + audio.removeEventListener('timeupdate', handleTimeUpdate); + audio.removeEventListener('ended', handleEnded); + audio.removeEventListener('canplay', handleCanPlay); + audio.removeEventListener('error', handleError); + audio.pause(); + }; + } + + return () => { + if (audioUrl) { + URL.revokeObjectURL(audioUrl); + } + }; + }, [audioUrl]); + const handleScroll = useCallback(() => { const element = scrollContainerRef.current; if (!element) return; @@ -402,17 +590,70 @@ export default function Home() { Audio Player + + {/* Debug Panel */} + + + 🔍 Debug Panel + + +
+ Status: {audioStatus} +
+ + {debugSteps.length > 0 && ( +
+
Steps:
+
+ {debugSteps.map((step, index) => ( +
+ {step} +
+ ))} +
+
+ )} + + {apiResponse && ( +
+
API Response:
+
+
✅ Success: {apiResponse.success}
+
📨 Messages: {apiResponse.messageCount}
+
📏 Size: {apiResponse.audioSize} bytes
+
+
+ )} +
+
{messages.length === 0 ? (

Generate a podcast to enable audio playback.

) : (
-
-

- AI-generated conversation ready! Click play to listen. -

-
+ {isGeneratingAudio ? ( +
+
+
+

+ Generating audio... +

+
+
+ ) : audioUrl ? ( +
+

+ Podcast audio ready! Click play to listen. +

+
+ ) : ( +
+

+ AI-generated conversation ready! Audio will be generated automatically. +

+
+ )} {/* Audio Controls */}
@@ -422,14 +663,16 @@ export default function Home() { size="icon" className="rounded-full transition-all duration-200 hover:scale-110" title="Restart" + disabled={!audioUrl} >
@@ -468,11 +717,28 @@ export default function Home() {

Episode Details

- Duration: {formatTime(duration)} + Messages: {messages.length} +

+

+ Audio Duration: {duration > 0 ? formatTime(duration) : 'Generating...'}

Speakers: Alex & Sarah

+

+ Status: {isGeneratingAudio ? 'Generating audio...' : audioUrl ? 'Ready to play' : 'Preparing audio...'} +

+ {messages.length > 0 && !audioUrl && ( + + )}