diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..ca70c82 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,19 @@ +# Overview +This is an AI Podcast Generation app that converts a URL into a natural sounding podcast audio file. + +# Core Features + +## Scrape Website +The user enters a URL and the app then use Firecrawl to extract the website content. + +## Generate Conversation +The app then uses the AI SDK v5 (OpenAI gpt-5-mini as the model) to create a conversation between 2 hosts discussing the scraped content. + +## Generate Podcast Audio +Use the Elevenlabs SDK to generate the dialog from the conversation text. + +# Important Rules! +- Always use shadcn components over custom components +- Never create the components yourself, always install them using the install command: + Example: npx shadcn@latest add button +- Always use the standard tailwind and shadcn colors. Never use inline custom colors. diff --git a/docs/business/overview.md b/docs/business/overview.md new file mode 100644 index 0000000..7f93e48 --- /dev/null +++ b/docs/business/overview.md @@ -0,0 +1,13 @@ +# Overview +This is an AI Podcast Generation app that converts a URL into a natural sounding podcast audio file. + +# Core Features + +## Scrape Website +The user enters a URL and the app then use Firecrawl to extract the website content. + +## Generate Conversation +The app then uses the AI SDK v5 (OpenAI gpt-5-mini as the model) to create a conversation between 2 hosts discussing the scraped content. + +## Generate Podcast Audio +Use the Elevenlabs SDK to generate the dialog from the conversation text. \ No newline at end of file diff --git a/docs/inspiration/notebooklm.png b/docs/inspiration/notebooklm.png new file mode 100644 index 0000000..3f53cd5 Binary files /dev/null and b/docs/inspiration/notebooklm.png differ diff --git a/docs/technical/firecrawl.md b/docs/technical/firecrawl.md new file mode 100644 index 0000000..2e4ed56 --- /dev/null +++ b/docs/technical/firecrawl.md @@ -0,0 +1,675 @@ +# Quickstart + +> Firecrawl allows you to turn entire websites into LLM-ready markdown + +Hero Light + +## Welcome to Firecrawl + +[Firecrawl](https://firecrawl.dev?ref=github) is an API service that takes a URL, crawls it, and converts it into clean markdown. We crawl all accessible subpages and give you clean markdown for each. No sitemap required. + +## How to use it? + +We provide an easy to use API with our hosted version. You can find the playground and documentation [here](https://firecrawl.dev/playground). You can also self host the backend if you'd like. + +Check out the following resources to get started: + +* [x] **API**: [Documentation](https://docs.firecrawl.dev/api-reference/introduction) +* [x] **SDKs**: [Python](https://docs.firecrawl.dev/sdks/python), [Node](https://docs.firecrawl.dev/sdks/node) +* [x] **LLM Frameworks**: [Langchain (python)](https://python.langchain.com/docs/integrations/document_loaders/firecrawl/), [Langchain (js)](https://js.langchain.com/docs/integrations/document_loaders/web_loaders/firecrawl), [Llama Index](https://docs.llamaindex.ai/en/latest/examples/data_connectors/WebPageDemo/#using-firecrawl-reader), [Crew.ai](https://docs.crewai.com/), [Composio](https://composio.dev/tools/firecrawl/all), [PraisonAI](https://docs.praison.ai/firecrawl/), [Superinterface](https://superinterface.ai/docs/assistants/functions/firecrawl), [Vectorize](https://docs.vectorize.io/integrations/source-connectors/firecrawl) +* [x] **Low-code Frameworks**: [Dify](https://dify.ai/blog/dify-ai-blog-integrated-with-firecrawl), [Langflow](https://docs.langflow.org/), [Flowise AI](https://docs.flowiseai.com/integrations/langchain/document-loaders/firecrawl), [Cargo](https://docs.getcargo.io/integration/firecrawl), [Pipedream](https://pipedream.com/apps/firecrawl/) +* [x] **Community SDKs**: [Go](https://docs.firecrawl.dev/sdks/go), [Rust](https://docs.firecrawl.dev/sdks/rust) (v1) +* [x] **Others**: [Zapier](https://zapier.com/apps/firecrawl/integrations), [Pabbly Connect](https://www.pabbly.com/connect/integrations/firecrawl/) +* [ ] Want an SDK or Integration? Let us know by opening an issue. + +**Self-host:** To self-host refer to guide [here](/contributing/self-host). + +### API Key + +To use the API, you need to sign up on [Firecrawl](https://firecrawl.dev) and get an API key. + +### Features + +* [**Scrape**](#scraping): scrapes a URL and get its content in LLM-ready format (markdown, summary, structured data via [json mode](#json-mode), screenshot, html) +* [**Crawl**](#crawling): scrapes all the URLs of a web page and return content in LLM-ready format +* [**Map**](/features/map): input a website and get all the website urls - extremely fast +* [**Search**](/features/search): search the web and get full content from results +* [**Extract**](/features/extract): get structured data from single page, multiple pages or entire websites with AI. + +### Powerful Capabilities + +* **LLM-ready formats**: markdown, summary, structured data, screenshot, HTML, links, metadata +* **The hard stuff**: proxies, anti-bot mechanisms, dynamic content (js-rendered), output parsing, orchestration +* **Lightning fast**: Get results in seconds—built for speed and high-throughput use cases. +* **Customizability**: exclude tags, crawl behind auth walls with custom headers, max crawl depth, etc... +* **Media parsing**: pdfs, docx, images. +* **Reliability first**: designed to get the data you need - no matter how hard it is. +* **Actions**: click, scroll, input, wait and more before extracting data + +You can find all of Firecrawl's capabilities and how to use them in our [documentation](https://docs.firecrawl.dev/api-reference/v2-introduction) + +## Installing Firecrawl + + + ```python Python + # pip install firecrawl-py + + from firecrawl import Firecrawl + + firecrawl = Firecrawl(api_key="fc-YOUR-API-KEY") + ``` + + ```js Node + # npm install @mendable/firecrawl-js + + import Firecrawl from '@mendable/firecrawl-js'; + + const firecrawl = new Firecrawl({ apiKey: "fc-YOUR-API-KEY" }); + ``` + + +## Scraping + +To scrape a single URL, use the `scrape` method. It takes the URL as a parameter and returns the scraped data as a dictionary. + + + ```python Python + from firecrawl import Firecrawl + + firecrawl = Firecrawl(api_key="fc-YOUR-API-KEY") + + # Scrape a website: + doc = firecrawl.scrape("https://firecrawl.dev", formats=["markdown", "html"]) + print(doc) + ``` + + ```js Node + import Firecrawl from '@mendable/firecrawl-js'; + + const firecrawl = new Firecrawl({ apiKey: "fc-YOUR-API-KEY" }); + + // Scrape a website: + const doc = await firecrawl.scrape('https://firecrawl.dev', { formats: ['markdown', 'html'] }); + console.log(doc); + ``` + + ```bash cURL + curl -s -X POST "https://api.firecrawl.dev/v2/scrape" \ + -H "Authorization: Bearer $FIRECRAWL_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "url": "https://firecrawl.dev", + "formats": ["markdown", "html"] + }' + ``` + + +### Response + +SDKs will return the data object directly. cURL will return the payload exactly as shown below. + +```json +{ + "success": true, + "data" : { + "markdown": "Launch Week I is here! [See our Day 2 Release 🚀](https://www.firecrawl.dev/blog/launch-week-i-day-2-doubled-rate-limits)[💥 Get 2 months free...", + "html": " + ```python Python + from firecrawl import Firecrawl + + firecrawl = Firecrawl(api_key="fc-YOUR-API-KEY") + + docs = firecrawl.crawl(url="https://docs.firecrawl.dev", limit=10) + print(docs) + ``` + + ```js Node + import Firecrawl from '@mendable/firecrawl-js'; + + const firecrawl = new Firecrawl({ apiKey: "fc-YOUR-API-KEY" }); + + const docs = await firecrawl.crawl('https://docs.firecrawl.dev', { limit: 10 }); + console.log(docs); + ``` + + ```bash cURL + curl -s -X POST "https://api.firecrawl.dev/v2/crawl" \ + -H "Authorization: Bearer $FIRECRAWL_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "url": "https://docs.firecrawl.dev", + "limit": 10 + }' + ``` + + +If you're using our API directly, cURL or `start crawl` functions on SDKs, this will return an `ID` where you can use to check the status of the crawl. + +```json +{ + "success": true, + "id": "123-456-789", + "url": "https://api.firecrawl.dev/v2/crawl/123-456-789" +} +``` + +### Get Crawl Status + +Used to check the status of a crawl job and get its result. + + + ```python Python + status = firecrawl.get_crawl_status("") + print(status) + ``` + + ```js Node + const status = await firecrawl.getCrawlStatus(""); + console.log(status); + ``` + + ```bash cURL + # After starting a crawl, poll status by jobId + curl -s -X GET "https://api.firecrawl.dev/v2/crawl/" \ + -H "Authorization: Bearer $FIRECRAWL_API_KEY" + ``` + + +#### Response + +The response will be different depending on the status of the crawl. For not completed or large responses exceeding 10MB, a `next` URL parameter is provided. You must request this URL to retrieve the next 10MB of data. If the `next` parameter is absent, it indicates the end of the crawl data. + + + ```json Scraping + { + "status": "scraping", + "total": 36, + "completed": 10, + "creditsUsed": 10, + "expiresAt": "2024-00-00T00:00:00.000Z", + "next": "https://api.firecrawl.dev/v2/crawl/123-456-789?skip=10", + "data": [ + { + "markdown": "[Firecrawl Docs home page![light logo](https://mintlify.s3-us-west-1.amazonaws.com/firecrawl/logo/light.svg)!...", + "html": "...", + "metadata": { + "title": "Build a 'Chat with website' using Groq Llama 3 | Firecrawl", + "language": "en", + "sourceURL": "https://docs.firecrawl.dev/learn/rag-llama3", + "description": "Learn how to use Firecrawl, Groq Llama 3, and Langchain to build a 'Chat with your website' bot.", + "ogLocaleAlternate": [], + "statusCode": 200 + } + }, + ... + ] + } + ``` + + ```json Completed + { + "status": "completed", + "total": 36, + "completed": 36, + "creditsUsed": 36, + "expiresAt": "2024-00-00T00:00:00.000Z", + "next": "https://api.firecrawl.dev/v2/crawl/123-456-789?skip=26", + "data": [ + { + "markdown": "[Firecrawl Docs home page![light logo](https://mintlify.s3-us-west-1.amazonaws.com/firecrawl/logo/light.svg)!...", + "html": "...", + "metadata": { + "title": "Build a 'Chat with website' using Groq Llama 3 | Firecrawl", + "language": "en", + "sourceURL": "https://docs.firecrawl.dev/learn/rag-llama3", + "description": "Learn how to use Firecrawl, Groq Llama 3, and Langchain to build a 'Chat with your website' bot.", + "ogLocaleAlternate": [], + "statusCode": 200 + } + }, + ... + ] + } + ``` + + +## JSON mode + +With JSON mode, you can easily extract structured data from any URL. We support pydantic schemas to make it easier for you too. Here is how you to use it: + + + ```python Python + from firecrawl import Firecrawl + from pydantic import BaseModel + app = Firecrawl(api_key="fc-YOUR-API-KEY") + + class JsonSchema(BaseModel): + company_mission: str + supports_sso: bool + is_open_source: bool + is_in_yc: bool + + result = app.scrape( + 'https://firecrawl.dev', + formats=[{ + "type": "json", + "schema": JsonSchema + }], + only_main_content=False, + timeout=120000 + ) + + print(result) + ``` + + ```js Node + import FirecrawlApp from "@mendable/firecrawl-js"; + import { z } from "zod"; + + const app = new FirecrawlApp({ + apiKey: "fc-YOUR_API_KEY" + }); + + // Define schema to extract contents into + const schema = z.object({ + company_mission: z.string(), + supports_sso: z.boolean(), + is_open_source: z.boolean(), + is_in_yc: z.boolean() + }); + + const result = await app.scrape("https://docs.firecrawl.dev/", { + formats: [{ + type: "json", + schema: schema + }], + }); + + console.log(result); + ``` + + ```bash cURL + curl -X POST https://api.firecrawl.dev/v2/scrape \ + -H 'Content-Type: application/json' \ + -H 'Authorization: Bearer YOUR_API_KEY' \ + -d '{ + "url": "https://docs.firecrawl.dev/", + "formats": [ { + "type": "json", + "schema": { + "type": "object", + "properties": { + "company_mission": { + "type": "string" + }, + "supports_sso": { + "type": "boolean" + }, + "is_open_source": { + "type": "boolean" + }, + "is_in_yc": { + "type": "boolean" + } + }, + "required": [ + "company_mission", + "supports_sso", + "is_open_source", + "is_in_yc" + ] + } + } ] + }' + ``` + + +Output: + +```json JSON +{ + "success": true, + "data": { + "json": { + "company_mission": "AI-powered web scraping and data extraction", + "supports_sso": true, + "is_open_source": true, + "is_in_yc": true + }, + "metadata": { + "title": "Firecrawl", + "description": "AI-powered web scraping and data extraction", + "robots": "follow, index", + "ogTitle": "Firecrawl", + "ogDescription": "AI-powered web scraping and data extraction", + "ogUrl": "https://firecrawl.dev/", + "ogImage": "https://firecrawl.dev/og.png", + "ogLocaleAlternate": [], + "ogSiteName": "Firecrawl", + "sourceURL": "https://firecrawl.dev/" + }, + } +} +``` + +## Search + +Firecrawl's search API allows you to perform web searches and optionally scrape the search results in one operation. + +* Choose specific output formats (markdown, HTML, links, screenshots) +* Choose specific sources (web, news, images) +* Search the web with customizable parameters (location, etc.) + +For details, see the [Search Endpoint API Reference](/api-reference/endpoint/search). + + + ```python Python + from firecrawl import Firecrawl + + firecrawl = Firecrawl(api_key="fc-YOUR-API-KEY") + + results = firecrawl.search( + query="firecrawl", + limit=3, + ) + print(results) + ``` + + ```js Node + import Firecrawl from '@mendable/firecrawl-js'; + + const firecrawl = new Firecrawl({ apiKey: "fc-YOUR-API-KEY" }); + + const results = await firecrawl.search('firecrawl', { + limit: 3, + scrapeOptions: { formats: ['markdown'] } + }); + console.log(results); + ``` + + ```bash + curl -s -X POST "https://api.firecrawl.dev/v2/search" \ + -H "Authorization: Bearer $FIRECRAWL_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "query": "firecrawl", + "limit": 3 + }' + ``` + + +### Response + +SDKs will return the data object directly. cURL will return the complete payload. + +```json JSON +{ + "success": true, + "data": { + "web": [ + { + "url": "https://www.firecrawl.dev/", + "title": "Firecrawl - The Web Data API for AI", + "description": "The web crawling, scraping, and search API for AI. Built for scale. Firecrawl delivers the entire internet to AI agents and builders.", + "position": 1 + }, + { + "url": "https://github.com/mendableai/firecrawl", + "title": "mendableai/firecrawl: Turn entire websites into LLM-ready ... - GitHub", + "description": "Firecrawl is an API service that takes a URL, crawls it, and converts it into clean markdown or structured data.", + "position": 2 + }, + ... + ], + "images": [ + { + "title": "Quickstart | Firecrawl", + "imageUrl": "https://mintlify.s3.us-west-1.amazonaws.com/firecrawl/logo/logo.png", + "imageWidth": 5814, + "imageHeight": 1200, + "url": "https://docs.firecrawl.dev/", + "position": 1 + }, + ... + ], + "news": [ + { + "title": "Y Combinator startup Firecrawl is ready to pay $1M to hire three AI agents as employees", + "url": "https://techcrunch.com/2025/05/17/y-combinator-startup-firecrawl-is-ready-to-pay-1m-to-hire-three-ai-agents-as-employees/", + "snippet": "It's now placed three new ads on YC's job board for “AI agents only” and has set aside a $1 million budget total to make it happen.", + "date": "3 months ago", + "position": 1 + }, + ... + ] + } +} +``` + +### Extracting without schema + +You can now extract without a schema by just passing a `prompt` to the endpoint. The llm chooses the structure of the data. + + + ```python Python + from firecrawl import Firecrawl + + app = Firecrawl(api_key="fc-YOUR-API-KEY") + + result = app.scrape( + 'https://firecrawl.dev', + formats=[{ + "type": "json", + "prompt": "Extract the company mission from the page." + }], + only_main_content=False, + timeout=120000 + ) + + print(result) + ``` + + ```js Node + import FirecrawlApp from "@mendable/firecrawl-js"; + + const app = new FirecrawlApp({ + apiKey: "fc-YOUR_API_KEY" + }); + + const result = await app.scrape("https://docs.firecrawl.dev/", { + formats: [{ + type: "json", + prompt: "Extract the company mission from the page." + }] + }); + + console.log(result); + ``` + + ```bash cURL + curl -X POST https://api.firecrawl.dev/v2/scrape \ + -H 'Content-Type: application/json' \ + -H 'Authorization: Bearer YOUR_API_KEY' \ + -d '{ + "url": "https://docs.firecrawl.dev/", + "formats": [{ + "type": "json", + "prompt": "Extract the company mission from the page." + }] + }' + ``` + + +Output: + +```json JSON +{ + "success": true, + "data": { + "json": { + "company_mission": "AI-powered web scraping and data extraction", + }, + "metadata": { + "title": "Firecrawl", + "description": "AI-powered web scraping and data extraction", + "robots": "follow, index", + "ogTitle": "Firecrawl", + "ogDescription": "AI-powered web scraping and data extraction", + "ogUrl": "https://firecrawl.dev/", + "ogImage": "https://firecrawl.dev/og.png", + "ogLocaleAlternate": [], + "ogSiteName": "Firecrawl", + "sourceURL": "https://firecrawl.dev/" + }, + } +} +``` + +## Interacting with the page with Actions + +Firecrawl allows you to perform various actions on a web page before scraping its content. This is particularly useful for interacting with dynamic content, navigating through pages, or accessing content that requires user interaction. + +Here is an example of how to use actions to navigate to google.com, search for Firecrawl, click on the first result, and take a screenshot. + +It is important to almost always use the `wait` action before/after executing other actions to give enough time for the page to load. + +### Example + + + ```python Python + from firecrawl import Firecrawl + + firecrawl = Firecrawl(api_key="fc-YOUR-API-KEY") + + doc = firecrawl.scrape('https://example.com/login', { + formats=['markdown'], + actions=[ + { type: 'write', text: 'john@example.com' }, + { type: 'press', key: 'Tab' }, + { type: 'write', text: 'secret' }, + { type: 'click', selector: 'button[type="submit"]' }, + { type: 'wait', milliseconds: 1500 }, + { type: 'screenshot', fullPage: true }, + ], + }); + + print(doc.markdown, doc.screenshot); + ``` + + ```js Node + import Firecrawl from '@mendable/firecrawl-js'; + + const firecrawl = new Firecrawl({ apiKey: "fc-YOUR-API-KEY" }); + + const doc = await firecrawl.scrape('https://example.com/login', { + formats: ['markdown'], + actions: [ + { type: 'write', text: 'john@example.com' }, + { type: 'press', key: 'Tab' }, + { type: 'write', text: 'secret' }, + { type: 'click', selector: 'button[type="submit"]' }, + { type: 'wait', milliseconds: 1500 }, + { type: 'screenshot', fullPage: true }, + ], + }); + + console.log(doc.markdown, doc.screenshot); + ``` + + ```bash cURL + curl -X POST https://api.firecrawl.dev/v2/scrape \ + -H 'Content-Type: application/json' \ + -H 'Authorization: Bearer YOUR_API_KEY' \ + -d '{ + "url": "https://example.com/login", + "formats": ["markdown"], + "actions": [ + { "type": "write", "text": "john@example.com" }, + { "type": "press", "key": "Tab" }, + { "type": "write", "text": "secret" }, + { "type": "click", "selector": "button[type=\"submit\"]" }, + { "type": "wait", "milliseconds": 1500 }, + { "type": "screenshot", "fullPage": true }, + ], + }' + ``` + + +### Output + + + ```json JSON + { + "success": true, + "data": { + "markdown": "Our first Launch Week is over! [See the recap 🚀](blog/firecrawl-launch-week-1-recap)...", + "actions": { + "screenshots": [ + "https://alttmdsdujxrfnakrkyi.supabase.co/storage/v1/object/public/media/screenshot-75ef2d87-31e0-4349-a478-fb432a29e241.png" + ], + "scrapes": [ + { + "url": "https://www.firecrawl.dev/", + "html": "

Firecrawl

" + } + ] + }, + "metadata": { + "title": "Home - Firecrawl", + "description": "Firecrawl crawls and converts any website into clean markdown.", + "language": "en", + "keywords": "Firecrawl,Markdown,Data,Mendable,Langchain", + "robots": "follow, index", + "ogTitle": "Firecrawl", + "ogDescription": "Turn any website into LLM-ready data.", + "ogUrl": "https://www.firecrawl.dev/", + "ogImage": "https://www.firecrawl.dev/og.png?123", + "ogLocaleAlternate": [], + "ogSiteName": "Firecrawl", + "sourceURL": "http://google.com", + "statusCode": 200 + } + } + } + ``` +
+ +## Open Source vs Cloud + +Firecrawl is open source available under the [AGPL-3.0 license](https://github.com/mendableai/firecrawl/blob/main/LICENSE). + +To deliver the best possible product, we offer a hosted version of Firecrawl alongside our open-source offering. The cloud solution allows us to continuously innovate and maintain a high-quality, sustainable service for all users. + +Firecrawl Cloud is available at [firecrawl.dev](https://firecrawl.dev) and offers a range of features that are not available in the open source version: + +Firecrawl Cloud vs Open Source + +## Contributing + +We love contributions! Please read our [contributing guide](https://github.com/mendableai/firecrawl/blob/main/CONTRIBUTING.md) before submitting a pull request. diff --git a/package-lock.json b/package-lock.json index 623af58..1840cc3 100644 --- a/package-lock.json +++ b/package-lock.json @@ -8,6 +8,7 @@ "name": "ai-podcast", "version": "0.1.0", "dependencies": { + "@mendable/firecrawl-js": "^4.3.5", "@radix-ui/react-progress": "^1.1.7", "@radix-ui/react-slot": "^1.2.3", "class-variance-authority": "^0.7.1", @@ -761,6 +762,21 @@ "@jridgewell/sourcemap-codec": "^1.4.14" } }, + "node_modules/@mendable/firecrawl-js": { + "version": "4.3.5", + "resolved": "https://registry.npmjs.org/@mendable/firecrawl-js/-/firecrawl-js-4.3.5.tgz", + "integrity": "sha512-6Vd0bEVD0hOS4SXlmrBnOkm86WWqEITHSTXwdnjm6SkTz6ZF6o85gGHUvOYoky4w7AoLctAv8yqrm6aZsy4lfQ==", + "license": "MIT", + "dependencies": { + "axios": "^1.12.2", + "typescript-event-target": "^1.1.1", + "zod": "^3.23.8", + "zod-to-json-schema": "^3.23.0" + }, + "engines": { + "node": ">=22.0.0" + } + }, "node_modules/@napi-rs/wasm-runtime": { "version": "0.2.12", "resolved": "https://registry.npmjs.org/@napi-rs/wasm-runtime/-/wasm-runtime-0.2.12.tgz", @@ -2229,6 +2245,12 @@ "node": ">= 0.4" } }, + "node_modules/asynckit": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", + "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==", + "license": "MIT" + }, "node_modules/available-typed-arrays": { "version": "1.0.7", "resolved": "https://registry.npmjs.org/available-typed-arrays/-/available-typed-arrays-1.0.7.tgz", @@ -2255,6 +2277,17 @@ "node": ">=4" } }, + "node_modules/axios": { + "version": "1.12.2", + "resolved": "https://registry.npmjs.org/axios/-/axios-1.12.2.tgz", + "integrity": "sha512-vMJzPewAlRyOgxV2dU0Cuz2O8zzzx9VYtbJOaBgXFeLc4IV/Eg50n4LowmehOOR61S8ZMpc2K5Sa7g6A4jfkUw==", + "license": "MIT", + "dependencies": { + "follow-redirects": "^1.15.6", + "form-data": "^4.0.4", + "proxy-from-env": "^1.1.0" + } + }, "node_modules/axobject-query": { "version": "4.1.0", "resolved": "https://registry.npmjs.org/axobject-query/-/axobject-query-4.1.0.tgz", @@ -2319,7 +2352,6 @@ "version": "1.0.2", "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz", "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==", - "dev": true, "license": "MIT", "dependencies": { "es-errors": "^1.3.0", @@ -2450,6 +2482,18 @@ "dev": true, "license": "MIT" }, + "node_modules/combined-stream": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", + "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", + "license": "MIT", + "dependencies": { + "delayed-stream": "~1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, "node_modules/concat-map": { "version": "0.0.1", "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", @@ -2601,6 +2645,15 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/delayed-stream": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", + "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", + "license": "MIT", + "engines": { + "node": ">=0.4.0" + } + }, "node_modules/detect-libc": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.1.0.tgz", @@ -2628,7 +2681,6 @@ "version": "1.0.1", "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==", - "dev": true, "license": "MIT", "dependencies": { "call-bind-apply-helpers": "^1.0.1", @@ -2733,7 +2785,6 @@ "version": "1.0.1", "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz", "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==", - "dev": true, "license": "MIT", "engines": { "node": ">= 0.4" @@ -2743,7 +2794,6 @@ "version": "1.3.0", "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz", "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", - "dev": true, "license": "MIT", "engines": { "node": ">= 0.4" @@ -2781,7 +2831,6 @@ "version": "1.1.1", "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz", "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==", - "dev": true, "license": "MIT", "dependencies": { "es-errors": "^1.3.0" @@ -2794,7 +2843,6 @@ "version": "2.1.0", "resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz", "integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==", - "dev": true, "license": "MIT", "dependencies": { "es-errors": "^1.3.0", @@ -3401,6 +3449,26 @@ "dev": true, "license": "ISC" }, + "node_modules/follow-redirects": { + "version": "1.15.11", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.11.tgz", + "integrity": "sha512-deG2P0JfjrTxl50XGCDyfI97ZGVCxIpfKYmfyrQ54n5FO/0gfIES8C/Psl6kWVDolizcaaxZJnTS0QSMxvnsBQ==", + "funding": [ + { + "type": "individual", + "url": "https://github.com/sponsors/RubenVerborgh" + } + ], + "license": "MIT", + "engines": { + "node": ">=4.0" + }, + "peerDependenciesMeta": { + "debug": { + "optional": true + } + } + }, "node_modules/for-each": { "version": "0.3.5", "resolved": "https://registry.npmjs.org/for-each/-/for-each-0.3.5.tgz", @@ -3417,11 +3485,26 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/form-data": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.4.tgz", + "integrity": "sha512-KrGhL9Q4zjj0kiUt5OO4Mr/A/jlI2jDYs5eHBpYHPcBEVSiipAvn2Ko2HnPe20rmcuuvMHNdZFp+4IlGTMF0Ow==", + "license": "MIT", + "dependencies": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.8", + "es-set-tostringtag": "^2.1.0", + "hasown": "^2.0.2", + "mime-types": "^2.1.12" + }, + "engines": { + "node": ">= 6" + } + }, "node_modules/function-bind": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", - "dev": true, "license": "MIT", "funding": { "url": "https://github.com/sponsors/ljharb" @@ -3462,7 +3545,6 @@ "version": "1.3.0", "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz", "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==", - "dev": true, "license": "MIT", "dependencies": { "call-bind-apply-helpers": "^1.0.2", @@ -3487,7 +3569,6 @@ "version": "1.0.1", "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz", "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==", - "dev": true, "license": "MIT", "dependencies": { "dunder-proto": "^1.0.1", @@ -3575,7 +3656,6 @@ "version": "1.2.0", "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz", "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==", - "dev": true, "license": "MIT", "engines": { "node": ">= 0.4" @@ -3654,7 +3734,6 @@ "version": "1.1.0", "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz", "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==", - "dev": true, "license": "MIT", "engines": { "node": ">= 0.4" @@ -3667,7 +3746,6 @@ "version": "1.0.2", "resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz", "integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==", - "dev": true, "license": "MIT", "dependencies": { "has-symbols": "^1.0.3" @@ -3683,7 +3761,6 @@ "version": "2.0.2", "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", - "dev": true, "license": "MIT", "dependencies": { "function-bind": "^1.1.2" @@ -4599,7 +4676,6 @@ "version": "1.1.0", "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz", "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==", - "dev": true, "license": "MIT", "engines": { "node": ">= 0.4" @@ -4629,6 +4705,27 @@ "node": ">=8.6" } }, + "node_modules/mime-db": { + "version": "1.52.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", + "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime-types": { + "version": "2.1.35", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", + "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", + "license": "MIT", + "dependencies": { + "mime-db": "1.52.0" + }, + "engines": { + "node": ">= 0.6" + } + }, "node_modules/minimatch": { "version": "3.1.2", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", @@ -5130,6 +5227,12 @@ "react-is": "^16.13.1" } }, + "node_modules/proxy-from-env": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", + "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==", + "license": "MIT" + }, "node_modules/punycode": { "version": "2.3.1", "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz", @@ -6046,6 +6149,12 @@ "node": ">=14.17" } }, + "node_modules/typescript-event-target": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/typescript-event-target/-/typescript-event-target-1.1.1.tgz", + "integrity": "sha512-dFSOFBKV6uwaloBCCUhxlD3Pr/P1a/tJdcmPrTXCHlEFD3faj0mztjcGn6VBAhQ0/Bdy8K3VWrrqwbt/ffsYsg==", + "license": "MIT" + }, "node_modules/unbox-primitive": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/unbox-primitive/-/unbox-primitive-1.1.0.tgz", @@ -6254,6 +6363,24 @@ "funding": { "url": "https://github.com/sponsors/sindresorhus" } + }, + "node_modules/zod": { + "version": "3.25.76", + "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz", + "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + }, + "node_modules/zod-to-json-schema": { + "version": "3.24.6", + "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.24.6.tgz", + "integrity": "sha512-h/z3PKvcTcTetyjl1fkj79MHNEjm+HpD6NXheWjzOekY7kV+lwDYnHw+ivHkijnCSMz1yJaWBD9vu/Fcmk+vEg==", + "license": "ISC", + "peerDependencies": { + "zod": "^3.24.1" + } } } } diff --git a/package.json b/package.json index 93dd610..47dd5b1 100644 --- a/package.json +++ b/package.json @@ -9,6 +9,7 @@ "lint": "eslint" }, "dependencies": { + "@mendable/firecrawl-js": "^4.3.5", "@radix-ui/react-progress": "^1.1.7", "@radix-ui/react-slot": "^1.2.3", "class-variance-authority": "^0.7.1", diff --git a/src/app/api/scrape/route.ts b/src/app/api/scrape/route.ts new file mode 100644 index 0000000..b3dc6d4 --- /dev/null +++ b/src/app/api/scrape/route.ts @@ -0,0 +1,59 @@ +import { NextRequest, NextResponse } from 'next/server'; +import Firecrawl from '@mendable/firecrawl-js'; + +export async function POST(request: NextRequest) { + try { + const { url } = await request.json(); + + if (!url) { + return NextResponse.json( + { error: 'URL is required' }, + { status: 400 } + ); + } + + // Initialize Firecrawl with API key from environment + const firecrawl = new Firecrawl({ + apiKey: process.env.FIRECRAWL_API_KEY + }); + + console.log('Attempting to scrape URL:', url); + + // Scrape the website + const result = await firecrawl.scrape(url, { + formats: ['markdown', 'html'] + }); + + console.log('Firecrawl result received'); + + // Check if we have the expected data structure + if (!result || !result.markdown) { + console.error('Invalid Firecrawl response:', result); + throw new Error('Invalid response from Firecrawl API'); + } + + // Create a truncated excerpt for display + const excerpt = result.markdown + ? result.markdown.substring(0, 200) + (result.markdown.length > 200 ? '...' : '') + : 'No content available'; + + return NextResponse.json({ + success: true, + data: { + url: result.metadata?.sourceURL || url, + title: result.metadata?.title || 'Untitled', + description: result.metadata?.description || '', + content: result.markdown || '', + excerpt: excerpt, + scrapedAt: new Date().toISOString() + } + }); + + } catch (error) { + console.error('Scraping error:', error); + return NextResponse.json( + { error: error instanceof Error ? error.message : 'Failed to scrape website' }, + { status: 500 } + ); + } +} \ No newline at end of file diff --git a/src/app/page.tsx b/src/app/page.tsx index a74fcd8..c53248d 100644 --- a/src/app/page.tsx +++ b/src/app/page.tsx @@ -22,17 +22,60 @@ export default function Home() { const [currentTime, setCurrentTime] = useState(0); const [duration, setDuration] = useState(0); const [progressInterval, setProgressInterval] = useState(null); + const [recentSources, setRecentSources] = useState>([]); const handleSubmit = async (e: React.FormEvent) => { e.preventDefault(); setIsLoading(true); - // Simulate API call - setTimeout(() => { - setMessages(mockMessages); + try { + // Call Firecrawl API to scrape the website + const response = await fetch('/api/scrape', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ url }), + }); + + const result = await response.json(); + + if (result.success) { + // Add to recent sources + setRecentSources(prev => [{ + url: result.data.url, + title: result.data.title || 'Untitled', + excerpt: result.data.excerpt || 'No content available', + scrapedAt: result.data.scrapedAt + }, ...prev]); + + // Show success message + setMessages([{ + id: '1', + speaker: 'host1', + text: `Great! I've successfully scraped the content from "${result.data.title || 'the website'}". The content has been processed and is ready for podcast generation.`, + timestamp: '0:15' + }]); + setDuration(320); + } else { + throw new Error(result.error || 'Failed to scrape website'); + } + } catch (error) { + console.error('Error:', error); + setMessages([{ + id: '1', + speaker: 'host1', + text: 'Sorry, I encountered an error while trying to scrape the website. Please check the URL and try again.', + timestamp: '0:15' + }]); + } finally { setIsLoading(false); - setDuration(320); // 5 minutes 20 seconds - }, 2000); + } }; const togglePlay = () => { @@ -110,10 +153,39 @@ export default function Home() { + {/* Recent Sources Section */} + {recentSources.length > 0 && ( +
+

Recent Sources

+
+ {recentSources.slice(0, 3).map((source, index) => ( + +
+
+

+ {source.title} +

+ + {new Date(source.scrapedAt).toLocaleTimeString()} + +
+

+ {source.excerpt} +

+

+ {source.url} +

+
+
+ ))} +
+
+ )} + {messages.length > 0 && (

- Podcast generated successfully! Listen to the conversation below. + Website scraped successfully! Content is ready for podcast generation.

)}