async

2025-08-13 18:19:00 +08:00 · 2025-03-13 16:27:59 -03:00 · 2025-03-13 16:27:59 -03:00 · 3641070ece
commit 3641070ece
parent 6a5a4e5b6f
4 changed files with 1762 additions and 183 deletions
--- a/apps/python-sdk/example.py
+++ b/apps/python-sdk/example.py
@ -47,7 +47,7 @@ while attempts > 0 and crawl_status['status'] != 'completed':
    attempts -= 1
    time.sleep(1)

-crawl_status = app.get_crawl_status(async_result['id'])
+crawl_status = app.check_crawl_status(async_result['id'])
 print(crawl_status)

 # LLM Extraction:
@ -155,4 +155,4 @@ async def start_crawl_and_watch():
    watcher.add_event_listener("done", on_done)

    # Start the watcher
-    await watcher.connect()
+    await watcher.connect()
--- a/apps/python-sdk/example_async.py
+++ b/apps/python-sdk/example_async.py
@ -0,0 +1,168 @@
+import time
+import nest_asyncio
+import uuid
+import asyncio
+from firecrawl.firecrawl import AsyncFirecrawlApp
+from pydantic import BaseModel, Field
+from typing import List
+
+app = AsyncFirecrawlApp(api_key="fc-")
+
+async def example_scrape():
+    # Scrape a website:
+    scrape_result = await app.scrape_url('firecrawl.dev')
+    print(scrape_result['markdown'])
+
+async def example_batch_scrape():
+    # Batch scrape
+    urls = ['https://example.com', 'https://docs.firecrawl.dev']
+    batch_scrape_params = {
+        'formats': ['markdown', 'html'],
+    }
+
+    # Synchronous batch scrape
+    batch_result = await app.batch_scrape_urls(urls, batch_scrape_params)
+    print("Synchronous Batch Scrape Result:")
+    print(batch_result['data'][0]['markdown'])
+
+    # Asynchronous batch scrape
+    async_batch_result = await app.async_batch_scrape_urls(urls, batch_scrape_params)
+    print("\nAsynchronous Batch Scrape Result:")
+    print(async_batch_result)
+
+async def example_crawl():
+    # Crawl a website:
+    idempotency_key = str(uuid.uuid4()) # optional idempotency key
+    crawl_result = await app.crawl_url('firecrawl.dev', {'excludePaths': ['blog/*']}, 2, idempotency_key)
+    print(crawl_result)
+
+    # Asynchronous Crawl a website:
+    async_result = await app.async_crawl_url('firecrawl.dev', {'excludePaths': ['blog/*']}, "")
+    print(async_result)
+
+    crawl_status = await app.check_crawl_status(async_result['id'])
+    print(crawl_status)
+
+    attempts = 15
+    while attempts > 0 and crawl_status['status'] != 'completed':
+        print(crawl_status)
+        crawl_status = await app.check_crawl_status(async_result['id'])
+        attempts -= 1
+        await asyncio.sleep(1)  # Use async sleep instead of time.sleep
+
+    crawl_status = await app.check_crawl_status(async_result['id'])
+    print(crawl_status)
+
+async def example_llm_extraction():
+    # Define schema to extract contents into using pydantic
+    class ArticleSchema(BaseModel):
+        title: str
+        points: int 
+        by: str
+        commentsURL: str
+
+    class TopArticlesSchema(BaseModel):
+        top: List[ArticleSchema] = Field(..., description="Top 5 stories")
+
+    llm_extraction_result = await app.scrape_url('https://news.ycombinator.com', {
+        'formats': ['extract'],
+        'extract': {
+            'schema': TopArticlesSchema.model_json_schema()
+        }
+    })
+
+    print(llm_extraction_result['extract'])
+
+    # Define schema to extract contents into using json schema
+    json_schema = {
+      "type": "object",
+      "properties": {
+        "top": {
+          "type": "array",
+          "items": {
+            "type": "object",
+            "properties": {
+              "title": {"type": "string"},
+              "points": {"type": "number"},
+              "by": {"type": "string"},
+              "commentsURL": {"type": "string"}
+            },
+            "required": ["title", "points", "by", "commentsURL"]
+          },
+          "minItems": 5,
+          "maxItems": 5,
+          "description": "Top 5 stories on Hacker News"
+        }
+      },
+      "required": ["top"]
+    }
+
+    app2 = AsyncFirecrawlApp(api_key="fc-", version="v0")
+
+    llm_extraction_result = await app2.scrape_url('https://news.ycombinator.com', {
+        'extractorOptions': {
+            'extractionSchema': json_schema,
+            'mode': 'llm-extraction'
+        },
+        'pageOptions':{
+            'onlyMainContent': True
+        }
+    })
+
+async def example_map_and_extract():
+    # Map a website:
+    map_result = await app.map_url('https://firecrawl.dev', { 'search': 'blog' })
+    print(map_result)
+
+    # Extract URLs:
+    class ExtractSchema(BaseModel):
+        title: str
+        description: str
+        links: List[str]
+
+    # Define the schema using Pydantic
+    extract_schema = ExtractSchema.schema()
+
+    # Perform the extraction
+    extract_result = await app.extract(['https://firecrawl.dev'], {
+        'prompt': "Extract the title, description, and links from the website",
+        'schema': extract_schema
+    })
+    print(extract_result)
+
+# Define event handlers for websocket
+def on_document(detail):
+    print("DOC", detail)
+
+def on_error(detail):
+    print("ERR", detail['error'])
+
+def on_done(detail):
+    print("DONE", detail['status'])
+
+async def example_websocket_crawl():
+    # Initiate the crawl job and get the watcher
+    watcher = await app.crawl_url_and_watch('firecrawl.dev', { 'excludePaths': ['blog/*'], 'limit': 5 })
+
+    # Add event listeners
+    watcher.add_event_listener("document", on_document)
+    watcher.add_event_listener("error", on_error)
+    watcher.add_event_listener("done", on_done)
+
+    # Start the watcher
+    await watcher.connect()
+
+async def main():
+    # Apply nest_asyncio to allow nested event loops
+    nest_asyncio.apply()
+    
+    # Run all the examples
+    await example_scrape()
+    await example_batch_scrape()
+    await example_crawl()
+    await example_llm_extraction()
+    await example_map_and_extract()
+    await example_websocket_crawl()
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/apps/python-sdk/firecrawl/firecrawl.py
+++ b/apps/python-sdk/firecrawl/firecrawl.py
--- a/apps/python-sdk/requirements.txt
+++ b/apps/python-sdk/requirements.txt
@ -3,4 +3,5 @@ pytest
 python-dotenv
 websockets
 nest-asyncio
-pydantic
+pydantic
+aiohttp