async

2025-08-15 01:45:59 +08:00 · 2025-03-13 16:27:59 -03:00 · 2025-03-13 16:27:59 -03:00 · 3641070ece
commit 3641070ece
parent 6a5a4e5b6f
4 changed files with 1762 additions and 183 deletions
--- a/apps/python-sdk/example.py
+++ b/apps/python-sdk/example.py
@ -47,7 +47,7 @@ while attempts > 0 and crawl_status['status'] != 'completed':
    attempts -= 1
    time.sleep(1)
-crawl_status = app.get_crawl_status(async_result['id'])
+crawl_status = app.check_crawl_status(async_result['id'])
 print(crawl_status)
 # LLM Extraction:
--- a/apps/python-sdk/example_async.py
+++ b/apps/python-sdk/example_async.py
@ -0,0 +1,168 @@
 import time
 import nest_asyncio
 import uuid
 import asyncio
 from firecrawl.firecrawl import AsyncFirecrawlApp
 from pydantic import BaseModel, Field
 from typing import List
 app = AsyncFirecrawlApp(api_key="fc-")
 async def example_scrape():
    # Scrape a website:
    scrape_result = await app.scrape_url('firecrawl.dev')
    print(scrape_result['markdown'])
 async def example_batch_scrape():
    # Batch scrape
    urls = ['https://example.com', 'https://docs.firecrawl.dev']
    batch_scrape_params = {
        'formats': ['markdown', 'html'],
    }
    # Synchronous batch scrape
    batch_result = await app.batch_scrape_urls(urls, batch_scrape_params)
    print("Synchronous Batch Scrape Result:")
    print(batch_result['data'][0]['markdown'])
    # Asynchronous batch scrape
    async_batch_result = await app.async_batch_scrape_urls(urls, batch_scrape_params)
    print("\nAsynchronous Batch Scrape Result:")
    print(async_batch_result)
 async def example_crawl():
    # Crawl a website:
    idempotency_key = str(uuid.uuid4()) # optional idempotency key
    crawl_result = await app.crawl_url('firecrawl.dev', {'excludePaths': ['blog/*']}, 2, idempotency_key)
    print(crawl_result)
    # Asynchronous Crawl a website:
    async_result = await app.async_crawl_url('firecrawl.dev', {'excludePaths': ['blog/*']}, "")
    print(async_result)
    crawl_status = await app.check_crawl_status(async_result['id'])
    print(crawl_status)
    attempts = 15
    while attempts > 0 and crawl_status['status'] != 'completed':
        print(crawl_status)
        crawl_status = await app.check_crawl_status(async_result['id'])
        attempts -= 1
        await asyncio.sleep(1)  # Use async sleep instead of time.sleep
    crawl_status = await app.check_crawl_status(async_result['id'])
    print(crawl_status)
 async def example_llm_extraction():
    # Define schema to extract contents into using pydantic
    class ArticleSchema(BaseModel):
        title: str
        points: int 
        by: str
        commentsURL: str
    class TopArticlesSchema(BaseModel):
        top: List[ArticleSchema] = Field(..., description="Top 5 stories")
    llm_extraction_result = await app.scrape_url('https://news.ycombinator.com', {
        'formats': ['extract'],
        'extract': {
            'schema': TopArticlesSchema.model_json_schema()
        }
    })
    print(llm_extraction_result['extract'])
    # Define schema to extract contents into using json schema
    json_schema = {
      "type": "object",
      "properties": {
        "top": {
          "type": "array",
          "items": {
            "type": "object",
            "properties": {
              "title": {"type": "string"},
              "points": {"type": "number"},
              "by": {"type": "string"},
              "commentsURL": {"type": "string"}
            },
            "required": ["title", "points", "by", "commentsURL"]
          },
          "minItems": 5,
          "maxItems": 5,
          "description": "Top 5 stories on Hacker News"
        }
      },
      "required": ["top"]
    }
    app2 = AsyncFirecrawlApp(api_key="fc-", version="v0")
    llm_extraction_result = await app2.scrape_url('https://news.ycombinator.com', {
        'extractorOptions': {
            'extractionSchema': json_schema,
            'mode': 'llm-extraction'
        },
        'pageOptions':{
            'onlyMainContent': True
        }
    })
 async def example_map_and_extract():
    # Map a website:
    map_result = await app.map_url('https://firecrawl.dev', { 'search': 'blog' })
    print(map_result)
    # Extract URLs:
    class ExtractSchema(BaseModel):
        title: str
        description: str
        links: List[str]
    # Define the schema using Pydantic
    extract_schema = ExtractSchema.schema()
    # Perform the extraction
    extract_result = await app.extract(['https://firecrawl.dev'], {
        'prompt': "Extract the title, description, and links from the website",
        'schema': extract_schema
    })
    print(extract_result)
 # Define event handlers for websocket
 def on_document(detail):
    print("DOC", detail)
 def on_error(detail):
    print("ERR", detail['error'])
 def on_done(detail):
    print("DONE", detail['status'])
 async def example_websocket_crawl():
    # Initiate the crawl job and get the watcher
    watcher = await app.crawl_url_and_watch('firecrawl.dev', { 'excludePaths': ['blog/*'], 'limit': 5 })
    # Add event listeners
    watcher.add_event_listener("document", on_document)
    watcher.add_event_listener("error", on_error)
    watcher.add_event_listener("done", on_done)
    # Start the watcher
    await watcher.connect()
 async def main():
    # Apply nest_asyncio to allow nested event loops
    nest_asyncio.apply()
    # Run all the examples
    await example_scrape()
    await example_batch_scrape()
    await example_crawl()
    await example_llm_extraction()
    await example_map_and_extract()
    await example_websocket_crawl()
 if __name__ == "__main__":
    asyncio.run(main())
--- a/apps/python-sdk/firecrawl/firecrawl.py
+++ b/apps/python-sdk/firecrawl/firecrawl.py
--- a/apps/python-sdk/requirements.txt
+++ b/apps/python-sdk/requirements.txt
@ -4,3 +4,4 @@ python-dotenv
 websockets
 nest-asyncio
 pydantic
 aiohttp