fix

2025-08-15 01:05:55 +08:00 · 2025-04-18 13:37:09 -07:00 · 2025-04-18 13:37:09 -07:00 · 0aedef7210
commit 0aedef7210
parent 79bc54c11e
3 changed files with 70 additions and 62 deletions
--- a/apps/python-sdk/example.py
+++ b/apps/python-sdk/example.py
@ -42,23 +42,7 @@ while attempts > 0 and crawl_status.status != 'completed':
 crawl_status = app.check_crawl_status(async_result.id)
 print(crawl_status)
-# LLM Extraction:
+# JSON format:
 # Define schema to extract contents into using pydantic
 class ArticleSchema(BaseModel):
    title: str
    points: int 
    by: str
    commentsURL: str
 class TopArticlesSchema(BaseModel):
    top: List[ArticleSchema] = Field(..., description="Top 5 stories")
 extract_config = JsonConfig(schema=TopArticlesSchema.model_json_schema())
 llm_extraction_result = app.scrape_url('https://news.ycombinator.com', formats=["extract"], extract=extract_config)
 print(llm_extraction_result.extract)
 # Define schema to extract contents into using json schema
 json_schema = {
  "type": "object",
@ -86,9 +70,6 @@ llm_extraction_result = app.scrape_url('https://news.ycombinator.com', formats=[
 print(llm_extraction_result.json)
 print(llm_extraction_result['llm_extraction'])
 # Map a website:
 map_result = app.map_url('https://firecrawl.dev', search="blog")
 print(map_result)
--- a/apps/python-sdk/example_async.py
+++ b/apps/python-sdk/example_async.py
@ -2,7 +2,7 @@ import time
 import nest_asyncio
 import uuid
 import asyncio
-from firecrawl.firecrawl import AsyncFirecrawlApp
+from firecrawl.firecrawl import AsyncFirecrawlApp, ScrapeOptions, JsonConfig
 from pydantic import BaseModel, Field
 from typing import List
@ -84,6 +84,20 @@ async def example_map_and_extract():
    extract_result = await app.extract(['https://firecrawl.dev'], prompt="Extract the title, description, and links from the website", schema=extract_schema)
    print(extract_result)
 async def example_deep_research():
    # Deep research example
    research_result = await app.deep_research(
        "What are the latest developments in large language models?",
        max_urls=4
    )
    print("Research Results:", research_result)
 async def example_generate_llms_text():
    # Generate LLMs.txt example
    llms_result = await app.generate_llms_text(
        "https://firecrawl.dev")
    print("LLMs.txt Results:", llms_result)
 # Define event handlers for websocket
 def on_document(detail):
    print("DOC", detail)
@ -115,6 +129,8 @@ async def main():
    await example_llm_extraction()
    await example_map_and_extract()
    await example_websocket_crawl()
    await example_deep_research()
    await example_generate_llms_text()
 if __name__ == "__main__":
    asyncio.run(main())
--- a/apps/python-sdk/firecrawl/firecrawl.py
+++ b/apps/python-sdk/firecrawl/firecrawl.py
@ -1742,7 +1742,7 @@ class FirecrawlApp:
    def async_extract(
            self,
-            urls: List[str],
+            urls: Optional[List[str]] = None,
            *,
            prompt: Optional[str] = None,
            schema: Optional[Any] = None,
@ -1750,8 +1750,7 @@ class FirecrawlApp:
            allow_external_links: Optional[bool] = False,
            enable_web_search: Optional[bool] = False,
            show_sources: Optional[bool] = False,
-            agent: Optional[Dict[str, Any]] = None,
+            agent: Optional[Dict[str, Any]] = None) -> ExtractResponse[Any]:
            idempotency_key: Optional[str] = None) -> ExtractResponse[Any]:
        """
        Initiate an asynchronous extract job.
@ -1775,7 +1774,7 @@ class FirecrawlApp:
        Raises:
            ValueError: If job initiation fails
        """
-        headers = self._prepare_headers(idempotency_key)
+        headers = self._prepare_headers()
        schema = schema
        if schema:
@ -3457,27 +3456,28 @@ class AsyncFirecrawlApp(FirecrawlApp):
    async def extract(
            self,
-            urls: List[str],
+            urls: Optional[List[str]] = None,
-            params: Optional[ExtractParams] = None) -> ExtractResponse[Any]:
+            *,
            prompt: Optional[str] = None,
            schema: Optional[Any] = None,
            system_prompt: Optional[str] = None,
            allow_external_links: Optional[bool] = False,
            enable_web_search: Optional[bool] = False,
            show_sources: Optional[bool] = False,
            agent: Optional[Dict[str, Any]] = None) -> ExtractResponse[Any]:
        """
        Asynchronously extract structured information from URLs.
        Args:
-            urls (List[str]): URLs to extract from
+            urls (Optional[List[str]]): URLs to extract from
-            params (Optional[ExtractParams]): See ExtractParams model:
+            prompt (Optional[str]): Custom extraction prompt
-              Extraction Config:
+            schema (Optional[Any]): JSON schema/Pydantic model
-              * prompt - Custom extraction prompt
+            system_prompt (Optional[str]): System context
-              * schema - JSON schema/Pydantic model
+            allow_external_links (Optional[bool]): Follow external links
-              * systemPrompt - System context
+            enable_web_search (Optional[bool]): Enable web search
-              
+            show_sources (Optional[bool]): Include source URLs
-              Behavior Options:
+            agent (Optional[Dict[str, Any]]): Agent configuration
              * allowExternalLinks - Follow external links
              * enableWebSearch - Enable web search
              * includeSubdomains - Include subdomains
              * showSources - Include source URLs
              Scraping Options:
              * scrapeOptions - Page scraping config
        Returns:
          ExtractResponse with:
@ -3490,29 +3490,35 @@ class AsyncFirecrawlApp(FirecrawlApp):
        """
        headers = self._prepare_headers()
-        if not params or (not params.get('prompt') and not params.get('schema')):
+        if not prompt and not schema:
            raise ValueError("Either prompt or schema is required")
-        schema = params.get('schema')
+        if not urls and not prompt:
            raise ValueError("Either urls or prompt is required")
        if schema:
            if hasattr(schema, 'model_json_schema'):
                # Convert Pydantic model to JSON schema
                schema = schema.model_json_schema()
            # Otherwise assume it's already a JSON schema dict
-        request_data = ExtractResponse(
+        request_data = {
-            urls=urls,
+            'urls': urls or [],
-            allowExternalLinks=params.get('allow_external_links', params.get('allowExternalLinks', False)),
+            'allowExternalLinks': allow_external_links,
-            enableWebSearch=params.get('enable_web_search', params.get('enableWebSearch', False)),
+            'enableWebSearch': enable_web_search,
-            showSources=params.get('show_sources', params.get('showSources', False)),
+            'showSources': show_sources,
-            schema=schema,
+            'schema': schema,
-            origin=f'python-sdk@{version}'
+            'origin': f'python-sdk@{get_version()}'
-        )
+        }
-        if params.get('prompt'):
+        # Only add prompt and systemPrompt if they exist
-            request_data['prompt'] = params['prompt']
+        if prompt:
-        if params.get('system_prompt'):
+            request_data['prompt'] = prompt
-            request_data['systemPrompt'] = params['system_prompt']
+        if system_prompt:
-        elif params.get('systemPrompt'):
+            request_data['systemPrompt'] = system_prompt
-            request_data['systemPrompt'] = params['systemPrompt']
+            
        if agent:
            request_data['agent'] = agent
        response = await self._async_post_request(
            f'{self.api_url}/v1/extract',
@ -3532,7 +3538,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
                )
                if status_data['status'] == 'completed':
-                    return status_data
+                    return ExtractResponse(**status_data)
                elif status_data['status'] in ['failed', 'cancelled']:
                    raise Exception(f'Extract job {status_data["status"]}. Error: {status_data["error"]}')
@ -3715,8 +3721,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
            allow_external_links: Optional[bool] = False,
            enable_web_search: Optional[bool] = False,
            show_sources: Optional[bool] = False,
-            agent: Optional[Dict[str, Any]] = None,
+            agent: Optional[Dict[str, Any]] = None) -> ExtractResponse[Any]:
            idempotency_key: Optional[str] = None) -> ExtractResponse[Any]:
        """
        Initiate an asynchronous extraction job without waiting for completion.
@ -3740,7 +3745,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
        Raises:
            ValueError: If job initiation fails
        """
-        headers = self._prepare_headers(idempotency_key)
+        headers = self._prepare_headers()
        if not prompt and not schema:
            raise ValueError("Either prompt or schema is required")
@ -3871,6 +3876,12 @@ class AsyncFirecrawlApp(FirecrawlApp):
        if experimental_stream is not None:
            params['__experimental_stream'] = experimental_stream
        params = GenerateLLMsTextParams(
            maxUrls=max_urls,
            showFullText=show_full_text,
            __experimental_stream=experimental_stream
        )
        headers = self._prepare_headers()
        json_data = {'url': url, **params.dict(exclude_none=True)}
        json_data['origin'] = f"python-sdk@{version}"