Nick: python working

2025-08-12 19:39:16 +08:00 · 2024-08-30 12:58:38 -03:00 · 2024-08-30 12:58:38 -03:00 · 45e33563eb
commit 45e33563eb
parent bb4808443c
3 changed files with 58 additions and 36 deletions
--- a/apps/js-sdk/firecrawl/src/index.ts
+++ b/apps/js-sdk/firecrawl/src/index.ts
@ -75,6 +75,7 @@ export interface FirecrawlDocument {
  html?: string;
  rawHtml?: string;
  links?: string[];
  extract?: Record<any, any>;
  screenshot?: string;
  metadata: FirecrawlDocumentMetadata;
 }
@ -344,12 +345,13 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
      Authorization: `Bearer ${this.apiKey}`,
    } as AxiosRequestHeaders;
    let jsonData: any = { url, ...params };
-    if (jsonData?.extractorOptions?.extractionSchema) {
+    if (jsonData?.extractorOptions?.extractionSchema || jsonData?.extract?.schema) {
-      let schema = jsonData.extractorOptions.extractionSchema;
+      let schema = jsonData.extractorOptions?.extractionSchema || jsonData.extract?.schema;
      // Check if schema is an instance of ZodSchema to correctly identify Zod schemas
      if (schema instanceof z.ZodSchema) {
        schema = zodToJsonSchema(schema);
      }
      if(this.version === 'v0') {
        jsonData = {
          ...jsonData,
          extractorOptions: {
@ -358,6 +360,15 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
            mode: jsonData.extractorOptions.mode || "llm-extraction",
          },
        };
      } else {
        jsonData = {
          ...jsonData,
          extract: {
            ...jsonData.extract,
            schema: schema,
          },
        };
      }
    }
    try {
      const response: AxiosResponse = await axios.post(
--- a/apps/python-sdk/example.py
+++ b/apps/python-sdk/example.py
@ -1,17 +1,18 @@
 import uuid
 from firecrawl.firecrawl import FirecrawlApp
-app = FirecrawlApp(api_key="fc-YOUR_API_KEY")
+app = FirecrawlApp(api_key="fc-")
 # Scrape a website:
 scrape_result = app.scrape_url('firecrawl.dev')
 print(scrape_result['markdown'])
 # Crawl a website:
-idempotency_key = str(uuid.uuid4()) # optional idempotency key
+crawl_result = app.crawl_url('docs.firecrawl.dev', {}, True, 2)
 crawl_result = app.crawl_url('mendable.ai', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, idempotency_key)
 print(crawl_result)
 # LLM Extraction:
 # Define schema to extract contents into using pydantic
 from pydantic import BaseModel, Field
@ -27,18 +28,15 @@ class TopArticlesSchema(BaseModel):
    top: List[ArticleSchema] = Field(..., max_items=5, description="Top 5 stories")
 llm_extraction_result = app.scrape_url('https://news.ycombinator.com', {
-    'extractorOptions': {
+    'formats': ['extract'],
-        'extractionSchema': TopArticlesSchema.model_json_schema(),
+    'extract': {
-        'mode': 'llm-extraction'
+        'schema': TopArticlesSchema.model_json_schema()
    },
    'pageOptions':{
        'onlyMainContent': True
    }
 })
-print(llm_extraction_result['llm_extraction'])
+print(llm_extraction_result['extract'])
-# Define schema to extract contents into using json schema
+# # Define schema to extract contents into using json schema
 json_schema = {
  "type": "object",
  "properties": {
@ -62,7 +60,10 @@ json_schema = {
  "required": ["top"]
 }
-llm_extraction_result = app.scrape_url('https://news.ycombinator.com', {
+app2 = FirecrawlApp(api_key="fc-", version="v0")
 llm_extraction_result = app2.scrape_url('https://news.ycombinator.com', {
    'extractorOptions': {
        'extractionSchema': json_schema,
        'mode': 'llm-extraction'
--- a/apps/python-sdk/firecrawl/firecrawl.py
+++ b/apps/python-sdk/firecrawl/firecrawl.py
@ -58,20 +58,30 @@ class FirecrawlApp:
        # If there are additional params, process them
        if params:
-            # Initialize extractorOptions if present
+            if self.version == 'v0':
                # Handle extractorOptions (for v0 compatibility)
                extractor_options = params.get('extractorOptions', {})
-            # Check and convert the extractionSchema if it's a Pydantic model
+                if extractor_options:
-            if 'extractionSchema' in extractor_options:
+                    if 'extractionSchema' in extractor_options and hasattr(extractor_options['extractionSchema'], 'schema'):
                if hasattr(extractor_options['extractionSchema'], 'schema'):
                        extractor_options['extractionSchema'] = extractor_options['extractionSchema'].schema()
                # Ensure 'mode' is set, defaulting to 'llm-extraction' if not explicitly provided
                    extractor_options['mode'] = extractor_options.get('mode', 'llm-extraction')
                # Update the scrape_params with the processed extractorOptions
                    scrape_params['extractorOptions'] = extractor_options
                # Include any other params directly at the top level of scrape_params
                for key, value in params.items():
-                if key != 'extractorOptions':
+                    if key not in ['extractorOptions']:
                        scrape_params[key] = value
            elif self.version == 'v1':
                # Handle extract (for v1)
                extract = params.get('extract', {})
                if extract:
                    if 'schema' in extract and hasattr(extract['schema'], 'schema'):
                        extract['schema'] = extract['schema'].schema()
                    scrape_params['extract'] = extract
                # Include any other params directly at the top level of scrape_params
                for key, value in params.items():
                    if key not in ['extract']:
                        scrape_params[key] = value
        endpoint = f'/{self.version}/scrape'