sdks wip

2025-08-12 15:39:03 +08:00 · 2024-11-14 15:51:27 -03:00 · 2024-11-14 15:51:27 -03:00 · 80d6cb16fb
commit 80d6cb16fb
parent a1c018fdb0
7 changed files with 167 additions and 7 deletions
--- a/apps/js-sdk/example.js
+++ b/apps/js-sdk/example.js
@ -1,4 +1,5 @@
 import FirecrawlApp from 'firecrawl';
 import { z } from 'zod';
 const app = new FirecrawlApp({apiKey: "fc-YOUR_API_KEY"});
@ -42,6 +43,18 @@ const main = async () => {
  const mapResult = await app.mapUrl('https://firecrawl.dev');
  console.log(mapResult)
  // Extract information from a website using LLM:
  const extractSchema = z.object({
    title: z.string(),
    description: z.string(),
    links: z.array(z.string())
  });
  const extractResult = await app.extractUrls(['https://firecrawl.dev'], {
    prompt: "Extract the title, description, and links from the website",
    schema: extractSchema
  });
  console.log(extractResult);
  // Crawl a website with WebSockets:
  const watch = await app.crawlUrlAndWatch('mendable.ai', { excludePaths: ['blog/*'], limit: 5});
--- a/apps/js-sdk/example.ts
+++ b/apps/js-sdk/example.ts
@ -42,6 +42,19 @@ const main = async () => {
  const mapResult = await app.mapUrl('https://firecrawl.dev');
  console.log(mapResult)
  // // Extract information from a website using LLM:
  // const extractSchema = z.object({
  //   title: z.string(),
  //   description: z.string(),
  //   links: z.array(z.string())
  // });
  // const extractResult = await app.extractUrls(['https://firecrawl.dev'], {
  //   prompt: "Extract the title, description, and links from the website",
  //   schema: extractSchema
  // });
  // console.log(extractResult);
  // Crawl a website with WebSockets:
  const watch = await app.crawlUrlAndWatch('mendable.ai', { excludePaths: ['blog/*'], limit: 5});
--- a/apps/js-sdk/firecrawl/package.json
+++ b/apps/js-sdk/firecrawl/package.json
@ -1,6 +1,6 @@
 {
  "name": "@mendable/firecrawl-js",
-  "version": "1.8.2",
+  "version": "1.9.0",
  "description": "JavaScript SDK for Firecrawl API",
  "main": "dist/index.js",
  "types": "dist/index.d.ts",
--- a/apps/js-sdk/firecrawl/src/index.ts
+++ b/apps/js-sdk/firecrawl/src/index.ts
@ -234,6 +234,26 @@ export interface MapResponse {
  error?: string;
 }
 /**
 * Parameters for extracting information from URLs.
 * Defines options for extracting information from URLs.
 */
 export interface ExtractParams {
  prompt: string;
  schema?: zt.ZodSchema;
  systemPrompt?: string;
 }
 /**
 * Response interface for extracting information from URLs.
 * Defines the structure of the response received after extracting information from URLs.
 */
 export interface ExtractResponse {
  success: true;
  data: zt.infer<zt.ZodSchema>;
  error?: string;
 }
 /**
 * Error response interface.
 * Defines the structure of the response received when an error occurs.
@ -243,7 +263,6 @@ export interface ErrorResponse {
  error: string;
 }
 /**
 * Custom error class for Firecrawl.
 * Extends the built-in Error class to include a status code.
@ -675,6 +694,44 @@ export default class FirecrawlApp {
    return { success: false, error: "Internal server error." };
  }
  /**
   * Extracts information from a URL using the Firecrawl API.
   * @param url - The URL to extract information from.
   * @param params - Additional parameters for the extract request.
   * @returns The response from the extract operation.
   */
  async extractUrls(urls: string[], params?: ExtractParams): Promise<ExtractResponse | ErrorResponse> {
    const headers = this.prepareHeaders();
    if (!params?.prompt) {
      throw new FirecrawlError("Prompt is required", 400);
    }
    let jsonData: { urls: string[] } & ExtractParams= { urls,  ...params };
    let jsonSchema: any;
    try {
      jsonSchema = params?.schema ? zodToJsonSchema(params.schema) : undefined;
    } catch (error: any) {
      throw new FirecrawlError("Invalid schema. Use a valid Zod schema.", 400);
    }
    try {
      const response: AxiosResponse = await this.postRequest(
        this.apiUrl + `/v1/extract`,
        { ...jsonData, schema: jsonSchema },
        headers
      );
      if (response.status === 200) {
        return response.data as ExtractResponse;
      } else {
        this.handleError(response, "extract");
      }
    } catch (error: any) {
      throw new FirecrawlError(error.message, 500);
    }
    return { success: false, error: "Internal server error." };
  }
  /**
   * Prepares the headers for an API request.
   * @param idempotencyKey - Optional key to ensure idempotency.
--- a/apps/python-sdk/example.py
+++ b/apps/python-sdk/example.py
@ -2,6 +2,8 @@ import time
 import nest_asyncio
 import uuid
 from firecrawl.firecrawl import FirecrawlApp
 from pydantic import BaseModel, Field
 from typing import List
 app = FirecrawlApp(api_key="fc-")
@ -50,9 +52,6 @@ print(crawl_status)
 # LLM Extraction:
 # Define schema to extract contents into using pydantic
 from pydantic import BaseModel, Field
 from typing import List
 class ArticleSchema(BaseModel):
    title: str
    points: int 
@ -115,6 +114,22 @@ llm_extraction_result = app2.scrape_url('https://news.ycombinator.com', {
 map_result = app.map_url('https://firecrawl.dev', { 'search': 'blog' })
 print(map_result)
 # Extract URLs:
 class ExtractSchema(BaseModel):
    title: str
    description: str
    links: List[str]
 # Define the schema using Pydantic
 extract_schema = ExtractSchema.schema()
 # Perform the extraction
 extract_result = app.extract_urls(['https://firecrawl.dev'], {
    'prompt': "Extract the title, description, and links from the website",
    'schema': extract_schema
 })
 print(extract_result)
 # Crawl a website with WebSockets:
 # inside an async function...
 nest_asyncio.apply()
--- a/apps/python-sdk/firecrawl/init.py
+++ b/apps/python-sdk/firecrawl/init.py
@ -13,7 +13,7 @@ import os
 from .firecrawl import FirecrawlApp # noqa
-__version__ = "1.5.0"
+__version__ = "1.6.0"
 # Define the logger for the Firecrawl project
 logger: logging.Logger = logging.getLogger("firecrawl")
--- a/apps/python-sdk/firecrawl/firecrawl.py
+++ b/apps/python-sdk/firecrawl/firecrawl.py
@ -12,15 +12,39 @@ Classes:
 import logging
 import os
 import time
-from typing import Any, Dict, Optional, List
+from typing import Any, Dict, Optional, List, Union
 import json
 import requests
 import pydantic
 import websockets
 logger : logging.Logger = logging.getLogger("firecrawl")
 class FirecrawlApp:
    class ExtractParams(pydantic.BaseModel):
        """
        Parameters for the extract operation.
        """
        prompt: str
        schema: Optional[Any] = None
        system_prompt: Optional[str] = None
    class ExtractResponse(pydantic.BaseModel):
        """
        Response from the extract operation.
        """
        success: bool
        data: Optional[Any] = None
        error: Optional[str] = None
    class ErrorResponse(pydantic.BaseModel):
        """
        Error response.
        """
        success: bool
        error: str
    def __init__(self, api_key: Optional[str] = None, api_url: Optional[str] = None) -> None:
      """
      Initialize the FirecrawlApp instance with API key, API URL.
@ -434,6 +458,44 @@ class FirecrawlApp:
        else:
            self._handle_error(response, 'check batch scrape status')
    def extract_urls(self, urls: List[str], params: Optional[ExtractParams] = None) -> Union[ExtractResponse, ErrorResponse]:
        """
        Extracts information from a URL using the Firecrawl API.
        Args:
            urls (List[str]): The URLs to extract information from.
            params (Optional[ExtractParams]): Additional parameters for the extract request.
        Returns:
            Union[ExtractResponse, ErrorResponse]: The response from the extract operation.
        """
        headers = self._prepare_headers()
        if not params or not params.get('prompt'):
            raise ValueError("Prompt is required")
        if not params.get('schema'):
            raise ValueError("Schema is required for extraction")
        jsonData = {'urls': urls, **params}
        jsonSchema = params['schema'].schema() if hasattr(params['schema'], 'schema') else None
        try:
            response = self._post_request(
                f'{self.api_url}/v1/extract',
                {**jsonData, 'schema': jsonSchema},
                headers
            )
            if response.status_code == 200:
                return response.json()
            else:
                self._handle_error(response, "extract")
        except Exception as e:
            raise ValueError(str(e), 500)
        return {'success': False, 'error': "Internal server error."}
    def _prepare_headers(self, idempotency_key: Optional[str] = None) -> Dict[str, str]:
        """
        Prepare the headers for API requests.