Nick: examples

2025-08-12 01:59:04 +08:00 · 2025-04-18 02:00:00 -07:00 · 2025-04-18 02:00:00 -07:00 · 16439b1c7e
commit 16439b1c7e
parent 4e67803863
1 changed files with 38 additions and 15 deletions
--- a/apps/python-sdk/example.py
+++ b/apps/python-sdk/example.py
@ -1,27 +1,27 @@
-from firecrawl.firecrawl import ExtractConfig, FirecrawlApp
+from firecrawl import JsonConfig, FirecrawlApp
 from pydantic import BaseModel, Field
 from typing import List
 import time
 app = FirecrawlApp(api_url="https://api.firecrawl.dev")

-# # Scrape a website:
+# Scrape a website:
 scrape_result = app.scrape_url('example.com', formats=["markdown", "html"])
 print(scrape_result.markdown)


-# # Test batch scrapeq
+# # # Test batch scrape
 urls = ['https://example.com', 'https://docs.firecrawl.dev']
-# Synchronous batch scrape
+# # Synchronous batch scrape
 batch_result = app.batch_scrape_urls(urls, formats=["markdown", "html"])
 print("Synchronous Batch Scrape Result:")
 print(batch_result.data[0].markdown)

-# # Asynchronous batch scrape
+# # # Asynchronous batch scrape
 async_batch_result = app.async_batch_scrape_urls(urls, formats=["markdown", "html"])
 print("\nAsynchronous Batch Scrape Result:")
 print(async_batch_result)

-# Crawl a website:
+# # Crawl a website:
 crawl_result = app.crawl_url('firecrawl.dev', exclude_paths=['blog/*'])
 print(crawl_result.data[0].markdown)

@ -53,13 +53,13 @@ class ArticleSchema(BaseModel):
 class TopArticlesSchema(BaseModel):
    top: List[ArticleSchema] = Field(..., description="Top 5 stories")

-extract_config = ExtractConfig(schema=TopArticlesSchema.model_json_schema())
+extract_config = JsonConfig(schema=TopArticlesSchema.model_json_schema())

 llm_extraction_result = app.scrape_url('https://news.ycombinator.com', formats=["extract"], extract=extract_config)

 print(llm_extraction_result.extract)

-# # Define schema to extract contents into using json schema
+# Define schema to extract contents into using json schema
 json_schema = {
  "type": "object",
  "properties": {
@ -75,20 +75,18 @@ json_schema = {
        },
        "required": ["title", "points", "by", "commentsURL"]
      },
-      "minItems": 5,
-      "maxItems": 5,
      "description": "Top 5 stories on Hacker News"
    }
  },
  "required": ["top"]
 }

-extract_config = ExtractConfig(extractionSchema=json_schema, mode="llm-extraction", pageOptions={"onlyMainContent": True})
-llm_extraction_result = app.scrape_url('https://news.ycombinator.com', formats=["extract"], extract=extract_config)
+extract_config = JsonConfig(schema=json_schema)
+llm_extraction_result = app.scrape_url('https://news.ycombinator.com', formats=["json"], json_options=extract_config)

-print(llm_extraction_result.extract)
+print(llm_extraction_result.json)

-# print(llm_extraction_result['llm_extraction'])
+print(llm_extraction_result['llm_extraction'])


 # Map a website:
@ -108,6 +106,20 @@ extract_schema = ExtractSchema.schema()
 extract_result = app.extract(['https://firecrawl.dev'], prompt="Extract the title, description, and links from the website", schema=extract_schema)
 print(extract_result)

+
+# Deep research example
+research_result = app.deep_research(
+    "What are the latest developments in large language models?",
+    max_urls=4
+)
+print("Research Results:", research_result)
+
+# Generate LLMs.txt example
+llms_result = app.generate_llms_text(
+    "https://firecrawl.dev")
+print("LLMs.txt Results:", llms_result)
+
+
 # Crawl a website with WebSockets:
 # inside an async function...
 import nest_asyncio
@ -134,4 +146,15 @@ async def start_crawl_and_watch():
    watcher.add_event_listener("done", on_done)

    # Start the watcher
-    await watcher.connect()
+    await watcher.connect()
+
+
+class ExtractSchema(BaseModel):
+    company_mission: str
+    supports_sso: bool
+    is_open_source: bool
+    is_in_yc: bool
+
+extract_config = JsonConfig(schema=ExtractSchema.model_json_schema())
+data = app.scrape_url('https://docs.firecrawl.dev/', formats=['json'], json_options=extract_config)
+print(data.json)