mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 16:28:59 +08:00
Nick: python sdk 2.0
This commit is contained in:
parent
f3522666db
commit
a3f3168212
@ -6,51 +6,47 @@ from firecrawl.firecrawl import AsyncFirecrawlApp
|
|||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
app = AsyncFirecrawlApp(api_key="fc-")
|
app = AsyncFirecrawlApp(api_url="https://api.firecrawl.dev")
|
||||||
|
|
||||||
async def example_scrape():
|
async def example_scrape():
|
||||||
# Scrape a website:
|
# Scrape a website:
|
||||||
scrape_result = await app.scrape_url('firecrawl.dev')
|
scrape_result = await app.scrape_url('example.com', formats=["markdown", "html"])
|
||||||
print(scrape_result['markdown'])
|
print(scrape_result.markdown)
|
||||||
|
|
||||||
async def example_batch_scrape():
|
async def example_batch_scrape():
|
||||||
# Batch scrape
|
# Batch scrape
|
||||||
urls = ['https://example.com', 'https://docs.firecrawl.dev']
|
urls = ['https://example.com', 'https://docs.firecrawl.dev']
|
||||||
batch_scrape_params = {
|
|
||||||
'formats': ['markdown', 'html'],
|
|
||||||
}
|
|
||||||
|
|
||||||
# Synchronous batch scrape
|
# Synchronous batch scrape
|
||||||
batch_result = await app.batch_scrape_urls(urls, batch_scrape_params)
|
batch_result = await app.batch_scrape_urls(urls, formats=["markdown", "html"])
|
||||||
print("Synchronous Batch Scrape Result:")
|
print("Synchronous Batch Scrape Result:")
|
||||||
print(batch_result['data'][0]['markdown'])
|
print(batch_result.data[0].markdown)
|
||||||
|
|
||||||
# Asynchronous batch scrape
|
# Asynchronous batch scrape
|
||||||
async_batch_result = await app.async_batch_scrape_urls(urls, batch_scrape_params)
|
async_batch_result = await app.async_batch_scrape_urls(urls, formats=["markdown", "html"])
|
||||||
print("\nAsynchronous Batch Scrape Result:")
|
print("\nAsynchronous Batch Scrape Result:")
|
||||||
print(async_batch_result)
|
print(async_batch_result)
|
||||||
|
|
||||||
async def example_crawl():
|
async def example_crawl():
|
||||||
# Crawl a website:
|
# Crawl a website:
|
||||||
idempotency_key = str(uuid.uuid4()) # optional idempotency key
|
crawl_result = await app.crawl_url('firecrawl.dev', exclude_paths=['blog/*'])
|
||||||
crawl_result = await app.crawl_url('firecrawl.dev', {'excludePaths': ['blog/*']}, 2, idempotency_key)
|
print(crawl_result.data[0].markdown)
|
||||||
print(crawl_result)
|
|
||||||
|
|
||||||
# Asynchronous Crawl a website:
|
# Asynchronous Crawl a website:
|
||||||
async_result = await app.async_crawl_url('firecrawl.dev', {'excludePaths': ['blog/*']}, "")
|
async_result = await app.async_crawl_url('firecrawl.dev', exclude_paths=['blog/*'])
|
||||||
print(async_result)
|
print(async_result)
|
||||||
|
|
||||||
crawl_status = await app.check_crawl_status(async_result['id'])
|
crawl_status = await app.check_crawl_status(async_result.id)
|
||||||
print(crawl_status)
|
print(crawl_status)
|
||||||
|
|
||||||
attempts = 15
|
attempts = 15
|
||||||
while attempts > 0 and crawl_status['status'] != 'completed':
|
while attempts > 0 and crawl_status.status != 'completed':
|
||||||
print(crawl_status)
|
print(crawl_status)
|
||||||
crawl_status = await app.check_crawl_status(async_result['id'])
|
crawl_status = await app.check_crawl_status(async_result.id)
|
||||||
attempts -= 1
|
attempts -= 1
|
||||||
await asyncio.sleep(1) # Use async sleep instead of time.sleep
|
await asyncio.sleep(1) # Use async sleep instead of time.sleep
|
||||||
|
|
||||||
crawl_status = await app.check_crawl_status(async_result['id'])
|
crawl_status = await app.check_crawl_status(async_result.id)
|
||||||
print(crawl_status)
|
print(crawl_status)
|
||||||
|
|
||||||
async def example_llm_extraction():
|
async def example_llm_extraction():
|
||||||
@ -64,18 +60,15 @@ async def example_llm_extraction():
|
|||||||
class TopArticlesSchema(BaseModel):
|
class TopArticlesSchema(BaseModel):
|
||||||
top: List[ArticleSchema] = Field(..., description="Top 5 stories")
|
top: List[ArticleSchema] = Field(..., description="Top 5 stories")
|
||||||
|
|
||||||
llm_extraction_result = await app.scrape_url('https://news.ycombinator.com', {
|
extract_config = ExtractConfig(schema=TopArticlesSchema.model_json_schema())
|
||||||
'formats': ['extract'],
|
|
||||||
'extract': {
|
|
||||||
'schema': TopArticlesSchema.model_json_schema()
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
print(llm_extraction_result['extract'])
|
llm_extraction_result = await app.scrape_url('https://news.ycombinator.com', formats=["extract"], extract=extract_config)
|
||||||
|
|
||||||
|
print(llm_extraction_result.extract)
|
||||||
|
|
||||||
async def example_map_and_extract():
|
async def example_map_and_extract():
|
||||||
# Map a website:
|
# Map a website:
|
||||||
map_result = await app.map_url('https://firecrawl.dev', { 'search': 'blog' })
|
map_result = await app.map_url('https://firecrawl.dev', search="blog")
|
||||||
print(map_result)
|
print(map_result)
|
||||||
|
|
||||||
# Extract URLs:
|
# Extract URLs:
|
||||||
@ -88,10 +81,7 @@ async def example_map_and_extract():
|
|||||||
extract_schema = ExtractSchema.schema()
|
extract_schema = ExtractSchema.schema()
|
||||||
|
|
||||||
# Perform the extraction
|
# Perform the extraction
|
||||||
extract_result = await app.extract(['https://firecrawl.dev'], {
|
extract_result = await app.extract(['https://firecrawl.dev'], prompt="Extract the title, description, and links from the website", schema=extract_schema)
|
||||||
'prompt': "Extract the title, description, and links from the website",
|
|
||||||
'schema': extract_schema
|
|
||||||
})
|
|
||||||
print(extract_result)
|
print(extract_result)
|
||||||
|
|
||||||
# Define event handlers for websocket
|
# Define event handlers for websocket
|
||||||
|
@ -13,7 +13,7 @@ import os
|
|||||||
|
|
||||||
from .firecrawl import FirecrawlApp # noqa
|
from .firecrawl import FirecrawlApp # noqa
|
||||||
|
|
||||||
__version__ = "1.17.0"
|
__version__ = "2.0.0"
|
||||||
|
|
||||||
# Define the logger for the Firecrawl project
|
# Define the logger for the Firecrawl project
|
||||||
logger: logging.Logger = logging.getLogger("firecrawl")
|
logger: logging.Logger = logging.getLogger("firecrawl")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user