mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 08:09:04 +08:00
Merge branch 'main' of https://github.com/mendableai/firecrawl
This commit is contained in:
commit
bc5c5a31d4
27
README.md
27
README.md
@ -481,17 +481,15 @@ app = FirecrawlApp(api_key="fc-YOUR_API_KEY")
|
|||||||
# Scrape a website:
|
# Scrape a website:
|
||||||
scrape_status = app.scrape_url(
|
scrape_status = app.scrape_url(
|
||||||
'https://firecrawl.dev',
|
'https://firecrawl.dev',
|
||||||
params={'formats': ['markdown', 'html']}
|
formats=["markdown", "html"]
|
||||||
)
|
)
|
||||||
print(scrape_status)
|
print(scrape_status)
|
||||||
|
|
||||||
# Crawl a website:
|
# Crawl a website:
|
||||||
crawl_status = app.crawl_url(
|
crawl_status = app.crawl_url(
|
||||||
'https://firecrawl.dev',
|
'https://firecrawl.dev',
|
||||||
params={
|
limit=100,
|
||||||
'limit': 100,
|
scrapeOptions'={'formats': ['markdown', 'html']}
|
||||||
'scrapeOptions': {'formats': ['markdown', 'html']}
|
|
||||||
},
|
|
||||||
poll_interval=30
|
poll_interval=30
|
||||||
)
|
)
|
||||||
print(crawl_status)
|
print(crawl_status)
|
||||||
@ -502,11 +500,6 @@ print(crawl_status)
|
|||||||
With LLM extraction, you can easily extract structured data from any URL. We support pydantic schemas to make it easier for you too. Here is how you to use it:
|
With LLM extraction, you can easily extract structured data from any URL. We support pydantic schemas to make it easier for you too. Here is how you to use it:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
|
||||||
from firecrawl.firecrawl import FirecrawlApp
|
|
||||||
|
|
||||||
app = FirecrawlApp(api_key="fc-YOUR_API_KEY")
|
|
||||||
|
|
||||||
class ArticleSchema(BaseModel):
|
class ArticleSchema(BaseModel):
|
||||||
title: str
|
title: str
|
||||||
points: int
|
points: int
|
||||||
@ -514,15 +507,13 @@ class ArticleSchema(BaseModel):
|
|||||||
commentsURL: str
|
commentsURL: str
|
||||||
|
|
||||||
class TopArticlesSchema(BaseModel):
|
class TopArticlesSchema(BaseModel):
|
||||||
top: List[ArticleSchema] = Field(..., max_items=5, description="Top 5 stories")
|
top: List[ArticleSchema] = Field(..., description="Top 5 stories")
|
||||||
|
|
||||||
data = app.scrape_url('https://news.ycombinator.com', {
|
json_config = ExtractConfig(schema=TopArticlesSchema.model_json_schema())
|
||||||
'formats': ['json'],
|
|
||||||
'jsonOptions': {
|
llm_extraction_result = app.scrape_url('https://news.ycombinator.com', formats=["json"], json=json_config)
|
||||||
'schema': TopArticlesSchema.model_json_schema()
|
|
||||||
}
|
print(llm_extraction_result.json)
|
||||||
})
|
|
||||||
print(data["json"])
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Using the Node SDK
|
## Using the Node SDK
|
||||||
|
Loading…
x
Reference in New Issue
Block a user