Docs to API Spec

This commit is contained in:
Eric Ciarla 2024-09-06 15:26:33 -04:00
parent 0566e54d85
commit 2044e71fcf
27 changed files with 647 additions and 5198 deletions

View File

@ -1,771 +0,0 @@
{
"info": {
"title": "Firecrawl API",
"version": "v0"
},
"openapi": "3.0.0",
"paths": {
"/crawl": {
"post": {
"/crawl/cancel/{jobId}": {
"/crawl/status/{jobId}": {
"get": {
"/scrape": {
"/search": {
"post": {
"components": {
"securitySchemes": {
"Authorization": {
"bearerFormat": "JWT",
"scheme": "bearer",
"type": "http"
}
}
},
"description": "Send a request to perform a web search and get scraped results from the top pages.",
"operationId": "searchWeb",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"pageOptions": {
"description": "Options for controlling the scraping behavior of search result pages.",
"properties": {
"fetchPageContent": {
"default": true,
"description": "Fetch the content of each page. If false, defaults to a basic fast serp API.",
"type": "boolean"
},
"includeHtml": {
"default": false,
"description": "Include the HTML version of the content on page. Will output a html key in the response.",
"type": "boolean"
},
"includeRawHtml": {
"default": false,
"description": "Include the raw HTML content of the page. Will output a rawHtml key in the response.",
"type": "boolean"
},
"onlyMainContent": {
"default": false,
"description": "Only return the main content of the page excluding headers, navs, footers, etc.",
"type": "boolean"
}
},
"type": "object"
},
"query": {
"description": "The search query.",
"required": true,
"type": "string"
},
"searchOptions": {
"description": "Options for controlling the search.",
"properties": {
"limit": {
"description": "Maximum number of search results to return.",
"type": "integer"
}
},
"type": "object"
}
},
"type": "object"
}
}
},
"responses": {
"200": {
"402": {
"description": "Payment required."
},
"429": {
"description": "Rate limit exceeded."
},
"500": {
"description": "Internal server error."
},
"content": {
"application/json": {
"schema": {
"properties": {
"data": {
"description": "An array of search results.",
"items": {
"properties": {
"content": {
"description": "Raw content of the search result page.",
"type": "string"
},
"markdown": {
"description": "Markdown content of the search result page.",
"type": "string"
},
"metadata": {
"description": "Metadata extracted from the search result page.",
"properties": {
"description": {
"description": "Page description.",
"type": "string"
},
"language": {
"description": "Page language.",
"nullable": true,
"type": "string"
},
"sourceURL": {
"description": "Source URL of the search result page.",
"type": "string"
},
"title": {
"description": "Page title.",
"type": "string"
}
},
"type": "object"
},
"url": {
"description": "URL of the search result.",
"type": "string"
}
},
"type": "object"
},
"type": "array"
},
"success": {
"description": "Indicates if the search was successful.",
"type": "boolean"
}
},
"type": "object"
}
}
},
"description": "Web search completed successfully."
}
}
},
"summary": "Search the Web"
}
},
"post": {
"description": "Send a request to scrape a single URL and get its content.",
"operationId": "scrapeURL",
"parameters": [],
"requestBody": {
"402": {
"description": "Payment required."
},
"429": {
"description": "Rate limit exceeded."
},
"500": {
"description": "Internal server error."
},
"content": {
"application/json": {
"schema": {
"properties": {
"extractorOptions": {
"description": "Options for extraction of structured information from the page content. Note: LLM-based extraction is not performed by default and only occurs when explicitly configured. The 'markdown' mode simply returns the scraped markdown and is the default mode for scraping.",
"properties": {
"extractionPrompt": {
"description": "A prompt describing what information to extract from the page, applicable for LLM extraction modes.",
"type": "string"
},
"extractionSchema": {
"description": "The schema for the data to be extracted, required only for LLM extraction modes.",
"type": "object"
},
"mode": {
"default": "markdown",
"description": "The extraction mode to use. 'markdown': Returns the scraped markdown content, does not perform LLM extraction. 'llm-extraction': Extracts information from the cleaned and parsed content using LLM. 'llm-extraction-from-raw-html': Extracts information directly from the raw HTML using LLM. 'llm-extraction-from-markdown': Extracts information from the markdown content using LLM.",
"enum": [
"markdown",
"llm-extraction",
"llm-extraction-from-raw-html",
"llm-extraction-from-markdown"
],
"type": "string"
}
},
"type": "object"
},
"pageOptions": {
"description": "Options for controlling the scraping behavior.",
"properties": {
"fullPageScreenshot": {
"default": false,
"description": "Include a full page screenshot of the page that you are scraping.",
"type": "boolean"
},
"headers": {
"description": "Headers to send with the request. Can be used to send cookies, user-agent, etc.",
"type": "object"
},
"includeHtml": {
"default": false,
"description": "Include the HTML version of the content on page. Will output a html key in the response.",
"type": "boolean"
},
"includeRawHtml": {
"default": false,
"description": "Include the raw HTML content of the page. Will output a rawHtml key in the response.",
"type": "boolean"
},
"onlyIncludeTags": {
"description": "Only include tags, classes and ids from the page in the final output. Use comma separated values. Example: 'script, .ad, #footer'",
"items": {
"type": "string"
},
"type": "array"
},
"onlyMainContent": {
"default": false,
"description": "Only return the main content of the page excluding headers, navs, footers, etc.",
"type": "boolean"
},
"removeTags": {
"description": "Tags, classes and ids to remove from the page. Use comma separated values. Example: 'script, .ad, #footer'",
"items": {
"type": "string"
},
"type": "array"
},
"replaceAllPathsWithAbsolutePaths": {
"default": false,
"description": "Replace all relative paths with absolute paths for images and links",
"type": "boolean"
},
"screenshot": {
"default": false,
"description": "Include a screenshot of the top of the page that you are scraping.",
"type": "boolean"
},
"waitFor": {
"default": 0,
"description": "Wait x amount of milliseconds for the page to load to fetch content",
"type": "integer"
}
},
"type": "object"
},
"timeout": {
"default": 30000,
"description": "Timeout in milliseconds for the request",
"type": "integer"
},
"url": {
"description": "The URL to scrape.",
"required": true,
"type": "string"
}
},
"type": "object"
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"data": {
"properties": {
"content": {
"description": "Raw content of the page.",
"type": "string"
},
"html": {
"description": "HTML version of the page content, only present if `includeHtml` was set to `true` in the request.",
"nullable": true,
"type": "string"
},
"llm_extraction": {
"description": "Extracted data from the page using the specified schema, only present if an LLM extraction mode was used.",
"nullable": true,
"type": "object"
},
"markdown": {
"description": "Markdown version of the page content.",
"type": "string"
},
"metadata": {
"properties": {
"<any other metadata> ": {
"description": "Any other extracted metadata.",
"type": "string"
},
"description": {
"description": "Page description.",
"type": "string"
},
"language": {
"description": "Page language.",
"nullable": true,
"type": "string"
},
"pageError": {
"description": "Error message if there was an error scraping the page.",
"nullable": true,
"type": "string"
},
"pageStatusCode": {
"description": "HTTP status code of the page.",
"type": "integer"
},
"sourceURL": {
"description": "Source URL of the page.",
"type": "string"
},
"title": {
"description": "Page title.",
"type": "string"
}
},
"type": "object"
},
"rawHtml": {
"description": "Raw HTML content of the page, only present if `includeRawHtml` was set to `true` in the request.",
"nullable": true,
"type": "string"
},
"warning": {
"description": "Warning message from the LLM extraction process, if any.",
"nullable": true,
"type": "string"
}
},
"type": "object"
},
"success": {
"description": "Indicates whether the scraping was successful.",
"type": "boolean"
}
},
"type": "object"
}
}
},
"description": "URL scraped successfully."
}
}
},
"summary": "Scrape a URL"
}
},
"description": "Send a request to get the status and results of a crawl job.",
"operationId": "getCrawlJobStatus",
"parameters": [
{
"description": "ID of the crawl job to check.",
"in": "path",
"name": "jobId",
"required": true,
"schema": {
"type": "string"
}
}
],
"requestBody": {
"content": {}
},
"responses": {
"200": {
"402": {
"description": "Payment required."
},
"429": {
"description": "Rate limit exceeded."
},
"500": {
"description": "Internal server error."
},
"content": {
"application/json": {
"schema": {
"properties": {
"current": {
"description": "The number of pages crawled so far.",
"type": "integer"
},
"data": {
"description": "The crawl results. Only available when the crawl job is completed.",
"items": {
"properties": {
"content": {
"description": "Raw content of the page.",
"type": "string"
},
"html": {
"description": "HTML version of the page content, only present if `includeHtml` was set to `true` in the crawl request.",
"type": "string"
},
"index": {
"description": "The index of the crawled page in the results.",
"type": "integer"
},
"markdown": {
"description": "Markdown content of the page.",
"type": "string"
},
"metadata": {
"description": "Metadata extracted from the page.",
"properties": {
"<any other metadata> ": {
"description": "Any other extracted metadata.",
"type": "string"
},
"description": {
"description": "Page description.",
"type": "string"
},
"language": {
"description": "Page language.",
"type": "string"
},
"pageError": {
"description": "Error message if there was an error scraping the page.",
"type": "string"
},
"pageStatusCode": {
"description": "HTTP status code of the page.",
"type": "integer"
},
"sourceURL": {
"description": "Source URL of the page.",
"type": "string"
},
"title": {
"description": "Page title.",
"type": "string"
}
},
"type": "object"
},
"rawHtml": {
"description": "Raw HTML content of the page, only present if `includeRawHtml` was set to `true` in the crawl request.",
"type": "string"
}
},
"type": "object"
},
"type": "array"
},
"partial_data": {
"description": "Partial results streamed as the crawl progresses. This feature is in alpha and may change.",
"items": {
"properties": {
"content": {
"description": "Raw content of the page.",
"type": "string"
},
"html": {
"description": "HTML version of the page content, only present if `includeHtml` was set to `true` in the crawl request.",
"type": "string"
},
"index": {
"description": "The index of the crawled page in the results.",
"type": "integer"
},
"markdown": {
"description": "Markdown content of the page.",
"type": "string"
},
"metadata": {
"description": "Metadata extracted from the page.",
"properties": {
"<any other metadata> ": {
"description": "Any other extracted metadata.",
"type": "string"
},
"description": {
"description": "Page description.",
"type": "string"
},
"language": {
"description": "Page language.",
"type": "string"
},
"pageError": {
"description": "Error message if there was an error scraping the page.",
"type": "string"
},
"pageStatusCode": {
"description": "HTTP status code of the page.",
"type": "integer"
},
"sourceURL": {
"description": "Source URL of the page.",
"type": "string"
},
"title": {
"description": "Page title.",
"type": "string"
}
},
"type": "object"
},
"rawHtml": {
"description": "Raw HTML content of the page, only present if `includeRawHtml` was set to `true` in the crawl request.",
"type": "string"
}
},
"type": "object"
},
"type": "array"
},
"status": {
"description": "Status of the crawl job. Can be 'completed', 'active', 'failed', or 'paused'.",
"enum": [
"completed",
"active",
"failed",
"paused"
],
"type": "string"
},
"total": {
"description": "The total estimated number of pages to crawl.",
"type": "integer"
}
},
"type": "object"
}
}
},
"description": "Crawl job status retrieved."
}
},
"summary": "Get Crawl Job Status"
}
},
"delete": {
"description": "Send a request to cancel a running crawl job.",
"operationId": "cancelCrawlJob",
"parameters": [
{
"description": "ID of the crawl job to cancel.",
"in": "path",
"name": "jobId",
"required": true,
"schema": {
"type": "string"
}
}
],
"requestBody": {
"content": {}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"status": {
"description": "The status of the crawl job cancellation request, usually 'cancelled'.",
"type": "string"
}
},
"type": "object"
}
}
},
"description": "Crawl job cancellation request submitted."
},
"402": {
"description": "Payment required."
},
"429": {
"description": "Rate limit exceeded."
},
"500": {
"description": "Internal server error."
}
},
"summary": "Cancel a Crawl Job"
}
},
"description": "Send a request to crawl a URL and all accessible subpages. This submits a crawl job and returns a job ID to check the status of the crawl.",
"operationId": "crawlWebsite",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"crawlerOptions": {
"description": "Options for controlling the crawling behavior.",
"properties": {
"allowBackwardCrawling": {
"default": false,
"description": "Enables the crawler to navigate from a specific URL to previously linked pages. For instance, from 'example.com/product/123' back to 'example.com/product'",
"type": "boolean"
},
"allowExternalContentLinks": {
"default": false,
"description": "Allows the crawler to follow links to external websites.",
"type": "boolean"
},
"excludes": {
"description": "URL patterns to exclude",
"items": {
"type": "string"
},
"type": "array"
},
"generateImgAltText": {
"default": false,
"description": "Generate alt text for images using LLMs (must have a paid plan)",
"type": "boolean"
},
"ignoreSitemap": {
"default": false,
"description": "Ignore the website sitemap when crawling",
"type": "boolean"
},
"includes": {
"description": "URL patterns to include",
"items": {
"type": "string"
},
"type": "array"
},
"limit": {
"default": 10000,
"description": "Maximum number of pages to crawl",
"type": "integer"
},
"maxDepth": {
"description": "Maximum depth to crawl relative to the entered URL. A maxDepth of 0 scrapes only the entered URL. A maxDepth of 1 scrapes the entered URL and all pages one level deep. A maxDepth of 2 scrapes the entered URL and all pages up to two levels deep. Higher values follow the same pattern.",
"type": "integer"
},
"mode": {
"default": "default",
"description": "The crawling mode to use. Fast mode crawls 4x faster websites without sitemap, but may not be as accurate and shouldn't be used in heavy js-rendered websites.",
"enum": [
"default",
"fast"
],
"type": "string"
},
"returnOnlyUrls": {
"default": false,
"description": "If true, returns only the URLs as a list on the crawl status. Attention: the return response will be a list of URLs inside the data, not a list of documents.",
"type": "boolean"
}
},
"type": "object"
},
"pageOptions": {
"description": "Options for controlling the scraping behavior of individual pages.",
"properties": {
"fullPageScreenshot": {
"default": false,
"description": "Include a full page screenshot of the page that you are scraping.",
"type": "boolean"
},
"headers": {
"description": "Headers to send with the request. Can be used to send cookies, user-agent, etc.",
"type": "object"
},
"includeHtml": {
"default": false,
"description": "Include the HTML version of the content on page. Will output a html key in the response.",
"type": "boolean"
},
"includeRawHtml": {
"default": false,
"description": "Include the raw HTML content of the page. Will output a rawHtml key in the response.",
"type": "boolean"
},
"onlyIncludeTags": {
"description": "Only include tags, classes and ids from the page in the final output. Use comma separated values. Example: 'script, .ad, #footer'",
"items": {
"type": "string"
},
"type": "array"
},
"onlyMainContent": {
"default": false,
"description": "Only return the main content of the page excluding headers, navs, footers, etc.",
"type": "boolean"
},
"removeTags": {
"description": "Tags, classes and ids to remove from the page. Use comma separated values. Example: 'script, .ad, #footer'",
"items": {
"type": "string"
},
"type": "array"
},
"replaceAllPathsWithAbsolutePaths": {
"default": false,
"description": "Replace all relative paths with absolute paths for images and links",
"type": "boolean"
},
"screenshot": {
"default": false,
"description": "Include a screenshot of the top of the page that you are scraping.",
"type": "boolean"
},
"waitFor": {
"default": 0,
"description": "Wait x amount of milliseconds for the page to load to fetch content",
"type": "integer"
}
},
"type": "object"
},
"url": {
"description": "The base URL to start crawling from",
"required": true,
"type": "string"
}
},
"type": "object"
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"jobId": {
"description": "The ID of the submitted crawl job.",
"type": "string"
}
},
"type": "object"
}
}
},
"description": "Crawl job submitted successfully."
},
"402": {
"description": "Payment required."
},
"429": {
"description": "Rate limit exceeded."
},
"500": {
"description": "Internal server error."
}
}
},
"summary": "Crawl a Website"
}
}
},
"servers": [
{
"url": "https://api.firecrawl.dev/v0"
}
]
}

View File

@ -0,0 +1,510 @@
{
"openapi": "3.0.0",
"info": {
"title": "https://docs.firecrawl.dev/api-reference API Specification",
"version": "1.0.0"
},
"paths": {
"/crawl": {
"post": {
"summary": "Crawl a website",
"requestBody": {
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "Base URL to crawl"
},
"excludePaths": {
"type": "array",
"items": {
"type": "string"
},
"description": "URL patterns to exclude"
},
"includePaths": {
"type": "array",
"items": {
"type": "string"
},
"description": "URL patterns to include"
},
"maxDepth": {
"type": "integer",
"description": "Maximum crawl depth"
},
"ignoreSitemap": {
"type": "boolean",
"description": "Ignore sitemap?"
},
"limit": {
"type": "integer",
"description": "Maximum pages to crawl"
},
"allowBackwardLinks": {
"type": "boolean",
"description": "Allow backward links?"
},
"allowExternalLinks": {
"type": "boolean",
"description": "Allow external links?"
},
"webhook": {
"type": "string",
"description": "Webhook URL"
},
"scrapeOptions": {
"type": "object",
"properties": {
"formats": {
"type": "array",
"items": {
"type": "string"
},
"description": "Formats to include"
},
"headers": {
"type": "object",
"description": "Headers to send"
},
"includeTags": {
"type": "array",
"items": {
"type": "string"
},
"description": "Tags to include"
},
"excludeTags": {
"type": "array",
"items": {
"type": "string"
},
"description": "Tags to exclude"
},
"onlyMainContent": {
"type": "boolean",
"description": "Only main content?"
},
"waitFor": {
"type": "integer",
"description": "Wait time in ms"
}
}
}
}
}
}
}
},
"responses": {
"200": {
"description": "Crawl started",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"success": {
"type": "boolean"
},
"id": {
"type": "string"
},
"url": {
"type": "string"
}
}
}
}
}
}
},
"security": [
{
"Authorization": []
}
]
}
},
"/scrape": {
"post": {
"summary": "Scrape a webpage",
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "URL to scrape"
},
"formats": {
"type": "array",
"description": "Output formats",
"items": {
"type": "string",
"enum": [
"markdown",
"html",
"rawHtml",
"links",
"screenshot",
"extract",
"screenshot@fullPage"
]
}
},
"onlyMainContent": {
"type": "boolean",
"description": "Only main content"
},
"includeTags": {
"type": "array",
"description": "Tags to include",
"items": {
"type": "string"
}
},
"excludeTags": {
"type": "array",
"description": "Tags to exclude",
"items": {
"type": "string"
}
},
"headers": {
"type": "object",
"description": "Request headers"
},
"waitFor": {
"type": "integer",
"description": "Delay in ms"
},
"timeout": {
"type": "integer",
"description": "Timeout in ms"
},
"extract": {
"type": "object",
"description": "Extract object",
"properties": {
"schema": {
"type": "object",
"description": "Extraction schema"
},
"systemPrompt": {
"type": "string",
"description": "System prompt"
},
"prompt": {
"type": "string",
"description": "Extraction prompt"
}
}
}
}
}
}
}
},
"responses": {
"200": {
"description": "Successful scrape",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"success": {
"type": "boolean"
},
"data": {
"type": "object",
"properties": {
"markdown": {
"type": "string"
},
"html": {
"type": "string"
},
"rawHtml": {
"type": "string"
},
"screenshot": {
"type": "string"
},
"links": {
"type": "array",
"items": {
"type": "string"
}
},
"metadata": {
"type": "object",
"properties": {
"title": {
"type": "string"
},
"description": {
"type": "string"
},
"language": {
"type": "string"
},
"sourceURL": {
"type": "string"
},
"statusCode": {
"type": "integer"
},
"error": {
"type": "string"
}
}
},
"llm_extraction": {
"type": "object"
},
"warning": {
"type": "string"
}
}
}
}
}
}
}
}
},
"security": [
{
"Bearer": []
}
]
}
},
"/v1/crawl/{id}": {
"get": {
"summary": "Get crawl status",
"parameters": [
{
"name": "id",
"in": "path",
"description": "ID of crawl job",
"required": true,
"schema": {
"type": "string"
}
}
],
"responses": {
"200": {
"description": "Crawl status",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"status": {
"type": "string",
"description": "Current status of crawl"
},
"total": {
"type": "integer",
"description": "Total pages crawled"
},
"completed": {
"type": "integer",
"description": "Number of pages crawled"
},
"creditsUsed": {
"type": "integer",
"description": "Credits used"
},
"expiresAt": {
"type": "string",
"format": "date-time",
"description": "Crawl expiry"
},
"next": {
"type": "string",
"nullable": true,
"description": "URL for next data"
},
"data": {
"type": "array",
"description": "Data of the crawl",
"items": {
"type": "object",
"properties": {
"markdown": {
"type": "string"
},
"html": {
"type": "string"
},
"rawHtml": {
"type": "string"
},
"links": {
"type": "array",
"items": {
"type": "string"
}
},
"screenshot": {
"type": "string"
},
"metadata": {
"type": "object",
"properties": {
"title": {
"type": "string"
},
"description": {
"type": "string"
},
"language": {
"type": "string"
},
"sourceURL": {
"type": "string"
},
"statusCode": {
"type": "integer"
},
"error": {
"type": "string"
}
}
}
}
}
}
}
}
}
}
}
},
"security": [
{
"Bearer": []
}
]
}
},
"/crawl/{id}": {
"delete": {
"summary": "Cancel crawl job",
"security": [
{
"bearerAuth": []
}
],
"parameters": [
{
"name": "id",
"in": "path",
"description": "ID of crawl job",
"required": true,
"schema": {
"type": "string"
}
}
],
"responses": {
"200": {
"description": "Crawl job cancelled",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"success": {
"type": "boolean"
},
"message": {
"type": "string"
}
}
}
}
}
}
}
}
},
"/map": {
"post": {
"summary": "Map website and return links",
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "Base URL to crawl"
},
"search": {
"type": "string",
"description": "Search query for mapping"
},
"ignoreSitemap": {
"type": "boolean",
"description": "Ignore sitemap?"
},
"includeSubdomains": {
"type": "boolean",
"description": "Include subdomains?"
},
"limit": {
"type": "integer",
"description": "Max links to return"
}
},
"required": [
"url"
]
}
}
}
},
"responses": {
"200": {
"description": "Successful mapping",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"success": {
"type": "boolean"
},
"links": {
"type": "array",
"items": {
"type": "string"
}
}
}
}
}
}
}
}
}
}
},
"components": {
"schemas": {}
}
}

View File

@ -1,164 +0,0 @@
{
"openapi": "3.0.0",
"info": {
"title": "Knowledge Base API",
"description": "API for managing knowledge bases and documents."
},
"paths": {
"/datasets": {
"post": {
"summary": "Create an Empty Dataset",
"description": "Only used to create an empty dataset",
"requestBody": {
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"name": {
"type": "string"
}
}
}
}
}
},
"responses": {}
},
"get": {
"summary": "Dataset List",
"parameters": [
{
"name": "page",
"in": "query",
"schema": {
"type": "integer"
}
},
{
"name": "limit",
"in": "query",
"schema": {
"type": "integer"
}
}
],
"responses": {}
}
},
"/datasets/{dataset_id}/document/create_by_text": {
"post": {
"summary": "Create Document by Text",
"requestBody": {
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"name": {
"type": "string"
},
"text": {
"type": "string"
},
"indexing_technique": {
"type": "string"
},
"process_rule": {
"type": "object"
}
}
}
}
}
},
"responses": {}
}
},
"/datasets/{dataset_id}/document/create_by_file": {
"post": {
"summary": "Create Document by File",
"requestBody": {
"content": {
"multipart/form-data": {
"schema": {
"type": "object",
"properties": {
"data": {
"type": "string"
},
"file": {
"type": "string",
"format": "binary"
}
}
}
}
}
},
"responses": {}
}
},
"/datasets/{dataset_id}/documents/{batch}/indexing-status": {
"get": {
"summary": "Get Document Embedding Status (Progress)",
"responses": {}
}
},
"/datasets/{dataset_id}/documents/{document_id}": {
"delete": {
"summary": "Delete Document",
"responses": {}
}
},
"/datasets/{dataset_id}/documents": {
"get": {
"summary": "Dataset Document List",
"responses": {}
}
},
"/datasets/{dataset_id}/documents/{document_id}/segments": {
"post": {
"summary": "Add Segments",
"requestBody": {
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"segments": {
"type": "array",
"items": {
"type": "object",
"properties": {
"content": {
"type": "string"
},
"answer": {
"type": "string"
},
"keywords": {
"type": "array",
"items": {
"type": "string"
}
}
}
}
}
}
}
}
}
},
"responses": {}
}
},
"/datasets/{dataset_id}/segments/{segment_id}": {
"delete": {
"summary": "Delete Document Segment",
"responses": {}
}
}
}
}

View File

@ -1,211 +0,0 @@
{
"info": {
"title": "Firecrawl API",
"version": "v0"
},
"openapi": "3.0.0",
"paths": {
"/v0/crawl": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"crawlerOptions": {
"description": "Crawling options.",
"properties": {
"excludes": {
"description": "URL patterns to exclude.",
"items": {
"type": "string"
},
"type": "array"
},
"includes": {
"description": "URL patterns to include.",
"items": {
"type": "string"
},
"type": "array"
},
"limit": {
"description": "Maximum pages to crawl.",
"type": "integer"
},
"maxDepth": {
"description": "Maximum crawl depth.",
"type": "integer"
},
"mode": {
"description": "Crawling mode.",
"enum": [
"default",
"fast"
],
"type": "string"
},
"returnOnlyUrls": {
"description": "Return only URLs.",
"type": "boolean"
}
},
"type": "object"
},
"pageOptions": {
"description": "Page scraping options.",
"properties": {
"includeHtml": {
"description": "Include HTML content.",
"type": "boolean"
},
"includeRawHtml": {
"description": "Include raw HTML content.",
"type": "boolean"
},
"onlyMainContent": {
"description": "Only main content.",
"type": "boolean"
},
"screenshot": {
"description": "Include page screenshot.",
"type": "boolean"
},
"waitFor": {
"description": "Wait time in milliseconds.",
"type": "integer"
}
},
"type": "object"
},
"url": {
"description": "Base URL to crawl.",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"jobId": {
"description": "Crawl job ID.",
"type": "string"
}
},
"type": "object"
}
}
},
"description": "Crawl job initiated."
}
},
"summary": "Crawl multiple pages."
}
},
"/v0/crawl/status/{jobId}": {
"get": {
"parameters": [
{
"description": "Crawl job ID.",
"in": "path",
"name": "jobId",
"required": true,
"schema": {
"type": "string"
}
}
],
"responses": {
"200": {
"description": "Crawl job status."
}
},
"summary": "Check crawl job status."
}
},
"/v0/scrape": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"extractorOptions": {
"description": "Data extraction options.",
"properties": {
"extractionPrompt": {
"description": "Prompt for data extraction.",
"type": "string"
},
"extractionSchema": {
"description": "Schema for data extraction.",
"type": "object"
},
"mode": {
"description": "Extraction mode.",
"enum": [
"llm-extraction",
"llm-extraction-from-raw-html"
],
"type": "string"
}
},
"type": "object"
},
"pageOptions": {
"description": "Page scraping options.",
"properties": {
"includeHtml": {
"description": "Include HTML content.",
"type": "boolean"
},
"includeRawHtml": {
"description": "Include raw HTML content.",
"type": "boolean"
},
"onlyMainContent": {
"description": "Only main content.",
"type": "boolean"
},
"screenshot": {
"description": "Include page screenshot.",
"type": "boolean"
},
"waitFor": {
"description": "Wait time in milliseconds.",
"type": "integer"
}
},
"type": "object"
},
"timeout": {
"description": "Timeout in milliseconds.",
"type": "integer"
},
"url": {
"description": "URL to scrape.",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"description": "Successful scraping."
}
},
"summary": "Scrape a single page."
}
}
}
}

View File

@ -1,165 +0,0 @@
{
"info": {
"title": "Firecrawl API",
"version": "v0"
},
"openapi": "3.0.0",
"paths": {
"/crawl": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"crawlerOptions": {
"properties": {
"allowBackwardCrawling": {
"description": "Allow backward crawling",
"type": "boolean"
},
"allowExternalContentLinks": {
"description": "Allow external links",
"type": "boolean"
},
"excludes": {
"description": "URL patterns to exclude",
"items": {
"type": "string"
},
"type": "array"
},
"generateImgAltText": {
"description": "Generate alt text for images",
"type": "boolean"
},
"ignoreSitemap": {
"description": "Ignore website sitemap",
"type": "boolean"
},
"includes": {
"description": "URL patterns to include",
"items": {
"type": "string"
},
"type": "array"
},
"limit": {
"description": "Maximum pages to crawl",
"type": "integer"
},
"maxDepth": {
"description": "Maximum crawl depth",
"type": "integer"
},
"mode": {
"description": "Crawling mode",
"enum": [
"default",
"fast"
],
"type": "string"
},
"returnOnlyUrls": {
"description": "Return only crawled URLs",
"type": "boolean"
}
},
"type": "object"
},
"pageOptions": {
"properties": {
"fullPageScreenshot": {
"description": "Include full page screenshot",
"type": "boolean"
},
"headers": {
"description": "Headers for requests",
"type": "object"
},
"includeHtml": {
"description": "Include HTML content",
"type": "boolean"
},
"includeRawHtml": {
"description": "Include raw HTML content",
"type": "boolean"
},
"onlyIncludeTags": {
"description": "Include only specific tags",
"items": {
"type": "string"
},
"type": "array"
},
"onlyMainContent": {
"description": "Return only main content",
"type": "boolean"
},
"removeTags": {
"description": "Remove specific tags",
"items": {
"type": "string"
},
"type": "array"
},
"replaceAllPathsWithAbsolutePaths": {
"description": "Use absolute paths",
"type": "boolean"
},
"screenshot": {
"description": "Include page screenshot",
"type": "boolean"
},
"waitFor": {
"description": "Wait for page load (ms)",
"type": "integer"
}
},
"type": "object"
},
"url": {
"description": "Base URL to crawl",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"jobId": {
"description": "Job ID of the crawl",
"type": "string"
}
},
"type": "object"
}
}
},
"description": "Crawl request successful"
}
},
"security": [
{
"Bearer": []
}
],
"summary": "Crawl a website"
}
}
},
"securitySchemes": {
"Bearer": {
"bearerFormat": "JWT",
"scheme": "bearer",
"type": "http"
}
}
}

View File

@ -1,93 +0,0 @@
{
"info": {
"title": "Firecrawl API",
"version": "1.0.0"
},
"openapi": "3.0.0",
"paths": {
"/check_crawl_status": {
"post": {
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"current": {
"type": "integer"
},
"data": {
"items": {
"properties": {
"content": {
"type": "string"
},
"markdown": {
"type": "string"
},
"metadata": {
"properties": {
"description": {
"type": "string"
},
"language": {
"type": "string"
},
"sourceURL": {
"type": "string"
},
"title": {
"type": "string"
}
},
"type": "object"
},
"provider": {
"type": "string"
}
},
"type": "object"
},
"type": "array"
},
"status": {
"type": "string"
},
"total": {
"type": "integer"
}
},
"type": "object"
}
}
},
"description": "Crawl job status"
}
},
"summary": "Check crawl job status"
}
},
"/crawl": {
"post": {
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"jobId": {
"type": "string"
}
},
"type": "object"
}
}
},
"description": "Job ID"
}
},
"summary": "Crawl URL and subpages"
}
}
}
}

View File

@ -1,131 +0,0 @@
{
"info": {
"title": "Firecrawl API",
"version": "v0"
},
"openapi": "3.0.0",
"paths": {
"/scrape": {
"post": {
"requestBody": {
"content": {
"application/json": {
"example": {
"extractorOptions": {
"extractionPrompt": "Based on the information on the page, extract the information from the schema. ",
"extractionSchema": {
"properties": {
"company_mission": {
"type": "string"
},
"is_in_yc": {
"type": "boolean"
},
"is_open_source": {
"type": "boolean"
},
"supports_sso": {
"type": "boolean"
}
},
"required": [
"company_mission",
"supports_sso",
"is_open_source",
"is_in_yc"
],
"type": "object"
},
"mode": "llm-extraction"
},
"url": "https://docs.firecrawl.dev/"
},
"schema": {
"properties": {
"extractorOptions": {
"properties": {
"extractionPrompt": {
"description": "Prompt for extraction",
"type": "string"
},
"extractionSchema": {
"description": "Schema for data extraction",
"type": "object"
},
"mode": {
"description": "Extraction mode",
"type": "string"
}
},
"type": "object"
},
"url": {
"description": "URL to scrape",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"example": {
"data": {
"content": "Raw Content",
"llm_extraction": {
"company_mission": "Train a secure AI on your technical resources that answers customer and employee questions so your team doesn't have to",
"is_in_yc": true,
"is_open_source": false,
"supports_sso": true
},
"metadata": {
"description": "Mendable allows you to easily build AI chat applications. Ingest, customize, then deploy with one line of code anywhere you want. Brought to you by SideGuide",
"ogDescription": "Mendable allows you to easily build AI chat applications. Ingest, customize, then deploy with one line of code anywhere you want. Brought to you by SideGuide",
"ogImage": "https://docs.firecrawl.dev/mendable_new_og1.png",
"ogLocaleAlternate": [],
"ogSiteName": "Mendable",
"ogTitle": "Mendable",
"ogUrl": "https://docs.firecrawl.dev/",
"robots": "follow, index",
"sourceURL": "https://docs.firecrawl.dev/",
"title": "Mendable"
}
},
"success": true
},
"schema": {
"properties": {
"data": {
"properties": {
"content": {
"type": "string"
},
"llm_extraction": {
"type": "object"
},
"metadata": {
"type": "object"
}
},
"type": "object"
},
"success": {
"type": "boolean"
}
},
"type": "object"
}
}
},
"description": "Successful scrape"
}
},
"summary": "Extract data from pages."
}
}
}
}

View File

@ -1,87 +0,0 @@
{
"info": {
"title": "Firecrawl API",
"version": "v0"
},
"openapi": "3.0.0",
"paths": {
"/search": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"pageOptions": {
"properties": {
"fetchPageContent": {
"type": "boolean"
}
},
"type": "object"
},
"query": {
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"data": {
"items": {
"properties": {
"markdown": {
"type": "string"
},
"metadata": {
"properties": {
"description": {
"type": "string"
},
"language": {
"type": "string"
},
"sourceURL": {
"type": "string"
},
"title": {
"type": "string"
}
},
"type": "object"
},
"provider": {
"type": "string"
},
"url": {
"type": "string"
}
},
"type": "object"
},
"type": "array"
},
"success": {
"type": "boolean"
}
},
"type": "object"
}
}
},
"description": "Successful search and scrape."
}
},
"summary": "Search web, scrape, return markdown."
}
}
}
}

View File

@ -1,83 +0,0 @@
{
"info": {
"title": "Firecrawl API",
"version": "1.0.0"
},
"openapi": "3.0.0",
"paths": {
"/crawl": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"url": {
"description": "Website URL to crawl.",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"items": {
"properties": {
"markdown": {
"description": "Markdown content.",
"type": "string"
}
},
"type": "object"
},
"type": "array"
}
}
},
"description": "Website crawled successfully."
}
},
"summary": "Crawl a website."
}
},
"/scrape": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"url": {
"description": "Page URL to scrape.",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"text/plain": {
"schema": {
"description": "Scraped content.",
"type": "string"
}
}
},
"description": "Page scraped successfully."
}
},
"summary": "Scrape a single page."
}
}
}
}

View File

@ -1,200 +0,0 @@
{
"info": {
"title": "Firecrawl API",
"version": "1.0.0"
},
"openapi": "3.0.0",
"paths": {
"/crawl": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"crawler_options": {
"properties": {
"exclude": {
"description": "URL patterns to exclude",
"items": {
"type": "string"
},
"type": "array"
},
"generateImgAltText": {
"description": "Generate alt text for images",
"type": "boolean"
},
"includes": {
"description": "URL patterns to include",
"items": {
"type": "string"
},
"type": "array"
},
"limit": {
"description": "Max pages to crawl",
"type": "integer"
},
"maxDepth": {
"description": "Maximum crawl depth",
"type": "integer"
},
"mode": {
"description": "Crawling mode",
"type": "string"
},
"returnOnlyUrls": {
"description": "Return only URLs",
"type": "boolean"
},
"timeout": {
"description": "Timeout in milliseconds",
"type": "integer"
}
},
"type": "object"
},
"page_options": {
"properties": {
"includeHtml": {
"description": "Include raw HTML",
"type": "boolean"
},
"onlyMainContent": {
"description": "Only main content",
"type": "boolean"
}
},
"type": "object"
},
"url": {
"description": "Base URL to crawl",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"description": "Crawl successful."
}
},
"summary": "Crawl a website."
}
},
"/scrape": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"extractor_options": {
"properties": {
"extractionPrompt": {
"description": "Prompt for extraction",
"type": "string"
},
"extractionSchema": {
"description": "Schema for extraction",
"type": "string"
},
"mode": {
"description": "Extraction mode",
"type": "string"
}
},
"type": "object"
},
"page_options": {
"properties": {
"includeHtml": {
"description": "Include raw HTML",
"type": "boolean"
},
"onlyMainContent": {
"description": "Only main content",
"type": "boolean"
}
},
"type": "object"
},
"timeout": {
"description": "Timeout in milliseconds",
"type": "integer"
},
"url": {
"description": "URL to scrape",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"description": "Scrape successful."
}
},
"summary": "Scrape a website."
}
},
"/search": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"page_options": {
"properties": {
"fetchPageContent": {
"description": "Fetch full content",
"type": "boolean"
},
"includeHtml": {
"description": "Include raw HTML",
"type": "boolean"
},
"onlyMainContent": {
"description": "Only main content",
"type": "boolean"
}
},
"type": "object"
},
"query": {
"description": "Search query string",
"type": "string"
},
"search_options": {
"properties": {
"limit": {
"description": "Max results",
"type": "integer"
}
},
"type": "object"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"description": "Search successful."
}
},
"summary": "Search Firecrawl index."
}
}
}
}

View File

@ -1,54 +0,0 @@
{
"info": {
"title": "Firecrawl API",
"version": "v0"
},
"openapi": "3.0.0",
"paths": {
"/crawl/cancel/{jobId}": {
"delete": {
"parameters": [
{
"description": "ID of crawl job",
"in": "path",
"name": "jobId",
"required": true,
"schema": {
"type": "string"
}
}
],
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"status": {
"type": "string"
}
},
"type": "object"
}
}
},
"description": "Returns cancelled."
}
},
"security": [
{
"Bearer": []
}
],
"summary": "Cancel crawl job"
}
}
},
"securitySchemes": {
"Bearer": {
"bearerFormat": "Bearer <token>",
"scheme": "bearer",
"type": "http"
}
}
}

View File

@ -1,166 +0,0 @@
{
"info": {
"title": "Firecrawl API",
"version": "1.0.0"
},
"openapi": "3.0.0",
"paths": {
"/check-crawl-status/{jobId}": {
"get": {
"parameters": [
{
"in": "path",
"name": "jobId",
"required": true,
"schema": {
"type": "string"
}
}
],
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"current": {
"description": "Current progress",
"type": "integer"
},
"data": {
"items": {
"properties": {
"content": {
"description": "Raw content",
"type": "string"
},
"markdown": {
"description": "Markdown content",
"type": "string"
},
"metadata": {
"description": "Page metadata",
"type": "object"
},
"provider": {
"description": "Data provider",
"type": "string"
}
},
"type": "object"
},
"type": "array"
},
"status": {
"description": "Job status",
"type": "string"
},
"total": {
"description": "Total pages",
"type": "integer"
}
},
"type": "object"
}
}
},
"description": "Crawl job status."
}
},
"summary": "Check crawl job status."
}
},
"/crawl": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"crawlerOptions": {
"description": "Crawler options",
"type": "object"
},
"url": {
"description": "URL to crawl",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"jobId": {
"description": "Job ID",
"type": "string"
}
},
"type": "object"
}
}
},
"description": "Crawl job submitted."
}
},
"summary": "Crawl a URL."
}
},
"/scrape": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"extractorOptions": {
"description": "Extractor options",
"type": "object"
},
"pageOptions": {
"description": "Page options",
"type": "object"
},
"url": {
"description": "URL to scrape",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"data": {
"description": "Scraped data",
"type": "object"
},
"success": {
"description": "Success flag",
"type": "boolean"
}
},
"type": "object"
}
}
},
"description": "Scraped data."
}
},
"summary": "Scrape a single URL."
}
}
}
}

View File

@ -1,229 +0,0 @@
{
"info": {
"title": "Firecrawl API",
"version": "v0"
},
"openapi": "3.0.0",
"paths": {
"/v0/crawl": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"crawlerOptions": {
"properties": {
"excludes": {
"description": "Paths to exclude",
"items": {
"type": "string"
},
"type": "array"
},
"includes": {
"description": "Paths to include",
"items": {
"type": "string"
},
"type": "array"
},
"limit": {
"description": "Maximum pages to crawl",
"type": "integer"
},
"maxDepth": {
"description": "Maximum crawl depth",
"type": "integer"
},
"returnOnlyUrls": {
"description": "Only return URLs",
"type": "boolean"
}
},
"type": "object"
},
"pageOptions": {
"properties": {
"onlyMainContent": {
"description": "Extract main content",
"type": "boolean"
}
},
"type": "object"
},
"url": {
"description": "URL to crawl",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"jobId": {
"description": "Job ID",
"type": "string"
}
},
"type": "object"
}
}
},
"description": "Crawl job created"
}
},
"summary": "Crawl a website"
}
},
"/v0/crawl/status/{jobId}": {
"get": {
"parameters": [
{
"in": "path",
"name": "jobId",
"required": true,
"schema": {
"type": "string"
}
}
],
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"current": {
"type": "integer"
},
"data": {
"items": {
"properties": {
"url": {
"type": "string"
}
},
"type": "object"
},
"type": "array"
},
"status": {
"description": "Job status",
"type": "string"
},
"total": {
"type": "integer"
}
},
"type": "object"
}
}
},
"description": "Crawl job status"
}
},
"summary": "Get crawl job status"
}
},
"/v0/scrape": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"pageOptions": {
"properties": {
"onlyMainContent": {
"description": "Extract main content",
"type": "boolean"
}
},
"type": "object"
},
"url": {
"description": "URL to scrape",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"data": {
"properties": {
"content": {
"type": "string"
},
"html": {
"type": "string"
},
"llm_extraction": {
"type": "object"
},
"markdown": {
"type": "string"
},
"metadata": {
"properties": {
"description": {
"type": "string"
},
"language": {
"type": "string"
},
"pageError": {
"type": "string"
},
"pageStatusCode": {
"type": "integer"
},
"sourceURL": {
"type": "string"
},
"title": {
"type": "string"
}
},
"type": "object"
},
"rawHtml": {
"type": "string"
},
"warning": {
"type": "string"
}
},
"type": "object"
},
"success": {
"type": "boolean"
}
},
"type": "object"
}
}
},
"description": "Scrape results"
}
},
"summary": "Scrape a webpage"
}
}
}
}

View File

@ -1,115 +0,0 @@
{
"info": {
"title": "Firecrawl API",
"version": "1.0.0"
},
"openapi": "3.0.0",
"paths": {
"/scrape": {
"post": {
"requestBody": {
"content": {
"application/json": {
"example": {
"extractorOptions": {
"extractionPrompt": "Extract company info.",
"extractionSchema": {
"properties": {
"company_description": {
"type": "string"
},
"company_industry": {
"type": "string"
},
"who_they_serve": {
"type": "string"
}
},
"required": [
"company_description",
"company_industry",
"who_they_serve"
],
"type": "object"
},
"mode": "llm-extraction"
},
"pageOptions": {
"onlyMainContent": true
},
"url": "https://example.com"
},
"schema": {
"properties": {
"extractorOptions": {
"properties": {
"extractionPrompt": {
"description": "Prompt for LLM extraction.",
"type": "string"
},
"extractionSchema": {
"properties": {
"properties": {
"company_description": {
"type": "string"
},
"company_industry": {
"type": "string"
},
"who_they_serve": {
"type": "string"
}
},
"required": [
"company_description",
"company_industry",
"who_they_serve"
],
"type": {
"type": "string"
}
},
"type": "object"
},
"mode": {
"description": "Extraction mode.",
"type": "string"
}
},
"type": "object"
},
"pageOptions": {
"properties": {
"onlyMainContent": {
"type": "boolean"
}
},
"type": "object"
},
"url": {
"description": "URL to scrape.",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"type": "object"
}
}
},
"description": "Successful scrape."
}
},
"summary": "Scrape data from URL."
}
}
}
}

View File

@ -1,185 +0,0 @@
{
"components": {
"securitySchemes": {
"bearerAuth": {
"scheme": "bearer",
"type": "http"
}
}
},
"info": {
"title": "Firecrawl API",
"version": "v0"
},
"openapi": "3.0.0",
"paths": {
"/v0/scrape": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"extractorOptions": {
"description": "Options for extraction",
"properties": {
"extractionPrompt": {
"description": "Prompt for LLM extraction",
"type": "string"
},
"extractionSchema": {
"description": "Schema for LLM extraction",
"type": "object"
},
"mode": {
"description": "Extraction mode",
"enum": [
"markdown",
"llm-extraction",
"llm-extraction-from-raw-html",
"llm-extraction-from-markdown"
],
"type": "string"
}
},
"type": "object"
},
"pageOptions": {
"properties": {
"fullPageScreenshot": {
"description": "Include full page screenshot",
"type": "boolean"
},
"headers": {
"description": "Headers for request",
"type": "object"
},
"includeHtml": {
"description": "Include HTML content",
"type": "boolean"
},
"includeRawHtml": {
"description": "Include raw HTML content",
"type": "boolean"
},
"onlyIncludeTags": {
"description": "Include only these tags",
"items": {
"type": "string"
},
"type": "array"
},
"onlyMainContent": {
"description": "Only return main content",
"type": "boolean"
},
"removeTags": {
"description": "Remove these tags",
"items": {
"type": "string"
},
"type": "array"
},
"replaceAllPathsWithAbsolutePaths": {
"description": "Replace relative paths",
"type": "boolean"
},
"screenshot": {
"description": "Include screenshot",
"type": "boolean"
},
"waitFor": {
"description": "Wait time in ms",
"type": "integer"
}
},
"type": "object"
},
"timeout": {
"description": "Timeout in ms",
"type": "integer"
},
"url": {
"description": "URL to scrape",
"type": "string"
}
},
"type": "object"
}
}
},
"required": true
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"data": {
"properties": {
"content": {
"type": "string"
},
"html": {
"type": "string"
},
"llm_extraction": {
"type": "object"
},
"markdown": {
"type": "string"
},
"metadata": {
"properties": {
"description": {
"type": "string"
},
"language": {
"type": "string"
},
"pageError": {
"type": "string"
},
"pageStatusCode": {
"type": "integer"
},
"sourceURL": {
"type": "string"
},
"title": {
"type": "string"
}
},
"type": "object"
},
"rawHtml": {
"type": "string"
},
"warning": {
"type": "string"
}
},
"type": "object"
},
"success": {
"type": "boolean"
}
},
"type": "object"
}
}
},
"description": "Successful scrape"
}
},
"security": [
{
"bearerAuth": []
}
],
"summary": "Scrape a webpage"
}
}
}
}

View File

@ -1,212 +0,0 @@
{
"info": {
"title": "Firecrawl API",
"version": "1.0.0"
},
"openapi": "3.0.0",
"paths": {
"/crawl": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"crawlerOptions": {
"description": "Crawl job options",
"properties": {
"excludes": {
"description": "Pages to exclude",
"items": {
"type": "string"
},
"type": "array"
},
"includes": {
"description": "Pages to include",
"items": {
"type": "string"
},
"type": "array"
},
"limit": {
"description": "Max pages to crawl",
"type": "integer"
}
},
"type": "object"
},
"pageOptions": {
"description": "Page scraping options",
"properties": {
"onlyMainContent": {
"description": "Only scrape main content",
"type": "boolean"
}
},
"type": "object"
},
"url": {
"description": "URL to crawl",
"type": "string"
}
},
"required": [
"url"
],
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"description": "Crawl job result",
"type": "object"
}
}
},
"description": "Crawl job result"
}
},
"summary": "Crawl a website"
}
},
"/crawl/{jobId}/cancel": {
"post": {
"parameters": [
{
"description": "Crawl job ID",
"in": "path",
"name": "jobId",
"required": true,
"schema": {
"type": "string"
}
}
],
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"description": "Cancellation status",
"type": "object"
}
}
},
"description": "Cancellation status"
}
},
"summary": "Cancel crawl job"
}
},
"/crawl/{jobId}/status": {
"get": {
"parameters": [
{
"description": "Crawl job ID",
"in": "path",
"name": "jobId",
"required": true,
"schema": {
"type": "string"
}
}
],
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"description": "Crawl status",
"type": "object"
}
}
},
"description": "Crawl status"
}
},
"summary": "Check crawl status"
}
},
"/scrape": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"extractorOptions": {
"description": "LLM extraction options",
"properties": {
"extractionSchema": {
"description": "JSON schema for extraction",
"type": "object"
}
},
"type": "object"
},
"url": {
"description": "URL to scrape",
"type": "string"
}
},
"required": [
"url"
],
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"description": "Scraped data",
"type": "object"
}
}
},
"description": "Scraped data"
}
},
"summary": "Scrape a single URL"
}
},
"/search": {
"get": {
"parameters": [
{
"description": "Search query",
"in": "query",
"name": "query",
"required": true,
"schema": {
"type": "string"
}
}
],
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"description": "Search results",
"type": "object"
}
}
},
"description": "Search results"
}
},
"summary": "Search and scrape"
}
}
}
}

View File

@ -1,199 +0,0 @@
{
"info": {
"title": "Firecrawl API",
"version": "1.0.0"
},
"openapi": "3.0.0",
"paths": {
"/crawl": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"crawlerOptions": {
"properties": {
"excludes": {
"description": "Paths to exclude",
"items": {
"type": "string"
},
"type": "array"
},
"includes": {
"description": "Paths to include",
"items": {
"type": "string"
},
"type": "array"
},
"limit": {
"description": "Maximum pages to crawl",
"type": "integer"
}
},
"type": "object"
},
"pageOptions": {
"properties": {
"onlyMainContent": {
"description": "Extract only main content",
"type": "boolean"
}
},
"type": "object"
},
"url": {
"description": "Starting URL for crawl",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"jobId": {
"description": "Unique job identifier",
"type": "string"
}
},
"type": "object"
}
}
},
"description": "Crawl job started"
}
},
"summary": "Crawl a website"
}
},
"/crawl/{jobId}/status": {
"get": {
"parameters": [
{
"in": "path",
"name": "jobId",
"required": true,
"schema": {
"type": "string"
}
}
],
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"status": {
"description": "Current job status",
"type": "string"
}
},
"type": "object"
}
}
},
"description": "Crawl job status"
}
},
"summary": "Check crawl status"
}
},
"/scrape": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"extractorOptions": {
"properties": {
"extractionSchema": {
"description": "Zod schema for extraction",
"type": "object"
}
},
"type": "object"
},
"url": {
"description": "URL to scrape",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"data": {
"description": "Extracted data",
"type": "object"
}
},
"type": "object"
}
}
},
"description": "Scraped data"
}
},
"summary": "Scrape a single URL"
}
},
"/search": {
"get": {
"parameters": [
{
"in": "query",
"name": "query",
"required": true,
"schema": {
"type": "string"
}
}
],
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"items": {
"properties": {
"content": {
"description": "Page content (optional)",
"type": "string"
},
"url": {
"description": "Result URL",
"type": "string"
}
},
"type": "object"
},
"type": "array"
}
}
},
"description": "Search results"
}
},
"summary": "Search for a query"
}
}
}
}

View File

@ -1,202 +0,0 @@
{
"info": {
"title": "Firecrawl API",
"version": "1.0.0"
},
"openapi": "3.0.0",
"paths": {
"/crawl": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"crawlerOptions": {
"description": "Options for crawling",
"properties": {
"excludes": {
"description": "URLs to exclude",
"items": {
"type": "string"
},
"type": "array"
},
"includes": {
"description": "URLs to include",
"items": {
"type": "string"
},
"type": "array"
},
"limit": {
"description": "Maximum pages to crawl",
"type": "integer"
}
},
"type": "object"
},
"pageOptions": {
"description": "Options for page content",
"properties": {
"onlyMainContent": {
"description": "Extract only main content",
"type": "boolean"
}
},
"type": "object"
},
"url": {
"description": "URL to crawl",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"jobId": {
"description": "Unique crawl job ID",
"type": "string"
}
},
"type": "object"
}
}
},
"description": "Crawl job started."
}
},
"summary": "Crawl a website."
}
},
"/crawl/{jobId}": {
"get": {
"parameters": [
{
"in": "path",
"name": "jobId",
"required": true,
"schema": {
"type": "string"
}
}
],
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"status": {
"description": "Current job status",
"type": "string"
}
},
"type": "object"
}
}
},
"description": "Crawl job status."
}
},
"summary": "Check crawl job status."
}
},
"/scrape": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"extractorOptions": {
"description": "Options for data extraction",
"properties": {
"extractionSchema": {
"description": "Pydantic schema",
"type": "object"
},
"mode": {
"description": "Extraction mode",
"type": "string"
}
},
"type": "object"
},
"pageOptions": {
"description": "Options for page content",
"properties": {
"onlyMainContent": {
"description": "Extract only main content",
"type": "boolean"
}
},
"type": "object"
},
"url": {
"description": "URL to scrape",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"type": "object"
}
}
},
"description": "Scraped data."
}
},
"summary": "Scrape a single URL."
}
},
"/search": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"query": {
"description": "Search query",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"type": "object"
}
}
},
"description": "Search results."
}
},
"summary": "Search the web."
}
}
}
}

View File

@ -1,201 +0,0 @@
{
"info": {
"title": "Firecrawl API",
"version": "0.1"
},
"openapi": "3.0.0",
"paths": {
"/crawl": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"crawlerOptions": {
"description": "Crawl job options",
"properties": {
"excludes": {
"description": "URLs to exclude",
"items": {
"type": "string"
},
"type": "array"
},
"includes": {
"description": "URLs to include",
"items": {
"type": "string"
},
"type": "array"
},
"limit": {
"description": "Maximum pages to crawl",
"type": "integer"
}
},
"type": "object"
},
"pageOptions": {
"description": "Page scraping options",
"properties": {
"onlyMainContent": {
"description": "Only scrape main content",
"type": "boolean"
}
},
"type": "object"
},
"url": {
"description": "URL to crawl",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"type": "object"
}
}
},
"description": "Crawl job started"
}
},
"summary": "Crawl a website."
}
},
"/crawl/{job_id}/cancel": {
"post": {
"parameters": [
{
"description": "Crawl job ID",
"in": "path",
"name": "job_id",
"required": true,
"schema": {
"type": "string"
}
}
],
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"type": "object"
}
}
},
"description": "Cancellation status"
}
},
"summary": "Cancel crawl job."
}
},
"/crawl/{job_id}/status": {
"get": {
"parameters": [
{
"description": "Crawl job ID",
"in": "path",
"name": "job_id",
"required": true,
"schema": {
"type": "string"
}
}
],
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"type": "object"
}
}
},
"description": "Crawl status"
}
},
"summary": "Check crawl status."
}
},
"/scrape": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"extractorOptions": {
"description": "LLM extraction options",
"properties": {
"extractionSchema": {
"description": "JSON schema for extraction",
"type": "object"
}
},
"type": "object"
},
"url": {
"description": "URL to scrape",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"type": "object"
}
}
},
"description": "Scraped data"
}
},
"summary": "Scrape a single URL."
}
},
"/search": {
"get": {
"parameters": [
{
"description": "Search query",
"in": "query",
"name": "query",
"required": true,
"schema": {
"type": "string"
}
}
],
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"type": "object"
}
}
},
"description": "Search results"
}
},
"summary": "Search and scrape results."
}
}
}
}

View File

@ -1,245 +0,0 @@
{
"info": {
"title": "Firecrawl API",
"version": "1.0.0"
},
"openapi": "3.0.0",
"paths": {
"/check-crawl-status": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"jobId": {
"description": "Crawl job ID",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"current": {
"description": "Current page count",
"type": "integer"
},
"data": {
"description": "Crawl data",
"items": {
"properties": {
"content": {
"description": "Raw content",
"type": "string"
},
"markdown": {
"description": "Markdown content",
"type": "string"
},
"metadata": {
"description": "Page metadata",
"properties": {
"description": {
"description": "Page description",
"type": "string"
},
"language": {
"description": "Page language",
"type": "string"
},
"sourceURL": {
"description": "Page URL",
"type": "string"
},
"title": {
"description": "Page title",
"type": "string"
}
},
"type": "object"
},
"provider": {
"description": "Content provider",
"type": "string"
}
},
"type": "object"
},
"type": "array"
},
"status": {
"description": "Crawl status",
"type": "string"
},
"total": {
"description": "Total page count",
"type": "integer"
}
},
"type": "object"
}
}
},
"description": "Crawl job status."
}
},
"summary": "Check crawl job status."
}
},
"/crawl": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"crawlerOptions": {
"description": "Crawler options",
"properties": {
"excludes": {
"description": "URLs to exclude",
"items": {
"type": "string"
},
"type": "array"
}
},
"type": "object"
},
"url": {
"description": "URL to crawl",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"jobId": {
"description": "Job ID",
"type": "string"
}
},
"type": "object"
}
}
},
"description": "Crawl job submitted."
}
},
"summary": "Crawl a URL."
}
},
"/scrape-url": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"extractorOptions": {
"description": "Extractor options",
"properties": {
"extractionSchema": {
"description": "Extraction schema",
"type": "string"
},
"mode": {
"description": "Extraction mode",
"type": "string"
}
},
"type": "object"
},
"pageOptions": {
"description": "Page options",
"properties": {
"onlyMainContent": {
"description": "Only main content",
"type": "boolean"
}
},
"type": "object"
},
"url": {
"description": "URL to scrape",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"data": {
"description": "Scraped data",
"properties": {
"content": {
"description": "Raw content",
"type": "string"
},
"html": {
"description": "HTML content",
"type": "string"
},
"llm_extraction": {
"description": "LLM extraction results",
"type": "object"
},
"markdown": {
"description": "Markdown content",
"type": "string"
},
"metadata": {
"description": "Page metadata",
"type": "object"
},
"rawHtml": {
"description": "Raw HTML content",
"type": "string"
},
"warning": {
"description": "Warning message",
"type": "string"
}
},
"type": "object"
},
"success": {
"description": "Request success",
"type": "boolean"
}
},
"type": "object"
}
}
},
"description": "Scraped data."
}
},
"summary": "Scrape a single URL."
}
}
}
}

View File

@ -1,129 +0,0 @@
{
"components": {
"securitySchemes": {
"Bearer": {
"scheme": "bearer",
"type": "http"
}
}
},
"info": {
"title": "Firecrawl API",
"version": "v0"
},
"openapi": "3.0.0",
"paths": {
"/search": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"pageOptions": {
"properties": {
"fetchPageContent": {
"description": "Fetch content of each page.",
"type": "boolean"
},
"includeHtml": {
"description": "Include HTML content.",
"type": "boolean"
},
"includeRawHtml": {
"description": "Include raw HTML content.",
"type": "boolean"
},
"onlyMainContent": {
"description": "Only return main content.",
"type": "boolean"
}
},
"type": "object"
},
"query": {
"description": "The query to search for",
"type": "string"
},
"searchOptions": {
"properties": {
"limit": {
"description": "Maximum number of results.",
"type": "integer"
}
},
"type": "object"
}
},
"type": "object"
}
}
},
"required": true
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"data": {
"items": {
"properties": {
"content": {
"type": "string"
},
"markdown": {
"type": "string"
},
"metadata": {
"properties": {
"description": {
"type": "string"
},
"language": {
"type": "string"
},
"sourceURL": {
"type": "string"
},
"title": {
"type": "string"
}
},
"type": "object"
},
"url": {
"type": "string"
}
},
"type": "object"
},
"type": "array"
},
"success": {
"type": "boolean"
}
},
"type": "object"
}
}
},
"description": "Successful search."
}
},
"security": [
{
"Bearer": []
}
],
"summary": "Search the web."
}
}
},
"servers": [
{
"url": "https://api.firecrawl.dev/v0"
}
]
}

View File

@ -1,186 +0,0 @@
{
"info": {
"title": "Firecrawl API",
"version": "v0"
},
"openapi": "3.0.0",
"paths": {
"/crawl/status/{jobId}": {
"get": {
"parameters": [
{
"description": "ID of crawl job",
"in": "path",
"name": "jobId",
"required": true,
"schema": {
"type": "string"
}
}
],
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"current": {
"description": "Current page number",
"type": "integer"
},
"data": {
"description": "Data from the job",
"items": {
"properties": {
"content": {
"type": "string"
},
"html": {
"description": "HTML content",
"nullable": true,
"type": "string"
},
"index": {
"description": "Page number crawled",
"type": "integer"
},
"markdown": {
"type": "string"
},
"metadata": {
"properties": {
"description": {
"type": "string"
},
"language": {
"nullable": true,
"type": "string"
},
"pageError": {
"description": "Error message of page",
"nullable": true,
"type": "string"
},
"pageStatusCode": {
"description": "Status code of page",
"type": "integer"
},
"sourceURL": {
"type": "string"
},
"title": {
"type": "string"
},
"{any other metadata}": {
"type": "string"
}
},
"type": "object"
},
"rawHtml": {
"description": "Raw HTML content",
"nullable": true,
"type": "string"
}
},
"type": "object"
},
"type": "array"
},
"partial_data": {
"description": "Partial documents (streaming)",
"items": {
"properties": {
"content": {
"type": "string"
},
"html": {
"description": "HTML content",
"nullable": true,
"type": "string"
},
"index": {
"description": "Page number crawled",
"type": "integer"
},
"markdown": {
"type": "string"
},
"metadata": {
"properties": {
"description": {
"type": "string"
},
"language": {
"nullable": true,
"type": "string"
},
"pageError": {
"description": "Error message of page",
"nullable": true,
"type": "string"
},
"pageStatusCode": {
"description": "Status code of page",
"type": "integer"
},
"sourceURL": {
"type": "string"
},
"title": {
"type": "string"
},
"{any other metadata}": {
"type": "string"
}
},
"type": "object"
},
"rawHtml": {
"description": "Raw HTML content",
"nullable": true,
"type": "string"
}
},
"type": "object"
},
"type": "array"
},
"status": {
"description": "Status of the job",
"type": "string"
},
"total": {
"description": "Total number of pages",
"type": "integer"
}
},
"type": "object"
}
}
},
"description": "Successful operation"
}
},
"security": [
{
"Authorization": []
}
],
"summary": "Get crawl job status"
}
}
},
"securitySchemes": {
"Authorization": {
"bearerFormat": "Bearer <token>",
"scheme": "bearer",
"type": "http"
}
},
"servers": [
{
"url": "https://api.firecrawl.dev/v0"
}
]
}

View File

@ -1,86 +0,0 @@
{
"info": {
"title": "Firecrawl API",
"version": "v0"
},
"openapi": "3.0.0",
"paths": {
"/v0/search": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"pageOptions": {
"properties": {
"fetchPageContent": {
"description": "Fetch page content",
"type": "boolean"
}
},
"type": "object"
},
"query": {
"description": "Search term",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"data": {
"items": {
"properties": {
"markdown": {
"type": "string"
},
"metadata": {
"properties": {
"description": {
"type": "string"
},
"language": {
"type": "string"
},
"sourceURL": {
"type": "string"
},
"title": {
"type": "string"
}
},
"type": "object"
},
"url": {
"type": "string"
}
},
"type": "object"
},
"type": "array"
},
"success": {
"type": "boolean"
}
},
"type": "object"
}
}
},
"description": "Successful search"
}
},
"summary": "Search and extract content"
}
}
}
}

View File

@ -1,59 +0,0 @@
{
"info": {
"title": "Firecrawl API",
"version": "v0"
},
"openapi": "3.0.0",
"paths": {
"/test": {
"get": {
"description": "Returns a test message.",
"responses": {
"200": {
"content": {
"text/plain": {
"schema": {
"example": "Hello, world!",
"type": "string"
}
}
},
"description": "Successful operation"
}
},
"summary": "Test endpoint"
}
},
"/v0/crawl": {
"post": {
"description": "Processes crawl job for URL.",
"requestBody": {
"content": {
"application/json": {
"example": {
"url": "https://docs.firecrawl.dev"
},
"schema": {
"properties": {
"url": {
"description": "Website URL",
"type": "string"
}
},
"type": "object"
}
}
},
"description": "URL to crawl",
"required": true
},
"responses": {
"200": {
"description": "Crawl initiated."
}
},
"summary": "Crawl a given URL."
}
}
}
}

View File

@ -1,738 +0,0 @@
{
"components": {
"schemas": {}
},
"info": {
"title": "https://docs.firecrawl.dev API Specification",
"version": "1.0.0"
},
"openapi": "3.0.0",
"paths": {
"/check_crawl_status": {
"post": {
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"current": {
"type": "integer"
},
"data": {
"items": {
"properties": {
"content": {
"type": "string"
},
"markdown": {
"type": "string"
},
"metadata": {
"properties": {
"description": {
"type": "string"
},
"language": {
"type": "string"
},
"sourceURL": {
"type": "string"
},
"title": {
"type": "string"
}
},
"type": "object"
},
"provider": {
"type": "string"
}
},
"type": "object"
},
"type": "array"
},
"status": {
"type": "string"
},
"total": {
"type": "integer"
}
},
"type": "object"
}
}
},
"description": "Crawl job status"
}
},
"summary": "Check crawl job status"
}
},
"/crawl": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"crawlerOptions": {
"properties": {
"allowBackwardCrawling": {
"description": "Allow backward crawling",
"type": "boolean"
},
"allowExternalContentLinks": {
"description": "Allow external links",
"type": "boolean"
},
"excludes": {
"description": "URL patterns to exclude",
"items": {
"type": "string"
},
"type": "array"
},
"generateImgAltText": {
"description": "Generate alt text for images",
"type": "boolean"
},
"ignoreSitemap": {
"description": "Ignore website sitemap",
"type": "boolean"
},
"includes": {
"description": "URL patterns to include",
"items": {
"type": "string"
},
"type": "array"
},
"limit": {
"description": "Maximum pages to crawl",
"type": "integer"
},
"maxDepth": {
"description": "Maximum crawl depth",
"type": "integer"
},
"mode": {
"description": "Crawling mode",
"enum": [
"default",
"fast"
],
"type": "string"
},
"returnOnlyUrls": {
"description": "Return only crawled URLs",
"type": "boolean"
}
},
"type": "object"
},
"pageOptions": {
"properties": {
"fullPageScreenshot": {
"description": "Include full page screenshot",
"type": "boolean"
},
"headers": {
"description": "Headers for requests",
"type": "object"
},
"includeHtml": {
"description": "Include HTML content",
"type": "boolean"
},
"includeRawHtml": {
"description": "Include raw HTML content",
"type": "boolean"
},
"onlyIncludeTags": {
"description": "Include only specific tags",
"items": {
"type": "string"
},
"type": "array"
},
"onlyMainContent": {
"description": "Return only main content",
"type": "boolean"
},
"removeTags": {
"description": "Remove specific tags",
"items": {
"type": "string"
},
"type": "array"
},
"replaceAllPathsWithAbsolutePaths": {
"description": "Use absolute paths",
"type": "boolean"
},
"screenshot": {
"description": "Include page screenshot",
"type": "boolean"
},
"waitFor": {
"description": "Wait for page load (ms)",
"type": "integer"
}
},
"type": "object"
},
"url": {
"description": "Base URL to crawl",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"jobId": {
"description": "Job ID of the crawl",
"type": "string"
}
},
"type": "object"
}
}
},
"description": "Crawl request successful"
}
},
"security": [
{
"Bearer": []
}
],
"summary": "Crawl a website"
}
},
"/crawl/cancel/{jobId}": {
"delete": {
"parameters": [
{
"description": "ID of crawl job",
"in": "path",
"name": "jobId",
"required": true,
"schema": {
"type": "string"
}
}
],
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"status": {
"type": "string"
}
},
"type": "object"
}
}
},
"description": "Returns cancelled."
}
},
"security": [
{
"Bearer": []
}
],
"summary": "Cancel crawl job"
}
},
"/search": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"pageOptions": {
"properties": {
"fetchPageContent": {
"description": "Fetch content of each page.",
"type": "boolean"
},
"includeHtml": {
"description": "Include HTML content.",
"type": "boolean"
},
"includeRawHtml": {
"description": "Include raw HTML content.",
"type": "boolean"
},
"onlyMainContent": {
"description": "Only return main content.",
"type": "boolean"
}
},
"type": "object"
},
"query": {
"description": "The query to search for",
"type": "string"
},
"searchOptions": {
"properties": {
"limit": {
"description": "Maximum number of results.",
"type": "integer"
}
},
"type": "object"
}
},
"type": "object"
}
}
},
"required": true
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"data": {
"items": {
"properties": {
"content": {
"type": "string"
},
"markdown": {
"type": "string"
},
"metadata": {
"properties": {
"description": {
"type": "string"
},
"language": {
"type": "string"
},
"sourceURL": {
"type": "string"
},
"title": {
"type": "string"
}
},
"type": "object"
},
"url": {
"type": "string"
}
},
"type": "object"
},
"type": "array"
},
"success": {
"type": "boolean"
}
},
"type": "object"
}
}
},
"description": "Successful search."
}
},
"security": [
{
"Bearer": []
}
],
"summary": "Search the web."
}
},
"/v0/crawl": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"crawlerOptions": {
"description": "Crawling options.",
"properties": {
"excludes": {
"description": "URL patterns to exclude.",
"items": {
"type": "string"
},
"type": "array"
},
"includes": {
"description": "URL patterns to include.",
"items": {
"type": "string"
},
"type": "array"
},
"limit": {
"description": "Maximum pages to crawl.",
"type": "integer"
},
"maxDepth": {
"description": "Maximum crawl depth.",
"type": "integer"
},
"mode": {
"description": "Crawling mode.",
"enum": [
"default",
"fast"
],
"type": "string"
},
"returnOnlyUrls": {
"description": "Return only URLs.",
"type": "boolean"
}
},
"type": "object"
},
"pageOptions": {
"description": "Page scraping options.",
"properties": {
"includeHtml": {
"description": "Include HTML content.",
"type": "boolean"
},
"includeRawHtml": {
"description": "Include raw HTML content.",
"type": "boolean"
},
"onlyMainContent": {
"description": "Only main content.",
"type": "boolean"
},
"screenshot": {
"description": "Include page screenshot.",
"type": "boolean"
},
"waitFor": {
"description": "Wait time in milliseconds.",
"type": "integer"
}
},
"type": "object"
},
"url": {
"description": "Base URL to crawl.",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"jobId": {
"description": "Crawl job ID.",
"type": "string"
}
},
"type": "object"
}
}
},
"description": "Crawl job initiated."
}
},
"summary": "Crawl multiple pages."
}
},
"/v0/crawl/status/{jobId}": {
"get": {
"parameters": [
{
"description": "Crawl job ID.",
"in": "path",
"name": "jobId",
"required": true,
"schema": {
"type": "string"
}
}
],
"responses": {
"200": {
"description": "Crawl job status."
}
},
"summary": "Check crawl job status."
}
},
"/v0/scrape": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"extractorOptions": {
"description": "Options for extraction",
"properties": {
"extractionPrompt": {
"description": "Prompt for LLM extraction",
"type": "string"
},
"extractionSchema": {
"description": "Schema for LLM extraction",
"type": "object"
},
"mode": {
"description": "Extraction mode",
"enum": [
"markdown",
"llm-extraction",
"llm-extraction-from-raw-html",
"llm-extraction-from-markdown"
],
"type": "string"
}
},
"type": "object"
},
"pageOptions": {
"properties": {
"fullPageScreenshot": {
"description": "Include full page screenshot",
"type": "boolean"
},
"headers": {
"description": "Headers for request",
"type": "object"
},
"includeHtml": {
"description": "Include HTML content",
"type": "boolean"
},
"includeRawHtml": {
"description": "Include raw HTML content",
"type": "boolean"
},
"onlyIncludeTags": {
"description": "Include only these tags",
"items": {
"type": "string"
},
"type": "array"
},
"onlyMainContent": {
"description": "Only return main content",
"type": "boolean"
},
"removeTags": {
"description": "Remove these tags",
"items": {
"type": "string"
},
"type": "array"
},
"replaceAllPathsWithAbsolutePaths": {
"description": "Replace relative paths",
"type": "boolean"
},
"screenshot": {
"description": "Include screenshot",
"type": "boolean"
},
"waitFor": {
"description": "Wait time in ms",
"type": "integer"
}
},
"type": "object"
},
"timeout": {
"description": "Timeout in ms",
"type": "integer"
},
"url": {
"description": "URL to scrape",
"type": "string"
}
},
"type": "object"
}
}
},
"required": true
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"data": {
"properties": {
"content": {
"type": "string"
},
"html": {
"type": "string"
},
"llm_extraction": {
"type": "object"
},
"markdown": {
"type": "string"
},
"metadata": {
"properties": {
"description": {
"type": "string"
},
"language": {
"type": "string"
},
"pageError": {
"type": "string"
},
"pageStatusCode": {
"type": "integer"
},
"sourceURL": {
"type": "string"
},
"title": {
"type": "string"
}
},
"type": "object"
},
"rawHtml": {
"type": "string"
},
"warning": {
"type": "string"
}
},
"type": "object"
},
"success": {
"type": "boolean"
}
},
"type": "object"
}
}
},
"description": "Successful scrape"
}
},
"security": [
{
"bearerAuth": []
}
],
"summary": "Scrape a webpage"
}
},
"/v0/search": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"pageOptions": {
"properties": {
"fetchPageContent": {
"description": "Fetch page content",
"type": "boolean"
}
},
"type": "object"
},
"query": {
"description": "Search term",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"data": {
"items": {
"properties": {
"markdown": {
"type": "string"
},
"metadata": {
"properties": {
"description": {
"type": "string"
},
"language": {
"type": "string"
},
"sourceURL": {
"type": "string"
},
"title": {
"type": "string"
}
},
"type": "object"
},
"url": {
"type": "string"
}
},
"type": "object"
},
"type": "array"
},
"success": {
"type": "boolean"
}
},
"type": "object"
}
}
},
"description": "Successful search"
}
},
"summary": "Search and extract content"
}
}
}
}

View File

@ -1,287 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/ericciarla/projects/python_projects/agents_testing/.conda/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
}
],
"source": [
"import os\n",
"import datetime\n",
"import time\n",
"from firecrawl import FirecrawlApp\n",
"import json\n",
"import google.generativeai as genai\n",
"from dotenv import load_dotenv\n",
"\n",
"# Load environment variables\n",
"load_dotenv()\n",
"\n",
"# Retrieve API keys from environment variables\n",
"google_api_key = os.getenv(\"GOOGLE_API_KEY\")\n",
"firecrawl_api_key = os.getenv(\"FIRECRAWL_API_KEY\")\n",
"\n",
"# Configure the Google Generative AI module with the API key\n",
"genai.configure(api_key=google_api_key)\n",
"model = genai.GenerativeModel(\"gemini-1.5-pro-001\")\n",
"\n",
"# Set the docs URL\n",
"docs_url=\"https://docs.firecrawl.dev\"\n",
"\n",
"# Initialize the FirecrawlApp with your API key\n",
"app = FirecrawlApp(api_key=firecrawl_api_key)\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"36\n"
]
}
],
"source": [
"# Crawl all pages on docs\n",
"params = {\n",
" \"pageOptions\": {\n",
" \"onlyMainContent\": True\n",
" },\n",
"}\n",
"crawl_result = app.crawl_url(docs_url, params=params)\n",
"\n",
"print(len(crawl_result))"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"prompt_instructions = f\"\"\"Given the following API documentation content, generate an OpenAPI 3.0 specification in JSON format ONLY if you are 100% confident and clear about all details. Focus on extracting the main endpoints, their HTTP methods, parameters, request bodies, and responses. The specification should follow OpenAPI 3.0 structure and conventions. Include only the 200 response for each endpoint. Limit all descriptions to 5 words or less.\n",
"\n",
"If there is ANY uncertainty, lack of complete information, or if you are not 100% confident about ANY part of the specification, return an empty JSON object {{}}.\n",
"\n",
"Do not make anything up. Only include information that is explicitly provided in the documentation. If any detail is unclear or missing, do not attempt to fill it in.\n",
"\n",
"API Documentation Content:\n",
"{{content}}\n",
"\n",
"Generate the OpenAPI 3.0 specification in JSON format ONLY if you are 100% confident about every single detail. Include only the JSON object, no additional text, and ensure it has no errors in the JSON format so it can be parsed. Remember to include only the 200 response for each endpoint and keep all descriptions to 5 words maximum.\n",
"\n",
"Once again, if there is ANY doubt, uncertainty, or lack of complete information, return an empty JSON object {{}}.\n",
"\n",
"To reiterate: accuracy is paramount. Do not make anything up. If you are not 100% clear or confident about the entire OpenAPI spec, return an empty JSON object {{}}.\n",
"\"\"\"\n"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"API specification saved to docs.firecrawl.dev/api_spec_0.json\n",
"API specification saved to docs.firecrawl.dev/api_spec_1.json\n",
"API specification saved to docs.firecrawl.dev/api_spec_2.json\n",
"API specification saved to docs.firecrawl.dev/api_spec_3.json\n",
"API specification saved to docs.firecrawl.dev/api_spec_4.json\n",
"An error occurred for page 5: 'content'\n",
"No API specification found for page 6\n",
"API specification saved to docs.firecrawl.dev/api_spec_7.json\n",
"No API specification found for page 8\n",
"No API specification found for page 9\n",
"API specification saved to docs.firecrawl.dev/api_spec_10.json\n",
"No API specification found for page 11\n",
"No API specification found for page 12\n",
"API specification saved to docs.firecrawl.dev/api_spec_13.json\n",
"No API specification found for page 14\n",
"No API specification found for page 15\n",
"No API specification found for page 16\n",
"No API specification found for page 17\n",
"No API specification found for page 18\n",
"No API specification found for page 19\n",
"No API specification found for page 20\n",
"No API specification found for page 21\n",
"No API specification found for page 22\n",
"No API specification found for page 23\n",
"No API specification found for page 24\n",
"No API specification found for page 25\n",
"No API specification found for page 26\n",
"No API specification found for page 27\n",
"No API specification found for page 28\n",
"No API specification found for page 29\n",
"No API specification found for page 30\n",
"No API specification found for page 31\n",
"No API specification found for page 32\n",
"No API specification found for page 33\n",
"No API specification found for page 34\n",
"No API specification found for page 35\n",
"Total API specifications collected: 8\n"
]
}
],
"source": [
"# Create a folder for storing API specs\n",
"import os\n",
"import urllib.parse\n",
"\n",
"folder_name = urllib.parse.urlparse(docs_url).netloc\n",
"os.makedirs(folder_name, exist_ok=True)\n",
"\n",
"# Initialize a list to store all API specs\n",
"all_api_specs = []\n",
"\n",
"# Process each page in crawl_result\n",
"for index, result in enumerate(crawl_result):\n",
" if 'content' in result:\n",
" # Update prompt_instructions with the current page's content\n",
" current_prompt = prompt_instructions.replace(\"{content}\", result['content'])\n",
" try:\n",
" # Query the model\n",
" response = model.generate_content([current_prompt])\n",
" response_dict = response.to_dict()\n",
" response_text = response_dict['candidates'][0]['content']['parts'][0]['text']\n",
" \n",
" # Remove the ```json code wrap if present\n",
" response_text = response_text.strip().removeprefix('```json').removesuffix('```').strip()\n",
" \n",
" # Parse JSON\n",
" json_data = json.loads(response_text)\n",
" \n",
" # Save non-empty API specs\n",
" if json_data != {}:\n",
" output_file = os.path.join(folder_name, f'api_spec_{index}.json')\n",
" with open(output_file, 'w') as f:\n",
" json.dump(json_data, f, indent=2, sort_keys=True)\n",
" print(f\"API specification saved to {output_file}\")\n",
" \n",
" # Add the API spec to the list\n",
" all_api_specs.append(json_data)\n",
" else:\n",
" print(f\"No API specification found for page {index}\")\n",
" \n",
" except json.JSONDecodeError:\n",
" print(f\"Error parsing JSON response for page {index}\")\n",
" except Exception as e:\n",
" print(f\"An error occurred for page {index}: {str(e)}\")\n",
"\n",
"# Print the total number of API specs collected\n",
"print(f\"Total API specifications collected: {len(all_api_specs)}\")"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Combined API specification saved to docs.firecrawl.dev/combined_api_spec.json\n",
"Total paths in combined spec: 8\n",
"Total schemas in combined spec: 0\n"
]
}
],
"source": [
"# Combine all API specs and keep the most filled out spec for each path and method\n",
"combined_spec = {\n",
" \"openapi\": \"3.0.0\",\n",
" \"info\": {\n",
" \"title\": f\"{docs_url} API Specification\",\n",
" \"version\": \"1.0.0\"\n",
" },\n",
" \"paths\": {},\n",
" \"components\": {\n",
" \"schemas\": {}\n",
" }\n",
"}\n",
"\n",
"def count_properties(obj):\n",
" if isinstance(obj, dict):\n",
" return sum(count_properties(v) for v in obj.values()) + len(obj)\n",
" elif isinstance(obj, list):\n",
" return sum(count_properties(item) for item in obj)\n",
" else:\n",
" return 1\n",
"\n",
"for spec in all_api_specs:\n",
" if \"paths\" in spec:\n",
" for path, methods in spec[\"paths\"].items():\n",
" if path not in combined_spec[\"paths\"]:\n",
" combined_spec[\"paths\"][path] = {}\n",
" for method, details in methods.items():\n",
" if method not in combined_spec[\"paths\"][path] or count_properties(details) > count_properties(combined_spec[\"paths\"][path][method]):\n",
" combined_spec[\"paths\"][path][method] = details\n",
"\n",
" if \"components\" in spec and \"schemas\" in spec[\"components\"]:\n",
" for schema_name, schema in spec[\"components\"][\"schemas\"].items():\n",
" if schema_name not in combined_spec[\"components\"][\"schemas\"] or count_properties(schema) > count_properties(combined_spec[\"components\"][\"schemas\"][schema_name]):\n",
" combined_spec[\"components\"][\"schemas\"][schema_name] = schema\n",
"\n",
"# Save the combined API spec\n",
"output_file = os.path.join(folder_name, 'combined_api_spec.json')\n",
"with open(output_file, 'w') as f:\n",
" json.dump(combined_spec, f, indent=2, sort_keys=True)\n",
"\n",
"print(f\"Combined API specification saved to {output_file}\")\n",
"print(f\"Total paths in combined spec: {len(combined_spec['paths'])}\")\n",
"print(f\"Total schemas in combined spec: {len(combined_spec['components']['schemas'])}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# note: turn this into a simple web app like roast my site\n",
"- select which methods you want to add\n",
"- generate a UI for each method\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -0,0 +1,137 @@
# %%
import os
import datetime
import time
from firecrawl import FirecrawlApp
import json
import google.generativeai as genai
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# Retrieve API keys from environment variables
google_api_key = os.getenv("GOOGLE_API_KEY")
firecrawl_api_key = os.getenv("FIRECRAWL_API_KEY")
# Configure the Google Generative AI module with the API key
genai.configure(api_key=google_api_key)
model = genai.GenerativeModel("gemini-1.5-pro-001")
# Set the docs URL
docs_url = "https://docs.firecrawl.dev/api-reference"
# Initialize the FirecrawlApp with your API key
app = FirecrawlApp(api_key=firecrawl_api_key)
# %%
# Crawl all pages on docs
crawl_result = app.crawl_url(docs_url)
print(f"Total pages crawled: {len(crawl_result['data'])}")
# %%
# Define the prompt instructions for generating OpenAPI specs
prompt_instructions = """
Given the following API documentation content, generate an OpenAPI 3.0 specification in JSON format ONLY if you are 100% confident and clear about all details. Focus on extracting the main endpoints, their HTTP methods, parameters, request bodies, and responses. The specification should follow OpenAPI 3.0 structure and conventions. Include only the 200 response for each endpoint. Limit all descriptions to 5 words or less.
If there is ANY uncertainty, lack of complete information, or if you are not 100% confident about ANY part of the specification, return an empty JSON object {{}}.
Do not make anything up. Only include information that is explicitly provided in the documentation. If any detail is unclear or missing, do not attempt to fill it in.
API Documentation Content:
{{content}}
Generate the OpenAPI 3.0 specification in JSON format ONLY if you are 100% confident about every single detail. Include only the JSON object, no additional text, and ensure it has no errors in the JSON format so it can be parsed. Remember to include only the 200 response for each endpoint and keep all descriptions to 5 words maximum.
Once again, if there is ANY doubt, uncertainty, or lack of complete information, return an empty JSON object {{}}.
To reiterate: accuracy is paramount. Do not make anything up. If you are not 100% clear or confident about the entire OpenAPI spec, return an empty JSON object {{}}.
"""
# %%
# Initialize a list to store all API specs
all_api_specs = []
# Process each page in crawl_result
for index, page in enumerate(crawl_result['data']):
if 'markdown' in page:
# Update prompt_instructions with the current page's content
current_prompt = prompt_instructions.replace("{content}", page['markdown'])
try:
# Query the model
response = model.generate_content([current_prompt])
response_dict = response.to_dict()
response_text = response_dict['candidates'][0]['content']['parts'][0]['text']
# Remove the ```json code wrap if present
response_text = response_text.strip().removeprefix('```json').removesuffix('```').strip()
# Parse JSON
json_data = json.loads(response_text)
# Add non-empty API specs to the list
if json_data != {}:
all_api_specs.append(json_data)
print(f"API specification generated for page {index}")
else:
print(f"No API specification found for page {index}")
except json.JSONDecodeError:
print(f"Error parsing JSON response for page {index}")
except Exception as e:
print(f"An error occurred for page {index}: {str(e)}")
# Print the total number of API specs collected
print(f"Total API specifications collected: {len(all_api_specs)}")
# %%
# Combine all API specs and keep the most filled out spec for each path and method
combined_spec = {
"openapi": "3.0.0",
"info": {
"title": f"{docs_url} API Specification",
"version": "1.0.0"
},
"paths": {},
"components": {
"schemas": {}
}
}
# Helper function to count properties in an object
def count_properties(obj):
if isinstance(obj, dict):
return sum(count_properties(v) for v in obj.values()) + len(obj)
elif isinstance(obj, list):
return sum(count_properties(item) for item in obj)
else:
return 1
# Combine specs, keeping the most detailed version of each path and schema
for spec in all_api_specs:
# Combine paths
if "paths" in spec:
for path, methods in spec["paths"].items():
if path not in combined_spec["paths"]:
combined_spec["paths"][path] = {}
for method, details in methods.items():
if method not in combined_spec["paths"][path] or count_properties(details) > count_properties(combined_spec["paths"][path][method]):
combined_spec["paths"][path][method] = details
# Combine schemas
if "components" in spec and "schemas" in spec["components"]:
for schema_name, schema in spec["components"]["schemas"].items():
if schema_name not in combined_spec["components"]["schemas"] or count_properties(schema) > count_properties(combined_spec["components"]["schemas"][schema_name]):
combined_spec["components"]["schemas"][schema_name] = schema
# Print summary of combined spec
print(f"Combined API specification generated")
print(f"Total paths in combined spec: {len(combined_spec['paths'])}")
print(f"Total schemas in combined spec: {len(combined_spec['components']['schemas'])}")
# Save the combined spec to a JSON file in the same directory as the Python file
output_file = os.path.join(os.path.dirname(__file__), "combined_api_spec.json")
with open(output_file, "w") as f:
json.dump(combined_spec, f, indent=2)
print(f"Combined API specification saved to {output_file}")