From db926a4146192c03a3e023a5a9e9b4d570bc21a7 Mon Sep 17 00:00:00 2001 From: tak-s Date: Sun, 4 Aug 2024 15:05:53 +0900 Subject: [PATCH 01/22] set LOGGING_LEVEL to environment --- docker-compose.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/docker-compose.yaml b/docker-compose.yaml index 4974e8b8..ffcbc4ee 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -29,6 +29,7 @@ x-common-service: &common-service - SCRAPING_BEE_API_KEY=${SCRAPING_BEE_API_KEY} - HOST=${HOST:-0.0.0.0} - SELF_HOSTED_WEBHOOK_URL=${SELF_HOSTED_WEBHOOK_URL} + - LOGGING_LEVEL=${LOGGING_LEVEL} extra_hosts: - "host.docker.internal:host-gateway" From af9bc5c8bbe30d0c271697be95a58c19895f132e Mon Sep 17 00:00:00 2001 From: tak-s Date: Sun, 4 Aug 2024 15:09:36 +0900 Subject: [PATCH 02/22] Suppressed repetitive logs --- apps/api/src/services/supabase.ts | 8 -------- 1 file changed, 8 deletions(-) diff --git a/apps/api/src/services/supabase.ts b/apps/api/src/services/supabase.ts index d34f7b52..70ada12b 100644 --- a/apps/api/src/services/supabase.ts +++ b/apps/api/src/services/supabase.ts @@ -36,17 +36,9 @@ export const supabase_service: SupabaseClient = new Proxy( new SupabaseService(), { get: function (target, prop, receiver) { - if (process.env.USE_DB_AUTHENTICATION === "false") { - Logger.debug( - "Attempted to access Supabase client when it's not configured." - ); - } const client = target.getClient(); // If the Supabase client is not initialized, intercept property access to provide meaningful error feedback. if (client === null) { - Logger.error( - "Attempted to access Supabase client when it's not configured." - ); return () => { throw new Error("Supabase client is not configured."); }; From 1378ffc138a8d1b0165da722800c94d8e64281c1 Mon Sep 17 00:00:00 2001 From: KentHsu Date: Sun, 4 Aug 2024 17:14:55 +0800 Subject: [PATCH 03/22] feat: add go-sdk --- apps/go-sdk/.env.example | 2 + apps/go-sdk/LICENSE | 21 + apps/go-sdk/README.md | 189 ++++++++ apps/go-sdk/examples/example.go | 83 ++++ apps/go-sdk/examples/go.mod | 10 + apps/go-sdk/examples/go.sum | 12 + apps/go-sdk/firecrawl/firecrawl.go | 580 ++++++++++++++++++++++++ apps/go-sdk/firecrawl/firecrawl_test.go | 292 ++++++++++++ apps/go-sdk/go.mod | 15 + apps/go-sdk/go.sum | 14 + 10 files changed, 1218 insertions(+) create mode 100644 apps/go-sdk/.env.example create mode 100644 apps/go-sdk/LICENSE create mode 100644 apps/go-sdk/README.md create mode 100644 apps/go-sdk/examples/example.go create mode 100644 apps/go-sdk/examples/go.mod create mode 100644 apps/go-sdk/examples/go.sum create mode 100644 apps/go-sdk/firecrawl/firecrawl.go create mode 100644 apps/go-sdk/firecrawl/firecrawl_test.go create mode 100644 apps/go-sdk/go.mod create mode 100644 apps/go-sdk/go.sum diff --git a/apps/go-sdk/.env.example b/apps/go-sdk/.env.example new file mode 100644 index 00000000..772a6243 --- /dev/null +++ b/apps/go-sdk/.env.example @@ -0,0 +1,2 @@ +API_URL=http://localhost:3002 +TEST_API_KEY=fc-YOUR-API-KEY diff --git a/apps/go-sdk/LICENSE b/apps/go-sdk/LICENSE new file mode 100644 index 00000000..2635155f --- /dev/null +++ b/apps/go-sdk/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 Sideguide Technologies Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/apps/go-sdk/README.md b/apps/go-sdk/README.md new file mode 100644 index 00000000..353d28d6 --- /dev/null +++ b/apps/go-sdk/README.md @@ -0,0 +1,189 @@ +# Firecrawl Go SDK + +The Firecrawl Go SDK is a library that allows you to easily scrape and crawl websites, and output the data in a format ready for use with language models (LLMs). It provides a simple and intuitive interface for interacting with the Firecrawl API. + +## Installation + +To install the Firecrawl Go SDK, you can + +```bash +go get github.com/mendableai/firecrawl/go-sdk/firecrawl +``` + +## Usage + +1. Get an API key from [firecrawl.dev](https://firecrawl.dev) +2. Set the API key as an environment variable named `FIRECRAWL_API_KEY` or pass it as a parameter to the `FirecrawlApp` class. + + +Here's an example of how to use the SDK with error handling: + +```go +import ( + "fmt" + "log" + + "github.com/mendableai/firecrawl/go-sdk/firecrawl" +) + +func main() { + // Initialize the FirecrawlApp with your API key + app, err := firecrawl.NewFirecrawlApp("YOUR_API_KEY") + if err != nil { + log.Fatalf("Failed to initialize FirecrawlApp: %v", err) + } + + // Scrape a single URL + url := "https://mendable.ai" + scrapedData, err := app.ScrapeURL(url, nil) + if err != nil { + log.Fatalf("Error occurred while scraping: %v", err) + } + fmt.Println(scrapedData) + + // Crawl a website + crawlUrl := "https://mendable.ai" + params := map[string]any{ + "pageOptions": map[string]any{ + "onlyMainContent": true, + }, + } + + crawlResult, err := app.CrawlURL(crawlUrl, params) + if err != nil { + log.Fatalf("Error occurred while crawling: %v", err) + } + fmt.Println(crawlResult) +} +``` + +### Scraping a URL + +To scrape a single URL with error handling, use the `ScrapeURL` method. It takes the URL as a parameter and returns the scraped data as a dictionary. + +```go +url := "https://mendable.ai" +scrapedData, err := app.ScrapeURL(url, nil) +if err != nil { + log.Fatalf("Failed to scrape URL: %v", err) +} +fmt.Println(scrapedData) +``` + +### Extracting structured data from a URL + +With LLM extraction, you can easily extract structured data from any URL. Here is how you to use it: + +```go +jsonSchema := map[string]any{ + "type": "object", + "properties": map[string]any{ + "top": map[string]any{ + "type": "array", + "items": map[string]any{ + "type": "object", + "properties": map[string]any{ + "title": map[string]string{"type": "string"}, + "points": map[string]string{"type": "number"}, + "by": map[string]string{"type": "string"}, + "commentsURL": map[string]string{"type": "string"}, + }, + "required": []string{"title", "points", "by", "commentsURL"}, + }, + "minItems": 5, + "maxItems": 5, + "description": "Top 5 stories on Hacker News", + }, + }, + "required": []string{"top"}, +} + +llmExtractionParams := map[string]any{ + "extractorOptions": firecrawl.ExtractorOptions{ + ExtractionSchema: jsonSchema, + }, +} + +scrapeResult, err := app.ScrapeURL("https://news.ycombinator.com", llmExtractionParams) +if err != nil { + log.Fatalf("Failed to perform LLM extraction: %v", err) +} +fmt.Println(scrapeResult) +``` + +### Search for a query + +To search the web, get the most relevant results, scrap each page and return the markdown, use the `Search` method. The method takes the query as a parameter and returns the search results. + + +```go +query := "what is mendable?" +searchResult, err := app.Search(query) +if err != nil { + log.Fatalf("Failed to search: %v", err) +} +fmt.Println(searchResult) +``` + +### Crawling a Website + +To crawl a website, use the `CrawlUrl` method. It takes the starting URL and optional parameters as arguments. The `params` argument allows you to specify additional options for the crawl job, such as the maximum number of pages to crawl, allowed domains, and the output format. + +```go +crawlParams := map[string]any{ + "crawlerOptions": map[string]any{ + "excludes": []string{"blog/*"}, + "includes": []string{}, // leave empty for all pages + "limit": 1000, + }, + "pageOptions": map[string]any{ + "onlyMainContent": true, + }, +} +crawlResult, err := app.CrawlURL("mendable.ai", crawlParams, true, 2, idempotencyKey) +if err != nil { + log.Fatalf("Failed to crawl URL: %v", err) +} +fmt.Println(crawlResult) +``` + +### Checking Crawl Status + +To check the status of a crawl job, use the `CheckCrawlStatus` method. It takes the job ID as a parameter and returns the current status of the crawl job. + +```go +status, err := app.CheckCrawlStatus(jobId) +if err != nil { + log.Fatalf("Failed to check crawl status: %v", err) +} +fmt.Println(status) +``` + +### Canceling a Crawl Job +To cancel a crawl job, use the `CancelCrawlJob` method. It takes the job ID as a parameter and returns the cancellation status of the crawl job. + +```go +canceled, err := app.CancelCrawlJob(jobId) +if err != nil { + log.Fatalf("Failed to cancel crawl job: %v", err) +} +fmt.Println(canceled) +``` + +## Error Handling + +The SDK handles errors returned by the Firecrawl API and raises appropriate exceptions. If an error occurs during a request, an exception will be raised with a descriptive error message. + +## Contributing + +Contributions to the Firecrawl Go SDK are welcome! If you find any issues or have suggestions for improvements, please open an issue or submit a pull request on the GitHub repository. + +## License + +The Firecrawl Go SDK is licensed under the MIT License. This means you are free to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the SDK, subject to the following conditions: + +- The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +Please note that while this SDK is MIT licensed, it is part of a larger project which may be under different licensing terms. Always refer to the license information in the root directory of the main project for overall licensing details. diff --git a/apps/go-sdk/examples/example.go b/apps/go-sdk/examples/example.go new file mode 100644 index 00000000..ce8470e9 --- /dev/null +++ b/apps/go-sdk/examples/example.go @@ -0,0 +1,83 @@ +package main + +import ( + "encoding/json" + "fmt" + "log" + + "github.com/google/uuid" + "github.com/mendableai/firecrawl/go-sdk/firecrawl" +) + +func main() { + app, err := firecrawl.NewFirecrawlApp("fc-YOUR-API-KEY", "http://localhost:3002") + if err != nil { + log.Fatalf("Failed to create FirecrawlApp: %v", err) + } + + // Scrape a website + scrapeResult, err := app.ScrapeURL("firecrawl.dev", nil) + if err != nil { + log.Fatalf("Failed to scrape URL: %v", err) + } + fmt.Println(scrapeResult.Markdown) + + // Crawl a website + idempotencyKey := uuid.New().String() // optional idempotency key + crawlParams := map[string]any{ + "crawlerOptions": map[string]any{ + "excludes": []string{"blog/*"}, + }, + } + crawlResult, err := app.CrawlURL("mendable.ai", crawlParams, true, 2, idempotencyKey) + if err != nil { + log.Fatalf("Failed to crawl URL: %v", err) + } + fmt.Println(crawlResult) + + // LLM Extraction using JSON schema + jsonSchema := map[string]any{ + "type": "object", + "properties": map[string]any{ + "top": map[string]any{ + "type": "array", + "items": map[string]any{ + "type": "object", + "properties": map[string]any{ + "title": map[string]string{"type": "string"}, + "points": map[string]string{"type": "number"}, + "by": map[string]string{"type": "string"}, + "commentsURL": map[string]string{"type": "string"}, + }, + "required": []string{"title", "points", "by", "commentsURL"}, + }, + "minItems": 5, + "maxItems": 5, + "description": "Top 5 stories on Hacker News", + }, + }, + "required": []string{"top"}, + } + + llmExtractionParams := map[string]any{ + "extractorOptions": firecrawl.ExtractorOptions{ + ExtractionSchema: jsonSchema, + Mode: "llm-extraction", + }, + "pageOptions": map[string]any{ + "onlyMainContent": true, + }, + } + + llmExtractionResult, err := app.ScrapeURL("https://news.ycombinator.com", llmExtractionParams) + if err != nil { + log.Fatalf("Failed to perform LLM extraction: %v", err) + } + + // Pretty print the LLM extraction result + jsonResult, err := json.MarshalIndent(llmExtractionResult.LLMExtraction, "", " ") + if err != nil { + log.Fatalf("Failed to marshal LLM extraction result: %v", err) + } + fmt.Println(string(jsonResult)) +} diff --git a/apps/go-sdk/examples/go.mod b/apps/go-sdk/examples/go.mod new file mode 100644 index 00000000..e3c5335d --- /dev/null +++ b/apps/go-sdk/examples/go.mod @@ -0,0 +1,10 @@ +module github.com/mendableai/firecrawl/go-sdk/examples + +go 1.22.5 + +replace github.com/mendableai/firecrawl/go-sdk => ../ + +require ( + github.com/google/uuid v1.6.0 + github.com/mendableai/firecrawl/go-sdk v0.0.0-00010101000000-000000000000 +) diff --git a/apps/go-sdk/examples/go.sum b/apps/go-sdk/examples/go.sum new file mode 100644 index 00000000..e724cfb0 --- /dev/null +++ b/apps/go-sdk/examples/go.sum @@ -0,0 +1,12 @@ +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= +github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/apps/go-sdk/firecrawl/firecrawl.go b/apps/go-sdk/firecrawl/firecrawl.go new file mode 100644 index 00000000..b9f50f08 --- /dev/null +++ b/apps/go-sdk/firecrawl/firecrawl.go @@ -0,0 +1,580 @@ +// Package firecrawl provides a client for interacting with the Firecrawl API. +package firecrawl + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "math" + "net/http" + "os" + "time" +) + +// FirecrawlDocumentMetadata represents metadata for a Firecrawl document +type FirecrawlDocumentMetadata struct { + Title string `json:"title,omitempty"` + Description string `json:"description,omitempty"` + Language string `json:"language,omitempty"` + Keywords string `json:"keywords,omitempty"` + Robots string `json:"robots,omitempty"` + OGTitle string `json:"ogTitle,omitempty"` + OGDescription string `json:"ogDescription,omitempty"` + OGURL string `json:"ogUrl,omitempty"` + OGImage string `json:"ogImage,omitempty"` + OGAudio string `json:"ogAudio,omitempty"` + OGDeterminer string `json:"ogDeterminer,omitempty"` + OGLocale string `json:"ogLocale,omitempty"` + OGLocaleAlternate []string `json:"ogLocaleAlternate,omitempty"` + OGSiteName string `json:"ogSiteName,omitempty"` + OGVideo string `json:"ogVideo,omitempty"` + DCTermsCreated string `json:"dctermsCreated,omitempty"` + DCDateCreated string `json:"dcDateCreated,omitempty"` + DCDate string `json:"dcDate,omitempty"` + DCTermsType string `json:"dctermsType,omitempty"` + DCType string `json:"dcType,omitempty"` + DCTermsAudience string `json:"dctermsAudience,omitempty"` + DCTermsSubject string `json:"dctermsSubject,omitempty"` + DCSubject string `json:"dcSubject,omitempty"` + DCDescription string `json:"dcDescription,omitempty"` + DCTermsKeywords string `json:"dctermsKeywords,omitempty"` + ModifiedTime string `json:"modifiedTime,omitempty"` + PublishedTime string `json:"publishedTime,omitempty"` + ArticleTag string `json:"articleTag,omitempty"` + ArticleSection string `json:"articleSection,omitempty"` + SourceURL string `json:"sourceURL,omitempty"` + PageStatusCode int `json:"pageStatusCode,omitempty"` + PageError string `json:"pageError,omitempty"` +} + +// FirecrawlDocument represents a document in Firecrawl +type FirecrawlDocument struct { + ID string `json:"id,omitempty"` + URL string `json:"url,omitempty"` + Content string `json:"content"` + Markdown string `json:"markdown,omitempty"` + HTML string `json:"html,omitempty"` + LLMExtraction map[string]any `json:"llm_extraction,omitempty"` + CreatedAt *time.Time `json:"createdAt,omitempty"` + UpdatedAt *time.Time `json:"updatedAt,omitempty"` + Type string `json:"type,omitempty"` + Metadata *FirecrawlDocumentMetadata `json:"metadata,omitempty"` + ChildrenLinks []string `json:"childrenLinks,omitempty"` + Provider string `json:"provider,omitempty"` + Warning string `json:"warning,omitempty"` + Index int `json:"index,omitempty"` +} + +// ExtractorOptions represents options for extraction. +type ExtractorOptions struct { + Mode string `json:"mode,omitempty"` + ExtractionPrompt string `json:"extractionPrompt,omitempty"` + ExtractionSchema any `json:"extractionSchema,omitempty"` +} + +// ScrapeResponse represents the response for scraping operations +type ScrapeResponse struct { + Success bool `json:"success"` + Data *FirecrawlDocument `json:"data,omitempty"` +} + +// SearchResponse represents the response for searching operations +type SearchResponse struct { + Success bool `json:"success"` + Data []*FirecrawlDocument `json:"data,omitempty"` +} + +// CrawlResponse represents the response for crawling operations +type CrawlResponse struct { + Success bool `json:"success"` + JobID string `json:"jobId,omitempty"` + Data []*FirecrawlDocument `json:"data,omitempty"` +} + +// JobStatusResponse represents the response for checking crawl job status +type JobStatusResponse struct { + Success bool `json:"success"` + Status string `json:"status"` + Current int `json:"current,omitempty"` + CurrentURL string `json:"current_url,omitempty"` + CurrentStep string `json:"current_step,omitempty"` + Total int `json:"total,omitempty"` + JobID string `json:"jobId,omitempty"` + Data []*FirecrawlDocument `json:"data,omitempty"` + PartialData []*FirecrawlDocument `json:"partial_data,omitempty"` +} + +// CancelCrawlJobResponse represents the response for canceling a crawl job +type CancelCrawlJobResponse struct { + Success bool `json:"success"` + Status string `json:"status"` +} + +// requestOptions represents options for making requests. +type requestOptions struct { + retries int + backoff int +} + +// requestOption is a functional option type for requestOptions. +type requestOption func(*requestOptions) + +// newRequestOptions creates a new requestOptions instance with the provided options. +// +// Parameters: +// - opts: Optional request options. +// +// Returns: +// - *requestOptions: A new instance of requestOptions with the provided options. +func newRequestOptions(opts ...requestOption) *requestOptions { + options := &requestOptions{retries: 1} + for _, opt := range opts { + opt(options) + } + return options +} + +// withRetries sets the number of retries for a request. +// +// Parameters: +// - retries: The number of retries to be performed. +// +// Returns: +// - requestOption: A functional option that sets the number of retries for a request. +func withRetries(retries int) requestOption { + return func(opts *requestOptions) { + opts.retries = retries + } +} + +// withBackoff sets the backoff interval for a request. +// +// Parameters: +// - backoff: The backoff interval (in milliseconds) to be used for retries. +// +// Returns: +// - requestOption: A functional option that sets the backoff interval for a request. +func withBackoff(backoff int) requestOption { + return func(opts *requestOptions) { + opts.backoff = backoff + } +} + +// FirecrawlApp represents a client for the Firecrawl API. +type FirecrawlApp struct { + APIKey string + APIURL string + Client *http.Client +} + +// NewFirecrawlApp creates a new instance of FirecrawlApp with the provided API key and API URL. +// If the API key or API URL is not provided, it attempts to retrieve them from environment variables. +// If the API key is still not found, it returns an error. +// +// Parameters: +// - apiKey: The API key for authenticating with the Firecrawl API. If empty, it will be retrieved from the FIRECRAWL_API_KEY environment variable. +// - apiURL: The base URL for the Firecrawl API. If empty, it will be retrieved from the FIRECRAWL_API_URL environment variable, defaulting to "https://api.firecrawl.dev". +// +// Returns: +// - *FirecrawlApp: A new instance of FirecrawlApp configured with the provided or retrieved API key and API URL. +// - error: An error if the API key is not provided or retrieved. +func NewFirecrawlApp(apiKey, apiURL string) (*FirecrawlApp, error) { + if apiKey == "" { + apiKey = os.Getenv("FIRECRAWL_API_KEY") + if apiKey == "" { + return nil, fmt.Errorf("no API key provided") + } + } + + if apiURL == "" { + apiURL = os.Getenv("FIRECRAWL_API_URL") + if apiURL == "" { + apiURL = "https://api.firecrawl.dev" + } + } + + client := &http.Client{ + Timeout: 30 * time.Second, + } + + return &FirecrawlApp{ + APIKey: apiKey, + APIURL: apiURL, + Client: client, + }, nil +} + +// ScrapeURL scrapes the content of the specified URL using the Firecrawl API. +// +// Parameters: +// - url: The URL to be scraped. +// - params: Optional parameters for the scrape request, including extractor options for LLM extraction. +// +// Returns: +// - *FirecrawlDocument: The scraped document data. +// - error: An error if the scrape request fails. +func (app *FirecrawlApp) ScrapeURL(url string, params map[string]any) (*FirecrawlDocument, error) { + headers := app.prepareHeaders("") + scrapeBody := map[string]any{"url": url} + + if params != nil { + if extractorOptions, ok := params["extractorOptions"].(ExtractorOptions); ok { + if schema, ok := extractorOptions.ExtractionSchema.(interface{ schema() any }); ok { + extractorOptions.ExtractionSchema = schema.schema() + } + if extractorOptions.Mode == "" { + extractorOptions.Mode = "llm-extraction" + } + scrapeBody["extractorOptions"] = extractorOptions + } + + for key, value := range params { + if key != "extractorOptions" { + scrapeBody[key] = value + } + } + } + + resp, err := app.makeRequest( + http.MethodPost, + fmt.Sprintf("%s/v0/scrape", app.APIURL), + scrapeBody, + headers, + "scrape URL", + ) + if err != nil { + return nil, err + } + + var scrapeResponse ScrapeResponse + err = json.Unmarshal(resp, &scrapeResponse) + if err != nil { + return nil, err + } + + if scrapeResponse.Success { + return scrapeResponse.Data, nil + } + + return nil, fmt.Errorf("failed to scrape URL") +} + +// Search performs a search query using the Firecrawl API and returns the search results. +// +// Parameters: +// - query: The search query string. +// - params: Optional parameters for the search request. +// +// Returns: +// - []*FirecrawlDocument: A slice of FirecrawlDocument containing the search results. +// - error: An error if the search request fails. +func (app *FirecrawlApp) Search(query string, params map[string]any) ([]*FirecrawlDocument, error) { + headers := app.prepareHeaders("") + searchBody := map[string]any{"query": query} + for k, v := range params { + searchBody[k] = v + } + + resp, err := app.makeRequest( + http.MethodPost, + fmt.Sprintf("%s/v0/search", app.APIURL), + searchBody, + headers, + "search", + ) + if err != nil { + return nil, err + } + + var searchResponse SearchResponse + err = json.Unmarshal(resp, &searchResponse) + if err != nil { + return nil, err + } + + if searchResponse.Success { + return searchResponse.Data, nil + } + + return nil, fmt.Errorf("failed to search") +} + +// CrawlURL starts a crawl job for the specified URL using the Firecrawl API. +// +// Parameters: +// - url: The URL to crawl. +// - params: Optional parameters for the crawl request. +// - waitUntilDone: If true, the method will wait until the crawl job is completed before returning. +// - pollInterval: The interval (in seconds) at which to poll the job status if waitUntilDone is true. +// - idempotencyKey: An optional idempotency key to ensure the request is idempotent. +// +// Returns: +// - any: The job ID if waitUntilDone is false, or the crawl result if waitUntilDone is true. +// - error: An error if the crawl request fails. +func (app *FirecrawlApp) CrawlURL(url string, params map[string]any, waitUntilDone bool, pollInterval int, idempotencyKey string) (any, error) { + headers := app.prepareHeaders(idempotencyKey) + crawlBody := map[string]any{"url": url} + for k, v := range params { + crawlBody[k] = v + } + + resp, err := app.makeRequest( + http.MethodPost, + fmt.Sprintf("%s/v0/crawl", app.APIURL), + crawlBody, + headers, + "start crawl job", + withRetries(3), + withBackoff(500), + ) + if err != nil { + return nil, err + } + + var crawlResponse CrawlResponse + err = json.Unmarshal(resp, &crawlResponse) + if err != nil { + return nil, err + } + + if waitUntilDone { + return app.monitorJobStatus(crawlResponse.JobID, headers, pollInterval) + } + + if crawlResponse.JobID == "" { + return nil, fmt.Errorf("failed to get job ID") + } + + return crawlResponse.JobID, nil +} + +// CheckCrawlStatus checks the status of a crawl job using the Firecrawl API. +// +// Parameters: +// - jobID: The ID of the crawl job to check. +// +// Returns: +// - *JobStatusResponse: The status of the crawl job. +// - error: An error if the crawl status check request fails. +func (app *FirecrawlApp) CheckCrawlStatus(jobID string) (*JobStatusResponse, error) { + headers := app.prepareHeaders("") + resp, err := app.makeRequest( + http.MethodGet, + fmt.Sprintf("%s/v0/crawl/status/%s", app.APIURL, jobID), + nil, + headers, + "check crawl status", + withRetries(3), + withBackoff(500), + ) + if err != nil { + return nil, err + } + + var jobStatusResponse JobStatusResponse + err = json.Unmarshal(resp, &jobStatusResponse) + if err != nil { + return nil, err + } + + return &jobStatusResponse, nil +} + +// CancelCrawlJob cancels a crawl job using the Firecrawl API. +// +// Parameters: +// - jobID: The ID of the crawl job to cancel. +// +// Returns: +// - string: The status of the crawl job after cancellation. +// - error: An error if the crawl job cancellation request fails. +func (app *FirecrawlApp) CancelCrawlJob(jobID string) (string, error) { + headers := app.prepareHeaders("") + resp, err := app.makeRequest( + http.MethodDelete, + fmt.Sprintf("%s/v0/crawl/cancel/%s", app.APIURL, jobID), + nil, + headers, + "cancel crawl job", + ) + if err != nil { + return "", err + } + + var cancelCrawlJobResponse CancelCrawlJobResponse + err = json.Unmarshal(resp, &cancelCrawlJobResponse) + if err != nil { + return "", err + } + + return cancelCrawlJobResponse.Status, nil +} + +// prepareHeaders prepares the headers for an HTTP request. +// +// Parameters: +// - idempotencyKey: A string representing the idempotency key to be included in the headers. +// If the idempotency key is an empty string, it will not be included in the headers. +// +// Returns: +// - map[string]string: A map containing the headers for the HTTP request. +func (app *FirecrawlApp) prepareHeaders(idempotencyKey string) map[string]string { + headers := map[string]string{ + "Content-Type": "application/json", + "Authorization": fmt.Sprintf("Bearer %s", app.APIKey), + } + if idempotencyKey != "" { + headers["x-idempotency-key"] = idempotencyKey + } + return headers +} + +// makeRequest makes a request to the specified URL with the provided method, data, headers, and options. +// +// Parameters: +// - method: The HTTP method to use for the request (e.g., "GET", "POST", "DELETE"). +// - url: The URL to send the request to. +// - data: The data to be sent in the request body. +// - headers: The headers to be included in the request. +// - action: A string describing the action being performed. +// - opts: Optional request options. +// +// Returns: +// - []byte: The response body from the request. +// - error: An error if the request fails. +func (app *FirecrawlApp) makeRequest(method, url string, data map[string]any, headers map[string]string, action string, opts ...requestOption) ([]byte, error) { + var body []byte + var err error + if data != nil { + body, err = json.Marshal(data) + if err != nil { + return nil, err + } + } + + req, err := http.NewRequest(method, url, bytes.NewBuffer(body)) + if err != nil { + return nil, err + } + + for key, value := range headers { + req.Header.Set(key, value) + } + + var resp *http.Response + options := newRequestOptions(opts...) + for i := 0; i < options.retries; i++ { + resp, err = app.Client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode != 502 { + break + } + + time.Sleep(time.Duration(math.Pow(2, float64(i))) * time.Duration(options.backoff) * time.Millisecond) + } + + respBody, err := io.ReadAll(resp.Body) + if err != nil { + return nil, err + } + + statusCode := resp.StatusCode + if statusCode != 200 { + return nil, app.handleError(statusCode, respBody, action) + } + + return respBody, nil +} + +// monitorJobStatus monitors the status of a crawl job using the Firecrawl API. +// +// Parameters: +// - jobID: The ID of the crawl job to monitor. +// - headers: The headers to be included in the request. +// - pollInterval: The interval (in seconds) at which to poll the job status. +// +// Returns: +// - []*FirecrawlDocument: The crawl result if the job is completed. +// - error: An error if the crawl status check request fails. +func (app *FirecrawlApp) monitorJobStatus(jobID string, headers map[string]string, pollInterval int) ([]*FirecrawlDocument, error) { + for { + resp, err := app.makeRequest( + http.MethodGet, + fmt.Sprintf("%s/v0/crawl/status/%s", app.APIURL, jobID), + nil, + headers, + "check crawl status", + withRetries(3), + withBackoff(500), + ) + if err != nil { + return nil, err + } + + var statusData JobStatusResponse + err = json.Unmarshal(resp, &statusData) + if err != nil { + return nil, err + } + + status := statusData.Status + if status == "" { + return nil, fmt.Errorf("invalid status in response") + } + + if status == "completed" { + if statusData.Data != nil { + return statusData.Data, nil + } + return nil, fmt.Errorf("crawl job completed but no data was returned") + } else if status == "active" || status == "paused" || status == "pending" || status == "queued" || status == "waiting" { + pollInterval = max(pollInterval, 2) + time.Sleep(time.Duration(pollInterval) * time.Second) + } else { + return nil, fmt.Errorf("crawl job failed or was stopped. Status: %s", status) + } + } +} + +// handleError handles errors returned by the Firecrawl API. +// +// Parameters: +// - resp: The HTTP response object. +// - body: The response body from the HTTP response. +// - action: A string describing the action being performed. +// +// Returns: +// - error: An error describing the failure reason. +func (app *FirecrawlApp) handleError(statusCode int, body []byte, action string) error { + var errorData map[string]any + err := json.Unmarshal(body, &errorData) + if err != nil { + return fmt.Errorf("failed to parse error response: %v", err) + } + + errorMessage, _ := errorData["error"].(string) + if errorMessage == "" { + errorMessage = "No additional error details provided." + } + + var message string + switch statusCode { + case 402: + message = fmt.Sprintf("Payment Required: Failed to %s. %s", action, errorMessage) + case 408: + message = fmt.Sprintf("Request Timeout: Failed to %s as the request timed out. %s", action, errorMessage) + case 409: + message = fmt.Sprintf("Conflict: Failed to %s due to a conflict. %s", action, errorMessage) + case 500: + message = fmt.Sprintf("Internal Server Error: Failed to %s. %s", action, errorMessage) + default: + message = fmt.Sprintf("Unexpected error during %s: Status code %d. %s", action, statusCode, errorMessage) + } + + return fmt.Errorf(message) +} diff --git a/apps/go-sdk/firecrawl/firecrawl_test.go b/apps/go-sdk/firecrawl/firecrawl_test.go new file mode 100644 index 00000000..8a3aacb3 --- /dev/null +++ b/apps/go-sdk/firecrawl/firecrawl_test.go @@ -0,0 +1,292 @@ +package firecrawl + +import ( + "log" + "os" + "testing" + "time" + + "github.com/google/uuid" + "github.com/joho/godotenv" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +var API_URL string +var TEST_API_KEY string + +func init() { + err := godotenv.Load() + if err != nil { + log.Fatalf("Error loading .env file: %v", err) + } + API_URL = os.Getenv("API_URL") + TEST_API_KEY = os.Getenv("TEST_API_KEY") +} + +func TestNoAPIKey(t *testing.T) { + _, err := NewFirecrawlApp("", API_URL) + assert.Error(t, err) + assert.Contains(t, err.Error(), "no API key provided") +} + +func TestScrapeURLInvalidAPIKey(t *testing.T) { + app, err := NewFirecrawlApp("invalid_api_key", API_URL) + require.NoError(t, err) + + _, err = app.ScrapeURL("https://firecrawl.dev", nil) + assert.Error(t, err) + assert.Contains(t, err.Error(), "Unexpected error during scrape URL: Status code 401. Unauthorized: Invalid token") +} + +func TestBlocklistedURL(t *testing.T) { + app, err := NewFirecrawlApp(TEST_API_KEY, API_URL) + require.NoError(t, err) + + _, err = app.ScrapeURL("https://facebook.com/fake-test", nil) + assert.Error(t, err) + assert.Contains(t, err.Error(), "Unexpected error during scrape URL: Status code 403. Firecrawl currently does not support social media scraping due to policy restrictions.") +} + +func TestSuccessfulResponseWithValidPreviewToken(t *testing.T) { + app, err := NewFirecrawlApp("this_is_just_a_preview_token", API_URL) + require.NoError(t, err) + + response, err := app.ScrapeURL("https://roastmywebsite.ai", nil) + require.NoError(t, err) + assert.NotNil(t, response) + + assert.Contains(t, response.Content, "_Roast_") +} + +func TestScrapeURLE2E(t *testing.T) { + app, err := NewFirecrawlApp(TEST_API_KEY, API_URL) + require.NoError(t, err) + + response, err := app.ScrapeURL("https://roastmywebsite.ai", nil) + require.NoError(t, err) + assert.NotNil(t, response) + + assert.Contains(t, response.Content, "_Roast_") + assert.NotEqual(t, response.Markdown, "") + assert.NotNil(t, response.Metadata) + assert.Equal(t, response.HTML, "") +} + +func TestSuccessfulResponseWithValidAPIKeyAndIncludeHTML(t *testing.T) { + app, err := NewFirecrawlApp(TEST_API_KEY, API_URL) + require.NoError(t, err) + + params := map[string]any{ + "pageOptions": map[string]any{ + "includeHtml": true, + }, + } + response, err := app.ScrapeURL("https://roastmywebsite.ai", params) + require.NoError(t, err) + assert.NotNil(t, response) + + assert.Contains(t, response.Content, "_Roast_") + assert.Contains(t, response.Markdown, "_Roast_") + assert.Contains(t, response.HTML, " Date: Sun, 4 Aug 2024 17:15:31 +0800 Subject: [PATCH 04/22] chore: add go-sdk-tests job --- .github/workflows/fly.yml | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/.github/workflows/fly.yml b/.github/workflows/fly.yml index 3f34f860..1b9fe6b9 100644 --- a/.github/workflows/fly.yml +++ b/.github/workflows/fly.yml @@ -169,6 +169,41 @@ jobs: run: npm run test working-directory: ./apps/js-sdk/firecrawl + go-sdk-tests: + name: Go SDK Tests + needs: pre-deploy-e2e-tests + runs-on: ubuntu-latest + services: + redis: + image: redis + ports: + - 6379:6379 + steps: + - uses: actions/checkout@v3 + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version-file: "go.mod" + - name: Install pnpm + run: npm install -g pnpm + - name: Install dependencies + run: pnpm install + working-directory: ./apps/api + - name: Start the application + run: npm start & + working-directory: ./apps/api + id: start_app + - name: Start workers + run: npm run workers & + working-directory: ./apps/api + id: start_workers + - name: Install dependencies for Go SDK + run: go mod tidy + working-directory: ./apps/go-sdk + - name: Run tests for Go SDK + run: go test -v ./... + working-directory: ./apps/go-sdk/firecrawl + deploy: name: Deploy app runs-on: ubuntu-latest From b12e1157cc3628c109346753f54518b29b95168d Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 7 Aug 2024 10:40:00 -0400 Subject: [PATCH 05/22] Nick: v35 bump --- apps/js-sdk/firecrawl/build/esm/index.js | 14 +++++++------- apps/js-sdk/firecrawl/package-lock.json | 4 ++-- apps/js-sdk/firecrawl/package.json | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/apps/js-sdk/firecrawl/build/esm/index.js b/apps/js-sdk/firecrawl/build/esm/index.js index ef79f180..99de5e2b 100644 --- a/apps/js-sdk/firecrawl/build/esm/index.js +++ b/apps/js-sdk/firecrawl/build/esm/index.js @@ -31,9 +31,9 @@ export default class FirecrawlApp { * @param {Params | null} params - Additional parameters for the scrape request. * @returns {Promise} The response from the scrape operation. */ - scrapeUrl(url, params = null) { - var _a; - return __awaiter(this, void 0, void 0, function* () { + scrapeUrl(url_1) { + return __awaiter(this, arguments, void 0, function* (url, params = null) { + var _a; const headers = { "Content-Type": "application/json", Authorization: `Bearer ${this.apiKey}`, @@ -74,8 +74,8 @@ export default class FirecrawlApp { * @param {Params | null} params - Additional parameters for the search request. * @returns {Promise} The response from the search operation. */ - search(query, params = null) { - return __awaiter(this, void 0, void 0, function* () { + search(query_1) { + return __awaiter(this, arguments, void 0, function* (query, params = null) { const headers = { "Content-Type": "application/json", Authorization: `Bearer ${this.apiKey}`, @@ -114,8 +114,8 @@ export default class FirecrawlApp { * @param {string} idempotencyKey - Optional idempotency key for the request. * @returns {Promise} The response from the crawl operation. */ - crawlUrl(url, params = null, waitUntilDone = true, pollInterval = 2, idempotencyKey) { - return __awaiter(this, void 0, void 0, function* () { + crawlUrl(url_1) { + return __awaiter(this, arguments, void 0, function* (url, params = null, waitUntilDone = true, pollInterval = 2, idempotencyKey) { const headers = this.prepareHeaders(idempotencyKey); let jsonData = { url }; if (params) { diff --git a/apps/js-sdk/firecrawl/package-lock.json b/apps/js-sdk/firecrawl/package-lock.json index 25b0e305..c42d6ca7 100644 --- a/apps/js-sdk/firecrawl/package-lock.json +++ b/apps/js-sdk/firecrawl/package-lock.json @@ -1,12 +1,12 @@ { "name": "@mendable/firecrawl-js", - "version": "0.0.29", + "version": "0.0.34", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@mendable/firecrawl-js", - "version": "0.0.29", + "version": "0.0.34", "license": "MIT", "dependencies": { "axios": "^1.6.8", diff --git a/apps/js-sdk/firecrawl/package.json b/apps/js-sdk/firecrawl/package.json index f50e7a4e..a76748d4 100644 --- a/apps/js-sdk/firecrawl/package.json +++ b/apps/js-sdk/firecrawl/package.json @@ -1,6 +1,6 @@ { "name": "@mendable/firecrawl-js", - "version": "0.0.34", + "version": "0.0.35", "description": "JavaScript SDK for Firecrawl API", "main": "build/cjs/index.js", "types": "types/index.d.ts", From f1f56050100ebcfa4d81357f96f0544d3bc616dd Mon Sep 17 00:00:00 2001 From: Nicolas Date: Thu, 8 Aug 2024 12:31:58 -0400 Subject: [PATCH 06/22] Update website_params.ts --- .../src/scraper/WebScraper/utils/custom/website_params.ts | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/apps/api/src/scraper/WebScraper/utils/custom/website_params.ts b/apps/api/src/scraper/WebScraper/utils/custom/website_params.ts index 8583e614..fcd3f69b 100644 --- a/apps/api/src/scraper/WebScraper/utils/custom/website_params.ts +++ b/apps/api/src/scraper/WebScraper/utils/custom/website_params.ts @@ -240,4 +240,12 @@ export const urlSpecificParams = { }, }, }, + "digikey.com":{ + defaultScraper: "fire-engine", + params:{ + fireEngineOptions:{ + engine: "tlsclient", + }, + }, + } }; From b802ea02a11942169c5f1c97c402918cc56b432d Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Fri, 9 Aug 2024 11:13:14 -0300 Subject: [PATCH 07/22] small improvements - wait for getting results on crawl: sometimes crawl takes some a second to save the data on the db and this causes response.data to be empty - added timeout value to test script - increased http client timeout (llm extract was failing on e2e tests) - fixed env path on test script --- .github/workflows/fly.yml | 2 +- apps/go-sdk/examples/example.go | 6 +++++- apps/go-sdk/firecrawl/firecrawl.go | 8 ++++++-- apps/go-sdk/firecrawl/firecrawl_test.go | 2 +- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/.github/workflows/fly.yml b/.github/workflows/fly.yml index 1b9fe6b9..9e4b85a8 100644 --- a/.github/workflows/fly.yml +++ b/.github/workflows/fly.yml @@ -201,7 +201,7 @@ jobs: run: go mod tidy working-directory: ./apps/go-sdk - name: Run tests for Go SDK - run: go test -v ./... + run: go test -v ./... -timeout 180s working-directory: ./apps/go-sdk/firecrawl deploy: diff --git a/apps/go-sdk/examples/example.go b/apps/go-sdk/examples/example.go index ce8470e9..75194965 100644 --- a/apps/go-sdk/examples/example.go +++ b/apps/go-sdk/examples/example.go @@ -33,7 +33,11 @@ func main() { if err != nil { log.Fatalf("Failed to crawl URL: %v", err) } - fmt.Println(crawlResult) + jsonCrawlResult, err := json.MarshalIndent(crawlResult, "", " ") + if err != nil { + log.Fatalf("Failed to marshal crawl result: %v", err) + } + fmt.Println(string(jsonCrawlResult)) // LLM Extraction using JSON schema jsonSchema := map[string]any{ diff --git a/apps/go-sdk/firecrawl/firecrawl.go b/apps/go-sdk/firecrawl/firecrawl.go index b9f50f08..9a9dcfef 100644 --- a/apps/go-sdk/firecrawl/firecrawl.go +++ b/apps/go-sdk/firecrawl/firecrawl.go @@ -195,7 +195,7 @@ func NewFirecrawlApp(apiKey, apiURL string) (*FirecrawlApp, error) { } client := &http.Client{ - Timeout: 30 * time.Second, + Timeout: 60 * time.Second, } return &FirecrawlApp{ @@ -502,6 +502,7 @@ func (app *FirecrawlApp) makeRequest(method, url string, data map[string]any, he // - []*FirecrawlDocument: The crawl result if the job is completed. // - error: An error if the crawl status check request fails. func (app *FirecrawlApp) monitorJobStatus(jobID string, headers map[string]string, pollInterval int) ([]*FirecrawlDocument, error) { + attempts := 0 for { resp, err := app.makeRequest( http.MethodGet, @@ -531,7 +532,10 @@ func (app *FirecrawlApp) monitorJobStatus(jobID string, headers map[string]strin if statusData.Data != nil { return statusData.Data, nil } - return nil, fmt.Errorf("crawl job completed but no data was returned") + attempts++ + if attempts > 3 { + return nil, fmt.Errorf("crawl job completed but no data was returned") + } } else if status == "active" || status == "paused" || status == "pending" || status == "queued" || status == "waiting" { pollInterval = max(pollInterval, 2) time.Sleep(time.Duration(pollInterval) * time.Second) diff --git a/apps/go-sdk/firecrawl/firecrawl_test.go b/apps/go-sdk/firecrawl/firecrawl_test.go index 8a3aacb3..9d56c7ac 100644 --- a/apps/go-sdk/firecrawl/firecrawl_test.go +++ b/apps/go-sdk/firecrawl/firecrawl_test.go @@ -16,7 +16,7 @@ var API_URL string var TEST_API_KEY string func init() { - err := godotenv.Load() + err := godotenv.Load("../.env") if err != nil { log.Fatalf("Error loading .env file: %v", err) } From 0221872a70989fd79869d0d71b21184e5301d02f Mon Sep 17 00:00:00 2001 From: Quan Ming <116425066+wahpiangle@users.noreply.github.com> Date: Sat, 10 Aug 2024 00:40:11 +0800 Subject: [PATCH 08/22] Update redis urls in example .env --- apps/api/.env.example | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/api/.env.example b/apps/api/.env.example index 08ff7d7f..c62bac54 100644 --- a/apps/api/.env.example +++ b/apps/api/.env.example @@ -2,8 +2,8 @@ NUM_WORKERS_PER_QUEUE=8 PORT=3002 HOST=0.0.0.0 -REDIS_URL=redis://localhost:6379 -REDIS_RATE_LIMIT_URL=redis://localhost:6379 +REDIS_URL=redis://redis:6379 +REDIS_RATE_LIMIT_URL=redis://redis:6379 PLAYWRIGHT_MICROSERVICE_URL=http://playwright-service:3000/html ## To turn on DB authentication, you need to set up supabase. From 0591000b64ad586a4d24c74298a5167bc296726e Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Fri, 9 Aug 2024 14:30:41 -0300 Subject: [PATCH 09/22] bugfix includes excludes --- apps/api/src/scraper/WebScraper/index.ts | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/apps/api/src/scraper/WebScraper/index.ts b/apps/api/src/scraper/WebScraper/index.ts index e667fa6b..859127bd 100644 --- a/apps/api/src/scraper/WebScraper/index.ts +++ b/apps/api/src/scraper/WebScraper/index.ts @@ -168,11 +168,29 @@ export class WebScraperDataProvider { private async handleCrawlMode( inProgress?: (progress: Progress) => void ): Promise { + let includes: string[]; + if (Array.isArray(this.includes)) { + if (this.includes[0] != "") { + includes = this.includes; + } + } else { + includes = this.includes.split(','); + } + + let excludes: string[]; + if (Array.isArray(this.excludes)) { + if (this.excludes[0] != "") { + excludes = this.excludes; + } + } else { + excludes = this.excludes.split(','); + } + const crawler = new WebCrawler({ jobId: this.jobId, initialUrl: this.urls[0], - includes: Array.isArray(this.includes) ? this.includes : this.includes.split(','), - excludes: Array.isArray(this.excludes) ? this.excludes : this.excludes.split(','), + includes, + excludes, maxCrawledLinks: this.maxCrawledLinks, maxCrawledDepth: getAdjustedMaxDepth(this.urls[0], this.maxCrawledDepth), limit: this.limit, From 5a778f2c22a451f1eead5eb9733bcd462d3cd081 Mon Sep 17 00:00:00 2001 From: Gergo Moricz Date: Fri, 9 Aug 2024 20:05:29 +0200 Subject: [PATCH 10/22] fix(js-sdk): add type metadata to exports --- apps/js-sdk/firecrawl/package.json | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/apps/js-sdk/firecrawl/package.json b/apps/js-sdk/firecrawl/package.json index a76748d4..380d972b 100644 --- a/apps/js-sdk/firecrawl/package.json +++ b/apps/js-sdk/firecrawl/package.json @@ -6,8 +6,14 @@ "types": "types/index.d.ts", "type": "module", "exports": { - "require": "./build/cjs/index.js", - "import": "./build/esm/index.js" + "require": { + "types": "./types/index.d.ts", + "default": "./build/cjs/index.js" + }, + "import": { + "types": "./types/index.d.ts", + "default": "./build/esm/index.js" + } }, "scripts": { "build": "tsc --module commonjs --moduleResolution node10 --outDir build/cjs/ && echo '{\"type\": \"commonjs\"}' > build/cjs/package.json && npx tsc --module NodeNext --moduleResolution NodeNext --outDir build/esm/ && echo '{\"type\": \"module\"}' > build/esm/package.json", From a96ad4b0e208f2c9faa3218cdfcb85ce526a23bf Mon Sep 17 00:00:00 2001 From: Quan Ming <116425066+wahpiangle@users.noreply.github.com> Date: Sat, 10 Aug 2024 12:33:26 +0800 Subject: [PATCH 11/22] Update redis url to use comment --- apps/api/.env.example | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/api/.env.example b/apps/api/.env.example index c62bac54..9f96e8a6 100644 --- a/apps/api/.env.example +++ b/apps/api/.env.example @@ -2,8 +2,8 @@ NUM_WORKERS_PER_QUEUE=8 PORT=3002 HOST=0.0.0.0 -REDIS_URL=redis://redis:6379 -REDIS_RATE_LIMIT_URL=redis://redis:6379 +REDIS_URL=redis://localhost:6379 #for self-hosting using docker, use redis://redis:6379. For running locally, use redis://localhost:6379 +REDIS_RATE_LIMIT_URL=redis://localhost:6379 #for self-hosting using docker, use redis://redis:6379. For running locally, use redis://localhost:6379 PLAYWRIGHT_MICROSERVICE_URL=http://playwright-service:3000/html ## To turn on DB authentication, you need to set up supabase. From fe179d0cb14b7accd6b7da87451890faab51720a Mon Sep 17 00:00:00 2001 From: Quan Ming <116425066+wahpiangle@users.noreply.github.com> Date: Sat, 10 Aug 2024 12:39:22 +0800 Subject: [PATCH 12/22] Update redis troubleshooting in self host guide --- SELF_HOST.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SELF_HOST.md b/SELF_HOST.md index 43bc3757..75066e48 100644 --- a/SELF_HOST.md +++ b/SELF_HOST.md @@ -160,7 +160,7 @@ Errors related to connecting to Redis, such as timeouts or "Connection refused". **Solution:** - Ensure that the Redis service is up and running in your Docker environment. -- Verify that the REDIS_URL and REDIS_RATE_LIMIT_URL in your .env file point to the correct Redis instance. +- Verify that the REDIS_URL and REDIS_RATE_LIMIT_URL in your .env file point to the correct Redis instance, ensure that it points to the same URL in the `docker-compose.yaml` file (`redis://redis:6379`) - Check network settings and firewall rules that may block the connection to the Redis port. ### API endpoint does not respond From 36e4b2cf4945b3e1567d2f5aae18ab18a071bd9c Mon Sep 17 00:00:00 2001 From: Rafael Miller <150964962+rafaelsideguide@users.noreply.github.com> Date: Mon, 12 Aug 2024 10:37:00 -0300 Subject: [PATCH 13/22] Update .env.example --- apps/api/.env.example | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/api/.env.example b/apps/api/.env.example index 9f96e8a6..87ecde77 100644 --- a/apps/api/.env.example +++ b/apps/api/.env.example @@ -2,8 +2,8 @@ NUM_WORKERS_PER_QUEUE=8 PORT=3002 HOST=0.0.0.0 -REDIS_URL=redis://localhost:6379 #for self-hosting using docker, use redis://redis:6379. For running locally, use redis://localhost:6379 -REDIS_RATE_LIMIT_URL=redis://localhost:6379 #for self-hosting using docker, use redis://redis:6379. For running locally, use redis://localhost:6379 +REDIS_URL=redis://redis:6379 #for self-hosting using docker, use redis://redis:6379. For running locally, use redis://localhost:6379 +REDIS_RATE_LIMIT_URL=redis://redis:6379 #for self-hosting using docker, use redis://redis:6379. For running locally, use redis://localhost:6379 PLAYWRIGHT_MICROSERVICE_URL=http://playwright-service:3000/html ## To turn on DB authentication, you need to set up supabase. From 25a899eae39ca201ce8b81d5dc783e8a1f221b6c Mon Sep 17 00:00:00 2001 From: Nicolas Date: Mon, 12 Aug 2024 13:37:47 -0400 Subject: [PATCH 14/22] Nick: redlock cache in auth --- apps/api/src/controllers/auth.ts | 105 +++++++++++++----- .../src/services/billing/credit_billing.ts | 29 +---- apps/api/src/services/redlock.ts | 29 +++++ apps/js-sdk/firecrawl/build/cjs/index.js | 14 +-- apps/js-sdk/firecrawl/package-lock.json | 4 +- apps/js-sdk/firecrawl/package.json | 2 +- 6 files changed, 119 insertions(+), 64 deletions(-) create mode 100644 apps/api/src/services/redlock.ts diff --git a/apps/api/src/controllers/auth.ts b/apps/api/src/controllers/auth.ts index 5dff80b8..6887baea 100644 --- a/apps/api/src/controllers/auth.ts +++ b/apps/api/src/controllers/auth.ts @@ -7,7 +7,15 @@ import { RateLimiterRedis } from "rate-limiter-flexible"; import { setTraceAttributes } from '@hyperdx/node-opentelemetry'; import { sendNotification } from "../services/notification/email_notification"; import { Logger } from "../lib/logger"; +import { redlock } from "../../src/services/redlock"; +import { getValue } from "../../src/services/redis"; +import { setValue } from "../../src/services/redis"; +import { validate } from 'uuid'; +function normalizedApiIsUuid(potentialUuid: string): boolean { + // Check if the string is a valid UUID + return validate(potentialUuid); +} export async function authenticateUser(req, res, mode?: RateLimiterMode): Promise { return withAuth(supaAuthenticateUser)(req, res, mode); } @@ -54,17 +62,72 @@ export async function supaAuthenticateUser( let subscriptionData: { team_id: string, plan: string } | null = null; let normalizedApi: string; - let team_id: string; + let cacheKey= ""; + let redLockKey = ""; + const lockTTL = 5000; // 5 seconds + let teamId: string | null = null; + let priceId: string | null = null; if (token == "this_is_just_a_preview_token") { rateLimiter = getRateLimiter(RateLimiterMode.Preview, token); - team_id = "preview"; + teamId = "preview"; } else { normalizedApi = parseApi(token); + if(!normalizedApiIsUuid(normalizedApi)){ + return { + success: false, + error: "Unauthorized: Invalid token", + status: 401, + }; + } + cacheKey = `api_key:${normalizedApi}`; + redLockKey = `redlock:${cacheKey}`; + + try{ + const lock = await redlock.acquire([redLockKey], lockTTL); - const { data, error } = await supabase_service.rpc( - 'get_key_and_price_id_2', { api_key: normalizedApi } - ); + try{ + + const teamIdPriceId = await getValue(cacheKey); + if(teamIdPriceId){ + const { team_id, price_id } = JSON.parse(teamIdPriceId); + teamId = team_id; + priceId = price_id; + } + else{ + const { data, error } = await supabase_service.rpc( + 'get_key_and_price_id_2', { api_key: normalizedApi } + ); + if(error){ + Logger.error(`RPC ERROR (get_key_and_price_id_2): ${error.message}`); + return { + success: false, + error: "The server seems overloaded. Please contact hello@firecrawl.com if you aren't sending too many requests at once.", + status: 500, + }; + } + if (!data || data.length === 0) { + Logger.warn(`Error fetching api key: ${error.message} or data is empty`); + // TODO: change this error code ? + return { + success: false, + error: "Unauthorized: Invalid token", + status: 401, + }; + } + else { + teamId = data[0].team_id; + priceId = data[0].price_id; + } + } + }finally{ + await lock.release(); + } + }catch(error){ + Logger.error(`Error acquiring the rate limiter lock: ${error}`); + } + + // get_key_and_price_id_2 rpc definition: // create or replace function get_key_and_price_id_2(api_key uuid) // returns table(key uuid, team_id uuid, price_id text) as $$ @@ -82,30 +145,12 @@ export async function supaAuthenticateUser( // end; // $$ language plpgsql; - if (error) { - Logger.warn(`Error fetching key and price_id: ${error.message}`); - } else { - // console.log('Key and Price ID:', data); - } - - - if (error || !data || data.length === 0) { - Logger.warn(`Error fetching api key: ${error.message} or data is empty`); - return { - success: false, - error: "Unauthorized: Invalid token", - status: 401, - }; - } - const internal_team_id = data[0].team_id; - team_id = internal_team_id; - - const plan = getPlanByPriceId(data[0].price_id); + const plan = getPlanByPriceId(priceId); // HyperDX Logging - setTrace(team_id, normalizedApi); + setTrace(teamId, normalizedApi); subscriptionData = { - team_id: team_id, + team_id: teamId, plan: plan } switch (mode) { @@ -134,7 +179,7 @@ export async function supaAuthenticateUser( } } - const team_endpoint_token = token === "this_is_just_a_preview_token" ? iptoken : team_id; + const team_endpoint_token = token === "this_is_just_a_preview_token" ? iptoken : teamId; try { await rateLimiter.consume(team_endpoint_token); @@ -147,7 +192,13 @@ export async function supaAuthenticateUser( const startDate = new Date(); const endDate = new Date(); endDate.setDate(endDate.getDate() + 7); + // await sendNotification(team_id, NotificationType.RATE_LIMIT_REACHED, startDate.toISOString(), endDate.toISOString()); + // TODO: cache 429 for a few minuts + if(teamId && priceId && mode !== RateLimiterMode.Preview){ + await setValue(cacheKey, JSON.stringify({team_id: teamId, price_id: priceId}), 60 * 5); + } + return { success: false, error: `Rate limit exceeded. Consumed points: ${rateLimiterRes.consumedPoints}, Remaining points: ${rateLimiterRes.remainingPoints}. Upgrade your plan at https://firecrawl.dev/pricing for increased rate limits or please retry after ${secs}s, resets at ${retryDate}`, diff --git a/apps/api/src/services/billing/credit_billing.ts b/apps/api/src/services/billing/credit_billing.ts index 765d028e..d25289b2 100644 --- a/apps/api/src/services/billing/credit_billing.ts +++ b/apps/api/src/services/billing/credit_billing.ts @@ -4,37 +4,12 @@ import { sendNotification } from "../notification/email_notification"; import { supabase_service } from "../supabase"; import { Logger } from "../../lib/logger"; import { getValue, setValue } from "../redis"; -import Redlock from "redlock"; -import Client from "ioredis"; +import { redlock } from "../redlock"; + const FREE_CREDITS = 500; -const redlock = new Redlock( - // You should have one client for each independent redis node - // or cluster. - [new Client(process.env.REDIS_RATE_LIMIT_URL)], - { - // The expected clock drift; for more details see: - // http://redis.io/topics/distlock - driftFactor: 0.01, // multiplied by lock ttl to determine drift time - // The max number of times Redlock will attempt to lock a resource - // before erroring. - retryCount: 5, - - // the time in ms between attempts - retryDelay: 100, // time in ms - - // the max time in ms randomly added to retries - // to improve performance under high contention - // see https://www.awsarchitectureblog.com/2015/03/backoff.html - retryJitter: 200, // time in ms - - // The minimum remaining time on a lock before an extension is automatically - // attempted with the `using` API. - automaticExtensionThreshold: 500, // time in ms - } -); export async function billTeam(team_id: string, credits: number) { return withAuth(supaBillTeam)(team_id, credits); } diff --git a/apps/api/src/services/redlock.ts b/apps/api/src/services/redlock.ts new file mode 100644 index 00000000..9cbfc1fc --- /dev/null +++ b/apps/api/src/services/redlock.ts @@ -0,0 +1,29 @@ +import Redlock from "redlock"; +import Client from "ioredis"; + +export const redlock = new Redlock( + // You should have one client for each independent redis node + // or cluster. + [new Client(process.env.REDIS_RATE_LIMIT_URL)], + { + // The expected clock drift; for more details see: + // http://redis.io/topics/distlock + driftFactor: 0.01, // multiplied by lock ttl to determine drift time + + // The max number of times Redlock will attempt to lock a resource + // before erroring. + retryCount: 5, + + // the time in ms between attempts + retryDelay: 100, // time in ms + + // the max time in ms randomly added to retries + // to improve performance under high contention + // see https://www.awsarchitectureblog.com/2015/03/backoff.html + retryJitter: 200, // time in ms + + // The minimum remaining time on a lock before an extension is automatically + // attempted with the `using` API. + automaticExtensionThreshold: 500, // time in ms + } +); diff --git a/apps/js-sdk/firecrawl/build/cjs/index.js b/apps/js-sdk/firecrawl/build/cjs/index.js index da340cae..dbc2d6b9 100644 --- a/apps/js-sdk/firecrawl/build/cjs/index.js +++ b/apps/js-sdk/firecrawl/build/cjs/index.js @@ -36,9 +36,9 @@ class FirecrawlApp { * @param {Params | null} params - Additional parameters for the scrape request. * @returns {Promise} The response from the scrape operation. */ - scrapeUrl(url, params = null) { - var _a; - return __awaiter(this, void 0, void 0, function* () { + scrapeUrl(url_1) { + return __awaiter(this, arguments, void 0, function* (url, params = null) { + var _a; const headers = { "Content-Type": "application/json", Authorization: `Bearer ${this.apiKey}`, @@ -79,8 +79,8 @@ class FirecrawlApp { * @param {Params | null} params - Additional parameters for the search request. * @returns {Promise} The response from the search operation. */ - search(query, params = null) { - return __awaiter(this, void 0, void 0, function* () { + search(query_1) { + return __awaiter(this, arguments, void 0, function* (query, params = null) { const headers = { "Content-Type": "application/json", Authorization: `Bearer ${this.apiKey}`, @@ -119,8 +119,8 @@ class FirecrawlApp { * @param {string} idempotencyKey - Optional idempotency key for the request. * @returns {Promise} The response from the crawl operation. */ - crawlUrl(url, params = null, waitUntilDone = true, pollInterval = 2, idempotencyKey) { - return __awaiter(this, void 0, void 0, function* () { + crawlUrl(url_1) { + return __awaiter(this, arguments, void 0, function* (url, params = null, waitUntilDone = true, pollInterval = 2, idempotencyKey) { const headers = this.prepareHeaders(idempotencyKey); let jsonData = { url }; if (params) { diff --git a/apps/js-sdk/firecrawl/package-lock.json b/apps/js-sdk/firecrawl/package-lock.json index c42d6ca7..4d9254ac 100644 --- a/apps/js-sdk/firecrawl/package-lock.json +++ b/apps/js-sdk/firecrawl/package-lock.json @@ -1,12 +1,12 @@ { "name": "@mendable/firecrawl-js", - "version": "0.0.34", + "version": "0.0.36", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@mendable/firecrawl-js", - "version": "0.0.34", + "version": "0.0.36", "license": "MIT", "dependencies": { "axios": "^1.6.8", diff --git a/apps/js-sdk/firecrawl/package.json b/apps/js-sdk/firecrawl/package.json index 380d972b..4b857b65 100644 --- a/apps/js-sdk/firecrawl/package.json +++ b/apps/js-sdk/firecrawl/package.json @@ -1,6 +1,6 @@ { "name": "@mendable/firecrawl-js", - "version": "0.0.35", + "version": "0.0.36", "description": "JavaScript SDK for Firecrawl API", "main": "build/cjs/index.js", "types": "types/index.d.ts", From 0bd1a820eef738df0a85db8f85ce122918302342 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Mon, 12 Aug 2024 13:42:09 -0400 Subject: [PATCH 15/22] Update auth.ts --- apps/api/src/controllers/auth.ts | 131 +++++++++++++++++++------------ 1 file changed, 81 insertions(+), 50 deletions(-) diff --git a/apps/api/src/controllers/auth.ts b/apps/api/src/controllers/auth.ts index 6887baea..a2bc1ea1 100644 --- a/apps/api/src/controllers/auth.ts +++ b/apps/api/src/controllers/auth.ts @@ -1,34 +1,41 @@ import { parseApi } from "../../src/lib/parseApi"; -import { getRateLimiter, } from "../../src/services/rate-limiter"; -import { AuthResponse, NotificationType, RateLimiterMode } from "../../src/types"; +import { getRateLimiter } from "../../src/services/rate-limiter"; +import { + AuthResponse, + NotificationType, + RateLimiterMode, +} from "../../src/types"; import { supabase_service } from "../../src/services/supabase"; import { withAuth } from "../../src/lib/withAuth"; import { RateLimiterRedis } from "rate-limiter-flexible"; -import { setTraceAttributes } from '@hyperdx/node-opentelemetry'; +import { setTraceAttributes } from "@hyperdx/node-opentelemetry"; import { sendNotification } from "../services/notification/email_notification"; import { Logger } from "../lib/logger"; import { redlock } from "../../src/services/redlock"; import { getValue } from "../../src/services/redis"; import { setValue } from "../../src/services/redis"; -import { validate } from 'uuid'; +import { validate } from "uuid"; function normalizedApiIsUuid(potentialUuid: string): boolean { // Check if the string is a valid UUID return validate(potentialUuid); } -export async function authenticateUser(req, res, mode?: RateLimiterMode): Promise { +export async function authenticateUser( + req, + res, + mode?: RateLimiterMode +): Promise { return withAuth(supaAuthenticateUser)(req, res, mode); } function setTrace(team_id: string, api_key: string) { try { setTraceAttributes({ team_id, - api_key + api_key, }); } catch (error) { Logger.error(`Error setting trace attributes: ${error.message}`); } - } export async function supaAuthenticateUser( req, @@ -59,10 +66,10 @@ export async function supaAuthenticateUser( const iptoken = incomingIP + token; let rateLimiter: RateLimiterRedis; - let subscriptionData: { team_id: string, plan: string } | null = null; + let subscriptionData: { team_id: string; plan: string } | null = null; let normalizedApi: string; - let cacheKey= ""; + let cacheKey = ""; let redLockKey = ""; const lockTTL = 5000; // 5 seconds let teamId: string | null = null; @@ -73,7 +80,7 @@ export async function supaAuthenticateUser( teamId = "preview"; } else { normalizedApi = parseApi(token); - if(!normalizedApiIsUuid(normalizedApi)){ + if (!normalizedApiIsUuid(normalizedApi)) { return { success: false, error: "Unauthorized: Invalid token", @@ -82,52 +89,56 @@ export async function supaAuthenticateUser( } cacheKey = `api_key:${normalizedApi}`; redLockKey = `redlock:${cacheKey}`; - - try{ + + try { const lock = await redlock.acquire([redLockKey], lockTTL); - try{ - + try { const teamIdPriceId = await getValue(cacheKey); - if(teamIdPriceId){ + if (teamIdPriceId) { const { team_id, price_id } = JSON.parse(teamIdPriceId); teamId = team_id; priceId = price_id; - } - else{ + } else { const { data, error } = await supabase_service.rpc( - 'get_key_and_price_id_2', { api_key: normalizedApi } + "get_key_and_price_id_2", + { api_key: normalizedApi } ); - if(error){ - Logger.error(`RPC ERROR (get_key_and_price_id_2): ${error.message}`); + if (error) { + Logger.error( + `RPC ERROR (get_key_and_price_id_2): ${error.message}` + ); return { success: false, - error: "The server seems overloaded. Please contact hello@firecrawl.com if you aren't sending too many requests at once.", + error: + "The server seems overloaded. Please contact hello@firecrawl.com if you aren't sending too many requests at once.", status: 500, }; } if (!data || data.length === 0) { - Logger.warn(`Error fetching api key: ${error.message} or data is empty`); + Logger.warn( + `Error fetching api key: ${error.message} or data is empty` + ); // TODO: change this error code ? return { success: false, error: "Unauthorized: Invalid token", status: 401, }; - } - else { + } else { teamId = data[0].team_id; priceId = data[0].price_id; } } - }finally{ + } catch (error) { + Logger.error(`Error with auth function: ${error.message}`); + } finally { await lock.release(); } - }catch(error){ + } catch (error) { Logger.error(`Error acquiring the rate limiter lock: ${error}`); } - // get_key_and_price_id_2 rpc definition: // create or replace function get_key_and_price_id_2(api_key uuid) // returns table(key uuid, team_id uuid, price_id text) as $$ @@ -145,28 +156,39 @@ export async function supaAuthenticateUser( // end; // $$ language plpgsql; - const plan = getPlanByPriceId(priceId); // HyperDX Logging setTrace(teamId, normalizedApi); subscriptionData = { team_id: teamId, - plan: plan - } + plan: plan, + }; switch (mode) { case RateLimiterMode.Crawl: - rateLimiter = getRateLimiter(RateLimiterMode.Crawl, token, subscriptionData.plan); + rateLimiter = getRateLimiter( + RateLimiterMode.Crawl, + token, + subscriptionData.plan + ); break; case RateLimiterMode.Scrape: - rateLimiter = getRateLimiter(RateLimiterMode.Scrape, token, subscriptionData.plan); + rateLimiter = getRateLimiter( + RateLimiterMode.Scrape, + token, + subscriptionData.plan + ); break; case RateLimiterMode.Search: - rateLimiter = getRateLimiter(RateLimiterMode.Search, token, subscriptionData.plan); + rateLimiter = getRateLimiter( + RateLimiterMode.Search, + token, + subscriptionData.plan + ); break; case RateLimiterMode.CrawlStatus: rateLimiter = getRateLimiter(RateLimiterMode.CrawlStatus, token); break; - + case RateLimiterMode.Preview: rateLimiter = getRateLimiter(RateLimiterMode.Preview, token); break; @@ -179,7 +201,8 @@ export async function supaAuthenticateUser( } } - const team_endpoint_token = token === "this_is_just_a_preview_token" ? iptoken : teamId; + const team_endpoint_token = + token === "this_is_just_a_preview_token" ? iptoken : teamId; try { await rateLimiter.consume(team_endpoint_token); @@ -192,11 +215,15 @@ export async function supaAuthenticateUser( const startDate = new Date(); const endDate = new Date(); endDate.setDate(endDate.getDate() + 7); - + // await sendNotification(team_id, NotificationType.RATE_LIMIT_REACHED, startDate.toISOString(), endDate.toISOString()); // TODO: cache 429 for a few minuts - if(teamId && priceId && mode !== RateLimiterMode.Preview){ - await setValue(cacheKey, JSON.stringify({team_id: teamId, price_id: priceId}), 60 * 5); + if (teamId && priceId && mode !== RateLimiterMode.Preview) { + await setValue( + cacheKey, + JSON.stringify({ team_id: teamId, price_id: priceId }), + 60 * 5 + ); } return { @@ -208,7 +235,9 @@ export async function supaAuthenticateUser( if ( token === "this_is_just_a_preview_token" && - (mode === RateLimiterMode.Scrape || mode === RateLimiterMode.Preview || mode === RateLimiterMode.Search) + (mode === RateLimiterMode.Scrape || + mode === RateLimiterMode.Preview || + mode === RateLimiterMode.Search) ) { return { success: true, team_id: "preview" }; // check the origin of the request and make sure its from firecrawl.dev @@ -232,8 +261,6 @@ export async function supaAuthenticateUser( .select("*") .eq("key", normalizedApi); - - if (error || !data || data.length === 0) { Logger.warn(`Error fetching api key: ${error.message} or data is empty`); return { @@ -246,26 +273,30 @@ export async function supaAuthenticateUser( subscriptionData = data[0]; } - return { success: true, team_id: subscriptionData.team_id, plan: subscriptionData.plan ?? ""}; + return { + success: true, + team_id: subscriptionData.team_id, + plan: subscriptionData.plan ?? "", + }; } function getPlanByPriceId(price_id: string) { switch (price_id) { case process.env.STRIPE_PRICE_ID_STARTER: - return 'starter'; + return "starter"; case process.env.STRIPE_PRICE_ID_STANDARD: - return 'standard'; + return "standard"; case process.env.STRIPE_PRICE_ID_SCALE: - return 'scale'; + return "scale"; case process.env.STRIPE_PRICE_ID_HOBBY: case process.env.STRIPE_PRICE_ID_HOBBY_YEARLY: - return 'hobby'; + return "hobby"; case process.env.STRIPE_PRICE_ID_STANDARD_NEW: case process.env.STRIPE_PRICE_ID_STANDARD_NEW_YEARLY: - return 'standardnew'; + return "standardnew"; case process.env.STRIPE_PRICE_ID_GROWTH: case process.env.STRIPE_PRICE_ID_GROWTH_YEARLY: - return 'growth'; + return "growth"; default: - return 'free'; + return "free"; } -} \ No newline at end of file +} From 74a512518503c797185138129e14281bda413a03 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Mon, 12 Aug 2024 15:07:30 -0400 Subject: [PATCH 16/22] Nick: removed redlock --- apps/api/src/controllers/auth.ts | 134 ++++++++++++++++++++----------- apps/api/src/index.ts | 2 + 2 files changed, 89 insertions(+), 47 deletions(-) diff --git a/apps/api/src/controllers/auth.ts b/apps/api/src/controllers/auth.ts index a2bc1ea1..e18a8a7c 100644 --- a/apps/api/src/controllers/auth.ts +++ b/apps/api/src/controllers/auth.ts @@ -37,6 +37,42 @@ function setTrace(team_id: string, api_key: string) { Logger.error(`Error setting trace attributes: ${error.message}`); } } + +async function getKeyAndPriceId(normalizedApi: string): Promise<{ + success: boolean; + teamId?: string; + priceId?: string; + error?: string; + status?: number; +}> { + const { data, error } = await supabase_service.rpc("get_key_and_price_id_2", { + api_key: normalizedApi, + }); + if (error) { + Logger.error(`RPC ERROR (get_key_and_price_id_2): ${error.message}`); + return { + success: false, + error: + "The server seems overloaded. Please contact hello@firecrawl.com if you aren't sending too many requests at once.", + status: 500, + }; + } + if (!data || data.length === 0) { + Logger.warn(`Error fetching api key: ${error.message} or data is empty`); + // TODO: change this error code ? + return { + success: false, + error: "Unauthorized: Invalid token", + status: 401, + }; + } else { + return { + success: true, + teamId: data[0].team_id, + priceId: data[0].price_id, + }; + } +} export async function supaAuthenticateUser( req, res, @@ -71,7 +107,7 @@ export async function supaAuthenticateUser( let cacheKey = ""; let redLockKey = ""; - const lockTTL = 5000; // 5 seconds + const lockTTL = 15000; // 10 seconds let teamId: string | null = null; let priceId: string | null = null; @@ -87,56 +123,60 @@ export async function supaAuthenticateUser( status: 401, }; } + cacheKey = `api_key:${normalizedApi}`; - redLockKey = `redlock:${cacheKey}`; try { - const lock = await redlock.acquire([redLockKey], lockTTL); - - try { - const teamIdPriceId = await getValue(cacheKey); - if (teamIdPriceId) { - const { team_id, price_id } = JSON.parse(teamIdPriceId); - teamId = team_id; - priceId = price_id; - } else { - const { data, error } = await supabase_service.rpc( - "get_key_and_price_id_2", - { api_key: normalizedApi } - ); - if (error) { - Logger.error( - `RPC ERROR (get_key_and_price_id_2): ${error.message}` - ); - return { - success: false, - error: - "The server seems overloaded. Please contact hello@firecrawl.com if you aren't sending too many requests at once.", - status: 500, - }; - } - if (!data || data.length === 0) { - Logger.warn( - `Error fetching api key: ${error.message} or data is empty` - ); - // TODO: change this error code ? - return { - success: false, - error: "Unauthorized: Invalid token", - status: 401, - }; - } else { - teamId = data[0].team_id; - priceId = data[0].price_id; - } + const teamIdPriceId = await getValue(cacheKey); + if (teamIdPriceId) { + const { team_id, price_id } = JSON.parse(teamIdPriceId); + teamId = team_id; + priceId = price_id; + } else { + const { + success, + teamId: tId, + priceId: pId, + error, + status, + } = await getKeyAndPriceId(normalizedApi); + if (!success) { + return { success, error, status }; } - } catch (error) { - Logger.error(`Error with auth function: ${error.message}`); - } finally { - await lock.release(); + teamId = tId; + priceId = pId; + await setValue( + cacheKey, + JSON.stringify({ team_id: teamId, price_id: priceId }), + 10 + ); } } catch (error) { - Logger.error(`Error acquiring the rate limiter lock: ${error}`); + Logger.error(`Error with auth function: ${error.message}`); + // const { + // success, + // teamId: tId, + // priceId: pId, + // error: e, + // status, + // } = await getKeyAndPriceId(normalizedApi); + // if (!success) { + // return { success, error: e, status }; + // } + // teamId = tId; + // priceId = pId; + // const { + // success, + // teamId: tId, + // priceId: pId, + // error: e, + // status, + // } = await getKeyAndPriceId(normalizedApi); + // if (!success) { + // return { success, error: e, status }; + // } + // teamId = tId; + // priceId = pId; } // get_key_and_price_id_2 rpc definition: @@ -217,12 +257,12 @@ export async function supaAuthenticateUser( endDate.setDate(endDate.getDate() + 7); // await sendNotification(team_id, NotificationType.RATE_LIMIT_REACHED, startDate.toISOString(), endDate.toISOString()); - // TODO: cache 429 for a few minuts + // Cache longer for 429s if (teamId && priceId && mode !== RateLimiterMode.Preview) { await setValue( cacheKey, JSON.stringify({ team_id: teamId, price_id: priceId }), - 60 * 5 + 60 // 10 seconds, cache for everything ); } diff --git a/apps/api/src/index.ts b/apps/api/src/index.ts index ebe6ef38..de80bb33 100644 --- a/apps/api/src/index.ts +++ b/apps/api/src/index.ts @@ -189,3 +189,5 @@ wsq.on("completed", j => ScrapeEvents.logJobEvent(j, "completed")); wsq.on("paused", j => ScrapeEvents.logJobEvent(j, "paused")); wsq.on("resumed", j => ScrapeEvents.logJobEvent(j, "resumed")); wsq.on("removed", j => ScrapeEvents.logJobEvent(j, "removed")); + + From 33aa5cf0de7c4734901999af8f00d3e83c7d3977 Mon Sep 17 00:00:00 2001 From: Kevin Swiber Date: Mon, 12 Aug 2024 12:24:46 -0700 Subject: [PATCH 17/22] Moving comments of .env.example values from end-of-line to above-line. Self-host docs suggest using .env.example as a base. However, Docker doesn't respect end-of-line comments. It sets the comment as the actual value of the variable. This fix prevents that. --- apps/api/.env.example | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/apps/api/.env.example b/apps/api/.env.example index 08ff7d7f..2e8891ce 100644 --- a/apps/api/.env.example +++ b/apps/api/.env.example @@ -17,18 +17,29 @@ SUPABASE_URL= SUPABASE_SERVICE_TOKEN= # Other Optionals -TEST_API_KEY= # use if you've set up authentication and want to test with a real API key -RATE_LIMIT_TEST_API_KEY_SCRAPE= # set if you'd like to test the scraping rate limit -RATE_LIMIT_TEST_API_KEY_CRAWL= # set if you'd like to test the crawling rate limit -SCRAPING_BEE_API_KEY= #Set if you'd like to use scraping Be to handle JS blocking -OPENAI_API_KEY= # add for LLM dependednt features (image alt generation, etc.) -BULL_AUTH_KEY= @ -LOGTAIL_KEY= # Use if you're configuring basic logging with logtail -LLAMAPARSE_API_KEY= #Set if you have a llamaparse key you'd like to use to parse pdfs -SERPER_API_KEY= #Set if you have a serper key you'd like to use as a search api -SLACK_WEBHOOK_URL= # set if you'd like to send slack server health status messages -POSTHOG_API_KEY= # set if you'd like to send posthog events like job logs -POSTHOG_HOST= # set if you'd like to send posthog events like job logs +# use if you've set up authentication and want to test with a real API key +TEST_API_KEY= +# set if you'd like to test the scraping rate limit +RATE_LIMIT_TEST_API_KEY_SCRAPE= +# set if you'd like to test the crawling rate limit +RATE_LIMIT_TEST_API_KEY_CRAWL= +# set if you'd like to use scraping Be to handle JS blocking +SCRAPING_BEE_API_KEY= +# add for LLM dependednt features (image alt generation, etc.) +OPENAI_API_KEY= +BULL_AUTH_KEY=@ +# use if you're configuring basic logging with logtail +LOGTAIL_KEY= +# set if you have a llamaparse key you'd like to use to parse pdfs +LLAMAPARSE_API_KEY= +# set if you have a serper key you'd like to use as a search api +SERPER_API_KEY= +# set if you'd like to send slack server health status messages +SLACK_WEBHOOK_URL= +# set if you'd like to send posthog events like job logs +POSTHOG_API_KEY= +# set if you'd like to send posthog events like job logs +POSTHOG_HOST= STRIPE_PRICE_ID_STANDARD= STRIPE_PRICE_ID_SCALE= @@ -43,7 +54,8 @@ STRIPE_PRICE_ID_GROWTH_YEARLY= HYPERDX_API_KEY= HDX_NODE_BETA_MODE=1 -FIRE_ENGINE_BETA_URL= # set if you'd like to use the fire engine closed beta +# set if you'd like to use the fire engine closed beta +FIRE_ENGINE_BETA_URL= # Proxy Settings for Playwright (Alternative you can can use a proxy service like oxylabs, which rotates IPs for you on every request) PROXY_SERVER= From 98be29c963054837af230155372399efa99bc33a Mon Sep 17 00:00:00 2001 From: Thomas Kosmas Date: Mon, 12 Aug 2024 22:49:28 +0300 Subject: [PATCH 18/22] Update parameters for platform.openai.com --- .../WebScraper/utils/custom/website_params.ts | 25 +++++-------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/apps/api/src/scraper/WebScraper/utils/custom/website_params.ts b/apps/api/src/scraper/WebScraper/utils/custom/website_params.ts index fcd3f69b..e74e2c4c 100644 --- a/apps/api/src/scraper/WebScraper/utils/custom/website_params.ts +++ b/apps/api/src/scraper/WebScraper/utils/custom/website_params.ts @@ -1,24 +1,11 @@ export const urlSpecificParams = { "platform.openai.com": { - params: { - wait_browser: "networkidle2", - block_resources: false, - }, - headers: { - "User-Agent": - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36", - "sec-fetch-site": "same-origin", - "sec-fetch-mode": "cors", - "sec-fetch-dest": "empty", - referer: "https://www.google.com/", - "accept-language": "en-US,en;q=0.9", - "accept-encoding": "gzip, deflate, br", - accept: - "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", - }, - cookies: { - __cf_bm: - "mC1On8P2GWT3A5UeSYH6z_MP94xcTAdZ5jfNi9IT2U0-1714327136-1.0.1.1-ILAP5pSX_Oo9PPo2iHEYCYX.p9a0yRBNLr58GHyrzYNDJ537xYpG50MXxUYVdfrD.h3FV5O7oMlRKGA0scbxaQ", + defaultScraper: "fire-engine", + params:{ + wait: 3000, + fireEngineOptions:{ + engine: "chrome-cdp" + }, }, }, "support.greenpay.me":{ From 681033827108a72224fc13d8bc63c30950e3fe41 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Mon, 12 Aug 2024 16:51:43 -0400 Subject: [PATCH 19/22] Update search.ts --- apps/api/src/controllers/search.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/apps/api/src/controllers/search.ts b/apps/api/src/controllers/search.ts index dfd9b8b9..1a70f25e 100644 --- a/apps/api/src/controllers/search.ts +++ b/apps/api/src/controllers/search.ts @@ -80,7 +80,7 @@ export async function searchHelper( await a.setOptions({ jobId, mode: "single_urls", - urls: res.map((r) => r.url).slice(0, searchOptions.limit ?? 7), + urls: res.map((r) => r.url).slice(0, Math.min(searchOptions.limit ?? 5, 5)), crawlerOptions: { ...crawlerOptions, }, @@ -150,7 +150,8 @@ export async function searchController(req: Request, res: Response) { }; const origin = req.body.origin ?? "api"; - const searchOptions = req.body.searchOptions ?? { limit: 7 }; + const searchOptions = req.body.searchOptions ?? { limit: 5 }; + const jobId = uuidv4(); From c6bf78cfb44e82c47712c18c6a9e447459e517a7 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Mon, 12 Aug 2024 21:14:00 -0400 Subject: [PATCH 20/22] Update fly-direct.yml --- .github/workflows/fly-direct.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/fly-direct.yml b/.github/workflows/fly-direct.yml index f846098d..aea0a48e 100644 --- a/.github/workflows/fly-direct.yml +++ b/.github/workflows/fly-direct.yml @@ -1,7 +1,7 @@ name: Fly Deploy Direct on: schedule: - - cron: '0 */6 * * *' + - cron: '0 */2 * * *' env: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} From fd060c7ef15602d02d21183da5fd67d58871eeb6 Mon Sep 17 00:00:00 2001 From: KentHsu Date: Tue, 13 Aug 2024 09:52:41 +0800 Subject: [PATCH 21/22] fix: go-sdk module name --- apps/go-sdk/README.md | 4 ++-- apps/go-sdk/examples/example.go | 2 +- apps/go-sdk/examples/go.mod | 6 +++--- apps/go-sdk/go.mod | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/apps/go-sdk/README.md b/apps/go-sdk/README.md index 353d28d6..7e17c10f 100644 --- a/apps/go-sdk/README.md +++ b/apps/go-sdk/README.md @@ -7,7 +7,7 @@ The Firecrawl Go SDK is a library that allows you to easily scrape and crawl web To install the Firecrawl Go SDK, you can ```bash -go get github.com/mendableai/firecrawl/go-sdk/firecrawl +go get github.com/mendableai/firecrawl ``` ## Usage @@ -23,7 +23,7 @@ import ( "fmt" "log" - "github.com/mendableai/firecrawl/go-sdk/firecrawl" + "github.com/mendableai/firecrawl/firecrawl" ) func main() { diff --git a/apps/go-sdk/examples/example.go b/apps/go-sdk/examples/example.go index 75194965..6097d22e 100644 --- a/apps/go-sdk/examples/example.go +++ b/apps/go-sdk/examples/example.go @@ -6,7 +6,7 @@ import ( "log" "github.com/google/uuid" - "github.com/mendableai/firecrawl/go-sdk/firecrawl" + "github.com/mendableai/firecrawl/firecrawl" ) func main() { diff --git a/apps/go-sdk/examples/go.mod b/apps/go-sdk/examples/go.mod index e3c5335d..8e47049c 100644 --- a/apps/go-sdk/examples/go.mod +++ b/apps/go-sdk/examples/go.mod @@ -1,10 +1,10 @@ -module github.com/mendableai/firecrawl/go-sdk/examples +module github.com/mendableai/firecrawl/apps/go-sdk/examples go 1.22.5 -replace github.com/mendableai/firecrawl/go-sdk => ../ +replace github.com/mendableai/firecrawl => ../ require ( github.com/google/uuid v1.6.0 - github.com/mendableai/firecrawl/go-sdk v0.0.0-00010101000000-000000000000 + github.com/mendableai/firecrawl v0.0.0-00010101000000-000000000000 ) diff --git a/apps/go-sdk/go.mod b/apps/go-sdk/go.mod index d792e0fe..c8a508d6 100644 --- a/apps/go-sdk/go.mod +++ b/apps/go-sdk/go.mod @@ -1,4 +1,4 @@ -module github.com/mendableai/firecrawl/go-sdk +module github.com/mendableai/firecrawl/apps/go-sdk go 1.22.5 From a4be95ac2706e3e810ecf9bd95c71b5b03b366a9 Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Tue, 13 Aug 2024 13:42:26 -0300 Subject: [PATCH 22/22] fixed tests --- .../src/__tests__/e2e_withAuth/index.test.ts | 85 +++++-------------- 1 file changed, 22 insertions(+), 63 deletions(-) diff --git a/apps/api/src/__tests__/e2e_withAuth/index.test.ts b/apps/api/src/__tests__/e2e_withAuth/index.test.ts index b5bc54a5..7519508d 100644 --- a/apps/api/src/__tests__/e2e_withAuth/index.test.ts +++ b/apps/api/src/__tests__/e2e_withAuth/index.test.ts @@ -311,7 +311,10 @@ describe("E2E Tests for API Routes", () => { } } - const completedResponse = response; + await new Promise((resolve) => setTimeout(resolve, 1000)); // wait for data to be saved on the database + const completedResponse = await request(TEST_URL) + .get(`/v0/crawl/status/${crawlResponse.body.jobId}`) + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`); const urls = completedResponse.body.data.map( (item: any) => item.metadata?.sourceURL @@ -363,7 +366,10 @@ describe("E2E Tests for API Routes", () => { } } - const completedResponse: FirecrawlCrawlStatusResponse = response; + await new Promise((resolve) => setTimeout(resolve, 1000)); // wait for data to be saved on the database + const completedResponse: FirecrawlCrawlStatusResponse = await request(TEST_URL) + .get(`/v0/crawl/status/${crawlResponse.body.jobId}`) + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`); const urls = completedResponse.body.data.map( (item: any) => item.metadata?.sourceURL @@ -481,7 +487,7 @@ describe("E2E Tests for API Routes", () => { expect(response.body).toHaveProperty("success"); expect(response.body.success).toBe(true); expect(response.body).toHaveProperty("data"); - }, 30000); // 30 seconds timeout + }, 60000); // 60 seconds timeout }); describe("GET /v0/crawl/status/:jobId", () => { @@ -513,7 +519,6 @@ describe("E2E Tests for API Routes", () => { expect(crawlResponse.statusCode).toBe(200); let isCompleted = false; - let completedResponse; while (!isCompleted) { const response = await request(TEST_URL) @@ -524,11 +529,16 @@ describe("E2E Tests for API Routes", () => { if (response.body.status === "completed") { isCompleted = true; - completedResponse = response; } else { await new Promise((r) => setTimeout(r, 1000)); // Wait for 1 second before checking again } } + + await new Promise((resolve) => setTimeout(resolve, 1000)); // wait for data to be saved on the database + const completedResponse = await request(TEST_URL) + .get(`/v0/crawl/status/${crawlResponse.body.jobId}`) + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`); + expect(completedResponse.body).toHaveProperty("status"); expect(completedResponse.body.status).toBe("completed"); expect(completedResponse.body).toHaveProperty("data"); @@ -619,7 +629,13 @@ describe("E2E Tests for API Routes", () => { expect(completedResponse.body).toHaveProperty("status"); expect(completedResponse.body.status).toBe("failed"); expect(completedResponse.body).toHaveProperty("data"); - expect(completedResponse.body.data).toBeNull(); + + let isNullOrEmptyArray = false; + if (completedResponse.body.data === null || completedResponse.body.data.length === 0) { + isNullOrEmptyArray = true; + } + expect(isNullOrEmptyArray).toBe(true); + expect(completedResponse.body.data).toEqual(expect.arrayContaining([])); expect(completedResponse.body).toHaveProperty("partial_data"); expect(completedResponse.body.partial_data[0]).toHaveProperty("content"); expect(completedResponse.body.partial_data[0]).toHaveProperty("markdown"); @@ -679,61 +695,4 @@ describe("E2E Tests for API Routes", () => { expect(typeof llmExtraction.is_open_source).toBe("boolean"); }, 60000); // 60 secs }); - - describe("POST /v0/crawl with fast mode", () => { - it.concurrent("should complete the crawl under 20 seconds", async () => { - const startTime = Date.now(); - - const crawlResponse = await request(TEST_URL) - .post("/v0/crawl") - .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) - .set("Content-Type", "application/json") - .send({ - url: "https://flutterbricks.com", - crawlerOptions: { - mode: "fast" - } - }); - - expect(crawlResponse.statusCode).toBe(200); - - const jobId = crawlResponse.body.jobId; - let statusResponse; - let isFinished = false; - - while (!isFinished) { - statusResponse = await request(TEST_URL) - .get(`/v0/crawl/status/${jobId}`) - .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`); - - expect(statusResponse.statusCode).toBe(200); - isFinished = statusResponse.body.status === "completed"; - - if (!isFinished) { - await new Promise((resolve) => setTimeout(resolve, 1000)); // Wait for 1 second before checking again - } - } - - // const endTime = Date.now(); - // const timeElapsed = (endTime - startTime) / 1000; // Convert to seconds - - // console.log(`Time elapsed: ${timeElapsed} seconds`); - - expect(statusResponse.body.status).toBe("completed"); - expect(statusResponse.body).toHaveProperty("data"); - expect(statusResponse.body.data[0]).toHaveProperty("content"); - expect(statusResponse.body.data[0]).toHaveProperty("markdown"); - expect(statusResponse.body.data[0]).toHaveProperty("metadata"); - expect(statusResponse.body.data[0].metadata.pageStatusCode).toBe(200); - expect(statusResponse.body.data[0].metadata.pageError).toBeUndefined(); - - const results = statusResponse.body.data; - // results.forEach((result, i) => { - // console.log(result.metadata.sourceURL); - // }); - expect(results.length).toBeGreaterThanOrEqual(10); - expect(results.length).toBeLessThanOrEqual(15); - - }, 20000); - }); });