From b802ea02a11942169c5f1c97c402918cc56b432d Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Fri, 9 Aug 2024 11:13:14 -0300 Subject: [PATCH] small improvements - wait for getting results on crawl: sometimes crawl takes some a second to save the data on the db and this causes response.data to be empty - added timeout value to test script - increased http client timeout (llm extract was failing on e2e tests) - fixed env path on test script --- .github/workflows/fly.yml | 2 +- apps/go-sdk/examples/example.go | 6 +++++- apps/go-sdk/firecrawl/firecrawl.go | 8 ++++++-- apps/go-sdk/firecrawl/firecrawl_test.go | 2 +- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/.github/workflows/fly.yml b/.github/workflows/fly.yml index 1b9fe6b9..9e4b85a8 100644 --- a/.github/workflows/fly.yml +++ b/.github/workflows/fly.yml @@ -201,7 +201,7 @@ jobs: run: go mod tidy working-directory: ./apps/go-sdk - name: Run tests for Go SDK - run: go test -v ./... + run: go test -v ./... -timeout 180s working-directory: ./apps/go-sdk/firecrawl deploy: diff --git a/apps/go-sdk/examples/example.go b/apps/go-sdk/examples/example.go index ce8470e9..75194965 100644 --- a/apps/go-sdk/examples/example.go +++ b/apps/go-sdk/examples/example.go @@ -33,7 +33,11 @@ func main() { if err != nil { log.Fatalf("Failed to crawl URL: %v", err) } - fmt.Println(crawlResult) + jsonCrawlResult, err := json.MarshalIndent(crawlResult, "", " ") + if err != nil { + log.Fatalf("Failed to marshal crawl result: %v", err) + } + fmt.Println(string(jsonCrawlResult)) // LLM Extraction using JSON schema jsonSchema := map[string]any{ diff --git a/apps/go-sdk/firecrawl/firecrawl.go b/apps/go-sdk/firecrawl/firecrawl.go index b9f50f08..9a9dcfef 100644 --- a/apps/go-sdk/firecrawl/firecrawl.go +++ b/apps/go-sdk/firecrawl/firecrawl.go @@ -195,7 +195,7 @@ func NewFirecrawlApp(apiKey, apiURL string) (*FirecrawlApp, error) { } client := &http.Client{ - Timeout: 30 * time.Second, + Timeout: 60 * time.Second, } return &FirecrawlApp{ @@ -502,6 +502,7 @@ func (app *FirecrawlApp) makeRequest(method, url string, data map[string]any, he // - []*FirecrawlDocument: The crawl result if the job is completed. // - error: An error if the crawl status check request fails. func (app *FirecrawlApp) monitorJobStatus(jobID string, headers map[string]string, pollInterval int) ([]*FirecrawlDocument, error) { + attempts := 0 for { resp, err := app.makeRequest( http.MethodGet, @@ -531,7 +532,10 @@ func (app *FirecrawlApp) monitorJobStatus(jobID string, headers map[string]strin if statusData.Data != nil { return statusData.Data, nil } - return nil, fmt.Errorf("crawl job completed but no data was returned") + attempts++ + if attempts > 3 { + return nil, fmt.Errorf("crawl job completed but no data was returned") + } } else if status == "active" || status == "paused" || status == "pending" || status == "queued" || status == "waiting" { pollInterval = max(pollInterval, 2) time.Sleep(time.Duration(pollInterval) * time.Second) diff --git a/apps/go-sdk/firecrawl/firecrawl_test.go b/apps/go-sdk/firecrawl/firecrawl_test.go index 8a3aacb3..9d56c7ac 100644 --- a/apps/go-sdk/firecrawl/firecrawl_test.go +++ b/apps/go-sdk/firecrawl/firecrawl_test.go @@ -16,7 +16,7 @@ var API_URL string var TEST_API_KEY string func init() { - err := godotenv.Load() + err := godotenv.Load("../.env") if err != nil { log.Fatalf("Error loading .env file: %v", err) }