From 115b6b61c4537d344b9a3fd0a2c96cc610c1ddee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Tue, 25 Feb 2025 14:28:06 +0100 Subject: [PATCH] add initial codeowners --- .github/CODEOWNERS | 91 ++++++++++++++++++++++++++ apps/api/fly.staging.toml | 66 ------------------- apps/api/fly.toml | 63 ------------------ apps/api/utils/find_uncovered_files.sh | 30 +++++++++ 4 files changed, 121 insertions(+), 129 deletions(-) create mode 100644 .github/CODEOWNERS delete mode 100644 apps/api/fly.staging.toml delete mode 100644 apps/api/fly.toml create mode 100755 apps/api/utils/find_uncovered_files.sh diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 00000000..0cfdd19a --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,91 @@ +# Firecrawl Ownership Chart + +# api + +## V1 API +/apps/api/src/routes/v1.ts @mogery @nickscamara + +### /scrape +/apps/api/src/controllers/v1/scrape* @mogery + +### /crawl +/apps/api/src/controllers/v1/crawl* @mogery + +### /batch/scrape +/apps/api/src/controllers/v1/batch-scrape* @mogery + +### /extract +/apps/api/src/controllers/v1/extract* @nickscamara +/apps/api/src/lib/extract/* @nickscamara +/apps/api/src/lib/generic-ai.ts @mogery @nickscamara # (AI SDK) + +### /map +/apps/api/src/controllers/v1/map* @nickscamara +/apps/api/src/lib/map-cosine.ts @nickscamara + +### /search +/apps/api/src/controllers/v1/search* @nickscamara +/apps/api/src/search/* @nickscamara + +### /llmstxt +/apps/api/src/controllers/v1/generate-llmstxt* @ericciarla +/apps/api/src/lib/generate-llmstxt/* @ericciarla + +### /deep-research +/apps/api/src/controllers/v1/deep-research* @nickscamara +/apps/api/src/lib/deep-research/* @nickscamara + +### Input Validation/Zod +/apps/api/src/controllers/v1/types.ts @mogery + +## V0 API, deprecated +/apps/api/src/controllers/v0/* @mogery @nickscamara +/apps/api/src/routes/v0.ts @mogery @nickscamara + +# Worker + +## scrapeURL +/apps/api/src/scraper/scrapeURL/* @mogery + +### crawler +/apps/api/src/lib/crawl-redis* @mogery + +### remnants of WebScraper/WebCrawler +/apps/api/src/scraper/WebScraper/* @mogery @nickscamara + +## concurrency limits +/apps/api/src/lib/concurrency-limit.ts @mogery @nickscamara + +## BullMQ-related code +/apps/api/src/services/queue-worker.ts @mogery @nickscamara +/apps/api/src/main/runWebScraper.ts @mogery @nickscamara +/apps/api/src/services/queue* @mogery @nickscamara +/apps/api/src/lib/job-priority.ts @nickscamara @mogery + +# Shared Libraries +/apps/api/sharedLibs/go-html-to-md/* @tomkosm +/apps/api/src/lib/html-to-markdown.ts @tomkosm +/apps/api/sharedLibs/html-transformer/* @mogery +/apps/api/src/lib/html-transformer.ts @mogery + +# playwright-serice-ts +/apps/playwright-service-ts/* @mogery + +# self-hosting +/docker-compose.yaml @mogery +/SELF_HOST.md @mogery + +# SDKs +/apps/python-sdk/* @rafaelsideguide @nickscamara +/apps/js-sdk/* @mogery @nickscamara +/apps/rust-sdk/* @mogery +/apps/go-sdk/* @rafaelsideguide + +# CI/CD and GitHub Workflows +/.github/* @mogery @rafaelsideguide + +# Tests +/apps/api/src/__tests__/snips/* @mogery + +# Examples +/examples/* @ericciarla @nickscamara diff --git a/apps/api/fly.staging.toml b/apps/api/fly.staging.toml deleted file mode 100644 index db1ed183..00000000 --- a/apps/api/fly.staging.toml +++ /dev/null @@ -1,66 +0,0 @@ -# fly.toml app configuration file generated for firecrawl-scraper-js on 2024-04-07T21:09:59-03:00 -# -# See https://fly.io/docs/reference/configuration/ for information about how to use this file. -# - -app = 'staging-firecrawl-scraper-js' -primary_region = 'mia' -kill_signal = 'SIGINT' -kill_timeout = '30s' - -[build] - -[processes] - app = 'node dist/src/index.js' - worker = 'node dist/src/services/queue-worker.js' - -[http_service] - internal_port = 8080 - force_https = true - auto_stop_machines = true - auto_start_machines = true - min_machines_running = 2 - processes = ['app'] - -[http_service.concurrency] - type = "requests" - # hard_limit = 100 - soft_limit = 100 - -[[http_service.checks]] - grace_period = "10s" - interval = "30s" - method = "GET" - timeout = "5s" - path = "/" - - -[[services]] - protocol = 'tcp' - internal_port = 8080 - processes = ['worker'] - -[[services.ports]] - port = 80 - handlers = ['http'] - force_https = true - -[[services.ports]] - port = 443 - handlers = ['tls', 'http'] - - [services.concurrency] - type = 'connections' - # hard_limit = 25 - soft_limit = 100 - -[[vm]] - size = 'performance-2x' - processes = ['app','worker'] - memory = 8192 - - - - - - diff --git a/apps/api/fly.toml b/apps/api/fly.toml deleted file mode 100644 index c0c87401..00000000 --- a/apps/api/fly.toml +++ /dev/null @@ -1,63 +0,0 @@ -# fly.toml app configuration file generated for firecrawl-scraper-js on 2024-04-07T21:09:59-03:00 -# -# See https://fly.io/docs/reference/configuration/ for information about how to use this file. -# - -app = 'firecrawl-scraper-js' -primary_region = 'iad' -kill_signal = 'SIGINT' -kill_timeout = '30s' - -[build] - -[processes] - app = 'node --max-old-space-size=8192 dist/src/index.js' - worker = 'node --max-old-space-size=8192 dist/src/services/queue-worker.js' - -[http_service] - internal_port = 8080 - force_https = true - auto_stop_machines = false - auto_start_machines = true - min_machines_running = 2 - processes = ['app'] - -[http_service.concurrency] - type = "requests" - # hard_limit = 200 - soft_limit = 200 - -[[http_service.checks]] - grace_period = "20s" - interval = "30s" - method = "GET" - timeout = "15s" - path = "/" - -[[services]] - protocol = 'tcp' - internal_port = 8080 - processes = ['app'] - -[[services.ports]] - port = 80 - handlers = ['http'] - force_https = true - -[[services.ports]] - port = 443 - handlers = ['tls', 'http'] - - [services.concurrency] - type = 'connections' - # hard_limit = 30 - soft_limit = 200 - -[[vm]] - size = 'performance-4x' - processes = ['app'] - - - - - diff --git a/apps/api/utils/find_uncovered_files.sh b/apps/api/utils/find_uncovered_files.sh new file mode 100755 index 00000000..32204dec --- /dev/null +++ b/apps/api/utils/find_uncovered_files.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +# Get all files tracked by git +git ls-files > /tmp/all_files.txt + +# Get files matched by CODEOWNERS +while read -r line; do + # Skip comments and empty lines + [[ "$line" =~ ^#.*$ ]] && continue + [[ -z "$line" ]] && continue + + # Extract the path pattern + pattern=$(echo "$line" | awk '{print $1}') + + # Convert the pattern to a form git understands + # Remove leading slash if present + pattern=${pattern#/} + + # List files matching this pattern + git ls-files "$pattern" 2>/dev/null >> /tmp/covered_files.txt +done < .github/CODEOWNERS + +# Sort and get unique entries +sort -u /tmp/covered_files.txt > /tmp/covered_files_unique.txt + +# Find files that are in all_files but not in covered_files +comm -23 /tmp/all_files.txt /tmp/covered_files_unique.txt + +# Cleanup +rm /tmp/all_files.txt /tmp/covered_files.txt /tmp/covered_files_unique.txt