add initial codeowners

This commit is contained in:
Gergő Móricz 2025-02-25 14:28:06 +01:00
parent 51bc7757ef
commit 115b6b61c4
4 changed files with 121 additions and 129 deletions

91
.github/CODEOWNERS vendored Normal file
View File

@ -0,0 +1,91 @@
# Firecrawl Ownership Chart
# api
## V1 API
/apps/api/src/routes/v1.ts @mogery @nickscamara
### /scrape
/apps/api/src/controllers/v1/scrape* @mogery
### /crawl
/apps/api/src/controllers/v1/crawl* @mogery
### /batch/scrape
/apps/api/src/controllers/v1/batch-scrape* @mogery
### /extract
/apps/api/src/controllers/v1/extract* @nickscamara
/apps/api/src/lib/extract/* @nickscamara
/apps/api/src/lib/generic-ai.ts @mogery @nickscamara # (AI SDK)
### /map
/apps/api/src/controllers/v1/map* @nickscamara
/apps/api/src/lib/map-cosine.ts @nickscamara
### /search
/apps/api/src/controllers/v1/search* @nickscamara
/apps/api/src/search/* @nickscamara
### /llmstxt
/apps/api/src/controllers/v1/generate-llmstxt* @ericciarla
/apps/api/src/lib/generate-llmstxt/* @ericciarla
### /deep-research
/apps/api/src/controllers/v1/deep-research* @nickscamara
/apps/api/src/lib/deep-research/* @nickscamara
### Input Validation/Zod
/apps/api/src/controllers/v1/types.ts @mogery
## V0 API, deprecated
/apps/api/src/controllers/v0/* @mogery @nickscamara
/apps/api/src/routes/v0.ts @mogery @nickscamara
# Worker
## scrapeURL
/apps/api/src/scraper/scrapeURL/* @mogery
### crawler
/apps/api/src/lib/crawl-redis* @mogery
### remnants of WebScraper/WebCrawler
/apps/api/src/scraper/WebScraper/* @mogery @nickscamara
## concurrency limits
/apps/api/src/lib/concurrency-limit.ts @mogery @nickscamara
## BullMQ-related code
/apps/api/src/services/queue-worker.ts @mogery @nickscamara
/apps/api/src/main/runWebScraper.ts @mogery @nickscamara
/apps/api/src/services/queue* @mogery @nickscamara
/apps/api/src/lib/job-priority.ts @nickscamara @mogery
# Shared Libraries
/apps/api/sharedLibs/go-html-to-md/* @tomkosm
/apps/api/src/lib/html-to-markdown.ts @tomkosm
/apps/api/sharedLibs/html-transformer/* @mogery
/apps/api/src/lib/html-transformer.ts @mogery
# playwright-serice-ts
/apps/playwright-service-ts/* @mogery
# self-hosting
/docker-compose.yaml @mogery
/SELF_HOST.md @mogery
# SDKs
/apps/python-sdk/* @rafaelsideguide @nickscamara
/apps/js-sdk/* @mogery @nickscamara
/apps/rust-sdk/* @mogery
/apps/go-sdk/* @rafaelsideguide
# CI/CD and GitHub Workflows
/.github/* @mogery @rafaelsideguide
# Tests
/apps/api/src/__tests__/snips/* @mogery
# Examples
/examples/* @ericciarla @nickscamara

View File

@ -1,66 +0,0 @@
# fly.toml app configuration file generated for firecrawl-scraper-js on 2024-04-07T21:09:59-03:00
#
# See https://fly.io/docs/reference/configuration/ for information about how to use this file.
#
app = 'staging-firecrawl-scraper-js'
primary_region = 'mia'
kill_signal = 'SIGINT'
kill_timeout = '30s'
[build]
[processes]
app = 'node dist/src/index.js'
worker = 'node dist/src/services/queue-worker.js'
[http_service]
internal_port = 8080
force_https = true
auto_stop_machines = true
auto_start_machines = true
min_machines_running = 2
processes = ['app']
[http_service.concurrency]
type = "requests"
# hard_limit = 100
soft_limit = 100
[[http_service.checks]]
grace_period = "10s"
interval = "30s"
method = "GET"
timeout = "5s"
path = "/"
[[services]]
protocol = 'tcp'
internal_port = 8080
processes = ['worker']
[[services.ports]]
port = 80
handlers = ['http']
force_https = true
[[services.ports]]
port = 443
handlers = ['tls', 'http']
[services.concurrency]
type = 'connections'
# hard_limit = 25
soft_limit = 100
[[vm]]
size = 'performance-2x'
processes = ['app','worker']
memory = 8192

View File

@ -1,63 +0,0 @@
# fly.toml app configuration file generated for firecrawl-scraper-js on 2024-04-07T21:09:59-03:00
#
# See https://fly.io/docs/reference/configuration/ for information about how to use this file.
#
app = 'firecrawl-scraper-js'
primary_region = 'iad'
kill_signal = 'SIGINT'
kill_timeout = '30s'
[build]
[processes]
app = 'node --max-old-space-size=8192 dist/src/index.js'
worker = 'node --max-old-space-size=8192 dist/src/services/queue-worker.js'
[http_service]
internal_port = 8080
force_https = true
auto_stop_machines = false
auto_start_machines = true
min_machines_running = 2
processes = ['app']
[http_service.concurrency]
type = "requests"
# hard_limit = 200
soft_limit = 200
[[http_service.checks]]
grace_period = "20s"
interval = "30s"
method = "GET"
timeout = "15s"
path = "/"
[[services]]
protocol = 'tcp'
internal_port = 8080
processes = ['app']
[[services.ports]]
port = 80
handlers = ['http']
force_https = true
[[services.ports]]
port = 443
handlers = ['tls', 'http']
[services.concurrency]
type = 'connections'
# hard_limit = 30
soft_limit = 200
[[vm]]
size = 'performance-4x'
processes = ['app']

View File

@ -0,0 +1,30 @@
#!/bin/bash
# Get all files tracked by git
git ls-files > /tmp/all_files.txt
# Get files matched by CODEOWNERS
while read -r line; do
# Skip comments and empty lines
[[ "$line" =~ ^#.*$ ]] && continue
[[ -z "$line" ]] && continue
# Extract the path pattern
pattern=$(echo "$line" | awk '{print $1}')
# Convert the pattern to a form git understands
# Remove leading slash if present
pattern=${pattern#/}
# List files matching this pattern
git ls-files "$pattern" 2>/dev/null >> /tmp/covered_files.txt
done < .github/CODEOWNERS
# Sort and get unique entries
sort -u /tmp/covered_files.txt > /tmp/covered_files_unique.txt
# Find files that are in all_files but not in covered_files
comm -23 /tmp/all_files.txt /tmp/covered_files_unique.txt
# Cleanup
rm /tmp/all_files.txt /tmp/covered_files.txt /tmp/covered_files_unique.txt