mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 00:09:01 +08:00
Add searxng for search endpoint (#1193)
* add searxng.ts * update to add searxng endpoint * Apply suggestions from code review * feat(ci/self-host): add tests with searxng * feat(ci/self-host): bootstrap searxng for testing * feat(ci): improvements in syntax --------- Co-authored-by: Gergő Móricz <mo.geryy@gmail.com>
This commit is contained in:
parent
04218de2b0
commit
100168ddf3
49
.github/workflows/test-server-self-host.yml
vendored
49
.github/workflows/test-server-self-host.yml
vendored
@ -21,9 +21,9 @@ jobs:
|
||||
name: Run tests
|
||||
strategy:
|
||||
matrix:
|
||||
openai: [true, false]
|
||||
serper: [true, false]
|
||||
playwright: [true, false]
|
||||
ai: ["openai", "no-ai"]
|
||||
search: ["searxng", "google"]
|
||||
engine: ["playwright", "fetch"]
|
||||
fail-fast: false
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
@ -32,9 +32,9 @@ jobs:
|
||||
ports:
|
||||
- 6379:6379
|
||||
env:
|
||||
OPENAI_API_KEY: ${{ matrix.openai == true && secrets.OPENAI_API_KEY || '' }}
|
||||
SERPER_API_KEY: ${{ matrix.serper == true && secrets.SERPER_API_KEY || '' }}
|
||||
PLAYWRIGHT_MICROSERVICE_URL: ${{ matrix.playwright == true && 'http://localhost:3003/scrape' || '' }}
|
||||
OPENAI_API_KEY: ${{ matrix.ai == 'openai' && secrets.OPENAI_API_KEY || '' }}
|
||||
SEARXNG_ENDPOINT: ${{ matrix.search == 'searxng' && 'http://localhost:3434' || '' }}
|
||||
PLAYWRIGHT_MICROSERVICE_URL: ${{ matrix.engine == 'playwright' && 'http://localhost:3003/scrape' || '' }}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install pnpm
|
||||
@ -51,7 +51,7 @@ jobs:
|
||||
run: pnpm install
|
||||
working-directory: ./apps/api
|
||||
- name: Install Playwright dependencies
|
||||
if: matrix.playwright == true
|
||||
if: matrix.engine == 'playwright'
|
||||
run: |
|
||||
pnpm install
|
||||
pnpm exec playwright install-deps
|
||||
@ -68,6 +68,20 @@ jobs:
|
||||
go build -o html-to-markdown.so -buildmode=c-shared html-to-markdown.go
|
||||
chmod +x html-to-markdown.so
|
||||
working-directory: ./apps/api/sharedLibs/go-html-to-md
|
||||
- name: Set up SearXNG
|
||||
if: matrix.search == 'searxng'
|
||||
run: |
|
||||
mkdir searxng
|
||||
|
||||
echo "use_default_settings: true
|
||||
search:
|
||||
formats: [html, json, csv]
|
||||
server:
|
||||
secret_key: 'fcsecret'" > searxng/settings.yml
|
||||
|
||||
docker run -d -p 3434:8080 -v "${PWD}/searxng:/etc/searxng" --name searxng searxng/searxng
|
||||
pnpx wait-on tcp:3434 -t 30s
|
||||
working-directory: ./
|
||||
- name: Start server
|
||||
run: npm start > api.log 2>&1 &
|
||||
working-directory: ./apps/api
|
||||
@ -75,7 +89,7 @@ jobs:
|
||||
run: npm run workers > worker.log 2>&1 &
|
||||
working-directory: ./apps/api
|
||||
- name: Start playwright
|
||||
if: matrix.playwright == true
|
||||
if: matrix.engine == 'playwright'
|
||||
run: npm run dev > playwright.log 2>&1 &
|
||||
working-directory: ./apps/playwright-service-ts
|
||||
env:
|
||||
@ -83,7 +97,7 @@ jobs:
|
||||
- name: Wait for server
|
||||
run: pnpx wait-on tcp:3002 -t 15s
|
||||
- name: Wait for playwright
|
||||
if: matrix.playwright == true
|
||||
if: matrix.engine == 'playwright'
|
||||
run: pnpx wait-on tcp:3003 -t 15s
|
||||
- name: Run snippet tests
|
||||
run: |
|
||||
@ -92,16 +106,29 @@ jobs:
|
||||
- name: Kill instances
|
||||
if: always()
|
||||
run: pkill -9 node
|
||||
- name: Kill SearXNG
|
||||
if: always() && matrix.search == 'searxng'
|
||||
run: |
|
||||
docker logs searxng > searxng/searxng.log 2>&1
|
||||
docker kill searxng
|
||||
working-directory: ./
|
||||
- uses: actions/upload-artifact@v4
|
||||
if: always()
|
||||
with:
|
||||
name: Logs (openai ${{ matrix.openai }}, serper ${{ matrix.serper }}, playwright ${{ matrix.playwright }})
|
||||
name: Logs (${{ matrix.ai }}, ${{ matrix.search }}, ${{ matrix.engine }})
|
||||
path: |
|
||||
./apps/api/api.log
|
||||
./apps/api/worker.log
|
||||
- uses: actions/upload-artifact@v4
|
||||
if: always() && matrix.playwright
|
||||
with:
|
||||
name: Playwright Logs (openai ${{ matrix.openai }}, serper ${{ matrix.serper }})
|
||||
name: Playwright Logs (${{ matrix.ai }}, ${{ matrix.search }})
|
||||
path: |
|
||||
./apps/playwright-service-ts/playwright.log
|
||||
- uses: actions/upload-artifact@v4
|
||||
if: always() && matrix.search == 'searxng'
|
||||
with:
|
||||
name: SearXNG (${{ matrix.ai }}, ${{ matrix.engine }})
|
||||
path: |
|
||||
./searxng/searxng.log
|
||||
./searxng/settings.yml
|
||||
|
@ -4,6 +4,7 @@ import { googleSearch } from "./googlesearch";
|
||||
import { fireEngineMap } from "./fireEngine";
|
||||
import { searchapi_search } from "./searchapi";
|
||||
import { serper_search } from "./serper";
|
||||
import { searxng_search } from "./searxng";
|
||||
|
||||
export async function search({
|
||||
query,
|
||||
@ -51,6 +52,16 @@ export async function search({
|
||||
location,
|
||||
});
|
||||
}
|
||||
if (process.env.SEARXNG_ENDPOINT) {
|
||||
return await searxng_search(query, {
|
||||
num_results,
|
||||
tbs,
|
||||
filter,
|
||||
lang,
|
||||
country,
|
||||
location,
|
||||
});
|
||||
}
|
||||
return await googleSearch(
|
||||
query,
|
||||
advanced,
|
||||
|
64
apps/api/src/search/searxng.ts
Normal file
64
apps/api/src/search/searxng.ts
Normal file
@ -0,0 +1,64 @@
|
||||
import axios from "axios";
|
||||
import dotenv from "dotenv";
|
||||
import { SearchResult } from "../../src/lib/entities";
|
||||
import { logger } from "../lib/logger"
|
||||
|
||||
dotenv.config();
|
||||
|
||||
interface SearchOptions {
|
||||
tbs?: string;
|
||||
filter?: string;
|
||||
lang?: string;
|
||||
country?: string;
|
||||
location?: string;
|
||||
num_results: number;
|
||||
page?: number;
|
||||
}
|
||||
|
||||
export async function searxng_search(
|
||||
q: string,
|
||||
options: SearchOptions,
|
||||
): Promise<SearchResult[]> {
|
||||
const params = {
|
||||
q: q,
|
||||
language: options.lang,
|
||||
// gl: options.country, //not possible with SearXNG
|
||||
// location: options.location, //not possible with SearXNG
|
||||
// num: options.num_results, //not possible with SearXNG
|
||||
engines: process.env.SEARXNG_ENGINES || "",
|
||||
categories: process.env.SEARXNG_CATEGORIES || "general",
|
||||
pageno: options.page ?? 1,
|
||||
format: "json"
|
||||
};
|
||||
|
||||
const url = process.env.SEARXNG_ENDPOINT!;
|
||||
// Remove trailing slash if it exists
|
||||
const cleanedUrl = url.endsWith('/') ? url.slice(0, -1) : url;
|
||||
|
||||
// Concatenate "/search" to the cleaned URL
|
||||
const finalUrl = cleanedUrl + "/search";
|
||||
|
||||
try {
|
||||
const response = await axios.get(finalUrl, {
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
params: params,
|
||||
});
|
||||
|
||||
const data = response.data;
|
||||
|
||||
if (data && Array.isArray(data.results)) {
|
||||
return data.results.map((a: any) => ({
|
||||
url: a.url,
|
||||
title: a.title,
|
||||
description: a.content,
|
||||
}));
|
||||
} else {
|
||||
return [];
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`There was an error searching for content`, { error });
|
||||
return [];
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user