mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-11 23:49:02 +08:00
Add searxng for search endpoint (#1193)
* add searxng.ts * update to add searxng endpoint * Apply suggestions from code review * feat(ci/self-host): add tests with searxng * feat(ci/self-host): bootstrap searxng for testing * feat(ci): improvements in syntax --------- Co-authored-by: Gergő Móricz <mo.geryy@gmail.com>
This commit is contained in:
parent
04218de2b0
commit
100168ddf3
49
.github/workflows/test-server-self-host.yml
vendored
49
.github/workflows/test-server-self-host.yml
vendored
@ -21,9 +21,9 @@ jobs:
|
|||||||
name: Run tests
|
name: Run tests
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
openai: [true, false]
|
ai: ["openai", "no-ai"]
|
||||||
serper: [true, false]
|
search: ["searxng", "google"]
|
||||||
playwright: [true, false]
|
engine: ["playwright", "fetch"]
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
services:
|
services:
|
||||||
@ -32,9 +32,9 @@ jobs:
|
|||||||
ports:
|
ports:
|
||||||
- 6379:6379
|
- 6379:6379
|
||||||
env:
|
env:
|
||||||
OPENAI_API_KEY: ${{ matrix.openai == true && secrets.OPENAI_API_KEY || '' }}
|
OPENAI_API_KEY: ${{ matrix.ai == 'openai' && secrets.OPENAI_API_KEY || '' }}
|
||||||
SERPER_API_KEY: ${{ matrix.serper == true && secrets.SERPER_API_KEY || '' }}
|
SEARXNG_ENDPOINT: ${{ matrix.search == 'searxng' && 'http://localhost:3434' || '' }}
|
||||||
PLAYWRIGHT_MICROSERVICE_URL: ${{ matrix.playwright == true && 'http://localhost:3003/scrape' || '' }}
|
PLAYWRIGHT_MICROSERVICE_URL: ${{ matrix.engine == 'playwright' && 'http://localhost:3003/scrape' || '' }}
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
- name: Install pnpm
|
- name: Install pnpm
|
||||||
@ -51,7 +51,7 @@ jobs:
|
|||||||
run: pnpm install
|
run: pnpm install
|
||||||
working-directory: ./apps/api
|
working-directory: ./apps/api
|
||||||
- name: Install Playwright dependencies
|
- name: Install Playwright dependencies
|
||||||
if: matrix.playwright == true
|
if: matrix.engine == 'playwright'
|
||||||
run: |
|
run: |
|
||||||
pnpm install
|
pnpm install
|
||||||
pnpm exec playwright install-deps
|
pnpm exec playwright install-deps
|
||||||
@ -68,6 +68,20 @@ jobs:
|
|||||||
go build -o html-to-markdown.so -buildmode=c-shared html-to-markdown.go
|
go build -o html-to-markdown.so -buildmode=c-shared html-to-markdown.go
|
||||||
chmod +x html-to-markdown.so
|
chmod +x html-to-markdown.so
|
||||||
working-directory: ./apps/api/sharedLibs/go-html-to-md
|
working-directory: ./apps/api/sharedLibs/go-html-to-md
|
||||||
|
- name: Set up SearXNG
|
||||||
|
if: matrix.search == 'searxng'
|
||||||
|
run: |
|
||||||
|
mkdir searxng
|
||||||
|
|
||||||
|
echo "use_default_settings: true
|
||||||
|
search:
|
||||||
|
formats: [html, json, csv]
|
||||||
|
server:
|
||||||
|
secret_key: 'fcsecret'" > searxng/settings.yml
|
||||||
|
|
||||||
|
docker run -d -p 3434:8080 -v "${PWD}/searxng:/etc/searxng" --name searxng searxng/searxng
|
||||||
|
pnpx wait-on tcp:3434 -t 30s
|
||||||
|
working-directory: ./
|
||||||
- name: Start server
|
- name: Start server
|
||||||
run: npm start > api.log 2>&1 &
|
run: npm start > api.log 2>&1 &
|
||||||
working-directory: ./apps/api
|
working-directory: ./apps/api
|
||||||
@ -75,7 +89,7 @@ jobs:
|
|||||||
run: npm run workers > worker.log 2>&1 &
|
run: npm run workers > worker.log 2>&1 &
|
||||||
working-directory: ./apps/api
|
working-directory: ./apps/api
|
||||||
- name: Start playwright
|
- name: Start playwright
|
||||||
if: matrix.playwright == true
|
if: matrix.engine == 'playwright'
|
||||||
run: npm run dev > playwright.log 2>&1 &
|
run: npm run dev > playwright.log 2>&1 &
|
||||||
working-directory: ./apps/playwright-service-ts
|
working-directory: ./apps/playwright-service-ts
|
||||||
env:
|
env:
|
||||||
@ -83,7 +97,7 @@ jobs:
|
|||||||
- name: Wait for server
|
- name: Wait for server
|
||||||
run: pnpx wait-on tcp:3002 -t 15s
|
run: pnpx wait-on tcp:3002 -t 15s
|
||||||
- name: Wait for playwright
|
- name: Wait for playwright
|
||||||
if: matrix.playwright == true
|
if: matrix.engine == 'playwright'
|
||||||
run: pnpx wait-on tcp:3003 -t 15s
|
run: pnpx wait-on tcp:3003 -t 15s
|
||||||
- name: Run snippet tests
|
- name: Run snippet tests
|
||||||
run: |
|
run: |
|
||||||
@ -92,16 +106,29 @@ jobs:
|
|||||||
- name: Kill instances
|
- name: Kill instances
|
||||||
if: always()
|
if: always()
|
||||||
run: pkill -9 node
|
run: pkill -9 node
|
||||||
|
- name: Kill SearXNG
|
||||||
|
if: always() && matrix.search == 'searxng'
|
||||||
|
run: |
|
||||||
|
docker logs searxng > searxng/searxng.log 2>&1
|
||||||
|
docker kill searxng
|
||||||
|
working-directory: ./
|
||||||
- uses: actions/upload-artifact@v4
|
- uses: actions/upload-artifact@v4
|
||||||
if: always()
|
if: always()
|
||||||
with:
|
with:
|
||||||
name: Logs (openai ${{ matrix.openai }}, serper ${{ matrix.serper }}, playwright ${{ matrix.playwright }})
|
name: Logs (${{ matrix.ai }}, ${{ matrix.search }}, ${{ matrix.engine }})
|
||||||
path: |
|
path: |
|
||||||
./apps/api/api.log
|
./apps/api/api.log
|
||||||
./apps/api/worker.log
|
./apps/api/worker.log
|
||||||
- uses: actions/upload-artifact@v4
|
- uses: actions/upload-artifact@v4
|
||||||
if: always() && matrix.playwright
|
if: always() && matrix.playwright
|
||||||
with:
|
with:
|
||||||
name: Playwright Logs (openai ${{ matrix.openai }}, serper ${{ matrix.serper }})
|
name: Playwright Logs (${{ matrix.ai }}, ${{ matrix.search }})
|
||||||
path: |
|
path: |
|
||||||
./apps/playwright-service-ts/playwright.log
|
./apps/playwright-service-ts/playwright.log
|
||||||
|
- uses: actions/upload-artifact@v4
|
||||||
|
if: always() && matrix.search == 'searxng'
|
||||||
|
with:
|
||||||
|
name: SearXNG (${{ matrix.ai }}, ${{ matrix.engine }})
|
||||||
|
path: |
|
||||||
|
./searxng/searxng.log
|
||||||
|
./searxng/settings.yml
|
||||||
|
@ -4,6 +4,7 @@ import { googleSearch } from "./googlesearch";
|
|||||||
import { fireEngineMap } from "./fireEngine";
|
import { fireEngineMap } from "./fireEngine";
|
||||||
import { searchapi_search } from "./searchapi";
|
import { searchapi_search } from "./searchapi";
|
||||||
import { serper_search } from "./serper";
|
import { serper_search } from "./serper";
|
||||||
|
import { searxng_search } from "./searxng";
|
||||||
|
|
||||||
export async function search({
|
export async function search({
|
||||||
query,
|
query,
|
||||||
@ -51,6 +52,16 @@ export async function search({
|
|||||||
location,
|
location,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
if (process.env.SEARXNG_ENDPOINT) {
|
||||||
|
return await searxng_search(query, {
|
||||||
|
num_results,
|
||||||
|
tbs,
|
||||||
|
filter,
|
||||||
|
lang,
|
||||||
|
country,
|
||||||
|
location,
|
||||||
|
});
|
||||||
|
}
|
||||||
return await googleSearch(
|
return await googleSearch(
|
||||||
query,
|
query,
|
||||||
advanced,
|
advanced,
|
||||||
|
64
apps/api/src/search/searxng.ts
Normal file
64
apps/api/src/search/searxng.ts
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
import axios from "axios";
|
||||||
|
import dotenv from "dotenv";
|
||||||
|
import { SearchResult } from "../../src/lib/entities";
|
||||||
|
import { logger } from "../lib/logger"
|
||||||
|
|
||||||
|
dotenv.config();
|
||||||
|
|
||||||
|
interface SearchOptions {
|
||||||
|
tbs?: string;
|
||||||
|
filter?: string;
|
||||||
|
lang?: string;
|
||||||
|
country?: string;
|
||||||
|
location?: string;
|
||||||
|
num_results: number;
|
||||||
|
page?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function searxng_search(
|
||||||
|
q: string,
|
||||||
|
options: SearchOptions,
|
||||||
|
): Promise<SearchResult[]> {
|
||||||
|
const params = {
|
||||||
|
q: q,
|
||||||
|
language: options.lang,
|
||||||
|
// gl: options.country, //not possible with SearXNG
|
||||||
|
// location: options.location, //not possible with SearXNG
|
||||||
|
// num: options.num_results, //not possible with SearXNG
|
||||||
|
engines: process.env.SEARXNG_ENGINES || "",
|
||||||
|
categories: process.env.SEARXNG_CATEGORIES || "general",
|
||||||
|
pageno: options.page ?? 1,
|
||||||
|
format: "json"
|
||||||
|
};
|
||||||
|
|
||||||
|
const url = process.env.SEARXNG_ENDPOINT!;
|
||||||
|
// Remove trailing slash if it exists
|
||||||
|
const cleanedUrl = url.endsWith('/') ? url.slice(0, -1) : url;
|
||||||
|
|
||||||
|
// Concatenate "/search" to the cleaned URL
|
||||||
|
const finalUrl = cleanedUrl + "/search";
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await axios.get(finalUrl, {
|
||||||
|
headers: {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
params: params,
|
||||||
|
});
|
||||||
|
|
||||||
|
const data = response.data;
|
||||||
|
|
||||||
|
if (data && Array.isArray(data.results)) {
|
||||||
|
return data.results.map((a: any) => ({
|
||||||
|
url: a.url,
|
||||||
|
title: a.title,
|
||||||
|
description: a.content,
|
||||||
|
}));
|
||||||
|
} else {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
logger.error(`There was an error searching for content`, { error });
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user