Add searxng for search endpoint (#1193)

* add searxng.ts

* update to add searxng endpoint

* Apply suggestions from code review

* feat(ci/self-host): add tests with searxng

* feat(ci/self-host): bootstrap searxng for testing

* feat(ci): improvements in syntax

---------

Co-authored-by: Gergő Móricz <mo.geryy@gmail.com>
This commit is contained in:
Loris 2025-02-20 12:36:53 +01:00 committed by GitHub
parent 04218de2b0
commit 100168ddf3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 113 additions and 11 deletions

View File

@ -21,9 +21,9 @@ jobs:
name: Run tests name: Run tests
strategy: strategy:
matrix: matrix:
openai: [true, false] ai: ["openai", "no-ai"]
serper: [true, false] search: ["searxng", "google"]
playwright: [true, false] engine: ["playwright", "fetch"]
fail-fast: false fail-fast: false
runs-on: ubuntu-latest runs-on: ubuntu-latest
services: services:
@ -32,9 +32,9 @@ jobs:
ports: ports:
- 6379:6379 - 6379:6379
env: env:
OPENAI_API_KEY: ${{ matrix.openai == true && secrets.OPENAI_API_KEY || '' }} OPENAI_API_KEY: ${{ matrix.ai == 'openai' && secrets.OPENAI_API_KEY || '' }}
SERPER_API_KEY: ${{ matrix.serper == true && secrets.SERPER_API_KEY || '' }} SEARXNG_ENDPOINT: ${{ matrix.search == 'searxng' && 'http://localhost:3434' || '' }}
PLAYWRIGHT_MICROSERVICE_URL: ${{ matrix.playwright == true && 'http://localhost:3003/scrape' || '' }} PLAYWRIGHT_MICROSERVICE_URL: ${{ matrix.engine == 'playwright' && 'http://localhost:3003/scrape' || '' }}
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- name: Install pnpm - name: Install pnpm
@ -51,7 +51,7 @@ jobs:
run: pnpm install run: pnpm install
working-directory: ./apps/api working-directory: ./apps/api
- name: Install Playwright dependencies - name: Install Playwright dependencies
if: matrix.playwright == true if: matrix.engine == 'playwright'
run: | run: |
pnpm install pnpm install
pnpm exec playwright install-deps pnpm exec playwright install-deps
@ -68,6 +68,20 @@ jobs:
go build -o html-to-markdown.so -buildmode=c-shared html-to-markdown.go go build -o html-to-markdown.so -buildmode=c-shared html-to-markdown.go
chmod +x html-to-markdown.so chmod +x html-to-markdown.so
working-directory: ./apps/api/sharedLibs/go-html-to-md working-directory: ./apps/api/sharedLibs/go-html-to-md
- name: Set up SearXNG
if: matrix.search == 'searxng'
run: |
mkdir searxng
echo "use_default_settings: true
search:
formats: [html, json, csv]
server:
secret_key: 'fcsecret'" > searxng/settings.yml
docker run -d -p 3434:8080 -v "${PWD}/searxng:/etc/searxng" --name searxng searxng/searxng
pnpx wait-on tcp:3434 -t 30s
working-directory: ./
- name: Start server - name: Start server
run: npm start > api.log 2>&1 & run: npm start > api.log 2>&1 &
working-directory: ./apps/api working-directory: ./apps/api
@ -75,7 +89,7 @@ jobs:
run: npm run workers > worker.log 2>&1 & run: npm run workers > worker.log 2>&1 &
working-directory: ./apps/api working-directory: ./apps/api
- name: Start playwright - name: Start playwright
if: matrix.playwright == true if: matrix.engine == 'playwright'
run: npm run dev > playwright.log 2>&1 & run: npm run dev > playwright.log 2>&1 &
working-directory: ./apps/playwright-service-ts working-directory: ./apps/playwright-service-ts
env: env:
@ -83,7 +97,7 @@ jobs:
- name: Wait for server - name: Wait for server
run: pnpx wait-on tcp:3002 -t 15s run: pnpx wait-on tcp:3002 -t 15s
- name: Wait for playwright - name: Wait for playwright
if: matrix.playwright == true if: matrix.engine == 'playwright'
run: pnpx wait-on tcp:3003 -t 15s run: pnpx wait-on tcp:3003 -t 15s
- name: Run snippet tests - name: Run snippet tests
run: | run: |
@ -92,16 +106,29 @@ jobs:
- name: Kill instances - name: Kill instances
if: always() if: always()
run: pkill -9 node run: pkill -9 node
- name: Kill SearXNG
if: always() && matrix.search == 'searxng'
run: |
docker logs searxng > searxng/searxng.log 2>&1
docker kill searxng
working-directory: ./
- uses: actions/upload-artifact@v4 - uses: actions/upload-artifact@v4
if: always() if: always()
with: with:
name: Logs (openai ${{ matrix.openai }}, serper ${{ matrix.serper }}, playwright ${{ matrix.playwright }}) name: Logs (${{ matrix.ai }}, ${{ matrix.search }}, ${{ matrix.engine }})
path: | path: |
./apps/api/api.log ./apps/api/api.log
./apps/api/worker.log ./apps/api/worker.log
- uses: actions/upload-artifact@v4 - uses: actions/upload-artifact@v4
if: always() && matrix.playwright if: always() && matrix.playwright
with: with:
name: Playwright Logs (openai ${{ matrix.openai }}, serper ${{ matrix.serper }}) name: Playwright Logs (${{ matrix.ai }}, ${{ matrix.search }})
path: | path: |
./apps/playwright-service-ts/playwright.log ./apps/playwright-service-ts/playwright.log
- uses: actions/upload-artifact@v4
if: always() && matrix.search == 'searxng'
with:
name: SearXNG (${{ matrix.ai }}, ${{ matrix.engine }})
path: |
./searxng/searxng.log
./searxng/settings.yml

View File

@ -4,6 +4,7 @@ import { googleSearch } from "./googlesearch";
import { fireEngineMap } from "./fireEngine"; import { fireEngineMap } from "./fireEngine";
import { searchapi_search } from "./searchapi"; import { searchapi_search } from "./searchapi";
import { serper_search } from "./serper"; import { serper_search } from "./serper";
import { searxng_search } from "./searxng";
export async function search({ export async function search({
query, query,
@ -51,6 +52,16 @@ export async function search({
location, location,
}); });
} }
if (process.env.SEARXNG_ENDPOINT) {
return await searxng_search(query, {
num_results,
tbs,
filter,
lang,
country,
location,
});
}
return await googleSearch( return await googleSearch(
query, query,
advanced, advanced,

View File

@ -0,0 +1,64 @@
import axios from "axios";
import dotenv from "dotenv";
import { SearchResult } from "../../src/lib/entities";
import { logger } from "../lib/logger"
dotenv.config();
interface SearchOptions {
tbs?: string;
filter?: string;
lang?: string;
country?: string;
location?: string;
num_results: number;
page?: number;
}
export async function searxng_search(
q: string,
options: SearchOptions,
): Promise<SearchResult[]> {
const params = {
q: q,
language: options.lang,
// gl: options.country, //not possible with SearXNG
// location: options.location, //not possible with SearXNG
// num: options.num_results, //not possible with SearXNG
engines: process.env.SEARXNG_ENGINES || "",
categories: process.env.SEARXNG_CATEGORIES || "general",
pageno: options.page ?? 1,
format: "json"
};
const url = process.env.SEARXNG_ENDPOINT!;
// Remove trailing slash if it exists
const cleanedUrl = url.endsWith('/') ? url.slice(0, -1) : url;
// Concatenate "/search" to the cleaned URL
const finalUrl = cleanedUrl + "/search";
try {
const response = await axios.get(finalUrl, {
headers: {
"Content-Type": "application/json",
},
params: params,
});
const data = response.data;
if (data && Array.isArray(data.results)) {
return data.results.map((a: any) => ({
url: a.url,
title: a.title,
description: a.content,
}));
} else {
return [];
}
} catch (error) {
logger.error(`There was an error searching for content`, { error });
return [];
}
}