From 8eee95119d646302e173b05575b09a1e86332755 Mon Sep 17 00:00:00 2001 From: "yanlong.wang" Date: Thu, 23 May 2024 12:06:07 +0800 Subject: [PATCH] feat: index brief in JSON format --- .../functions/src/cloud-functions/crawler.ts | 45 ++++++++++++++----- .../functions/src/cloud-functions/searcher.ts | 10 ++--- 2 files changed, 39 insertions(+), 16 deletions(-) diff --git a/backend/functions/src/cloud-functions/crawler.ts b/backend/functions/src/cloud-functions/crawler.ts index 3337333..0f64296 100644 --- a/backend/functions/src/cloud-functions/crawler.ts +++ b/backend/functions/src/cloud-functions/crawler.ts @@ -21,6 +21,7 @@ import { randomUUID } from 'crypto'; import { JinaEmbeddingsAuthDTO } from '../shared/dto/jina-embeddings-auth'; import { countGPTToken as estimateToken } from '../shared/utils/openai'; +import { JinaEmbeddingsTokenAccount } from '../shared/db/jina-embeddings-token-account'; const md5Hasher = new HashManager('md5', 'hex'); @@ -44,6 +45,16 @@ export interface FormattedPage { toString: () => string; } +const indexProto = { + toString: function (): string { + return _(this) + .toPairs() + .map(([k, v]) => k ? `[${_.upperFirst(_.lowerCase(k))}] ${v}` : '') + .value() + .join('\n') + '\n'; + } +}; + @singleton() export class CrawlerHost extends RPCHost { logger = this.globalLogger.child({ service: this.constructor.name }); @@ -54,12 +65,6 @@ export class CrawlerHost extends RPCHost { cacheValidMs = 1000 * 3600; urlValidMs = 1000 * 3600 * 4; - indexText = `[Usage1] https://r.jina.ai/YOUR_URL -[Usage2] https://s.jina.ai/YOUR_SEARCH_QUERY -[Homepage] https://jina.ai/reader -[Source code] https://github.com/jina-ai/reader -`; - constructor( protected globalLogger: Logger, protected puppeteerControl: PuppeteerControl, @@ -89,6 +94,25 @@ export class CrawlerHost extends RPCHost { this.emit('ready'); } + getIndex(user?: JinaEmbeddingsTokenAccount) { + const indexObject: Record = Object.create(indexProto); + + Object.assign(indexObject, { + usage1: 'https://r.jina.ai/YOUR_URL', + usage2: 'https://s.jina.ai/YOUR_SEARCH_QUERY', + homepage: 'https://jina.ai/reader', + sourceCode: 'https://github.com/jina-ai/reader', + }); + + if (user) { + indexObject[''] = undefined; + indexObject.authenticatedAs = `${user.user_id} (${user.full_name})`; + indexObject.balanceLeft = user.wallet.total_balance; + } + + return indexObject; + } + getTurndown(noRules?: boolean | string) { const turnDownService = new TurndownService(); if (!noRules) { @@ -497,12 +521,11 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`; const noSlashURL = ctx.req.url.slice(1); if (!noSlashURL) { const latestUser = uid ? await auth.assertUser() : undefined; - const authMixin = latestUser ? ` -[Authenticated as] ${latestUser.user_id} (${latestUser.full_name}) -[Balance left] ${latestUser.wallet.total_balance} -` : ''; + if (!ctx.req.accepts('text/plain') && (ctx.req.accepts('text/json') || ctx.req.accepts('application/json'))) { + return this.getIndex(latestUser); + } - return assignTransferProtocolMeta(`${this.indexText}${authMixin}`, + return assignTransferProtocolMeta(`${this.getIndex(latestUser)}`, { contentType: 'text/plain', envelope: null } ); } diff --git a/backend/functions/src/cloud-functions/searcher.ts b/backend/functions/src/cloud-functions/searcher.ts index 126226d..e42c3fa 100644 --- a/backend/functions/src/cloud-functions/searcher.ts +++ b/backend/functions/src/cloud-functions/searcher.ts @@ -152,12 +152,12 @@ export class SearcherHost extends RPCHost { const noSlashPath = ctx.req.url.slice(1); if (!noSlashPath) { const latestUser = uid ? await auth.assertUser() : undefined; - const authMixin = latestUser ? ` -[Authenticated as] ${latestUser.user_id} (${latestUser.full_name}) -[Balance left] ${latestUser.wallet.total_balance} -` : ''; + if (!ctx.req.accepts('text/plain') && (ctx.req.accepts('text/json') || ctx.req.accepts('application/json'))) { - return assignTransferProtocolMeta(`${this.crawler.indexText}${authMixin}`, + return this.crawler.getIndex(latestUser); + } + + return assignTransferProtocolMeta(`${this.crawler.getIndex(latestUser)}`, { contentType: 'text/plain', envelope: null } ); }