feat: index brief in JSON format

This commit is contained in:
yanlong.wang 2024-05-23 12:06:07 +08:00
parent 1c944562f7
commit 8eee95119d
No known key found for this signature in database
GPG Key ID: C0A623C0BADF9F37
2 changed files with 39 additions and 16 deletions

View File

@ -21,6 +21,7 @@ import { randomUUID } from 'crypto';
import { JinaEmbeddingsAuthDTO } from '../shared/dto/jina-embeddings-auth'; import { JinaEmbeddingsAuthDTO } from '../shared/dto/jina-embeddings-auth';
import { countGPTToken as estimateToken } from '../shared/utils/openai'; import { countGPTToken as estimateToken } from '../shared/utils/openai';
import { JinaEmbeddingsTokenAccount } from '../shared/db/jina-embeddings-token-account';
const md5Hasher = new HashManager('md5', 'hex'); const md5Hasher = new HashManager('md5', 'hex');
@ -44,6 +45,16 @@ export interface FormattedPage {
toString: () => string; toString: () => string;
} }
const indexProto = {
toString: function (): string {
return _(this)
.toPairs()
.map(([k, v]) => k ? `[${_.upperFirst(_.lowerCase(k))}] ${v}` : '')
.value()
.join('\n') + '\n';
}
};
@singleton() @singleton()
export class CrawlerHost extends RPCHost { export class CrawlerHost extends RPCHost {
logger = this.globalLogger.child({ service: this.constructor.name }); logger = this.globalLogger.child({ service: this.constructor.name });
@ -54,12 +65,6 @@ export class CrawlerHost extends RPCHost {
cacheValidMs = 1000 * 3600; cacheValidMs = 1000 * 3600;
urlValidMs = 1000 * 3600 * 4; urlValidMs = 1000 * 3600 * 4;
indexText = `[Usage1] https://r.jina.ai/YOUR_URL
[Usage2] https://s.jina.ai/YOUR_SEARCH_QUERY
[Homepage] https://jina.ai/reader
[Source code] https://github.com/jina-ai/reader
`;
constructor( constructor(
protected globalLogger: Logger, protected globalLogger: Logger,
protected puppeteerControl: PuppeteerControl, protected puppeteerControl: PuppeteerControl,
@ -89,6 +94,25 @@ export class CrawlerHost extends RPCHost {
this.emit('ready'); this.emit('ready');
} }
getIndex(user?: JinaEmbeddingsTokenAccount) {
const indexObject: Record<string, string | number | undefined> = Object.create(indexProto);
Object.assign(indexObject, {
usage1: 'https://r.jina.ai/YOUR_URL',
usage2: 'https://s.jina.ai/YOUR_SEARCH_QUERY',
homepage: 'https://jina.ai/reader',
sourceCode: 'https://github.com/jina-ai/reader',
});
if (user) {
indexObject[''] = undefined;
indexObject.authenticatedAs = `${user.user_id} (${user.full_name})`;
indexObject.balanceLeft = user.wallet.total_balance;
}
return indexObject;
}
getTurndown(noRules?: boolean | string) { getTurndown(noRules?: boolean | string) {
const turnDownService = new TurndownService(); const turnDownService = new TurndownService();
if (!noRules) { if (!noRules) {
@ -497,12 +521,11 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
const noSlashURL = ctx.req.url.slice(1); const noSlashURL = ctx.req.url.slice(1);
if (!noSlashURL) { if (!noSlashURL) {
const latestUser = uid ? await auth.assertUser() : undefined; const latestUser = uid ? await auth.assertUser() : undefined;
const authMixin = latestUser ? ` if (!ctx.req.accepts('text/plain') && (ctx.req.accepts('text/json') || ctx.req.accepts('application/json'))) {
[Authenticated as] ${latestUser.user_id} (${latestUser.full_name}) return this.getIndex(latestUser);
[Balance left] ${latestUser.wallet.total_balance} }
` : '';
return assignTransferProtocolMeta(`${this.indexText}${authMixin}`, return assignTransferProtocolMeta(`${this.getIndex(latestUser)}`,
{ contentType: 'text/plain', envelope: null } { contentType: 'text/plain', envelope: null }
); );
} }

View File

@ -152,12 +152,12 @@ export class SearcherHost extends RPCHost {
const noSlashPath = ctx.req.url.slice(1); const noSlashPath = ctx.req.url.slice(1);
if (!noSlashPath) { if (!noSlashPath) {
const latestUser = uid ? await auth.assertUser() : undefined; const latestUser = uid ? await auth.assertUser() : undefined;
const authMixin = latestUser ? ` if (!ctx.req.accepts('text/plain') && (ctx.req.accepts('text/json') || ctx.req.accepts('application/json'))) {
[Authenticated as] ${latestUser.user_id} (${latestUser.full_name})
[Balance left] ${latestUser.wallet.total_balance}
` : '';
return assignTransferProtocolMeta(`${this.crawler.indexText}${authMixin}`, return this.crawler.getIndex(latestUser);
}
return assignTransferProtocolMeta(`${this.crawler.getIndex(latestUser)}`,
{ contentType: 'text/plain', envelope: null } { contentType: 'text/plain', envelope: null }
); );
} }