mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader.git
synced 2025-08-19 22:39:11 +08:00
feat: index brief in JSON format
This commit is contained in:
parent
1c944562f7
commit
8eee95119d
@ -21,6 +21,7 @@ import { randomUUID } from 'crypto';
|
|||||||
import { JinaEmbeddingsAuthDTO } from '../shared/dto/jina-embeddings-auth';
|
import { JinaEmbeddingsAuthDTO } from '../shared/dto/jina-embeddings-auth';
|
||||||
|
|
||||||
import { countGPTToken as estimateToken } from '../shared/utils/openai';
|
import { countGPTToken as estimateToken } from '../shared/utils/openai';
|
||||||
|
import { JinaEmbeddingsTokenAccount } from '../shared/db/jina-embeddings-token-account';
|
||||||
|
|
||||||
const md5Hasher = new HashManager('md5', 'hex');
|
const md5Hasher = new HashManager('md5', 'hex');
|
||||||
|
|
||||||
@ -44,6 +45,16 @@ export interface FormattedPage {
|
|||||||
toString: () => string;
|
toString: () => string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const indexProto = {
|
||||||
|
toString: function (): string {
|
||||||
|
return _(this)
|
||||||
|
.toPairs()
|
||||||
|
.map(([k, v]) => k ? `[${_.upperFirst(_.lowerCase(k))}] ${v}` : '')
|
||||||
|
.value()
|
||||||
|
.join('\n') + '\n';
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
@singleton()
|
@singleton()
|
||||||
export class CrawlerHost extends RPCHost {
|
export class CrawlerHost extends RPCHost {
|
||||||
logger = this.globalLogger.child({ service: this.constructor.name });
|
logger = this.globalLogger.child({ service: this.constructor.name });
|
||||||
@ -54,12 +65,6 @@ export class CrawlerHost extends RPCHost {
|
|||||||
cacheValidMs = 1000 * 3600;
|
cacheValidMs = 1000 * 3600;
|
||||||
urlValidMs = 1000 * 3600 * 4;
|
urlValidMs = 1000 * 3600 * 4;
|
||||||
|
|
||||||
indexText = `[Usage1] https://r.jina.ai/YOUR_URL
|
|
||||||
[Usage2] https://s.jina.ai/YOUR_SEARCH_QUERY
|
|
||||||
[Homepage] https://jina.ai/reader
|
|
||||||
[Source code] https://github.com/jina-ai/reader
|
|
||||||
`;
|
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
protected globalLogger: Logger,
|
protected globalLogger: Logger,
|
||||||
protected puppeteerControl: PuppeteerControl,
|
protected puppeteerControl: PuppeteerControl,
|
||||||
@ -89,6 +94,25 @@ export class CrawlerHost extends RPCHost {
|
|||||||
this.emit('ready');
|
this.emit('ready');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
getIndex(user?: JinaEmbeddingsTokenAccount) {
|
||||||
|
const indexObject: Record<string, string | number | undefined> = Object.create(indexProto);
|
||||||
|
|
||||||
|
Object.assign(indexObject, {
|
||||||
|
usage1: 'https://r.jina.ai/YOUR_URL',
|
||||||
|
usage2: 'https://s.jina.ai/YOUR_SEARCH_QUERY',
|
||||||
|
homepage: 'https://jina.ai/reader',
|
||||||
|
sourceCode: 'https://github.com/jina-ai/reader',
|
||||||
|
});
|
||||||
|
|
||||||
|
if (user) {
|
||||||
|
indexObject[''] = undefined;
|
||||||
|
indexObject.authenticatedAs = `${user.user_id} (${user.full_name})`;
|
||||||
|
indexObject.balanceLeft = user.wallet.total_balance;
|
||||||
|
}
|
||||||
|
|
||||||
|
return indexObject;
|
||||||
|
}
|
||||||
|
|
||||||
getTurndown(noRules?: boolean | string) {
|
getTurndown(noRules?: boolean | string) {
|
||||||
const turnDownService = new TurndownService();
|
const turnDownService = new TurndownService();
|
||||||
if (!noRules) {
|
if (!noRules) {
|
||||||
@ -497,12 +521,11 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
|
|||||||
const noSlashURL = ctx.req.url.slice(1);
|
const noSlashURL = ctx.req.url.slice(1);
|
||||||
if (!noSlashURL) {
|
if (!noSlashURL) {
|
||||||
const latestUser = uid ? await auth.assertUser() : undefined;
|
const latestUser = uid ? await auth.assertUser() : undefined;
|
||||||
const authMixin = latestUser ? `
|
if (!ctx.req.accepts('text/plain') && (ctx.req.accepts('text/json') || ctx.req.accepts('application/json'))) {
|
||||||
[Authenticated as] ${latestUser.user_id} (${latestUser.full_name})
|
return this.getIndex(latestUser);
|
||||||
[Balance left] ${latestUser.wallet.total_balance}
|
}
|
||||||
` : '';
|
|
||||||
|
|
||||||
return assignTransferProtocolMeta(`${this.indexText}${authMixin}`,
|
return assignTransferProtocolMeta(`${this.getIndex(latestUser)}`,
|
||||||
{ contentType: 'text/plain', envelope: null }
|
{ contentType: 'text/plain', envelope: null }
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -152,12 +152,12 @@ export class SearcherHost extends RPCHost {
|
|||||||
const noSlashPath = ctx.req.url.slice(1);
|
const noSlashPath = ctx.req.url.slice(1);
|
||||||
if (!noSlashPath) {
|
if (!noSlashPath) {
|
||||||
const latestUser = uid ? await auth.assertUser() : undefined;
|
const latestUser = uid ? await auth.assertUser() : undefined;
|
||||||
const authMixin = latestUser ? `
|
if (!ctx.req.accepts('text/plain') && (ctx.req.accepts('text/json') || ctx.req.accepts('application/json'))) {
|
||||||
[Authenticated as] ${latestUser.user_id} (${latestUser.full_name})
|
|
||||||
[Balance left] ${latestUser.wallet.total_balance}
|
|
||||||
` : '';
|
|
||||||
|
|
||||||
return assignTransferProtocolMeta(`${this.crawler.indexText}${authMixin}`,
|
return this.crawler.getIndex(latestUser);
|
||||||
|
}
|
||||||
|
|
||||||
|
return assignTransferProtocolMeta(`${this.crawler.getIndex(latestUser)}`,
|
||||||
{ contentType: 'text/plain', envelope: null }
|
{ contentType: 'text/plain', envelope: null }
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user