mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader.git
synced 2025-08-19 15:09:10 +08:00
cleanup: use local project code as much as possible
This commit is contained in:
parent
512f225692
commit
66db31788e
@ -32,14 +32,16 @@ import { GlobalLogger } from '../services/logger';
|
|||||||
import { RateLimitControl, RateLimitDesc } from '../shared/services/rate-limit';
|
import { RateLimitControl, RateLimitDesc } from '../shared/services/rate-limit';
|
||||||
import { AsyncLocalContext } from '../services/async-context';
|
import { AsyncLocalContext } from '../services/async-context';
|
||||||
import { Context, Ctx, Method, Param, RPCReflect } from '../services/registry';
|
import { Context, Ctx, Method, Param, RPCReflect } from '../services/registry';
|
||||||
import { BudgetExceededError, InsufficientBalanceError, SecurityCompromiseError } from '../services/errors';
|
import {
|
||||||
|
BudgetExceededError, InsufficientBalanceError,
|
||||||
|
SecurityCompromiseError, ServiceBadApproachError, ServiceBadAttemptError
|
||||||
|
} from '../services/errors';
|
||||||
|
|
||||||
import { countGPTToken as estimateToken } from '../shared/utils/openai';
|
import { countGPTToken as estimateToken } from '../shared/utils/openai';
|
||||||
import { ProxyProvider } from '../shared/services/proxy-provider';
|
import { ProxyProvider } from '../shared/services/proxy-provider';
|
||||||
import { FirebaseStorageBucketControl } from '../shared/services/firebase-storage-bucket';
|
import { FirebaseStorageBucketControl } from '../shared/services/firebase-storage-bucket';
|
||||||
import { JinaEmbeddingsAuthDTO } from '../dto/jina-embeddings-auth';
|
import { JinaEmbeddingsAuthDTO } from '../dto/jina-embeddings-auth';
|
||||||
import { RobotsTxtService } from '../services/robots-text';
|
import { RobotsTxtService } from '../services/robots-text';
|
||||||
import { ServiceBadAttemptError } from '../shared/lib/errors';
|
|
||||||
|
|
||||||
export interface ExtraScrappingOptions extends ScrappingOptions {
|
export interface ExtraScrappingOptions extends ScrappingOptions {
|
||||||
withIframe?: boolean | 'quoted';
|
withIframe?: boolean | 'quoted';
|
||||||
@ -758,7 +760,9 @@ export class CrawlerHost extends RPCHost {
|
|||||||
let analyzed = await this.jsdomControl.analyzeHTMLTextLite(draftSnapshot.html);
|
let analyzed = await this.jsdomControl.analyzeHTMLTextLite(draftSnapshot.html);
|
||||||
draftSnapshot.title ??= analyzed.title;
|
draftSnapshot.title ??= analyzed.title;
|
||||||
let fallbackProxyIsUsed = false;
|
let fallbackProxyIsUsed = false;
|
||||||
if ((!crawlOpts?.allocProxy && !crawlOpts?.proxyUrl) && (analyzed.tokens < 42 || sideLoaded.status !== 200)) {
|
if (((!crawlOpts?.allocProxy || crawlOpts.allocProxy === 'none') && !crawlOpts?.proxyUrl) &&
|
||||||
|
(analyzed.tokens < 42 || sideLoaded.status !== 200)
|
||||||
|
) {
|
||||||
const proxyLoaded = await this.sideLoadWithAllocatedProxy(urlToCrawl, altOpts);
|
const proxyLoaded = await this.sideLoadWithAllocatedProxy(urlToCrawl, altOpts);
|
||||||
if (!proxyLoaded.file) {
|
if (!proxyLoaded.file) {
|
||||||
throw new ServiceBadAttemptError(`Remote server did not return a body: ${urlToCrawl}`);
|
throw new ServiceBadAttemptError(`Remote server did not return a body: ${urlToCrawl}`);
|
||||||
@ -904,7 +908,7 @@ export class CrawlerHost extends RPCHost {
|
|||||||
}
|
}
|
||||||
this.threadLocal.set('retainImages', opts.retainImages);
|
this.threadLocal.set('retainImages', opts.retainImages);
|
||||||
this.threadLocal.set('noGfm', opts.noGfm);
|
this.threadLocal.set('noGfm', opts.noGfm);
|
||||||
this.threadLocal.set('DNT', Boolean(opts.doNotTrack))
|
this.threadLocal.set('DNT', Boolean(opts.doNotTrack));
|
||||||
|
|
||||||
const crawlOpts: ExtraScrappingOptions = {
|
const crawlOpts: ExtraScrappingOptions = {
|
||||||
proxyUrl: opts.proxyUrl,
|
proxyUrl: opts.proxyUrl,
|
||||||
@ -1146,6 +1150,9 @@ export class CrawlerHost extends RPCHost {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@retryWith((err) => {
|
@retryWith((err) => {
|
||||||
|
if (err instanceof ServiceBadApproachError) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
if (err instanceof ServiceBadAttemptError) {
|
if (err instanceof ServiceBadAttemptError) {
|
||||||
// Keep trying
|
// Keep trying
|
||||||
return true;
|
return true;
|
||||||
@ -1157,6 +1164,9 @@ export class CrawlerHost extends RPCHost {
|
|||||||
return undefined;
|
return undefined;
|
||||||
}, 3)
|
}, 3)
|
||||||
async sideLoadWithAllocatedProxy(url: URL, opts?: ExtraScrappingOptions) {
|
async sideLoadWithAllocatedProxy(url: URL, opts?: ExtraScrappingOptions) {
|
||||||
|
if (opts?.allocProxy === 'none') {
|
||||||
|
return this.curlControl.sideLoad(url, opts);
|
||||||
|
}
|
||||||
const proxy = await this.proxyProvider.alloc(opts?.allocProxy);
|
const proxy = await this.proxyProvider.alloc(opts?.allocProxy);
|
||||||
const r = await this.curlControl.sideLoad(url, {
|
const r = await this.curlControl.sideLoad(url, {
|
||||||
...opts,
|
...opts,
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
import { AssertionFailureError, AsyncService, HashManager } from 'civkit';
|
import { AssertionFailureError, AsyncService, HashManager } from 'civkit';
|
||||||
import { singleton } from 'tsyringe';
|
import { singleton } from 'tsyringe';
|
||||||
import { Logger } from '../shared/services/logger';
|
import { GlobalLogger } from './logger';
|
||||||
import { CanvasService } from '../shared/services/canvas';
|
import { CanvasService } from '../shared/services/canvas';
|
||||||
import { ImageInterrogationManager } from '../shared/services/common-iminterrogate';
|
import { ImageInterrogationManager } from '../shared/services/common-iminterrogate';
|
||||||
import { ImgBrief } from './puppeteer';
|
import { ImgBrief } from './puppeteer';
|
||||||
@ -16,7 +16,7 @@ export class AltTextService extends AsyncService {
|
|||||||
logger = this.globalLogger.child({ service: this.constructor.name });
|
logger = this.globalLogger.child({ service: this.constructor.name });
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
protected globalLogger: Logger,
|
protected globalLogger: GlobalLogger,
|
||||||
protected imageInterrogator: ImageInterrogationManager,
|
protected imageInterrogator: ImageInterrogationManager,
|
||||||
protected canvasService: CanvasService,
|
protected canvasService: CanvasService,
|
||||||
protected asyncLocalContext: AsyncLocalContext
|
protected asyncLocalContext: AsyncLocalContext
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
import { AsyncService, AutoCastable, DownstreamServiceFailureError, Prop, RPC_CALL_ENVIRONMENT, delay, marshalErrorLike } from 'civkit';
|
import { AsyncService, AutoCastable, DownstreamServiceFailureError, Prop, RPC_CALL_ENVIRONMENT, delay, marshalErrorLike } from 'civkit';
|
||||||
import { singleton } from 'tsyringe';
|
import { singleton } from 'tsyringe';
|
||||||
import { Logger } from '../shared/services/logger';
|
import { GlobalLogger } from './logger';
|
||||||
import { SecretExposer } from '../shared/services/secrets';
|
import { SecretExposer } from '../shared/services/secrets';
|
||||||
import { BraveSearchHTTP, WebSearchQueryParams } from '../shared/3rd-party/brave-search';
|
import { BraveSearchHTTP, WebSearchQueryParams } from '../shared/3rd-party/brave-search';
|
||||||
import { GEOIP_SUPPORTED_LANGUAGES, GeoIPService } from './geoip';
|
import { GEOIP_SUPPORTED_LANGUAGES, GeoIPService } from './geoip';
|
||||||
import { AsyncContext } from '../shared';
|
import { AsyncLocalContext } from './async-context';
|
||||||
import { WebSearchOptionalHeaderOptions } from '../shared/3rd-party/brave-types';
|
import { WebSearchOptionalHeaderOptions } from '../shared/3rd-party/brave-types';
|
||||||
import type { Request, Response } from 'express';
|
import type { Request, Response } from 'express';
|
||||||
import { BlackHoleDetector } from './blackhole-detector';
|
import { BlackHoleDetector } from './blackhole-detector';
|
||||||
@ -17,10 +17,10 @@ export class BraveSearchService extends AsyncService {
|
|||||||
braveSearchHTTP!: BraveSearchHTTP;
|
braveSearchHTTP!: BraveSearchHTTP;
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
protected globalLogger: Logger,
|
protected globalLogger: GlobalLogger,
|
||||||
protected secretExposer: SecretExposer,
|
protected secretExposer: SecretExposer,
|
||||||
protected geoipControl: GeoIPService,
|
protected geoipControl: GeoIPService,
|
||||||
protected threadLocal: AsyncContext,
|
protected threadLocal: AsyncLocalContext,
|
||||||
protected blackHoleDetector: BlackHoleDetector,
|
protected blackHoleDetector: BlackHoleDetector,
|
||||||
) {
|
) {
|
||||||
super(...arguments);
|
super(...arguments);
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
import { container, singleton } from 'tsyringe';
|
import { container, singleton } from 'tsyringe';
|
||||||
import { AsyncService } from 'civkit/async-service';
|
import { AsyncService } from 'civkit/async-service';
|
||||||
import { Logger, SecretExposer } from '../shared';
|
import { SecretExposer } from '../shared/services/secrets';
|
||||||
|
import { GlobalLogger } from './logger';
|
||||||
import { CloudFlareHTTP } from '../shared/3rd-party/cloud-flare';
|
import { CloudFlareHTTP } from '../shared/3rd-party/cloud-flare';
|
||||||
|
|
||||||
@singleton()
|
@singleton()
|
||||||
@ -10,7 +11,7 @@ export class CFBrowserRendering extends AsyncService {
|
|||||||
client!: CloudFlareHTTP;
|
client!: CloudFlareHTTP;
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
protected globalLogger: Logger,
|
protected globalLogger: GlobalLogger,
|
||||||
protected secretExposer: SecretExposer,
|
protected secretExposer: SecretExposer,
|
||||||
) {
|
) {
|
||||||
super(...arguments);
|
super(...arguments);
|
||||||
|
@ -5,9 +5,10 @@ import { Curl, CurlCode, CurlFeature, HeaderInfo } from 'node-libcurl';
|
|||||||
import { parseString as parseSetCookieString } from 'set-cookie-parser';
|
import { parseString as parseSetCookieString } from 'set-cookie-parser';
|
||||||
|
|
||||||
import { ScrappingOptions } from './puppeteer';
|
import { ScrappingOptions } from './puppeteer';
|
||||||
import { Logger } from '../shared/services/logger';
|
import { GlobalLogger } from './logger';
|
||||||
import { AssertionFailureError, FancyFile } from 'civkit';
|
import { AssertionFailureError, FancyFile } from 'civkit';
|
||||||
import { ServiceBadAttemptError, TempFileManager } from '../shared';
|
import { ServiceBadAttemptError, ServiceBadApproachError } from './errors';
|
||||||
|
import { TempFileManager } from '../services/temp-file';
|
||||||
import { createBrotliDecompress, createInflate, createGunzip } from 'zlib';
|
import { createBrotliDecompress, createInflate, createGunzip } from 'zlib';
|
||||||
import { ZSTDDecompress } from 'simple-zstd';
|
import { ZSTDDecompress } from 'simple-zstd';
|
||||||
import _ from 'lodash';
|
import _ from 'lodash';
|
||||||
@ -32,7 +33,7 @@ export class CurlControl extends AsyncService {
|
|||||||
lifeCycleTrack = new WeakMap();
|
lifeCycleTrack = new WeakMap();
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
protected globalLogger: Logger,
|
protected globalLogger: GlobalLogger,
|
||||||
protected tempFileManager: TempFileManager,
|
protected tempFileManager: TempFileManager,
|
||||||
protected asyncLocalContext: AsyncLocalContext,
|
protected asyncLocalContext: AsyncLocalContext,
|
||||||
) {
|
) {
|
||||||
@ -328,7 +329,7 @@ export class CurlControl extends AsyncService {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
if (!location && cookieRedirects > 1) {
|
if (!location && cookieRedirects > 1) {
|
||||||
throw new ServiceBadAttemptError(`Failed to access ${urlToCrawl}: Browser required to solve complex cookie preconditions.`);
|
throw new ServiceBadApproachError(`Failed to access ${urlToCrawl}: Browser required to solve complex cookie preconditions.`);
|
||||||
}
|
}
|
||||||
|
|
||||||
nextHopUrl = new URL(location || '', nextHopUrl);
|
nextHopUrl = new URL(location || '', nextHopUrl);
|
||||||
|
@ -14,6 +14,12 @@ export class ServiceCrashedError extends ApplicationError { }
|
|||||||
@StatusCode(50303)
|
@StatusCode(50303)
|
||||||
export class ServiceNodeResourceDrainError extends ApplicationError { }
|
export class ServiceNodeResourceDrainError extends ApplicationError { }
|
||||||
|
|
||||||
|
@StatusCode(50304)
|
||||||
|
export class ServiceBadAttemptError extends ApplicationError { }
|
||||||
|
|
||||||
|
@StatusCode(50305)
|
||||||
|
export class ServiceBadApproachError extends ServiceBadAttemptError { }
|
||||||
|
|
||||||
@StatusCode(40104)
|
@StatusCode(40104)
|
||||||
export class EmailUnverifiedError extends ApplicationError { }
|
export class EmailUnverifiedError extends ApplicationError { }
|
||||||
|
|
||||||
|
@ -2,7 +2,7 @@ import { container, singleton } from 'tsyringe';
|
|||||||
import fsp from 'fs/promises';
|
import fsp from 'fs/promises';
|
||||||
import { CityResponse, Reader } from 'maxmind';
|
import { CityResponse, Reader } from 'maxmind';
|
||||||
import { AsyncService, AutoCastable, Prop, runOnce } from 'civkit';
|
import { AsyncService, AutoCastable, Prop, runOnce } from 'civkit';
|
||||||
import { Logger } from '../shared';
|
import { GlobalLogger } from './logger';
|
||||||
import path from 'path';
|
import path from 'path';
|
||||||
|
|
||||||
export enum GEOIP_SUPPORTED_LANGUAGES {
|
export enum GEOIP_SUPPORTED_LANGUAGES {
|
||||||
@ -61,7 +61,7 @@ export class GeoIPService extends AsyncService {
|
|||||||
mmdbCity!: Reader<CityResponse>;
|
mmdbCity!: Reader<CityResponse>;
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
protected globalLogger: Logger,
|
protected globalLogger: GlobalLogger,
|
||||||
) {
|
) {
|
||||||
super(...arguments);
|
super(...arguments);
|
||||||
}
|
}
|
||||||
|
@ -1,13 +1,13 @@
|
|||||||
import { container, singleton } from 'tsyringe';
|
import { container, singleton } from 'tsyringe';
|
||||||
import { AsyncService, marshalErrorLike } from 'civkit';
|
import { AsyncService, marshalErrorLike } from 'civkit';
|
||||||
import { Logger } from '../shared/services/logger';
|
import { GlobalLogger } from './logger';
|
||||||
import { ExtendedSnapshot, ImgBrief, PageSnapshot } from './puppeteer';
|
import { ExtendedSnapshot, ImgBrief, PageSnapshot } from './puppeteer';
|
||||||
import { Readability } from '@mozilla/readability';
|
import { Readability } from '@mozilla/readability';
|
||||||
import TurndownService from 'turndown';
|
import TurndownService from 'turndown';
|
||||||
import { Threaded } from '../services/threaded';
|
import { Threaded } from '../services/threaded';
|
||||||
import type { ExtraScrappingOptions } from '../api/crawler';
|
import type { ExtraScrappingOptions } from '../api/crawler';
|
||||||
import { tailwindClasses } from '../utils/tailwind-classes';
|
import { tailwindClasses } from '../utils/tailwind-classes';
|
||||||
import { countGPTToken } from '../shared';
|
import { countGPTToken } from '../shared/utils/openai';
|
||||||
|
|
||||||
const pLinkedom = import('linkedom');
|
const pLinkedom = import('linkedom');
|
||||||
|
|
||||||
@ -19,7 +19,7 @@ export class JSDomControl extends AsyncService {
|
|||||||
linkedom!: Awaited<typeof pLinkedom>;
|
linkedom!: Awaited<typeof pLinkedom>;
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
protected globalLogger: Logger,
|
protected globalLogger: GlobalLogger,
|
||||||
) {
|
) {
|
||||||
super(...arguments);
|
super(...arguments);
|
||||||
}
|
}
|
||||||
|
@ -2,7 +2,7 @@ import { AsyncService } from 'civkit/async-service';
|
|||||||
import { singleton } from 'tsyringe';
|
import { singleton } from 'tsyringe';
|
||||||
|
|
||||||
import { PageSnapshot } from './puppeteer';
|
import { PageSnapshot } from './puppeteer';
|
||||||
import { Logger } from '../shared/services/logger';
|
import { GlobalLogger } from './logger';
|
||||||
import _ from 'lodash';
|
import _ from 'lodash';
|
||||||
import { AssertionFailureError } from 'civkit';
|
import { AssertionFailureError } from 'civkit';
|
||||||
import { LLMManager } from '../shared/services/common-llm';
|
import { LLMManager } from '../shared/services/common-llm';
|
||||||
@ -16,7 +16,7 @@ export class LmControl extends AsyncService {
|
|||||||
logger = this.globalLogger.child({ service: this.constructor.name });
|
logger = this.globalLogger.child({ service: this.constructor.name });
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
protected globalLogger: Logger,
|
protected globalLogger: GlobalLogger,
|
||||||
protected commonLLM: LLMManager,
|
protected commonLLM: LLMManager,
|
||||||
protected jsdomControl: JSDomControl,
|
protected jsdomControl: JSDomControl,
|
||||||
) {
|
) {
|
||||||
|
@ -3,10 +3,10 @@ import { singleton } from 'tsyringe';
|
|||||||
import _ from 'lodash';
|
import _ from 'lodash';
|
||||||
import { TextItem } from 'pdfjs-dist/types/src/display/api';
|
import { TextItem } from 'pdfjs-dist/types/src/display/api';
|
||||||
import { AsyncService, HashManager } from 'civkit';
|
import { AsyncService, HashManager } from 'civkit';
|
||||||
import { Logger } from '../shared/services/logger';
|
import { GlobalLogger } from './logger';
|
||||||
import { PDFContent } from '../db/pdf';
|
import { PDFContent } from '../db/pdf';
|
||||||
import dayjs from 'dayjs';
|
import dayjs from 'dayjs';
|
||||||
import { FirebaseStorageBucketControl } from '../shared';
|
import { FirebaseStorageBucketControl } from '../shared/services/firebase-storage-bucket';
|
||||||
import { randomUUID } from 'crypto';
|
import { randomUUID } from 'crypto';
|
||||||
import type { PDFDocumentLoadingTask } from 'pdfjs-dist';
|
import type { PDFDocumentLoadingTask } from 'pdfjs-dist';
|
||||||
import path from 'path';
|
import path from 'path';
|
||||||
@ -55,7 +55,7 @@ export class PDFExtractor extends AsyncService {
|
|||||||
cacheRetentionMs = 1000 * 3600 * 24 * 7;
|
cacheRetentionMs = 1000 * 3600 * 24 * 7;
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
protected globalLogger: Logger,
|
protected globalLogger: GlobalLogger,
|
||||||
protected firebaseObjectStorage: FirebaseStorageBucketControl,
|
protected firebaseObjectStorage: FirebaseStorageBucketControl,
|
||||||
protected asyncLocalContext: AsyncLocalContext,
|
protected asyncLocalContext: AsyncLocalContext,
|
||||||
) {
|
) {
|
||||||
|
@ -2,14 +2,13 @@ import os from 'os';
|
|||||||
import fs from 'fs';
|
import fs from 'fs';
|
||||||
import { container, singleton } from 'tsyringe';
|
import { container, singleton } from 'tsyringe';
|
||||||
import { AsyncService, Defer, marshalErrorLike, AssertionFailureError, delay, Deferred, perNextTick, ParamValidationError, FancyFile } from 'civkit';
|
import { AsyncService, Defer, marshalErrorLike, AssertionFailureError, delay, Deferred, perNextTick, ParamValidationError, FancyFile } from 'civkit';
|
||||||
import { Logger } from '../shared/services/logger';
|
import { GlobalLogger } from './logger';
|
||||||
|
|
||||||
import type { Browser, CookieParam, GoToOptions, HTTPResponse, Page, Viewport } from 'puppeteer';
|
import type { Browser, CookieParam, GoToOptions, HTTPResponse, Page, Viewport } from 'puppeteer';
|
||||||
import type { Cookie } from 'set-cookie-parser';
|
import type { Cookie } from 'set-cookie-parser';
|
||||||
import puppeteer from 'puppeteer-extra';
|
import puppeteer from 'puppeteer-extra';
|
||||||
|
|
||||||
import puppeteerBlockResources from 'puppeteer-extra-plugin-block-resources';
|
import puppeteerBlockResources from 'puppeteer-extra-plugin-block-resources';
|
||||||
import puppeteerPageProxy from 'puppeteer-extra-plugin-page-proxy';
|
|
||||||
import { SecurityCompromiseError, ServiceCrashedError, ServiceNodeResourceDrainError } from '../shared/lib/errors';
|
import { SecurityCompromiseError, ServiceCrashedError, ServiceNodeResourceDrainError } from '../shared/lib/errors';
|
||||||
import { TimeoutError } from 'puppeteer';
|
import { TimeoutError } from 'puppeteer';
|
||||||
import _ from 'lodash';
|
import _ from 'lodash';
|
||||||
@ -108,9 +107,6 @@ puppeteer.use(puppeteerBlockResources({
|
|||||||
blockedTypes: new Set(['media']),
|
blockedTypes: new Set(['media']),
|
||||||
interceptResolutionPriority: 1,
|
interceptResolutionPriority: 1,
|
||||||
}));
|
}));
|
||||||
puppeteer.use(puppeteerPageProxy({
|
|
||||||
interceptResolutionPriority: 1,
|
|
||||||
}));
|
|
||||||
|
|
||||||
const SIMULATE_SCROLL = `
|
const SIMULATE_SCROLL = `
|
||||||
(function () {
|
(function () {
|
||||||
@ -472,7 +468,7 @@ export class PuppeteerControl extends AsyncService {
|
|||||||
lifeCycleTrack = new WeakMap();
|
lifeCycleTrack = new WeakMap();
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
protected globalLogger: Logger,
|
protected globalLogger: GlobalLogger,
|
||||||
protected asyncLocalContext: AsyncLocalContext,
|
protected asyncLocalContext: AsyncLocalContext,
|
||||||
protected curlControl: CurlControl,
|
protected curlControl: CurlControl,
|
||||||
protected blackHoleDetector: BlackHoleDetector,
|
protected blackHoleDetector: BlackHoleDetector,
|
||||||
|
@ -1,13 +1,12 @@
|
|||||||
import { singleton } from 'tsyringe';
|
import { singleton } from 'tsyringe';
|
||||||
|
import { URL } from 'url';
|
||||||
import { DownstreamServiceFailureError, ResourcePolicyDenyError } from 'civkit/civ-rpc';
|
import { DownstreamServiceFailureError, ResourcePolicyDenyError } from 'civkit/civ-rpc';
|
||||||
import { AsyncService } from 'civkit/async-service';
|
import { AsyncService } from 'civkit/async-service';
|
||||||
import { HashManager } from 'civkit/hash';
|
import { HashManager } from 'civkit/hash';
|
||||||
import { marshalErrorLike } from 'civkit/lang';
|
import { marshalErrorLike } from 'civkit/lang';
|
||||||
|
|
||||||
import { Logger } from '../shared/services/logger';
|
import { GlobalLogger } from './logger';
|
||||||
import { BraveSearchHTTP } from '../shared/3rd-party/brave-search';
|
import { FirebaseStorageBucketControl } from '../shared/services/firebase-storage-bucket';
|
||||||
import { FirebaseStorageBucketControl } from '../shared';
|
|
||||||
import { URL } from 'url';
|
|
||||||
import { Threaded } from '../services/threaded';
|
import { Threaded } from '../services/threaded';
|
||||||
|
|
||||||
|
|
||||||
@ -18,10 +17,8 @@ export class RobotsTxtService extends AsyncService {
|
|||||||
|
|
||||||
logger = this.globalLogger.child({ service: this.constructor.name });
|
logger = this.globalLogger.child({ service: this.constructor.name });
|
||||||
|
|
||||||
braveSearchHTTP!: BraveSearchHTTP;
|
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
protected globalLogger: Logger,
|
protected globalLogger: GlobalLogger,
|
||||||
protected firebaseStorageBucketControl: FirebaseStorageBucketControl,
|
protected firebaseStorageBucketControl: FirebaseStorageBucketControl,
|
||||||
) {
|
) {
|
||||||
super(...arguments);
|
super(...arguments);
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
import { AsyncService, AutoCastable, DownstreamServiceFailureError, Prop, RPC_CALL_ENVIRONMENT, delay, marshalErrorLike } from 'civkit';
|
import { AsyncService, AutoCastable, DownstreamServiceFailureError, Prop, RPC_CALL_ENVIRONMENT, delay, marshalErrorLike } from 'civkit';
|
||||||
import { singleton } from 'tsyringe';
|
import { singleton } from 'tsyringe';
|
||||||
import { Logger } from '../shared/services/logger';
|
import { GlobalLogger } from './logger';
|
||||||
import { SecretExposer } from '../shared/services/secrets';
|
import { SecretExposer } from '../shared/services/secrets';
|
||||||
import { GEOIP_SUPPORTED_LANGUAGES, GeoIPService } from './geoip';
|
import { GEOIP_SUPPORTED_LANGUAGES, GeoIPService } from './geoip';
|
||||||
import { AsyncContext } from '../shared';
|
import { AsyncLocalContext } from './async-context';
|
||||||
import { SerperGoogleHTTP, SerperSearchQueryParams, WORLD_COUNTRIES } from '../shared/3rd-party/serper-search';
|
import { SerperGoogleHTTP, SerperSearchQueryParams, WORLD_COUNTRIES } from '../shared/3rd-party/serper-search';
|
||||||
import { BlackHoleDetector } from './blackhole-detector';
|
import { BlackHoleDetector } from './blackhole-detector';
|
||||||
import { Context } from './registry';
|
import { Context } from './registry';
|
||||||
@ -16,10 +16,10 @@ export class SerperSearchService extends AsyncService {
|
|||||||
serperSearchHTTP!: SerperGoogleHTTP;
|
serperSearchHTTP!: SerperGoogleHTTP;
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
protected globalLogger: Logger,
|
protected globalLogger: GlobalLogger,
|
||||||
protected secretExposer: SecretExposer,
|
protected secretExposer: SecretExposer,
|
||||||
protected geoipControl: GeoIPService,
|
protected geoipControl: GeoIPService,
|
||||||
protected threadLocal: AsyncContext,
|
protected threadLocal: AsyncLocalContext,
|
||||||
protected blackHoleDetector: BlackHoleDetector,
|
protected blackHoleDetector: BlackHoleDetector,
|
||||||
) {
|
) {
|
||||||
super(...arguments);
|
super(...arguments);
|
||||||
|
@ -2,7 +2,7 @@ import { randomUUID } from 'crypto';
|
|||||||
import { container, singleton } from 'tsyringe';
|
import { container, singleton } from 'tsyringe';
|
||||||
import { AssertionFailureError, AsyncService, FancyFile, HashManager, marshalErrorLike } from 'civkit';
|
import { AssertionFailureError, AsyncService, FancyFile, HashManager, marshalErrorLike } from 'civkit';
|
||||||
import TurndownService, { Filter, Rule } from 'turndown';
|
import TurndownService, { Filter, Rule } from 'turndown';
|
||||||
import { Logger } from '../shared/services/logger';
|
import { GlobalLogger } from './logger';
|
||||||
import { PageSnapshot } from './puppeteer';
|
import { PageSnapshot } from './puppeteer';
|
||||||
import { FirebaseStorageBucketControl } from '../shared/services/firebase-storage-bucket';
|
import { FirebaseStorageBucketControl } from '../shared/services/firebase-storage-bucket';
|
||||||
import { AsyncContext } from '../shared/services/async-context';
|
import { AsyncContext } from '../shared/services/async-context';
|
||||||
@ -16,7 +16,7 @@ import { STATUS_CODES } from 'http';
|
|||||||
import type { CrawlerOptions } from '../dto/crawler-options';
|
import type { CrawlerOptions } from '../dto/crawler-options';
|
||||||
import { readFile } from 'fs/promises';
|
import { readFile } from 'fs/promises';
|
||||||
import { pathToFileURL } from 'url';
|
import { pathToFileURL } from 'url';
|
||||||
import { countGPTToken } from '../shared';
|
import { countGPTToken } from '../shared/utils/openai';
|
||||||
|
|
||||||
|
|
||||||
export interface FormattedPage {
|
export interface FormattedPage {
|
||||||
@ -82,7 +82,7 @@ export class SnapshotFormatter extends AsyncService {
|
|||||||
gfmNoTable = [highlightedCodeBlock, gfmPlugin.strikethrough, gfmPlugin.taskListItems];
|
gfmNoTable = [highlightedCodeBlock, gfmPlugin.strikethrough, gfmPlugin.taskListItems];
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
protected globalLogger: Logger,
|
protected globalLogger: GlobalLogger,
|
||||||
protected jsdomControl: JSDomControl,
|
protected jsdomControl: JSDomControl,
|
||||||
protected altTextService: AltTextService,
|
protected altTextService: AltTextService,
|
||||||
protected pdfExtractor: PDFExtractor,
|
protected pdfExtractor: PDFExtractor,
|
||||||
|
@ -1 +1 @@
|
|||||||
Subproject commit 20417f5bb7f8c773a835304f0624a180b558ff65
|
Subproject commit 755639081df7640733bb5f704460892a1a9059e7
|
Loading…
x
Reference in New Issue
Block a user