serp: new trick

2025-08-19 01:25:51 +08:00 · 2025-05-08 15:21:20 +08:00 · 2025-05-08 15:21:20 +08:00 · a178723579
commit a178723579
parent 5f07900eab
15 changed files with 784 additions and 142 deletions
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@ -90,6 +90,7 @@
      ],
      "env": {
        "GCLOUD_PROJECT": "reader-6b7dc",
+        "PREFERRED_PROXY_COUNTRY": "us",
        "LD_PRELOAD": "/usr/local/lib/libcurl-impersonate-chrome.dylib"
      },
      "cwd": "${workspaceFolder}",
@ -110,8 +111,8 @@
      ],
      "env": {
        "GCLOUD_PROJECT": "reader-6b7dc",
-        "PREFERRED_PROXY_COUNTRY": "hk",
-        "OVERRIDE_GOOGLE_DOMAIN": "www.google.com.hk",
+        "PREFERRED_PROXY_COUNTRY": "us",
+        // "OVERRIDE_GOOGLE_DOMAIN": "www.google.com.hk",
        "LD_PRELOAD": "/usr/local/lib/libcurl-impersonate-chrome.dylib"
      },
      "cwd": "${workspaceFolder}",
--- a/README.md
+++ b/README.md
@ -18,10 +18,13 @@ Or just visit these URLs (**Read**) https://r.jina.ai/https://github.com/jina-ai

 ## Updates

+- **2024-10-08**: Introduced an `adaptive crawler`. It can recursively crawl the website and extract the most relevant pages for a given webpage.
 - **2024-07-15**: To restrict the results of `s.jina.ai` to certain domain/website, you can set e.g. `site=jina.ai` in the query parameters, which enables in-site search. For more options, [try our updated live-demo](https://jina.ai/reader/#apiform).
+- **2024-07-01**: We have resolved a DDoS attack and other traffic abusing since June 27th. We also found a bug introduced on June 28th which may cause higher latency for some websites. The attack and the bug have been solved; if you have experienced high latency of r.jina.ai between June 27th-30th, it should back to normal now.
 - **2024-05-30**: Reader can now read abitrary PDF from any URL! Check out [this PDF result from NASA.gov](https://r.jina.ai/https://www.nasa.gov/wp-content/uploads/2023/01/55583main_vision_space_exploration2.pdf) vs [the original](https://www.nasa.gov/wp-content/uploads/2023/01/55583main_vision_space_exploration2.pdf).
 - **2024-05-15**: We introduced a new endpoint `s.jina.ai` that searches on the web and return top-5 results, each in a LLM-friendly format. [Read more about this new feature here](https://jina.ai/news/jina-reader-for-search-grounding-to-improve-factuality-of-llms).
 - **2024-05-08**: Image caption is off by default for better latency. To turn it on, set `x-with-generated-alt: true` in the request header.
+- **2024-05-03**: We finally resolved a DDoS attack since April 29th. Now our API is much more reliable and scalable than ever!
 - **2024-04-24**: You now have more fine-grained control over Reader API [using headers](#using-request-headers), e.g. forwarding cookies, using HTTP proxy.
 - **2024-04-15**: Reader now supports image reading! It captions all images at the specified URL and adds `Image [idx]: [caption]` as an alt tag (if they initially lack one). This enables downstream LLMs to interact with the images in reasoning, summarizing etc. [See example here](https://x.com/JinaAI_/status/1780094402071023926).

@ -151,8 +154,15 @@ All images in that page that lack `alt` tag can be auto-captioned by a VLM (visi
 curl -H "X-With-Generated-Alt: true" https://r.jina.ai/https://en.m.wikipedia.org/wiki/Main_Page
 ```

-## How it works
-[![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/jina-ai/reader)
+## Install
+
+You will need the following tools to run the project:
+- Node v18 (The build fails for Node version >18)
+
+```bash
+git clone git@github.com:jina-ai/reader.git
+npm install
+```

 ## What is `thinapps-shared` submodule?

--- a/package-lock.json
+++ b/package-lock.json
@ -23,6 +23,7 @@
        "express": "^4.19.2",
        "firebase-admin": "^12.1.0",
        "firebase-functions": "^6.1.1",
+        "generic-pool": "^3.9.0",
        "htmlparser2": "^9.0.0",
        "jose": "^5.1.0",
        "koa": "^2.16.0",
@ -6249,7 +6250,7 @@
      "version": "3.9.0",
      "resolved": "https://registry.npmjs.org/generic-pool/-/generic-pool-3.9.0.tgz",
      "integrity": "sha512-hymDOu5B53XvN4QT9dBmZxPX4CWhBPPLguTZ9MMFeFa/Kg0xWVfylOVNlJji/E7yTZWFd/q9GO5TxDLq156D7g==",
-      "optional": true,
+      "license": "MIT",
      "engines": {
        "node": ">= 4"
      }
--- a/package.json
+++ b/package.json
@ -32,6 +32,7 @@
    "express": "^4.19.2",
    "firebase-admin": "^12.1.0",
    "firebase-functions": "^6.1.1",
+    "generic-pool": "^3.9.0",
    "htmlparser2": "^9.0.0",
    "jose": "^5.1.0",
    "koa": "^2.16.0",
--- a/src/api/crawler.ts
+++ b/src/api/crawler.ts
@ -49,6 +49,7 @@ import { TempFileManager } from '../services/temp-file';
 import { MiscService } from '../services/misc';
 import { HTTPServiceError } from 'civkit/http';
 import { GeoIPService } from '../services/geoip';
+import { writeFile } from 'fs/promises';

 export interface ExtraScrappingOptions extends ScrappingOptions {
    withIframe?: boolean | 'quoted';
@ -1145,7 +1146,16 @@ export class CrawlerHost extends RPCHost {
            if (pdfUrl.startsWith('http')) {
                const sideLoaded = scrappingOptions?.sideLoad?.impersonate[pdfUrl];
                if (sideLoaded?.status === 200 && sideLoaded.body) {
-                    snapshotCopy.pdfs[0] = pathToFileURL(await sideLoaded?.body.filePath).href;
+                    let filePath = '';
+                    if (sideLoaded.body instanceof Blob) {
+                        const tmpPath = this.tempFileManager.alloc();
+                        await writeFile(tmpPath, sideLoaded.body.stream());
+                        this.tempFileManager.bindPathTo(this.threadLocal.ctx, tmpPath);
+                        filePath = tmpPath;
+                    } else {
+                        filePath = await sideLoaded.body.filePath;
+                    }
+                    snapshotCopy.pdfs[0] = pathToFileURL(filePath).href;
                    return this.snapshotFormatter.formatSnapshot(mode, snapshotCopy, nominalUrl, urlValidMs);
                }

--- a/src/api/searcher.ts
+++ b/src/api/searcher.ts
@ -27,8 +27,10 @@ import { LRUCache } from 'lru-cache';
 import { API_CALL_STATUS } from '../shared/db/api-roll';
 import { SERPResult } from '../db/searched';
 import { SerperSearchQueryParams, WORLD_COUNTRIES, WORLD_LANGUAGES } from '../shared/3rd-party/serper-search';
-import { InternalJinaSerpService } from '../services/serp/internal';
 import { WebSearchEntry } from '../services/serp/compat';
+import { CommonGoogleSERP } from '../services/serp/common-serp';
+import { GoogleSERP, GoogleSERPOldFashion } from '../services/serp/google';
+import { InternalJinaSerpService } from '../services/serp/internal';

 const WORLD_COUNTRY_CODES = Object.keys(WORLD_COUNTRIES).map((x) => x.toLowerCase());

@ -72,6 +74,9 @@ export class SearcherHost extends RPCHost {
        protected serperGoogle: SerperGoogleSearchService,
        protected serperBing: SerperBingSearchService,
        protected jinaSerp: InternalJinaSerpService,
+        protected commonGoogleSerp: CommonGoogleSERP,
+        protected googleSERP: GoogleSERP,
+        protected googleSERPOld: GoogleSERPOldFashion,
    ) {
        super(...arguments);

@ -715,26 +720,32 @@ export class SearcherHost extends RPCHost {
        }
    }

-    *iterProviders(preference?: string, variant?: string) {
+    *iterProviders(preference?: string, _variant?: string) {
        if (preference === 'bing') {
            yield this.serperBing;
-            yield variant === 'web' ? this.jinaSerp : this.serperGoogle;
+            yield this.googleSERP;
+            yield this.jinaSerp;
            yield this.serperGoogle;
+            yield this.commonGoogleSerp;

            return;
        }

        if (preference === 'google') {
-            yield variant === 'web' ? this.jinaSerp : this.serperGoogle;
-            yield this.serperGoogle;
+            yield this.googleSERP;
+            yield this.jinaSerp;
            yield this.serperGoogle;
+            yield this.commonGoogleSerp;
+            yield this.googleSERPOld;

            return;
        }

-        yield variant === 'web' ? this.jinaSerp : this.serperGoogle;
-        yield this.serperGoogle;
+        yield this.googleSERP;
+        yield this.jinaSerp;
        yield this.serperGoogle;
+        yield this.commonGoogleSerp;
+        yield this.googleSERPOld;
    }

    async cachedSearch(variant: 'web' | 'news' | 'images', query: Record<string, any>, noCache?: boolean): Promise<WebSearchEntry[]> {
@ -767,22 +778,27 @@ export class SearcherHost extends RPCHost {
            outerLoop:
            for (const client of this.iterProviders(provider, variant)) {
                const t0 = Date.now();
-                try {
+                let func;
                switch (variant) {
                    case 'images': {
-                            r = await Reflect.apply(client.imageSearch, client, [query]);
+                        func = Reflect.get(client, 'imageSearch');
                        break;
                    }
                    case 'news': {
-                            r = await Reflect.apply(client.newsSearch, client, [query]);
+                        func = Reflect.get(client, 'newsSearch');
                        break;
                    }
                    case 'web':
                    default: {
-                            r = await Reflect.apply(client.webSearch, client, [query]);
+                        func = Reflect.get(client, 'webSearch');
                        break;
                    }
                }
+                if (!func) {
+                    continue;
+                }
+                try {
+                    r = await Reflect.apply(func, client, [query]);
                    const dt = Date.now() - t0;
                    this.logger.info(`Search took ${dt}ms, ${client.constructor.name}(${variant})`, { searchDt: dt, variant, client: client.constructor.name });
                    break outerLoop;
@ -806,6 +822,8 @@ export class SearcherHost extends RPCHost {
                this.batchedCaches.push(record);
            } else if (lastError) {
                throw lastError;
+            } else if (!r) {
+                throw new AssertionFailureError(`No provider can do ${variant} search atm.`);
            }

            return r as WebSearchEntry[];
--- a/src/api/serp.ts
+++ b/src/api/serp.ts
@ -3,6 +3,7 @@ import {
    RPCHost, RPCReflection, assignMeta, RawString,
    ParamValidationError,
    assignTransferProtocolMeta,
+    AssertionFailureError,
 } from 'civkit/civ-rpc';
 import { marshalErrorLike } from 'civkit/lang';
 import _ from 'lodash';
@ -16,7 +17,7 @@ import { OutputServerEventStream } from '../lib/transform-server-event-stream';
 import { JinaEmbeddingsAuthDTO } from '../dto/jina-embeddings-auth';
 import { InsufficientBalanceError } from '../services/errors';
 import { WORLD_COUNTRIES, WORLD_LANGUAGES } from '../shared/3rd-party/serper-search';
-import { GoogleSERP } from '../services/serp/google';
+import { GoogleSERP, GoogleSERPOldFashion } from '../services/serp/google';
 import { WebSearchEntry } from '../services/serp/compat';
 import { CrawlerOptions } from '../dto/crawler-options';
 import { ScrappingOptions } from '../services/serp/puppeteer';
@ -26,6 +27,7 @@ import { SerperBingSearchService, SerperGoogleSearchService } from '../services/
 import type { JinaEmbeddingsTokenAccount } from '../shared/db/jina-embeddings-token-account';
 import { LRUCache } from 'lru-cache';
 import { API_CALL_STATUS } from '../shared/db/api-roll';
+import { CommonGoogleSERP } from '../services/serp/common-serp';
 import { InternalJinaSerpService } from '../services/serp/internal';

 const WORLD_COUNTRY_CODES = Object.keys(WORLD_COUNTRIES).map((x) => x.toLowerCase());
@ -91,8 +93,10 @@ export class SerpHost extends RPCHost {
        protected rateLimitControl: RateLimitControl,
        protected threadLocal: AsyncLocalContext,
        protected googleSerp: GoogleSERP,
+        protected googleSerpOld: GoogleSERPOldFashion,
        protected serperGoogle: SerperGoogleSearchService,
        protected serperBing: SerperBingSearchService,
+        protected commonGoogleSerp: CommonGoogleSERP,
        protected jinaSerp: InternalJinaSerpService,
    ) {
        super(...arguments);
@ -157,7 +161,7 @@ export class SerpHost extends RPCHost {
        @Param('num', { validate: (v: number) => v >= 0 && v <= 20 })
        num?: number,
        @Param('gl', { validate: (v: string) => WORLD_COUNTRY_CODES.includes(v?.toLowerCase()) }) gl?: string,
-        @Param('hl', { validate: (v: string) => WORLD_LANGUAGES.some(l => l.code === v) }) _hl?: string,
+        @Param('hl', { validate: (v: string) => WORLD_LANGUAGES.some(l => l.code === v) }) hl?: string,
        @Param('location') location?: string,
        @Param('page') page?: number,
        @Param('fallback') fallback?: boolean,
@ -318,7 +322,7 @@ export class SerpHost extends RPCHost {
            q,
            num,
            gl,
-            // hl,
+            hl,
            location,
            page,
        }, crawlerOptions);
@ -451,27 +455,32 @@ export class SerpHost extends RPCHost {
        return result;
    }

-    *iterProviders(preference?: string, variant?: string) {
+    *iterProviders(preference?: string, _variant?: string) {
        if (preference === 'bing') {
            yield this.serperBing;
-            yield this.serperGoogle;
            yield this.googleSerp;
+            yield this.jinaSerp;
+            yield this.serperGoogle;
+            yield this.commonGoogleSerp;
+            yield this.googleSerpOld;

            return;
        }

        if (preference === 'google') {
-            yield this.googleSerp;
            yield this.googleSerp;
            yield this.serperGoogle;
+            yield this.commonGoogleSerp;
+            yield this.googleSerpOld;

            return;
        }

-        // yield variant === 'web' ? this.jinaSerp : this.serperGoogle;
-        yield this.serperGoogle
-        yield this.serperGoogle;
        yield this.googleSerp;
+        yield this.jinaSerp;
+        yield this.serperGoogle;
+        yield this.commonGoogleSerp;
+        yield this.googleSerpOld;
    }

    async cachedSearch(variant: 'web' | 'news' | 'images', query: Record<string, any>, opts: CrawlerOptions) {
@ -506,22 +515,27 @@ export class SerpHost extends RPCHost {
            outerLoop:
            for (const client of this.iterProviders(provider, variant)) {
                const t0 = Date.now();
-                try {
+                let func;
                switch (variant) {
                    case 'images': {
-                            r = await Reflect.apply(client.imageSearch, client, [query, scrappingOptions]);
+                        func = Reflect.get(client, 'imageSearch');
                        break;
                    }
                    case 'news': {
-                            r = await Reflect.apply(client.newsSearch, client, [query, scrappingOptions]);
+                        func = Reflect.get(client, 'newsSearch');
                        break;
                    }
                    case 'web':
                    default: {
-                            r = await Reflect.apply(client.webSearch, client, [query, scrappingOptions]);
+                        func = Reflect.get(client, 'webSearch');
                        break;
                    }
                }
+                if (!func) {
+                    continue;
+                }
+                try {
+                    r = await Reflect.apply(func, client, [query, scrappingOptions]);
                    const dt = Date.now() - t0;
                    this.logger.info(`Search took ${dt}ms, ${client.constructor.name}(${variant})`, { searchDt: dt, variant, client: client.constructor.name });
                    break outerLoop;
@ -544,6 +558,8 @@ export class SerpHost extends RPCHost {
                this.batchedCaches.push(record);
            } else if (lastError) {
                throw lastError;
+            } else if (!r) {
+                throw new AssertionFailureError(`No provider can do ${variant} search atm.`);
            }

            return r;
--- a/src/services/curl.ts
+++ b/src/services/curl.ts
@ -16,7 +16,7 @@ import { Readable } from 'stream';
 import { AsyncLocalContext } from './async-context';
 import { BlackHoleDetector } from './blackhole-detector';

-export interface CURLScrappingOptions extends ScrappingOptions {
+export interface CURLScrappingOptions<T = any> extends ScrappingOptions<T> {
    method?: string;
    body?: string | Buffer;
 }
@ -75,7 +75,7 @@ export class CurlControl extends AsyncService {
        }

        const mixinHeaders: Record<string, string> = {
-            'Sec-Ch-Ua': `Not A(Brand";v="8", "Chromium";v="${this.chromeVersion}", "Google Chrome";v="${this.chromeVersion}"`,
+            'Sec-Ch-Ua': `"Google Chrome";v="${this.chromeVersion}", "Not-A.Brand";v="8", "Chromium";v="${this.chromeVersion}"`,
            'Sec-Ch-Ua-Mobile': '?0',
            'Sec-Ch-Ua-Platform': `"${uaPlatform}"`,
            'Upgrade-Insecure-Requests': '1',
@ -108,11 +108,11 @@ export class CurlControl extends AsyncService {
        return curl;
    }

-    urlToFile1Shot(urlToCrawl: URL, crawlOpts?: CURLScrappingOptions) {
+    urlToStream(urlToCrawl: URL, crawlOpts?: CURLScrappingOptions) {
        return new Promise<{
            statusCode: number,
            statusText?: string,
-            data?: FancyFile,
+            data?: Readable,
            headers: HeaderInfo[],
        }>((resolve, reject) => {
            let contentType = '';
@ -193,7 +193,7 @@ export class CurlControl extends AsyncService {
            });
            curl.setOpt(Curl.option.MAXFILESIZE, 4 * 1024 * 1024 * 1024); // 4GB
            let status = -1;
-            let statusText: string|undefined;
+            let statusText: string | undefined;
            let contentEncoding = '';
            curl.once('end', () => {
                if (curlStream) {
@ -302,13 +302,10 @@ export class CurlControl extends AsyncService {
                    }
                }

-                const fpath = this.tempFileManager.alloc();
-                const fancyFile = FancyFile.auto(stream, fpath);
-                this.tempFileManager.bindPathTo(fancyFile, fpath);
                resolve({
                    statusCode: status,
                    statusText,
-                    data: fancyFile,
+                    data: stream,
                    headers: headers as HeaderInfo[],
                });
            });
@ -324,7 +321,19 @@ export class CurlControl extends AsyncService {
        let nextHopUrl = urlToCrawl;
        const fakeHeaderInfos: HeaderInfo[] = [];
        do {
-            const r = await this.urlToFile1Shot(nextHopUrl, opts);
+            const s = await this.urlToStream(nextHopUrl, opts);
+            const r = { ...s } as {
+                statusCode: number,
+                statusText?: string,
+                data?: FancyFile,
+                headers: HeaderInfo[],
+            };
+            if (r.data) {
+                const fpath = this.tempFileManager.alloc();
+                const fancyFile = FancyFile.auto(r.data, fpath);
+                this.tempFileManager.bindPathTo(fancyFile, fpath);
+                r.data = fancyFile;
+            }

            if ([301, 302, 303, 307, 308].includes(r.statusCode)) {
                fakeHeaderInfos.push(...r.headers);
@ -375,7 +384,7 @@ export class CurlControl extends AsyncService {
        const curlResult = await this.urlToFile(targetUrl, crawlOpts);
        this.blackHoleDetector.itWorked();
        let finalURL = targetUrl;
-        const sideLoadOpts: CURLScrappingOptions['sideLoad'] = {
+        const sideLoadOpts: CURLScrappingOptions<FancyFile>['sideLoad'] = {
            impersonate: {},
            proxyOrigin: {},
        };
@ -421,6 +430,140 @@ export class CurlControl extends AsyncService {
        };
    }

+    async urlToBlob(urlToCrawl: URL, crawlOpts?: CURLScrappingOptions) {
+        let leftRedirection = 6;
+        let cookieRedirects = 0;
+        let opts = { ...crawlOpts };
+        let nextHopUrl = urlToCrawl;
+        const fakeHeaderInfos: HeaderInfo[] = [];
+        do {
+            const s = await this.urlToStream(nextHopUrl, opts);
+            const r = { ...s } as {
+                statusCode: number,
+                statusText?: string,
+                data?: Blob,
+                headers: HeaderInfo[],
+            };
+
+
+            const headers = r.headers[r.headers.length - 1];
+            if ([301, 302, 303, 307, 308].includes(r.statusCode)) {
+                fakeHeaderInfos.push(...r.headers);
+                const location: string | undefined = headers.Location || headers.location;
+
+                const setCookieHeader = headers['Set-Cookie'] || headers['set-cookie'];
+                if (setCookieHeader) {
+                    const cookieAssignments = Array.isArray(setCookieHeader) ? setCookieHeader : [setCookieHeader];
+                    const parsed = cookieAssignments.filter(Boolean).map((x) => parseSetCookieString(x, { decodeValues: true }));
+                    if (parsed.length) {
+                        opts.cookies = [...(opts.cookies || []), ...parsed];
+                    }
+                    if (!location) {
+                        cookieRedirects += 1;
+                    }
+                }
+
+                if (!location && !setCookieHeader) {
+                    // Follow curl behavior
+                    if (s.data) {
+                        const chunks: Buffer[] = [];
+                        s.data.on('data', (chunk) => {
+                            chunks.push(chunk);
+                        });
+                        await new Promise((resolve, reject) => {
+                            s.data!.once('end', resolve);
+                            s.data!.once('error', reject);
+                        });
+                        r.data = new Blob(chunks, { type: headers['Content-Type'] || headers['content-type'] });
+                    }
+                    return {
+                        statusCode: r.statusCode,
+                        data: r.data,
+                        headers: fakeHeaderInfos.concat(r.headers),
+                    };
+                }
+                if (!location && cookieRedirects > 1) {
+                    throw new ServiceBadApproachError(`Failed to access ${urlToCrawl}: Browser required to solve complex cookie preconditions.`);
+                }
+
+                nextHopUrl = new URL(location || '', nextHopUrl);
+                leftRedirection -= 1;
+                continue;
+            }
+
+            if (s.data) {
+                const chunks: Buffer[] = [];
+                s.data.on('data', (chunk) => {
+                    chunks.push(chunk);
+                });
+                await new Promise((resolve, reject) => {
+                    s.data!.once('end', resolve);
+                    s.data!.once('error', reject);
+                });
+                r.data = new Blob(chunks, { type: headers['Content-Type'] || headers['content-type'] });
+            }
+
+            return {
+                statusCode: r.statusCode,
+                statusText: r.statusText,
+                data: r.data,
+                headers: fakeHeaderInfos.concat(r.headers),
+            };
+        } while (leftRedirection > 0);
+
+        throw new ServiceBadAttemptError(`Failed to access ${urlToCrawl}: Too many redirections.`);
+    }
+
+    async sideLoadBlob(targetUrl: URL, crawlOpts?: CURLScrappingOptions) {
+        const curlResult = await this.urlToBlob(targetUrl, crawlOpts);
+        this.blackHoleDetector.itWorked();
+        let finalURL = targetUrl;
+        const sideLoadOpts: CURLScrappingOptions<Blob>['sideLoad'] = {
+            impersonate: {},
+            proxyOrigin: {},
+        };
+        for (const headers of curlResult.headers) {
+            sideLoadOpts.impersonate[finalURL.href] = {
+                status: headers.result?.code || -1,
+                headers: _.omit(headers, 'result'),
+                contentType: headers['Content-Type'] || headers['content-type'],
+            };
+            if (crawlOpts?.proxyUrl) {
+                sideLoadOpts.proxyOrigin[finalURL.origin] = crawlOpts.proxyUrl;
+            }
+            if (headers.result?.code && [301, 302, 307, 308].includes(headers.result.code)) {
+                const location = headers.Location || headers.location;
+                if (location) {
+                    finalURL = new URL(location, finalURL);
+                }
+            }
+        }
+        const lastHeaders = curlResult.headers[curlResult.headers.length - 1];
+        const contentType = (lastHeaders['Content-Type'] || lastHeaders['content-type'])?.toLowerCase() || (curlResult.data?.type) || 'application/octet-stream';
+        const contentDisposition = lastHeaders['Content-Disposition'] || lastHeaders['content-disposition'];
+        const fileName = contentDisposition?.match(/filename="([^"]+)"/i)?.[1] || finalURL.pathname.split('/').pop();
+
+        if (sideLoadOpts.impersonate[finalURL.href] && (curlResult.data?.size)) {
+            sideLoadOpts.impersonate[finalURL.href].body = curlResult.data;
+        }
+
+        // This should keep the file from being garbage collected and deleted until this asyncContext/request is done.
+        this.lifeCycleTrack.set(this.asyncLocalContext.ctx, curlResult.data);
+
+        return {
+            finalURL,
+            sideLoadOpts,
+            chain: curlResult.headers,
+            status: curlResult.statusCode,
+            statusText: curlResult.statusText,
+            headers: lastHeaders,
+            contentType,
+            contentDisposition,
+            fileName,
+            file: curlResult.data
+        };
+    }
+
    digestCurlCode(code: CurlCode, msg: string) {
        switch (code) {
            // 400 User errors
--- a/src/services/puppeteer.ts
+++ b/src/services/puppeteer.ts
@ -79,7 +79,7 @@ export interface ExtendedSnapshot extends PageSnapshot {
    imgs: ImgBrief[];
 }

-export interface ScrappingOptions {
+export interface ScrappingOptions<T = FancyFile | Blob> {
    proxyUrl?: string;
    cookies?: Cookie[];
    favorScreenshot?: boolean;
@ -101,7 +101,7 @@ export interface ScrappingOptions {
                status: number;
                headers: { [k: string]: string | string[]; };
                contentType?: string;
-                body?: FancyFile;
+                body?: T;
            };
        };
        proxyOrigin: { [origin: string]: string; };
@ -912,7 +912,11 @@ export class PuppeteerControl extends AsyncService {
            if (impersonate) {
                let body;
                if (impersonate.body) {
+                    if (impersonate.body instanceof Blob) {
+                        body = new Uint8Array(await impersonate.body.arrayBuffer());
+                    } else {
                        body = await readFile(await impersonate.body.filePath);
+                    }
                    if (req.isInterceptResolutionHandled()) {
                        return;
                    }
--- a/src/services/serp/common-serp.ts
+++ b/src/services/serp/common-serp.ts
@ -0,0 +1,133 @@
+import { singleton } from 'tsyringe';
+import { AsyncService } from 'civkit/async-service';
+import { GlobalLogger } from '../logger';
+import { JSDomControl } from '../jsdom';
+import _ from 'lodash';
+import { WebSearchEntry } from './compat';
+import { ServiceBadAttemptError } from '../errors';
+import commonSerpClients, { CommonSerpImageResponse, CommonSerpNewsResponse, CommonSerpWebResponse } from '../../shared/3rd-party/common-serp';
+import { AsyncLocalContext } from '../async-context';
+
+@singleton()
+export class CommonGoogleSERP extends AsyncService {
+    logger = this.globalLogger.child({ service: this.constructor.name });
+    googleDomain = process.env.OVERRIDE_GOOGLE_DOMAIN || 'www.google.com';
+
+    protected ctxIteratorMap = new WeakMap<object, ReturnType<CommonGoogleSERP['iterClients']>>();
+
+    constructor(
+        protected globalLogger: GlobalLogger,
+        protected jsDomControl: JSDomControl,
+        protected asyncContext: AsyncLocalContext,
+
+    ) {
+        super(...arguments);
+    }
+
+    override async init() {
+        await this.dependencyReady();
+
+        this.emit('ready');
+    }
+
+    *iterClients() {
+        if (!commonSerpClients.length) {
+            return;
+        }
+        while (true) {
+            yield* commonSerpClients;
+        }
+    }
+
+    getClient() {
+        const ctx = this.asyncContext.ctx;
+        const it = this.ctxIteratorMap.get(ctx) || this.iterClients();
+        this.ctxIteratorMap.set(ctx, it);
+        const client = it.next().value;
+        if (!client) {
+            throw new ServiceBadAttemptError('No client available');
+        }
+        return client;
+    }
+
+
+    digestQuery(query: { [k: string]: any; }) {
+        const url = new URL(`https://${this.googleDomain}/search`);
+        const clone = { ...query };
+        const num = clone.num || 10;
+        if (clone.page) {
+            const page = parseInt(clone.page);
+            delete clone.page;
+            clone.start = (page - 1) * num;
+            if (clone.start === 0) {
+                delete clone.start;
+            }
+        }
+        if (clone.location) {
+            delete clone.location;
+        }
+
+        for (const [k, v] of Object.entries(clone)) {
+            if (v === undefined || v === null) {
+                continue;
+            }
+            url.searchParams.set(k, `${v}`);
+        }
+
+        return url;
+    }
+
+    async webSearch(query: { [k: string]: any; }) {
+        const url = this.digestQuery(query);
+
+        const client = this.getClient();
+
+        const r = await client.queryJSON(url.href) as CommonSerpWebResponse;
+
+        return r.organic.map((x)=> ({
+            link: x.link,
+            title: x.title,
+            snippet: x.description,
+            variant: 'web',
+        })) as WebSearchEntry[];
+    }
+
+    async newsSearch(query: { [k: string]: any; }) {
+        const url = this.digestQuery(query);
+
+        url.searchParams.set('tbm', 'nws');
+
+        const client = this.getClient();
+
+        const r = await client.queryJSON(url.href) as CommonSerpNewsResponse;
+
+        return r.news.map((x)=> ({
+            link: x.link,
+            title: x.title,
+            snippet: x.description,
+            source: x.source,
+            date: x.date,
+            imageUrl: x.image,
+            variant: 'news',
+        })) as WebSearchEntry[];
+    }
+
+    async imageSearch(query: { [k: string]: any; }) {
+        const url = this.digestQuery(query);
+
+        url.searchParams.set('tbm', 'isch');
+
+        const client = this.getClient();
+
+        const r = await client.queryJSON(url.href) as CommonSerpImageResponse;
+
+        return r.images.map((x)=> ({
+            link: x.link,
+            title: x.title,
+            snippet: x.image_alt,
+            source: x.source,
+            imageUrl: x.image,
+            variant: 'images',
+        })) as WebSearchEntry[];
+    }
+}
--- a/src/services/serp/google.ts
+++ b/src/services/serp/google.ts
@ -7,24 +7,74 @@ import _ from 'lodash';
 import { WebSearchEntry } from './compat';
 import { ScrappingOptions, SERPSpecializedPuppeteerControl } from './puppeteer';
 import { CurlControl } from '../curl';
-import { readFile } from 'fs/promises';
 import { ApplicationError } from 'civkit/civ-rpc';
 import { ServiceBadApproachError, ServiceBadAttemptError } from '../errors';
 import { parseJSONText } from 'civkit/vectorize';
-import { retryWith } from 'civkit/decorators';
-import { ProxyProviderService } from '../../shared/services/proxy-provider';
+import { retry, retryWith } from 'civkit/decorators';
+import { SERPProxyProviderService } from '../../shared/services/proxy-provider';
+import { readBlob } from '../../utils/encoding';
+import { createContext, Script } from 'vm';
+import { BrowserContext } from 'puppeteer';
+import { createPool } from 'generic-pool';
+import { randomBytes } from 'crypto';
+import { AsyncLocalContext } from '../async-context';
+
+interface SerpContext {
+    proxyUrl?: string;
+    browserContext?: BrowserContext;
+    validTill?: Date;
+    magicId?: string;
+}

@singleton()
 export class GoogleSERP extends AsyncService {
    logger = this.globalLogger.child({ service: this.constructor.name });
    googleDomain = process.env.OVERRIDE_GOOGLE_DOMAIN || 'www.google.com';

+    nativeIPHealthy = true;
+
+    contextPool = createPool({
+        create: () => {
+            return this.createContext();
+        },
+        destroy: async (ctx: SerpContext) => {
+            if (ctx.browserContext) {
+                try {
+                    await ctx.browserContext.close();
+                } catch (err) {
+                    this.logger.warn('Error closing browser context', { err });
+                }
+            }
+        },
+        validate: async (ctx: SerpContext) => {
+            if (ctx.validTill && ctx.validTill > (new Date(Date.now() + 5 * 60 * 1000))) {
+                return true;
+            }
+
+            return !ctx.proxyUrl;
+        },
+    }, {
+        max: 3_000,
+        testOnBorrow: true,
+    });
+
+    protected async createContext() {
+        await this.serviceReady();
+        this.asyncLocalContext.ctx.ctxIsNew = true;
+
+        return {
+            magicId: randomBytes(17).toString('base64url'),
+            validTill: new Date(Date.now() + 30 * 60 * 1000),
+        } as SerpContext;
+    }
+
    constructor(
        protected globalLogger: GlobalLogger,
        protected puppeteerControl: SERPSpecializedPuppeteerControl,
        protected jsDomControl: JSDomControl,
        protected curlControl: CurlControl,
-        protected proxyProvider: ProxyProviderService,
+        protected proxyProvider: SERPProxyProviderService,
+        protected asyncLocalContext: AsyncLocalContext,
    ) {
        const filteredDeps = isMainThread ? arguments : _.without(arguments, puppeteerControl);
        super(...filteredDeps);
@ -36,6 +86,15 @@ export class GoogleSERP extends AsyncService {
        this.emit('ready');
    }

+    nativeIPBlocked() {
+        this.nativeIPHealthy = false;
+        this.logger.warn('Native IP is not healthy.');
+        setTimeout(() => {
+            this.nativeIPHealthy = true;
+            this.logger.debug('Presume native IP healthy again after timeout.');
+        }, 1000 * 60 * 60);
+    }
+
    @retryWith((err) => {
        if (err instanceof ServiceBadApproachError) {
            return false;
@ -51,15 +110,13 @@ export class GoogleSERP extends AsyncService {
        return undefined;
    }, 3)
    async sideLoadWithAllocatedProxy(url: URL, opts?: ScrappingOptions) {
-        if (opts?.allocProxy === 'none') {
-            return this.curlControl.sideLoad(url, opts);
+        if (opts?.allocProxy === 'none' || opts?.proxyUrl) {
+            return this.curlControl.sideLoadBlob(url, opts);
        }

-        const proxy = await this.proxyProvider.alloc(
-            process.env.PREFERRED_PROXY_COUNTRY || 'auto'
-        );
+        const proxy = await this.proxyProvider.alloc();
        this.logger.debug(`Proxy allocated`, { proxy: proxy.href });
-        const r = await this.curlControl.sideLoad(url, {
+        const r = await this.curlControl.sideLoadBlob(url, {
            ...opts,
            proxyUrl: proxy.href,
        });
@ -101,48 +158,126 @@ export class GoogleSERP extends AsyncService {
        return url;
    }

+    @retry(2)
    async webSearch(query: { [k: string]: any; }, opts?: ScrappingOptions) {
        const url = this.digestQuery(query);
+        const origHref = url.href;
+        if (!url.searchParams.has('start')) {
+            url.searchParams.set('start', '0');
+        }
+        url.searchParams.set('asearch', 'arc');
+
+        const ctx = await this.contextPool.acquire();
+
+        url.searchParams.set('async', getMagicAsyncParam(query.start, ctx.magicId));
+
+        const t0 = performance.now();
+        const sideLoaded = await this.sideLoadWithAllocatedProxy(url, {
+            ...opts,
+            allocProxy: opts?.allocProxy || (this.nativeIPHealthy ? 'none' : 'auto'),
+            proxyUrl: ctx.proxyUrl,
+            timeoutMs: 3_700
+        }).catch((err) => {
+            this.contextPool.destroy(ctx);
+
+            return Promise.reject(err);
+        });
+        const dt = performance.now() - t0;
+
+        if ('proxy' in sideLoaded) {
+            ctx.proxyUrl = sideLoaded.proxy.href;
+            ctx.validTill = new Date(Date.now() + 30 * 60 * 1000);
+        }
+
+        if (sideLoaded.status === 200) {
+            if (dt < 1_700) {
+                this.contextPool.release(ctx);
+            } else {
+                this.contextPool.destroy(ctx);
+            }
+        } else {
+            if (this.nativeIPHealthy && this.asyncLocalContext.ctx.ctxIsNew) {
+                this.nativeIPBlocked();
+            }
+            this.contextPool.destroy(ctx);
+            throw new ServiceBadAttemptError({
+                message: 'Google returned an error page. This may happen due to various reasons, including rate limiting or other issues.',
+            });
+        }

-        const sideLoaded = await this.sideLoadWithAllocatedProxy(url, opts);
        if (opts && sideLoaded.sideLoadOpts) {
            opts.sideLoad = sideLoaded.sideLoadOpts;
        }

-        const snapshot = await this.puppeteerControl.controlledScrap(url, getWebSearchResults, opts);
+        if (!sideLoaded.file) {
+            throw new ServiceBadAttemptError('Google returned an error page. This may happen due to various reasons, including rate limiting or other issues.');
+        }
+        const contentType = sideLoaded.contentType;
+        const encoding: string | undefined = contentType.includes('charset=') ? contentType.split('charset=')[1]?.trim().toLowerCase() : 'utf-8';

-        return snapshot;
+        let html = await readBlob(sideLoaded.file, encoding);
+        let innerCharset;
+        const peek = html.slice(0, 1024);
+        innerCharset ??= peek.match(/<meta[^>]+text\/html;\s*?charset=([^>"]+)/i)?.[1]?.toLowerCase();
+        innerCharset ??= peek.match(/<meta[^>]+charset="([^>"]+)\"/i)?.[1]?.toLowerCase();
+        if (innerCharset && innerCharset !== encoding) {
+            html = await readBlob(sideLoaded.file, innerCharset);
        }

-    async newsSearch(query: { [k: string]: any; }, opts?: ScrappingOptions) {
-        const url = this.digestQuery(query);
-
-        url.searchParams.set('tbm', 'nws');
-
-        const sideLoaded = await this.sideLoadWithAllocatedProxy(url, opts);
-        if (opts && sideLoaded.sideLoadOpts) {
-            opts.sideLoad = sideLoaded.sideLoadOpts;
+        const jsdom = this.jsDomControl.linkedom.parseHTML(html, { location: { href: origHref } });
+        try {
+            const r = runGetWebSearchResultsScript(createContext(jsdom));
+            if (!Array.isArray(r)) {
+                throw new Error('Failed to parse response as SERP results');
            }

-        const snapshot = await this.puppeteerControl.controlledScrap(url, getNewsSearchResults, opts);
-
-        return snapshot;
+            return r;
+        } catch (err) {
+            throw new ServiceBadAttemptError({
+                message: 'Google returned an error page. This may happen due to various reasons, including rate limiting or other issues.',
+                err
+            });
+        }
    }

+    @retry(2)
    async imageSearch(query: { [k: string]: any; }, opts?: ScrappingOptions) {
        const url = this.digestQuery(query);

        url.searchParams.set('tbm', 'isch');
        url.searchParams.set('asearch', 'isch');
        url.searchParams.set('async', `_fmt:json,p:1,ijn:${query.start ? Math.floor(query.start / (query.num || 10)) : 0}`);
+        const ctx = await this.contextPool.acquire();

-        const sideLoaded = await this.sideLoadWithAllocatedProxy(url, opts);
+        const sideLoaded = await this.sideLoadWithAllocatedProxy(url, {
+            ...opts,
+            proxyUrl: ctx.proxyUrl,
+            allocProxy: opts?.allocProxy || (this.nativeIPHealthy ? 'none' : 'auto'),
+        }).catch((err) => {
+            this.contextPool.destroy(ctx);
+
+            return Promise.reject(err);
+        });
+
+        if ('proxy' in sideLoaded) {
+            ctx.proxyUrl = sideLoaded.proxy.href;
+            ctx.validTill = new Date(Date.now() + 30 * 60 * 1000);
+        }
+
+        if (sideLoaded.status === 200) {
+            this.contextPool.release(ctx);
+        } else {
+            this.contextPool.destroy(ctx);
+            if (this.nativeIPHealthy && this.asyncLocalContext.ctx.ctxIsNew) {
+                this.nativeIPBlocked();
+            }
+        }

        if (sideLoaded.status !== 200 || !sideLoaded.file) {
            throw new ServiceBadAttemptError('Google returned an error page. This may happen due to various reasons, including rate limiting or other issues.');
        }

-        const jsonTxt = (await readFile((await sideLoaded.file.filePath))).toString();
+        const jsonTxt = (await readBlob(sideLoaded.file)).toString();
        const rJSON = parseJSONText(jsonTxt.slice(jsonTxt.indexOf('{"ischj":')));

        return _.get(rJSON, 'ischj.metadata').map((x: any) => {
@ -161,6 +296,95 @@ export class GoogleSERP extends AsyncService {
    }
 }

+
+@singleton()
+export class GoogleSERPOldFashion extends GoogleSERP {
+    override async createContext() {
+        await this.serviceReady();
+        this.asyncLocalContext.ctx.ctxIsNew = true;
+
+        return {
+            browserContext: await this.puppeteerControl.browser.createBrowserContext(),
+            magicId: randomBytes(17).toString('base64url'),
+            validTill: new Date(Date.now() + 30 * 60 * 1000),
+        } as SerpContext;
+    }
+
+    override async webSearch(query: { [k: string]: any; }, opts?: ScrappingOptions) {
+        const url = this.digestQuery(query);
+
+        const ctx = await this.contextPool.acquire();
+
+        const sideLoaded = await this.sideLoadWithAllocatedProxy(url, {
+            ...opts,
+            proxyUrl: ctx.proxyUrl,
+        }).catch((err) => {
+            this.contextPool.destroy(ctx);
+
+            return Promise.reject(err);
+        });
+
+        if ('proxy' in sideLoaded) {
+            ctx.proxyUrl = sideLoaded.proxy.href;
+            ctx.validTill = new Date(Date.now() + 30 * 60 * 1000);
+        }
+
+        if (sideLoaded.status === 200) {
+            this.contextPool.release(ctx);
+        } else {
+            this.contextPool.destroy(ctx);
+        }
+
+        if (opts && sideLoaded.sideLoadOpts) {
+            opts.sideLoad = sideLoaded.sideLoadOpts;
+        }
+
+        const snapshot = await this.puppeteerControl.controlledScrap(url, getWebSearchResults, opts);
+
+        if (!Array.isArray(snapshot)) {
+            throw new ServiceBadAttemptError('Google returned an error page. This may happen due to various reasons, including rate limiting or other issues.');
+        }
+
+        return snapshot;
+    }
+
+    async newsSearch(query: { [k: string]: any; }, opts?: ScrappingOptions) {
+        const url = this.digestQuery(query);
+
+        url.searchParams.set('tbm', 'nws');
+
+        const ctx = await this.contextPool.acquire();
+
+        const sideLoaded = await this.sideLoadWithAllocatedProxy(url, {
+            ...opts,
+            proxyUrl: ctx.proxyUrl,
+        }).catch((err) => {
+            this.contextPool.destroy(ctx);
+
+            return Promise.reject(err);
+        });
+
+        if ('proxy' in sideLoaded) {
+            ctx.proxyUrl = sideLoaded.proxy.href;
+            ctx.validTill = new Date(Date.now() + 30 * 60 * 1000);
+        }
+
+        const snapshot = await this.puppeteerControl.controlledScrap(url, getNewsSearchResults, {
+            ...opts,
+            proxyUrl: ctx.proxyUrl,
+            browserContext: ctx.browserContext,
+        }).catch((err) => {
+            this.contextPool.destroy(ctx);
+
+            return Promise.reject(err);
+        });
+
+        this.contextPool.release(ctx);
+
+        return snapshot;
+    }
+}
+
 async function getWebSearchResults() {
    if (location.pathname.startsWith('/sorry') || location.pathname.startsWith('/error')) {
        throw new Error('Google returned an error page. This may happen due to various reasons, including rate limiting or other issues.');
@ -254,6 +478,96 @@ async function getWebSearchResults() {
        };
    }).filter(Boolean) as WebSearchEntry[];
 }
+function getWebSearchResultsSync() {
+    const wrapper1 = document.querySelector('div[data-async-context^="query"]');
+
+    if (!wrapper1) {
+        return undefined;
+    }
+
+    const query = decodeURIComponent(wrapper1.getAttribute('data-async-context')?.split('query:')[1] || '');
+
+    if (!query) {
+        return undefined;
+    }
+
+    const candidates = Array.from(wrapper1.querySelectorAll('div[lang],div[data-surl]'));
+
+    return candidates.map((x, pos) => {
+        const primaryLink = x.querySelector('a:not([href="#"])');
+        if (!primaryLink) {
+            return undefined;
+        }
+        const url = primaryLink.getAttribute('href');
+
+        if (primaryLink.querySelector('div[role="heading"]')) {
+            // const spans = primaryLink.querySelectorAll('span');
+            // const title = spans[0]?.textContent;
+            // const source = spans[1]?.textContent;
+            // const date = spans[spans.length - 1].textContent;
+
+            // return {
+            //     link: url,
+            //     title,
+            //     source,
+            //     date,
+            //     variant: 'video'
+            // };
+            return undefined;
+        }
+
+        const title = primaryLink.querySelector('h3')?.textContent;
+        const source = Array.from(primaryLink.querySelectorAll('span')).find((x) => x.textContent)?.textContent;
+        const cite = primaryLink.querySelector('cite[role=text]')?.textContent;
+        let date = cite?.split('·')[1]?.trim();
+        const snippets = Array.from(x.querySelectorAll('div[data-sncf*="1"] span'));
+        let snippet = snippets[snippets.length - 1]?.textContent;
+        if (!snippet) {
+            snippet = x.querySelector('div.IsZvec')?.textContent?.trim() || null;
+        }
+        date ??= snippets[snippets.length - 2]?.textContent?.trim();
+        const imageUrl = x.querySelector('div[data-sncf*="1"] img[src]:not(img[src^="data"])')?.getAttribute('src');
+        let siteLinks = Array.from(x.querySelectorAll('div[data-sncf*="3"] a[href]')).map((l) => {
+            return {
+                link: l.getAttribute('href'),
+                title: l.textContent,
+            };
+        });
+        const perhapsParent = x.parentElement?.closest('div[data-hveid]');
+        if (!siteLinks?.length && perhapsParent) {
+            const candidates = Array.from(perhapsParent.querySelectorAll('td h3'));
+            if (candidates.length) {
+                siteLinks = candidates.map((l) => {
+                    const link = l.querySelector('a');
+                    if (!link) {
+                        return undefined;
+                    }
+                    const snippet = l.nextElementSibling?.textContent;
+                    return {
+                        link: link.getAttribute('href'),
+                        title: link.textContent,
+                        snippet,
+                    };
+                }).filter(Boolean) as any;
+            }
+        }
+
+        return {
+            link: url,
+            title,
+            source,
+            date,
+            snippet: snippet ?? undefined,
+            imageUrl: imageUrl?.startsWith('data:') ? undefined : imageUrl,
+            siteLinks: siteLinks.length ? siteLinks : undefined,
+            variant: 'web',
+        };
+    }).filter(Boolean) as WebSearchEntry[];
+}
+const script = new Script(`(${getWebSearchResultsSync.toString()})()`);
+function runGetWebSearchResultsScript(ctx: object) {
+    return script.runInContext(ctx);
+}

 async function getNewsSearchResults() {
    if (location.pathname.startsWith('/sorry') || location.pathname.startsWith('/error')) {
@ -306,3 +620,9 @@ async function getNewsSearchResults() {
        };
    }).filter(Boolean) as WebSearchEntry[];
 }
+
+function getMagicAsyncParam(start: number = 0, inputArcid?: string) {
+    const arcid = inputArcid || randomBytes(17).toString('base64url');
+
+    return `arc_id:srp_${arcid}_1${start.toString().padStart(2, '0')},use_ac:true,_fmt:prog`;
+}
--- a/src/services/serp/internal.ts
+++ b/src/services/serp/internal.ts
@ -67,11 +67,5 @@ export class InternalJinaSerpService extends AsyncService {
    async webSearch(query: SerperSearchQueryParams) {
        return this.doSearch('web', query);
    }
-    async imageSearch(query: SerperSearchQueryParams) {
-        return this.doSearch('images', query);
-    }
-    async newsSearch(query: SerperSearchQueryParams) {
-        return this.doSearch('news', query);
-    }

 }
--- a/src/services/serp/puppeteer.ts
+++ b/src/services/serp/puppeteer.ts
@ -2,7 +2,7 @@ import _ from 'lodash';
 import { readFile } from 'fs/promises';
 import { container, singleton } from 'tsyringe';

-import type { Browser, CookieParam, GoToOptions, Page, Viewport } from 'puppeteer';
+import type { Browser, BrowserContext, CookieParam, GoToOptions, Page, Viewport } from 'puppeteer';
 import type { Cookie } from 'set-cookie-parser';
 import puppeteer, { TimeoutError } from 'puppeteer';

@ -21,6 +21,7 @@ import { BlackHoleDetector } from '../blackhole-detector';


 export interface ScrappingOptions {
+    browserContext?: BrowserContext;
    proxyUrl?: string;
    cookies?: Cookie[];
    overrideUserAgent?: string;
@ -38,7 +39,7 @@ export interface ScrappingOptions {
                status: number;
                headers: { [k: string]: string | string[]; };
                contentType?: string;
-                body?: FancyFile;
+                body?: FancyFile | Blob;
            };
        };
        proxyOrigin: { [origin: string]: string; };
@ -226,8 +227,6 @@ export class SERPSpecializedPuppeteerControl extends AsyncService {
    browser!: Browser;
    logger = this.globalLogger.child({ service: this.constructor.name });

-    __loadedPage: Page[] = [];
-
    finalizerMap = new WeakMap<Page, ReturnType<typeof setTimeout>>();
    snMap = new WeakMap<Page, number>();
    livePages = new Set<Page>();
@ -251,7 +250,6 @@ export class SERPSpecializedPuppeteerControl extends AsyncService {
        let crippledTimes = 0;
        this.on('crippled', () => {
            crippledTimes += 1;
-            this.__loadedPage.length = 0;
            this.livePages.clear();
            if (crippledTimes > 5) {
                process.nextTick(() => {
@ -303,20 +301,16 @@ export class SERPSpecializedPuppeteerControl extends AsyncService {
        this.effectiveUA = this.ua.replace(/Headless/i, '').replace('Mozilla/5.0 (X11; Linux x86_64)', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)');
        this.curlControl.impersonateChrome(this.effectiveUA);

-        await this.newPage('beware_deadlock').then((r) => this.__loadedPage.push(r));
-
        this.emit('ready');
    }

-    async newPage<T>(bewareDeadLock: any = false) {
-        if (!bewareDeadLock) {
+    async newPage<T>(context?: BrowserContext) {
        await this.serviceReady();
-        }
        const sn = this._sn++;
        let page;
+        context ??= await this.browser.createBrowserContext();
        try {
-            const dedicatedContext = await this.browser.createBrowserContext();
-            page = await dedicatedContext.newPage();
+            page = await context.newPage();
        } catch (err: any) {
            this.logger.warn(`Failed to create page ${sn}`, { err });
            this.browser.process()?.kill('SIGKILL');
@ -347,23 +341,9 @@ export class SERPSpecializedPuppeteerControl extends AsyncService {
        return page;
    }

-    async getNextPage() {
-        let thePage: Page | undefined;
-        if (this.__loadedPage.length) {
-            thePage = this.__loadedPage.shift();
-            if (this.__loadedPage.length <= 1) {
-                process.nextTick(() => {
-                    this.newPage()
-                        .then((r) => this.__loadedPage.push(r))
-                        .catch((err) => {
-                            this.logger.warn(`Failed to load new page ahead of time`, { err });
-                        });
-                });
-            }
-        }
-
+    async getNextPage(context?: BrowserContext) {
+        const thePage = await this.newPage(context);
        if (!thePage) {
-            thePage = await this.newPage();
        }

        const timer = setTimeout(() => {
@ -387,14 +367,7 @@ export class SERPSpecializedPuppeteerControl extends AsyncService {
        const sn = this.snMap.get(page);
        this.logger.debug(`Closing page ${sn}`);
        await Promise.race([
-            (async () => {
-                const ctx = page.browserContext();
-                try {
-                    await page.close();
-                } finally {
-                    await ctx.close();
-                }
-            })(),
+            page.close(),
            delay(5000)
        ]).catch((err) => {
            this.logger.error(`Failed to destroy page ${sn}`, { err });
@ -405,7 +378,7 @@ export class SERPSpecializedPuppeteerControl extends AsyncService {
    async controlledScrap<T>(parsedUrl: URL, func: (this: void) => Promise<T>, options: ScrappingOptions = {}): Promise<T> {
        // parsedUrl.search = '';
        const url = parsedUrl.toString();
-        const page = await this.getNextPage();
+        const page = await this.getNextPage(options.browserContext);
        this.lifeCycleTrack.set(page, this.asyncLocalContext.ctx);
        page.on('response', (_resp) => {
            this.blackHoleDetector.itWorked();
@ -452,7 +425,11 @@ export class SERPSpecializedPuppeteerControl extends AsyncService {
            if (impersonate) {
                let body;
                if (impersonate.body) {
+                    if (impersonate.body instanceof Blob) {
+                        body = new Uint8Array(await impersonate.body.arrayBuffer());
+                    } else {
                        body = await readFile(await impersonate.body.filePath);
+                    }
                    if (req.isInterceptResolutionHandled()) {
                        return;
                    }
--- a/src/utils/encoding.ts
+++ b/src/utils/encoding.ts
@ -1,13 +1,12 @@
 import { createReadStream } from 'fs';
 import { Readable } from 'stream';
-import { TextDecoderStream } from 'stream/web';

 export async function decodeFileStream(
    fileStream: Readable,
    encoding: string = 'utf-8',
 ): Promise<string> {
    const decodeStream = new TextDecoderStream(encoding, { fatal: false, ignoreBOM: false });
-    Readable.toWeb(fileStream).pipeThrough(decodeStream);
+    (Readable.toWeb(fileStream) as ReadableStream).pipeThrough(decodeStream);
    const chunks = [];

    for await (const chunk of decodeStream.readable) {
@ -23,7 +22,22 @@ export async function readFile(
    encoding: string = 'utf-8',
 ): Promise<string> {
    const decodeStream = new TextDecoderStream(encoding, { fatal: false, ignoreBOM: false });
-    Readable.toWeb(createReadStream(filePath)).pipeThrough(decodeStream);
+    (Readable.toWeb(createReadStream(filePath)) as ReadableStream).pipeThrough(decodeStream);
+    const chunks = [];
+
+    for await (const chunk of decodeStream.readable) {
+        chunks.push(chunk);
+    }
+
+    return chunks.join('');
+}
+
+export async function readBlob(
+    blob: Blob,
+    encoding: string = 'utf-8',
+): Promise<string> {
+    const decodeStream = new TextDecoderStream(encoding, { fatal: false, ignoreBOM: false });
+    blob.stream().pipeThrough(decodeStream);
    const chunks = [];

    for await (const chunk of decodeStream.readable) {
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit 02279d88bc3940a08a92cb18cf8877d57cb49b82
+Subproject commit 8b78eab54d78868d44065bfc59c413fe6bd4929d