mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-03 04:50:40 +08:00
use thomas's url validation
This commit is contained in:
parent
e3279274f1
commit
dad9d353d9
@ -12,6 +12,7 @@ import { v4 as uuidv4 } from "uuid";
|
||||
import { Logger } from "../../src/lib/logger";
|
||||
import { addCrawlJob, addCrawlJobs, crawlToCrawler, lockURL, lockURLs, saveCrawl, StoredCrawl } from "../../src/lib/crawl-redis";
|
||||
import { getScrapeQueue } from "../../src/services/queue-service";
|
||||
import { checkAndUpdateURL } from "../../src/lib/validateUrl";
|
||||
|
||||
export async function crawlController(req: Request, res: Response) {
|
||||
try {
|
||||
@ -43,10 +44,17 @@ export async function crawlController(req: Request, res: Response) {
|
||||
return res.status(402).json({ error: "Insufficient credits" });
|
||||
}
|
||||
|
||||
const url = req.body.url;
|
||||
let url = req.body.url;
|
||||
if (!url) {
|
||||
return res.status(400).json({ error: "Url is required" });
|
||||
}
|
||||
try {
|
||||
url = checkAndUpdateURL(url).url;
|
||||
} catch (e) {
|
||||
return res
|
||||
.status(e instanceof Error && e.message === "Invalid URL" ? 400 : 500)
|
||||
.json({ error: e.message ?? e });
|
||||
}
|
||||
|
||||
if (isUrlBlocked(url)) {
|
||||
return res
|
||||
|
38
apps/api/src/lib/validateUrl.ts
Normal file
38
apps/api/src/lib/validateUrl.ts
Normal file
@ -0,0 +1,38 @@
|
||||
|
||||
const protocolIncluded = (url: string) => {
|
||||
// if :// not in the start of the url assume http (maybe https?)
|
||||
// regex checks if :// appears before any .
|
||||
return(/^([^.:]+:\/\/)/.test(url));
|
||||
}
|
||||
|
||||
const getURLobj = (s: string) => {
|
||||
// URL fails if we dont include the protocol ie google.com
|
||||
let error = false;
|
||||
let urlObj = {};
|
||||
try {
|
||||
urlObj = new URL(s);
|
||||
} catch (err) {
|
||||
error = true;
|
||||
}
|
||||
return { error, urlObj };
|
||||
};
|
||||
|
||||
export const checkAndUpdateURL = (url: string) => {
|
||||
|
||||
if (!protocolIncluded(url)) {
|
||||
url = `http://${url}`;
|
||||
}
|
||||
|
||||
const { error, urlObj } = getURLobj(url);
|
||||
if (error) {
|
||||
throw new Error("Invalid URL");
|
||||
}
|
||||
|
||||
const typedUrlObj = urlObj as URL;
|
||||
|
||||
if(typedUrlObj.protocol !== "http:" && typedUrlObj.protocol !== "https:") {
|
||||
throw new Error("Invalid URL");
|
||||
}
|
||||
|
||||
return { urlObj: typedUrlObj, url: url };
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user