mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-13 08:29:04 +08:00
Merge branch 'main' of https://github.com/mendableai/firecrawl
This commit is contained in:
commit
ad06cde422
@ -128,7 +128,9 @@ pub unsafe extern "C" fn extract_metadata(html: *const libc::c_char) -> *mut i8
|
|||||||
if let Some(v) = out.get(name) {
|
if let Some(v) = out.get(name) {
|
||||||
match v {
|
match v {
|
||||||
Value::String(_) => {
|
Value::String(_) => {
|
||||||
out.insert(name.to_string(), Value::Array(vec! [v.clone(), Value::String(content.to_string())]));
|
if name != "title" { // preserve title tag in metadata
|
||||||
|
out.insert(name.to_string(), Value::Array(vec! [v.clone(), Value::String(content.to_string())]));
|
||||||
|
}
|
||||||
},
|
},
|
||||||
Value::Array(_) => {
|
Value::Array(_) => {
|
||||||
match out.get_mut(name) {
|
match out.get_mut(name) {
|
||||||
|
@ -34,7 +34,7 @@ export const url = z.preprocess(
|
|||||||
.url()
|
.url()
|
||||||
.regex(/^https?:\/\//, "URL uses unsupported protocol")
|
.regex(/^https?:\/\//, "URL uses unsupported protocol")
|
||||||
.refine(
|
.refine(
|
||||||
(x) => /\.[a-z]{2,}(:\d+)?([\/?#]|$)/i.test(x),
|
(x) => /\.[a-zA-Z\u0400-\u04FF\u0500-\u052F\u2DE0-\u2DFF\uA640-\uA69F]{2,}(:\d+)?([\/?#]|$)/i.test(x),
|
||||||
"URL must have a valid top-level domain or be a valid path",
|
"URL must have a valid top-level domain or be a valid path",
|
||||||
)
|
)
|
||||||
.refine((x) => {
|
.refine((x) => {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user