diff --git a/apps/api/sharedLibs/html-transformer/Cargo.lock b/apps/api/sharedLibs/html-transformer/Cargo.lock
index 43696071..5e5e8b53 100644
--- a/apps/api/sharedLibs/html-transformer/Cargo.lock
+++ b/apps/api/sharedLibs/html-transformer/Cargo.lock
@@ -101,6 +101,17 @@ dependencies = [
"syn 2.0.96",
]
+[[package]]
+name = "displaydoc"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.96",
+]
+
[[package]]
name = "dtoa"
version = "1.0.9"
@@ -137,6 +148,15 @@ version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a0d2fde1f7b3d48b8395d5f2de76c18a528bd6a9cdde438df747bfcba3e05d6f"
+[[package]]
+name = "form_urlencoded"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456"
+dependencies = [
+ "percent-encoding",
+]
+
[[package]]
name = "futf"
version = "0.1.5"
@@ -204,6 +224,7 @@ dependencies = [
"lol_html",
"serde",
"serde_json",
+ "url",
]
[[package]]
@@ -220,6 +241,145 @@ dependencies = [
"syn 1.0.109",
]
+[[package]]
+name = "icu_collections"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526"
+dependencies = [
+ "displaydoc",
+ "yoke",
+ "zerofrom",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_locid"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637"
+dependencies = [
+ "displaydoc",
+ "litemap",
+ "tinystr",
+ "writeable",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_locid_transform"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e"
+dependencies = [
+ "displaydoc",
+ "icu_locid",
+ "icu_locid_transform_data",
+ "icu_provider",
+ "tinystr",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_locid_transform_data"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e"
+
+[[package]]
+name = "icu_normalizer"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f"
+dependencies = [
+ "displaydoc",
+ "icu_collections",
+ "icu_normalizer_data",
+ "icu_properties",
+ "icu_provider",
+ "smallvec",
+ "utf16_iter",
+ "utf8_iter",
+ "write16",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_normalizer_data"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516"
+
+[[package]]
+name = "icu_properties"
+version = "1.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5"
+dependencies = [
+ "displaydoc",
+ "icu_collections",
+ "icu_locid_transform",
+ "icu_properties_data",
+ "icu_provider",
+ "tinystr",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_properties_data"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569"
+
+[[package]]
+name = "icu_provider"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9"
+dependencies = [
+ "displaydoc",
+ "icu_locid",
+ "icu_provider_macros",
+ "stable_deref_trait",
+ "tinystr",
+ "writeable",
+ "yoke",
+ "zerofrom",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_provider_macros"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.96",
+]
+
+[[package]]
+name = "idna"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e"
+dependencies = [
+ "idna_adapter",
+ "smallvec",
+ "utf8_iter",
+]
+
+[[package]]
+name = "idna_adapter"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71"
+dependencies = [
+ "icu_normalizer",
+ "icu_properties",
+]
+
[[package]]
name = "indexmap"
version = "1.9.3"
@@ -261,6 +421,12 @@ version = "0.2.169"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
+[[package]]
+name = "litemap"
+version = "0.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104"
+
[[package]]
name = "lock_api"
version = "0.4.12"
@@ -373,6 +539,12 @@ dependencies = [
"windows-targets",
]
+[[package]]
+name = "percent-encoding"
+version = "2.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
+
[[package]]
name = "phf"
version = "0.8.0"
@@ -793,6 +965,17 @@ dependencies = [
"unicode-ident",
]
+[[package]]
+name = "synstructure"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.96",
+]
+
[[package]]
name = "tendril"
version = "0.4.3"
@@ -830,18 +1013,51 @@ dependencies = [
"syn 2.0.96",
]
+[[package]]
+name = "tinystr"
+version = "0.7.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f"
+dependencies = [
+ "displaydoc",
+ "zerovec",
+]
+
[[package]]
name = "unicode-ident"
version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "11cd88e12b17c6494200a9c1b683a04fcac9573ed74cd1b62aeb2727c5592243"
+[[package]]
+name = "url"
+version = "2.5.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60"
+dependencies = [
+ "form_urlencoded",
+ "idna",
+ "percent-encoding",
+]
+
[[package]]
name = "utf-8"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
+[[package]]
+name = "utf16_iter"
+version = "1.0.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246"
+
+[[package]]
+name = "utf8_iter"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
+
[[package]]
name = "wasi"
version = "0.9.0+wasi-snapshot-preview1"
@@ -918,6 +1134,42 @@ version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
+[[package]]
+name = "write16"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936"
+
+[[package]]
+name = "writeable"
+version = "0.5.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51"
+
+[[package]]
+name = "yoke"
+version = "0.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40"
+dependencies = [
+ "serde",
+ "stable_deref_trait",
+ "yoke-derive",
+ "zerofrom",
+]
+
+[[package]]
+name = "yoke-derive"
+version = "0.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.96",
+ "synstructure",
+]
+
[[package]]
name = "zerocopy"
version = "0.7.35"
@@ -938,3 +1190,46 @@ dependencies = [
"quote",
"syn 2.0.96",
]
+
+[[package]]
+name = "zerofrom"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e"
+dependencies = [
+ "zerofrom-derive",
+]
+
+[[package]]
+name = "zerofrom-derive"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.96",
+ "synstructure",
+]
+
+[[package]]
+name = "zerovec"
+version = "0.10.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079"
+dependencies = [
+ "yoke",
+ "zerofrom",
+ "zerovec-derive",
+]
+
+[[package]]
+name = "zerovec-derive"
+version = "0.10.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.96",
+]
diff --git a/apps/api/sharedLibs/html-transformer/Cargo.toml b/apps/api/sharedLibs/html-transformer/Cargo.toml
index 9e242060..0cd74dc6 100644
--- a/apps/api/sharedLibs/html-transformer/Cargo.toml
+++ b/apps/api/sharedLibs/html-transformer/Cargo.toml
@@ -9,6 +9,7 @@ lol_html = "2.2.0"
kuchikiki = "0.8.2"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
+url = "2.5.4"
[lib]
crate-type = ["cdylib"]
diff --git a/apps/api/sharedLibs/html-transformer/src/lib.rs b/apps/api/sharedLibs/html-transformer/src/lib.rs
index 290e910b..13d86bfe 100644
--- a/apps/api/sharedLibs/html-transformer/src/lib.rs
+++ b/apps/api/sharedLibs/html-transformer/src/lib.rs
@@ -1,28 +1,9 @@
use std::{collections::HashMap, ffi::{CStr, CString}};
use kuchikiki::{parse_html, traits::TendrilSink};
+use serde::Deserialize;
use serde_json::Value;
-
-// #[no_mangle]
-// pub extern "C" fn extract_links(html: *const libc::c_char) -> *mut i8 {
-// let html = unsafe { CStr::from_ptr(html) }.to_str().unwrap();
-
-// let mut output = vec![];
-
-// let mut rewriter = HtmlRewriter::new(
-// Settings {
-// element_content_handlers: vec! [
-// element!("")
-// ],
-// ..Settings::new()
-// },
-// |c: &[u8]| output.extend_from_slice(c)
-// );
-
-// rewriter.write(html.as_bytes()).unwrap();
-
-// CString::new(String::from_utf8(output).unwrap()).unwrap().into_raw()
-// }
+use url::Url;
#[no_mangle]
pub extern "C" fn extract_links(html: *const libc::c_char) -> *mut i8 {
@@ -162,6 +143,190 @@ pub extern "C" fn extract_metadata(html: *const libc::c_char) -> *mut i8 {
CString::new(serde_json::ser::to_string(&out).unwrap()).unwrap().into_raw()
}
+const EXCLUDE_NON_MAIN_TAGS: [&str; 41] = [
+ "header",
+ "footer",
+ "nav",
+ "aside",
+ ".header",
+ ".top",
+ ".navbar",
+ "#header",
+ ".footer",
+ ".bottom",
+ "#footer",
+ ".sidebar",
+ ".side",
+ ".aside",
+ "#sidebar",
+ ".modal",
+ ".popup",
+ "#modal",
+ ".overlay",
+ ".ad",
+ ".ads",
+ ".advert",
+ "#ad",
+ ".lang-selector",
+ ".language",
+ "#language-selector",
+ ".social",
+ ".social-media",
+ ".social-links",
+ "#social",
+ ".menu",
+ ".navigation",
+ "#nav",
+ ".breadcrumbs",
+ "#breadcrumbs",
+ ".share",
+ "#share",
+ ".widget",
+ "#widget",
+ ".cookie",
+ "#cookie",
+];
+
+const FORCE_INCLUDE_MAIN_TAGS: [&str; 1] = [
+ "#main"
+];
+
+#[derive(Deserialize)]
+struct TranformHTMLOptions {
+ html: String,
+ url: String,
+ include_tags: Vec,
+ exclude_tags: Vec,
+ only_main_content: bool,
+}
+
+struct ImageSource {
+ url: String,
+ size: i32,
+ is_x: bool,
+}
+
+fn _transform_html_inner(opts: TranformHTMLOptions) -> Result {
+ let mut document = parse_html().one(opts.html);
+
+ if opts.include_tags.len() > 0 {
+ let new_document = parse_html().one("");
+ let root = new_document.select_first("div")?;
+
+ for x in opts.include_tags.iter() {
+ for tag in document.select(&x)? {
+ root.as_node().append(tag.as_node().clone());
+ }
+ }
+
+ document = new_document;
+ }
+
+ while let Ok(x) = document.select_first("head") {
+ x.as_node().detach();
+ }
+
+ while let Ok(x) = document.select_first("meta") {
+ x.as_node().detach();
+ }
+
+ while let Ok(x) = document.select_first("noscript") {
+ x.as_node().detach();
+ }
+
+ while let Ok(x) = document.select_first("style") {
+ x.as_node().detach();
+ }
+
+ while let Ok(x) = document.select_first("script") {
+ x.as_node().detach();
+ }
+
+ for x in opts.exclude_tags.iter() {
+ // TODO: implement weird version
+ while let Ok(x) = document.select_first(&x) {
+ x.as_node().detach();
+ }
+ }
+
+ if opts.only_main_content {
+ for x in EXCLUDE_NON_MAIN_TAGS.iter() {
+ let x: Vec<_> = document.select(&format!("{}", x))?.collect();
+ for tag in x {
+ if !FORCE_INCLUDE_MAIN_TAGS.iter().any(|x| tag.as_node().select(&x).is_ok_and(|mut x| x.next().is_some())) {
+ tag.as_node().detach();
+ }
+ }
+ }
+ }
+
+ for img in document.select("img[srcset]")? {
+ let mut sizes: Vec = img.attributes.borrow().get("srcset").ok_or(())?.to_string().split(",").filter_map(|x| {
+ let tok: Vec<&str> = x.trim().split(" ").collect();
+ let tok_1 = if tok.len() > 0 {
+ tok[1]
+ } else {
+ "1x"
+ };
+ if let Ok(parsed_size) = tok_1[..tok_1.len()-1].parse() {
+ Some(ImageSource {
+ url: tok[0].to_string(),
+ size: parsed_size,
+ is_x: tok_1.ends_with("x")
+ })
+ } else {
+ None
+ }
+ }).collect();
+
+ if sizes.iter().all(|x| x.is_x) {
+ if let Some(src) = img.attributes.borrow().get("src").map(|x| x.to_string()) {
+ sizes.push(ImageSource {
+ url: src,
+ size: 1,
+ is_x: true,
+ });
+ }
+ }
+
+ sizes.sort_by(|a, b| b.size.cmp(&a.size));
+
+ if let Some(biggest) = sizes.first() {
+ img.attributes.borrow_mut().insert("src", biggest.url.clone());
+ }
+ }
+
+ let url = Url::parse(&opts.url).map_err(|_| ())?;
+
+ for img in document.select("img[src]")? {
+ let old = img.attributes.borrow().get("src").map(|x| x.to_string()).ok_or(())?;
+ if let Ok(new) = url.join(&old) {
+ img.attributes.borrow_mut().insert("src", new.to_string());
+ }
+ }
+
+ for anchor in document.select("a[href]")? {
+ let old = anchor.attributes.borrow().get("href").map(|x| x.to_string()).ok_or(())?;
+ if let Ok(new) = url.join(&old) {
+ anchor.attributes.borrow_mut().insert("href", new.to_string());
+ }
+ }
+
+ Ok(document.to_string())
+}
+
+#[no_mangle]
+pub extern "C" fn transform_html(opts: *const libc::c_char) -> *mut i8 {
+ let opts: TranformHTMLOptions = serde_json::de::from_str(&unsafe { CStr::from_ptr(opts) }.to_str().unwrap()).unwrap();
+
+ let out = match _transform_html_inner(opts) {
+ Ok(x) => x,
+ Err(_) => "RUSTFC:ERROR".to_string(),
+ };
+
+ CString::new(out).unwrap().into_raw()
+}
+
#[no_mangle]
pub extern "C" fn free_string(ptr: *mut i8) {
drop(unsafe { CString::from_raw(ptr) })
diff --git a/apps/api/src/lib/html-transformer.ts b/apps/api/src/lib/html-transformer.ts
index f7d58fb9..bab991c9 100644
--- a/apps/api/src/lib/html-transformer.ts
+++ b/apps/api/src/lib/html-transformer.ts
@@ -10,10 +10,19 @@ const rustExecutablePath = join(
platform() === "darwin" ? "libhtml_transformer.dylib" : "libhtml_transformer.so"
);
+type TransformHtmlOptions = {
+ html: string,
+ url: string,
+ include_tags: string[],
+ exclude_tags: string[],
+ only_main_content: boolean,
+};
+
class RustHTMLTransformer {
private static instance: RustHTMLTransformer;
private _extractLinks: KoffiFunction;
private _extractMetadata: KoffiFunction;
+ private _transformHtml: KoffiFunction;
private _freeString: KoffiFunction;
private constructor() {
@@ -23,6 +32,7 @@ class RustHTMLTransformer {
const freedResultString = koffi.disposable(cstn, "string", this._freeString);
this._extractLinks = lib.func("extract_links", freedResultString, ["string"]);
this._extractMetadata = lib.func("extract_metadata", freedResultString, ["string"]);
+ this._transformHtml = lib.func("transform_html", freedResultString, ["string"]);
}
public static async getInstance(): Promise {
@@ -60,6 +70,22 @@ class RustHTMLTransformer {
});
});
}
+
+ public async transformHtml(opts: TransformHtmlOptions): Promise {
+ return new Promise((resolve, reject) => {
+ this._transformHtml.async(JSON.stringify(opts), (err: Error, res: string) => {
+ if (err) {
+ reject(err);
+ } else {
+ if (res === "RUSTFC:ERROR") {
+ reject(new Error("Something went wrong on the Rust side."));
+ } else {
+ resolve(res);
+ }
+ }
+ });
+ });
+ }
}
export async function extractLinks(
@@ -82,4 +108,11 @@ export async function extractMetadata(
const converter = await RustHTMLTransformer.getInstance();
return await converter.extractMetadata(html);
-}
\ No newline at end of file
+}
+
+export async function transformHtml(
+ opts: TransformHtmlOptions,
+): Promise {
+ const converter = await RustHTMLTransformer.getInstance();
+ return await converter.transformHtml(opts);
+}
diff --git a/apps/api/src/scraper/scrapeURL/lib/removeUnwantedElements.ts b/apps/api/src/scraper/scrapeURL/lib/removeUnwantedElements.ts
index 4edb21f8..9e5ac215 100644
--- a/apps/api/src/scraper/scrapeURL/lib/removeUnwantedElements.ts
+++ b/apps/api/src/scraper/scrapeURL/lib/removeUnwantedElements.ts
@@ -1,7 +1,9 @@
// TODO: refactor
-import { AnyNode, Cheerio, load } from "cheerio"; // TODO: rustify
+import { AnyNode, Cheerio, load } from "cheerio"; // rustified
import { ScrapeOptions } from "../../../controllers/v1/types";
+import { transformHtml } from "../../../lib/html-transformer";
+import { logger } from "../../../lib/logger";
const excludeNonMainTags = [
"header",
@@ -49,11 +51,26 @@ const excludeNonMainTags = [
const forceIncludeMainTags = ["#main"];
-export const htmlTransform = (
+export const htmlTransform = async (
html: string,
url: string,
scrapeOptions: ScrapeOptions,
) => {
+ try {
+ return await transformHtml({
+ html,
+ url,
+ include_tags: (scrapeOptions.includeTags ?? []).map(x => x.trim()).filter((x) => x.length !== 0),
+ exclude_tags: (scrapeOptions.excludeTags ?? []).map(x => x.trim()).filter((x) => x.length !== 0),
+ only_main_content: scrapeOptions.onlyMainContent,
+ })
+ } catch (error) {
+ logger.error("Failed to call html-transformer! Falling back to cheerio...", {
+ error,
+ module: "scrapeURL", method: "extractLinks"
+ });
+ }
+
let soup = load(html);
// remove unwanted elements
diff --git a/apps/api/src/scraper/scrapeURL/transformers/index.ts b/apps/api/src/scraper/scrapeURL/transformers/index.ts
index fe132ffd..2775afcc 100644
--- a/apps/api/src/scraper/scrapeURL/transformers/index.ts
+++ b/apps/api/src/scraper/scrapeURL/transformers/index.ts
@@ -31,17 +31,17 @@ export async function deriveMetadataFromRawHTML(
return document;
}
-export function deriveHTMLFromRawHTML(
+export async function deriveHTMLFromRawHTML(
meta: Meta,
document: Document,
-): Document {
+): Promise {
if (document.rawHtml === undefined) {
throw new Error(
"rawHtml is undefined -- this transformer is being called out of order",
);
}
- document.html = htmlTransform(
+ document.html = await htmlTransform(
document.rawHtml,
document.metadata.url ?? document.metadata.sourceURL ?? meta.url,
meta.options,